diff options
Diffstat (limited to 'src')
243 files changed, 9916 insertions, 7052 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9d0af02fd..0913be72c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -53,7 +53,11 @@ if (MSVC) else() add_compile_options( -Wall + -Werror=implicit-fallthrough + -Werror=reorder + -Wextra -Wno-attributes + -Wno-unused-parameter ) if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang) diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index c187d8ac5..7a9dc61d4 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -36,9 +36,9 @@ public: } void SetWaveIndex(std::size_t index); - std::vector<s16> DequeueSamples(std::size_t sample_count, Memory::Memory& memory); + std::vector<s16> DequeueSamples(std::size_t sample_count, Core::Memory::Memory& memory); void UpdateState(); - void RefreshBuffer(Memory::Memory& memory); + void RefreshBuffer(Core::Memory::Memory& memory); private: bool is_in_use{}; @@ -66,13 +66,14 @@ public: return info; } - void UpdateState(Memory::Memory& memory); + void UpdateState(Core::Memory::Memory& memory); private: EffectOutStatus out_status{}; EffectInStatus info{}; }; -AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Memory::Memory& memory_, + +AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, AudioRendererParameter params, std::shared_ptr<Kernel::WritableEvent> buffer_event, std::size_t instance_number) @@ -208,7 +209,7 @@ void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) { } std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(std::size_t sample_count, - Memory::Memory& memory) { + Core::Memory::Memory& memory) { if (!IsPlaying()) { return {}; } @@ -258,7 +259,7 @@ void AudioRenderer::VoiceState::UpdateState() { is_in_use = info.is_in_use; } -void AudioRenderer::VoiceState::RefreshBuffer(Memory::Memory& memory) { +void AudioRenderer::VoiceState::RefreshBuffer(Core::Memory::Memory& memory) { const auto wave_buffer_address = info.wave_buffer[wave_index].buffer_addr; const auto wave_buffer_size = info.wave_buffer[wave_index].buffer_sz; std::vector<s16> new_samples(wave_buffer_size / sizeof(s16)); @@ -310,7 +311,7 @@ void AudioRenderer::VoiceState::RefreshBuffer(Memory::Memory& memory) { is_refresh_pending = false; } -void AudioRenderer::EffectState::UpdateState(Memory::Memory& memory) { +void AudioRenderer::EffectState::UpdateState(Core::Memory::Memory& memory) { if (info.is_new) { out_status.state = EffectStatus::New; } else { diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index c0fae669e..62faf9f19 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -22,7 +22,7 @@ namespace Kernel { class WritableEvent; } -namespace Memory { +namespace Core::Memory { class Memory; } @@ -221,7 +221,7 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size class AudioRenderer { public: - AudioRenderer(Core::Timing::CoreTiming& core_timing, Memory::Memory& memory_, + AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, AudioRendererParameter params, std::shared_ptr<Kernel::WritableEvent> buffer_event, std::size_t instance_number); ~AudioRenderer(); @@ -244,7 +244,7 @@ private: std::vector<EffectState> effects; std::unique_ptr<AudioOut> audio_out; StreamPtr stream; - Memory::Memory& memory; + Core::Memory::Memory& memory; }; } // namespace AudioCore diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index fbebed715..6ffc612e7 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -106,6 +106,8 @@ add_library(common STATIC common_funcs.h common_paths.h common_types.h + dynamic_library.cpp + dynamic_library.h file_util.cpp file_util.h hash.h @@ -153,6 +155,8 @@ add_library(common STATIC uuid.cpp uuid.h vector_math.h + virtual_buffer.cpp + virtual_buffer.h web_result.h zstd_compression.cpp zstd_compression.h diff --git a/src/common/alignment.h b/src/common/alignment.h index cdd4833f8..f8c49e079 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h @@ -38,6 +38,13 @@ constexpr bool IsWordAligned(T value) { return (value & 0b11) == 0; } +template <typename T> +constexpr bool IsAligned(T value, std::size_t alignment) { + using U = typename std::make_unsigned<T>::type; + const U mask = static_cast<U>(alignment - 1); + return (value & mask) == 0; +} + template <typename T, std::size_t Align = 16> class AlignmentAllocator { public: diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 052254678..88cf5250a 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -55,6 +55,38 @@ __declspec(dllimport) void __stdcall DebugBreak(void); // Defined in Misc.cpp. std::string GetLastErrorMsg(); +#define DECLARE_ENUM_FLAG_OPERATORS(type) \ + constexpr type operator|(type a, type b) noexcept { \ + using T = std::underlying_type_t<type>; \ + return static_cast<type>(static_cast<T>(a) | static_cast<T>(b)); \ + } \ + constexpr type operator&(type a, type b) noexcept { \ + using T = std::underlying_type_t<type>; \ + return static_cast<type>(static_cast<T>(a) & static_cast<T>(b)); \ + } \ + constexpr type& operator|=(type& a, type b) noexcept { \ + using T = std::underlying_type_t<type>; \ + a = static_cast<type>(static_cast<T>(a) | static_cast<T>(b)); \ + return a; \ + } \ + constexpr type& operator&=(type& a, type b) noexcept { \ + using T = std::underlying_type_t<type>; \ + a = static_cast<type>(static_cast<T>(a) & static_cast<T>(b)); \ + return a; \ + } \ + constexpr type operator~(type key) noexcept { \ + using T = std::underlying_type_t<type>; \ + return static_cast<type>(~static_cast<T>(key)); \ + } \ + constexpr bool True(type key) noexcept { \ + using T = std::underlying_type_t<type>; \ + return static_cast<T>(key) != 0; \ + } \ + constexpr bool False(type key) noexcept { \ + using T = std::underlying_type_t<type>; \ + return static_cast<T>(key) == 0; \ + } + namespace Common { constexpr u32 MakeMagic(char a, char b, char c, char d) { diff --git a/src/common/dynamic_library.cpp b/src/common/dynamic_library.cpp new file mode 100644 index 000000000..7ab54e9e4 --- /dev/null +++ b/src/common/dynamic_library.cpp @@ -0,0 +1,106 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include <cstring> +#include <string> +#include <utility> + +#include <fmt/format.h> + +#include "common/dynamic_library.h" + +#ifdef _WIN32 +#include <windows.h> +#else +#include <dlfcn.h> +#endif + +namespace Common { + +DynamicLibrary::DynamicLibrary() = default; + +DynamicLibrary::DynamicLibrary(const char* filename) { + Open(filename); +} + +DynamicLibrary::DynamicLibrary(DynamicLibrary&& rhs) noexcept + : handle{std::exchange(rhs.handle, nullptr)} {} + +DynamicLibrary& DynamicLibrary::operator=(DynamicLibrary&& rhs) noexcept { + Close(); + handle = std::exchange(rhs.handle, nullptr); + return *this; +} + +DynamicLibrary::~DynamicLibrary() { + Close(); +} + +std::string DynamicLibrary::GetUnprefixedFilename(const char* filename) { +#if defined(_WIN32) + return std::string(filename) + ".dll"; +#elif defined(__APPLE__) + return std::string(filename) + ".dylib"; +#else + return std::string(filename) + ".so"; +#endif +} + +std::string DynamicLibrary::GetVersionedFilename(const char* libname, int major, int minor) { +#if defined(_WIN32) + if (major >= 0 && minor >= 0) + return fmt::format("{}-{}-{}.dll", libname, major, minor); + else if (major >= 0) + return fmt::format("{}-{}.dll", libname, major); + else + return fmt::format("{}.dll", libname); +#elif defined(__APPLE__) + const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : ""; + if (major >= 0 && minor >= 0) + return fmt::format("{}{}.{}.{}.dylib", prefix, libname, major, minor); + else if (major >= 0) + return fmt::format("{}{}.{}.dylib", prefix, libname, major); + else + return fmt::format("{}{}.dylib", prefix, libname); +#else + const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : ""; + if (major >= 0 && minor >= 0) + return fmt::format("{}{}.so.{}.{}", prefix, libname, major, minor); + else if (major >= 0) + return fmt::format("{}{}.so.{}", prefix, libname, major); + else + return fmt::format("{}{}.so", prefix, libname); +#endif +} + +bool DynamicLibrary::Open(const char* filename) { +#ifdef _WIN32 + handle = reinterpret_cast<void*>(LoadLibraryA(filename)); +#else + handle = dlopen(filename, RTLD_NOW); +#endif + return handle != nullptr; +} + +void DynamicLibrary::Close() { + if (!IsOpen()) + return; + +#ifdef _WIN32 + FreeLibrary(reinterpret_cast<HMODULE>(handle)); +#else + dlclose(handle); +#endif + handle = nullptr; +} + +void* DynamicLibrary::GetSymbolAddress(const char* name) const { +#ifdef _WIN32 + return reinterpret_cast<void*>(GetProcAddress(reinterpret_cast<HMODULE>(handle), name)); +#else + return reinterpret_cast<void*>(dlsym(handle, name)); +#endif +} + +} // namespace Common diff --git a/src/common/dynamic_library.h b/src/common/dynamic_library.h new file mode 100644 index 000000000..2a06372fd --- /dev/null +++ b/src/common/dynamic_library.h @@ -0,0 +1,75 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include <string> + +namespace Common { + +/** + * Provides a platform-independent interface for loading a dynamic library and retrieving symbols. + * The interface maintains an internal reference count to allow one handle to be shared between + * multiple users. + */ +class DynamicLibrary final { +public: + /// Default constructor, does not load a library. + explicit DynamicLibrary(); + + /// Automatically loads the specified library. Call IsOpen() to check validity before use. + explicit DynamicLibrary(const char* filename); + + /// Moves the library. + DynamicLibrary(DynamicLibrary&&) noexcept; + DynamicLibrary& operator=(DynamicLibrary&&) noexcept; + + /// Delete copies, we can't copy a dynamic library. + DynamicLibrary(const DynamicLibrary&) = delete; + DynamicLibrary& operator=(const DynamicLibrary&) = delete; + + /// Closes the library. + ~DynamicLibrary(); + + /// Returns the specified library name with the platform-specific suffix added. + static std::string GetUnprefixedFilename(const char* filename); + + /// Returns the specified library name in platform-specific format. + /// Major/minor versions will not be included if set to -1. + /// If libname already contains the "lib" prefix, it will not be added again. + /// Windows: LIBNAME-MAJOR-MINOR.dll + /// Linux: libLIBNAME.so.MAJOR.MINOR + /// Mac: libLIBNAME.MAJOR.MINOR.dylib + static std::string GetVersionedFilename(const char* libname, int major = -1, int minor = -1); + + /// Returns true if a module is loaded, otherwise false. + bool IsOpen() const { + return handle != nullptr; + } + + /// Loads (or replaces) the handle with the specified library file name. + /// Returns true if the library was loaded and can be used. + bool Open(const char* filename); + + /// Unloads the library, any function pointers from this library are no longer valid. + void Close(); + + /// Returns the address of the specified symbol (function or variable) as an untyped pointer. + /// If the specified symbol does not exist in this library, nullptr is returned. + void* GetSymbolAddress(const char* name) const; + + /// Obtains the address of the specified symbol, automatically casting to the correct type. + /// Returns true if the symbol was found and assigned, otherwise false. + template <typename T> + bool GetSymbol(const char* name, T* ptr) const { + *ptr = reinterpret_cast<T>(GetSymbolAddress(name)); + return *ptr != nullptr; + } + +private: + /// Platform-dependent data type representing a dynamic library handle. + void* handle = nullptr; +}; + +} // namespace Common diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp index 28a16077d..45b750e1e 100644 --- a/src/common/file_util.cpp +++ b/src/common/file_util.cpp @@ -974,6 +974,34 @@ bool IOFile::Flush() { return IsOpen() && 0 == std::fflush(m_file); } +std::size_t IOFile::ReadImpl(void* data, std::size_t length, std::size_t data_size) const { + if (!IsOpen()) { + return std::numeric_limits<std::size_t>::max(); + } + + if (length == 0) { + return 0; + } + + DEBUG_ASSERT(data != nullptr); + + return std::fread(data, data_size, length, m_file); +} + +std::size_t IOFile::WriteImpl(const void* data, std::size_t length, std::size_t data_size) { + if (!IsOpen()) { + return std::numeric_limits<std::size_t>::max(); + } + + if (length == 0) { + return 0; + } + + DEBUG_ASSERT(data != nullptr); + + return std::fwrite(data, data_size, length, m_file); +} + bool IOFile::Resize(u64 size) { return IsOpen() && 0 == #ifdef _WIN32 diff --git a/src/common/file_util.h b/src/common/file_util.h index cde7ddf2d..f7a0c33fa 100644 --- a/src/common/file_util.h +++ b/src/common/file_util.h @@ -222,22 +222,15 @@ public: static_assert(std::is_trivially_copyable_v<T>, "Given array does not consist of trivially copyable objects"); - if (!IsOpen()) { - return std::numeric_limits<std::size_t>::max(); - } - - return std::fread(data, sizeof(T), length, m_file); + return ReadImpl(data, length, sizeof(T)); } template <typename T> std::size_t WriteArray(const T* data, std::size_t length) { static_assert(std::is_trivially_copyable_v<T>, "Given array does not consist of trivially copyable objects"); - if (!IsOpen()) { - return std::numeric_limits<std::size_t>::max(); - } - return std::fwrite(data, sizeof(T), length, m_file); + return WriteImpl(data, length, sizeof(T)); } template <typename T> @@ -278,6 +271,9 @@ public: } private: + std::size_t ReadImpl(void* data, std::size_t length, std::size_t data_size) const; + std::size_t WriteImpl(const void* data, std::size_t length, std::size_t data_size); + std::FILE* m_file = nullptr; }; diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp index 566b57b62..e5d3090d5 100644 --- a/src/common/page_table.cpp +++ b/src/common/page_table.cpp @@ -6,36 +6,20 @@ namespace Common { -PageTable::PageTable(std::size_t page_size_in_bits) : page_size_in_bits{page_size_in_bits} {} +PageTable::PageTable() = default; PageTable::~PageTable() = default; -void PageTable::Resize(std::size_t address_space_width_in_bits) { - const std::size_t num_page_table_entries = 1ULL - << (address_space_width_in_bits - page_size_in_bits); - +void PageTable::Resize(std::size_t address_space_width_in_bits, std::size_t page_size_in_bits, + bool has_attribute) { + const std::size_t num_page_table_entries{1ULL + << (address_space_width_in_bits - page_size_in_bits)}; pointers.resize(num_page_table_entries); - attributes.resize(num_page_table_entries); - - // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the - // vector size is subsequently decreased (via resize), the vector might not automatically - // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for - // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use. - - pointers.shrink_to_fit(); - attributes.shrink_to_fit(); -} - -BackingPageTable::BackingPageTable(std::size_t page_size_in_bits) : PageTable{page_size_in_bits} {} - -BackingPageTable::~BackingPageTable() = default; - -void BackingPageTable::Resize(std::size_t address_space_width_in_bits) { - PageTable::Resize(address_space_width_in_bits); - const std::size_t num_page_table_entries = 1ULL - << (address_space_width_in_bits - page_size_in_bits); backing_addr.resize(num_page_table_entries); - backing_addr.shrink_to_fit(); + + if (has_attribute) { + attributes.resize(num_page_table_entries); + } } } // namespace Common diff --git a/src/common/page_table.h b/src/common/page_table.h index dbc272ab7..1e8bd3187 100644 --- a/src/common/page_table.h +++ b/src/common/page_table.h @@ -5,9 +5,12 @@ #pragma once #include <vector> + #include <boost/icl/interval_map.hpp> + #include "common/common_types.h" #include "common/memory_hook.h" +#include "common/virtual_buffer.h" namespace Common { @@ -47,7 +50,7 @@ struct SpecialRegion { * mimics the way a real CPU page table works. */ struct PageTable { - explicit PageTable(std::size_t page_size_in_bits); + PageTable(); ~PageTable(); /** @@ -56,40 +59,18 @@ struct PageTable { * * @param address_space_width_in_bits The address size width in bits. */ - void Resize(std::size_t address_space_width_in_bits); + void Resize(std::size_t address_space_width_in_bits, std::size_t page_size_in_bits, + bool has_attribute); /** * Vector of memory pointers backing each page. An entry can only be non-null if the * corresponding entry in the `attributes` vector is of type `Memory`. */ - std::vector<u8*> pointers; - - /** - * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is - * of type `Special`. - */ - boost::icl::interval_map<u64, std::set<SpecialRegion>> special_regions; - - /** - * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then - * the corresponding entry in `pointers` MUST be set to null. - */ - std::vector<PageType> attributes; - - const std::size_t page_size_in_bits{}; -}; - -/** - * A more advanced Page Table with the ability to save a backing address when using it - * depends on another MMU. - */ -struct BackingPageTable : PageTable { - explicit BackingPageTable(std::size_t page_size_in_bits); - ~BackingPageTable(); + VirtualBuffer<u8*> pointers; - void Resize(std::size_t address_space_width_in_bits); + VirtualBuffer<u64> backing_addr; - std::vector<u64> backing_addr; + VirtualBuffer<PageType> attributes; }; } // namespace Common diff --git a/src/common/scope_exit.h b/src/common/scope_exit.h index 1176a72b1..68ef5f197 100644 --- a/src/common/scope_exit.h +++ b/src/common/scope_exit.h @@ -12,10 +12,17 @@ template <typename Func> struct ScopeExitHelper { explicit ScopeExitHelper(Func&& func) : func(std::move(func)) {} ~ScopeExitHelper() { - func(); + if (active) { + func(); + } + } + + void Cancel() { + active = false; } Func func; + bool active{true}; }; template <typename Func> diff --git a/src/common/virtual_buffer.cpp b/src/common/virtual_buffer.cpp new file mode 100644 index 000000000..b426f4747 --- /dev/null +++ b/src/common/virtual_buffer.cpp @@ -0,0 +1,52 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#ifdef _WIN32 +#include <windows.h> +#else +#include <stdio.h> +#include <sys/mman.h> +#include <sys/types.h> +#if defined __APPLE__ || defined __FreeBSD__ || defined __OpenBSD__ +#include <sys/sysctl.h> +#elif defined __HAIKU__ +#include <OS.h> +#else +#include <sys/sysinfo.h> +#endif +#endif + +#include "common/assert.h" +#include "common/virtual_buffer.h" + +namespace Common { + +void* AllocateMemoryPages(std::size_t size) { +#ifdef _WIN32 + void* base{VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_READWRITE)}; +#else + void* base{mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0)}; + + if (base == MAP_FAILED) { + base = nullptr; + } +#endif + + ASSERT(base); + + return base; +} + +void FreeMemoryPages(void* base, std::size_t size) { + if (!base) { + return; + } +#ifdef _WIN32 + ASSERT(VirtualFree(base, 0, MEM_RELEASE)); +#else + ASSERT(munmap(base, size) == 0); +#endif +} + +} // namespace Common diff --git a/src/common/virtual_buffer.h b/src/common/virtual_buffer.h new file mode 100644 index 000000000..da064e59e --- /dev/null +++ b/src/common/virtual_buffer.h @@ -0,0 +1,58 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_funcs.h" + +namespace Common { + +void* AllocateMemoryPages(std::size_t size); +void FreeMemoryPages(void* base, std::size_t size); + +template <typename T> +class VirtualBuffer final : NonCopyable { +public: + constexpr VirtualBuffer() = default; + explicit VirtualBuffer(std::size_t count) : alloc_size{count * sizeof(T)} { + base_ptr = reinterpret_cast<T*>(AllocateMemoryPages(alloc_size)); + } + + ~VirtualBuffer() { + FreeMemoryPages(base_ptr, alloc_size); + } + + void resize(std::size_t count) { + FreeMemoryPages(base_ptr, alloc_size); + + alloc_size = count * sizeof(T); + base_ptr = reinterpret_cast<T*>(AllocateMemoryPages(alloc_size)); + } + + constexpr const T& operator[](std::size_t index) const { + return base_ptr[index]; + } + + constexpr T& operator[](std::size_t index) { + return base_ptr[index]; + } + + constexpr T* data() { + return base_ptr; + } + + constexpr const T* data() const { + return base_ptr; + } + + constexpr std::size_t size() const { + return alloc_size / sizeof(T); + } + +private: + std::size_t alloc_size{}; + T* base_ptr{}; +}; + +} // namespace Common diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 66497a386..8546d3602 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -35,6 +35,8 @@ add_library(core STATIC crypto/ctr_encryption_layer.h crypto/xts_encryption_layer.cpp crypto/xts_encryption_layer.h + device_memory.cpp + device_memory.h file_sys/bis_factory.cpp file_sys/bis_factory.h file_sys/card_image.cpp @@ -152,6 +154,23 @@ add_library(core STATIC hle/kernel/hle_ipc.h hle/kernel/kernel.cpp hle/kernel/kernel.h + hle/kernel/memory/address_space_info.cpp + hle/kernel/memory/address_space_info.h + hle/kernel/memory/memory_block.h + hle/kernel/memory/memory_block_manager.cpp + hle/kernel/memory/memory_block_manager.h + hle/kernel/memory/memory_layout.h + hle/kernel/memory/memory_manager.cpp + hle/kernel/memory/memory_manager.h + hle/kernel/memory/memory_types.h + hle/kernel/memory/page_linked_list.h + hle/kernel/memory/page_heap.cpp + hle/kernel/memory/page_heap.h + hle/kernel/memory/page_table.cpp + hle/kernel/memory/page_table.h + hle/kernel/memory/slab_heap.h + hle/kernel/memory/system_control.cpp + hle/kernel/memory/system_control.h hle/kernel/mutex.cpp hle/kernel/mutex.h hle/kernel/object.cpp @@ -178,6 +197,7 @@ add_library(core STATIC hle/kernel/shared_memory.h hle/kernel/svc.cpp hle/kernel/svc.h + hle/kernel/svc_types.h hle/kernel/svc_wrap.h hle/kernel/synchronization_object.cpp hle/kernel/synchronization_object.h @@ -189,8 +209,6 @@ add_library(core STATIC hle/kernel/time_manager.h hle/kernel/transfer_memory.cpp hle/kernel/transfer_memory.h - hle/kernel/vm_manager.cpp - hle/kernel/vm_manager.h hle/kernel/writable_event.cpp hle/kernel/writable_event.h hle/lock.cpp @@ -591,11 +609,8 @@ target_link_libraries(core PUBLIC common PRIVATE audio_core video_core) target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt json-headers mbedtls opus unicorn) if (YUZU_ENABLE_BOXCAT) - get_directory_property(OPENSSL_LIBS - DIRECTORY ${PROJECT_SOURCE_DIR}/externals/libressl - DEFINITION OPENSSL_LIBS) - target_compile_definitions(core PRIVATE -DCPPHTTPLIB_OPENSSL_SUPPORT -DYUZU_ENABLE_BOXCAT) - target_link_libraries(core PRIVATE httplib json-headers ${OPENSSL_LIBS} zip) + target_compile_definitions(core PRIVATE -DYUZU_ENABLE_BOXCAT) + target_link_libraries(core PRIVATE httplib json-headers zip) endif() if (ENABLE_WEB_SERVICE) diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp index 7e846ddd5..d079a1bc8 100644 --- a/src/core/arm/arm_interface.cpp +++ b/src/core/arm/arm_interface.cpp @@ -60,7 +60,7 @@ static_assert(sizeof(ELFSymbol) == 0x18, "ELFSymbol has incorrect size."); using Symbols = std::vector<std::pair<ELFSymbol, std::string>>; -Symbols GetSymbols(VAddr text_offset, Memory::Memory& memory) { +Symbols GetSymbols(VAddr text_offset, Core::Memory::Memory& memory) { const auto mod_offset = text_offset + memory.Read32(text_offset + 4); if (mod_offset < text_offset || (mod_offset & 0b11) != 0 || @@ -123,7 +123,7 @@ Symbols GetSymbols(VAddr text_offset, Memory::Memory& memory) { std::optional<std::string> GetSymbolName(const Symbols& symbols, VAddr func_address) { const auto iter = std::find_if(symbols.begin(), symbols.end(), [func_address](const auto& pair) { - const auto& [symbol, name] = pair; + const auto& symbol = pair.first; const auto end_address = symbol.value + symbol.size; return func_address >= symbol.value && func_address < end_address; }); @@ -146,7 +146,7 @@ std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktrace() const { auto fp = GetReg(29); auto lr = GetReg(30); while (true) { - out.push_back({"", 0, lr, 0}); + out.push_back({"", 0, lr, 0, ""}); if (!fp) { break; } diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 57eae839e..cb2e640e2 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -26,28 +26,28 @@ public: virtual ~ARM_Interface() = default; struct ThreadContext32 { - std::array<u32, 16> cpu_registers; - u32 cpsr; - std::array<u8, 4> padding; - std::array<u64, 32> fprs; - u32 fpscr; - u32 fpexc; - u32 tpidr; + std::array<u32, 16> cpu_registers{}; + u32 cpsr{}; + std::array<u8, 4> padding{}; + std::array<u64, 32> fprs{}; + u32 fpscr{}; + u32 fpexc{}; + u32 tpidr{}; }; // Internally within the kernel, it expects the AArch32 version of the // thread context to be 344 bytes in size. static_assert(sizeof(ThreadContext32) == 0x158); struct ThreadContext64 { - std::array<u64, 31> cpu_registers; - u64 sp; - u64 pc; - u32 pstate; - std::array<u8, 4> padding; - std::array<u128, 32> vector_registers; - u32 fpcr; - u32 fpsr; - u64 tpidr; + std::array<u64, 31> cpu_registers{}; + u64 sp{}; + u64 pc{}; + u32 pstate{}; + std::array<u8, 4> padding{}; + std::array<u128, 32> vector_registers{}; + u32 fpcr{}; + u32 fpsr{}; + u64 tpidr{}; }; // Internally within the kernel, it expects the AArch64 version of the // thread context to be 800 bytes in size. diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 187a972ac..9bc86e3b9 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -67,7 +67,7 @@ public: } void CallSVC(u32 swi) override { - Kernel::CallSVC(parent.system, swi); + Kernel::Svc::Call(parent.system, swi); } void AddTicks(u64 ticks) override { diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index 143e46e4d..8ba9cea8f 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -15,7 +15,7 @@ #include "core/arm/arm_interface.h" #include "core/arm/exclusive_monitor.h" -namespace Memory { +namespace Core::Memory { class Memory; } diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index a53a58ba0..9add5d363 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -8,6 +8,7 @@ #include <dynarmic/A64/config.h> #include "common/logging/log.h" #include "common/microprofile.h" +#include "common/page_table.h" #include "core/arm/dynarmic/arm_dynarmic_64.h" #include "core/core.h" #include "core/core_manager.h" @@ -18,7 +19,6 @@ #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/svc.h" -#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" namespace Core { @@ -103,7 +103,7 @@ public: } void CallSVC(u32 swi) override { - Kernel::CallSVC(parent.system, swi); + Kernel::Svc::Call(parent.system, swi); } void AddTicks(u64 ticks) override { @@ -159,6 +159,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& // Unpredictable instructions config.define_unpredictable_behaviour = true; + config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; + config.only_detect_misalignment_via_page_table_on_page_boundary = true; + return std::make_shared<Dynarmic::A64::Jit>(config); } diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index e71240a96..647cecaf0 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -15,7 +15,7 @@ #include "core/arm/exclusive_monitor.h" #include "core/arm/unicorn/arm_unicorn.h" -namespace Memory { +namespace Core::Memory { class Memory; } @@ -92,7 +92,7 @@ public: private: friend class ARM_Dynarmic_64; Dynarmic::A64::ExclusiveMonitor monitor; - Memory::Memory& memory; + Core::Memory::Memory& memory; }; } // namespace Core diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h index 4ef418b90..ccd73b80f 100644 --- a/src/core/arm/exclusive_monitor.h +++ b/src/core/arm/exclusive_monitor.h @@ -8,7 +8,7 @@ #include "common/common_types.h" -namespace Memory { +namespace Core::Memory { class Memory; } diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 8a9800a96..d189efb63 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -266,7 +266,7 @@ void ARM_Unicorn::InterruptHook(uc_engine* uc, u32 int_no, void* user_data) { switch (ec) { case 0x15: // SVC - Kernel::CallSVC(arm_instance->system, iss); + Kernel::Svc::Call(arm_instance->system, iss); break; } } diff --git a/src/core/core.cpp b/src/core/core.cpp index 3bd90d79f..f9f8a3000 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -14,6 +14,7 @@ #include "core/core_manager.h" #include "core/core_timing.h" #include "core/cpu_manager.h" +#include "core/device_memory.h" #include "core/file_sys/bis_factory.h" #include "core/file_sys/card_image.h" #include "core/file_sys/mode.h" @@ -140,6 +141,8 @@ struct System::Impl { ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) { LOG_DEBUG(HW_Memory, "initialized OK"); + device_memory = std::make_unique<Core::DeviceMemory>(system); + core_timing.Initialize(); kernel.Initialize(); cpu_manager.Initialize(); @@ -276,6 +279,7 @@ struct System::Impl { telemetry_session.reset(); perf_stats.reset(); gpu_core.reset(); + device_memory.reset(); // Close all CPU/threading state cpu_manager.Shutdown(); @@ -346,7 +350,8 @@ struct System::Impl { std::unique_ptr<Loader::AppLoader> app_loader; std::unique_ptr<Tegra::GPU> gpu_core; std::unique_ptr<Hardware::InterruptManager> interrupt_manager; - Memory::Memory memory; + std::unique_ptr<Core::DeviceMemory> device_memory; + Core::Memory::Memory memory; CpuManager cpu_manager; bool is_powered_on = false; bool exit_lock = false; @@ -472,6 +477,14 @@ Kernel::Process* System::CurrentProcess() { return impl->kernel.CurrentProcess(); } +Core::DeviceMemory& System::DeviceMemory() { + return *impl->device_memory; +} + +const Core::DeviceMemory& System::DeviceMemory() const { + return *impl->device_memory; +} + const Kernel::Process* System::CurrentProcess() const { return impl->kernel.CurrentProcess(); } @@ -505,7 +518,7 @@ Memory::Memory& System::Memory() { return impl->memory; } -const Memory::Memory& System::Memory() const { +const Core::Memory::Memory& System::Memory() const { return impl->memory; } diff --git a/src/core/core.h b/src/core/core.h index 8d862a8e6..acc53d6a1 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -36,9 +36,10 @@ class AppLoader; enum class ResultStatus : u16; } // namespace Loader -namespace Memory { +namespace Core::Memory { struct CheatEntry; -} // namespace Memory +class Memory; +} // namespace Core::Memory namespace Service { @@ -86,14 +87,11 @@ namespace Core::Hardware { class InterruptManager; } -namespace Memory { -class Memory; -} - namespace Core { class ARM_Interface; class CoreManager; +class DeviceMemory; class ExclusiveMonitor; class FrameLimiter; class PerfStats; @@ -230,10 +228,10 @@ public: const ExclusiveMonitor& Monitor() const; /// Gets a mutable reference to the system memory instance. - Memory::Memory& Memory(); + Core::Memory::Memory& Memory(); /// Gets a constant reference to the system memory instance. - const Memory::Memory& Memory() const; + const Core::Memory::Memory& Memory() const; /// Gets a mutable reference to the GPU interface Tegra::GPU& GPU(); @@ -259,6 +257,12 @@ public: /// Gets the global scheduler const Kernel::GlobalScheduler& GlobalScheduler() const; + /// Gets the manager for the guest device memory + Core::DeviceMemory& DeviceMemory(); + + /// Gets the manager for the guest device memory + const Core::DeviceMemory& DeviceMemory() const; + /// Provides a pointer to the current process Kernel::Process* CurrentProcess(); diff --git a/src/core/core_manager.h b/src/core/core_manager.h index b14e723d7..d525de00a 100644 --- a/src/core/core_manager.h +++ b/src/core/core_manager.h @@ -22,7 +22,7 @@ namespace Core::Timing { class CoreTiming; } -namespace Memory { +namespace Core::Memory { class Memory; } diff --git a/src/core/device_memory.cpp b/src/core/device_memory.cpp new file mode 100644 index 000000000..51097ced3 --- /dev/null +++ b/src/core/device_memory.cpp @@ -0,0 +1,15 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/core.h" +#include "core/device_memory.h" +#include "core/memory.h" + +namespace Core { + +DeviceMemory::DeviceMemory(System& system) : buffer{DramMemoryMap::Size}, system{system} {} + +DeviceMemory::~DeviceMemory() = default; + +} // namespace Core diff --git a/src/core/device_memory.h b/src/core/device_memory.h new file mode 100644 index 000000000..9efa088d0 --- /dev/null +++ b/src/core/device_memory.h @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/assert.h" +#include "common/common_funcs.h" +#include "common/virtual_buffer.h" + +namespace Core { + +class System; + +namespace DramMemoryMap { +enum : u64 { + Base = 0x80000000ULL, + Size = 0x100000000ULL, + End = Base + Size, + KernelReserveBase = Base + 0x60000, + SlabHeapBase = KernelReserveBase + 0x85000, + SlapHeapSize = 0xa21000, + SlabHeapEnd = SlabHeapBase + SlapHeapSize, +}; +}; // namespace DramMemoryMap + +class DeviceMemory : NonCopyable { +public: + explicit DeviceMemory(Core::System& system); + ~DeviceMemory(); + + template <typename T> + PAddr GetPhysicalAddr(const T* ptr) const { + return (reinterpret_cast<uintptr_t>(ptr) - reinterpret_cast<uintptr_t>(buffer.data())) + + DramMemoryMap::Base; + } + + u8* GetPointer(PAddr addr) { + return buffer.data() + (addr - DramMemoryMap::Base); + } + + const u8* GetPointer(PAddr addr) const { + return buffer.data() + (addr - DramMemoryMap::Base); + } + +private: + Common::VirtualBuffer<u8> buffer; + Core::System& system; +}; + +} // namespace Core diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp index e226e9711..b93aa6935 100644 --- a/src/core/file_sys/patch_manager.cpp +++ b/src/core/file_sys/patch_manager.cpp @@ -249,7 +249,7 @@ bool PatchManager::HasNSOPatch(const std::array<u8, 32>& build_id_) const { } namespace { -std::optional<std::vector<Memory::CheatEntry>> ReadCheatFileFromFolder( +std::optional<std::vector<Core::Memory::CheatEntry>> ReadCheatFileFromFolder( const Core::System& system, u64 title_id, const std::array<u8, 0x20>& build_id_, const VirtualDir& base_path, bool upper) { const auto build_id_raw = Common::HexToString(build_id_, upper); @@ -269,14 +269,14 @@ std::optional<std::vector<Memory::CheatEntry>> ReadCheatFileFromFolder( return std::nullopt; } - Memory::TextCheatParser parser; + Core::Memory::TextCheatParser parser; return parser.Parse( system, std::string_view(reinterpret_cast<const char* const>(data.data()), data.size())); } } // Anonymous namespace -std::vector<Memory::CheatEntry> PatchManager::CreateCheatList( +std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList( const Core::System& system, const std::array<u8, 32>& build_id_) const { const auto load_dir = system.GetFileSystemController().GetModificationLoadRoot(title_id); if (load_dir == nullptr) { @@ -289,7 +289,7 @@ std::vector<Memory::CheatEntry> PatchManager::CreateCheatList( std::sort(patch_dirs.begin(), patch_dirs.end(), [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); }); - std::vector<Memory::CheatEntry> out; + std::vector<Core::Memory::CheatEntry> out; for (const auto& subdir : patch_dirs) { if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend()) { continue; @@ -348,6 +348,12 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t if (ext_dir != nullptr) layers_ext.push_back(std::move(ext_dir)); } + + // When there are no layers to apply, return early as there is no need to rebuild the RomFS + if (layers.empty() && layers_ext.empty()) { + return; + } + layers.push_back(std::move(extracted)); auto layered = LayeredVfsDirectory::MakeLayeredDirectory(std::move(layers)); @@ -434,7 +440,8 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam // Game Updates const auto update_tid = GetUpdateTitleID(title_id); PatchManager update{update_tid}; - auto [nacp, discard_icon_file] = update.GetControlMetadata(); + const auto metadata = update.GetControlMetadata(); + const auto& nacp = metadata.first; const auto update_disabled = std::find(disabled.cbegin(), disabled.cend(), "Update") != disabled.cend(); diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h index e857e6e82..ec6db524d 100644 --- a/src/core/file_sys/patch_manager.h +++ b/src/core/file_sys/patch_manager.h @@ -51,8 +51,8 @@ public: bool HasNSOPatch(const std::array<u8, 0x20>& build_id) const; // Creates a CheatList object with all - std::vector<Memory::CheatEntry> CreateCheatList(const Core::System& system, - const std::array<u8, 0x20>& build_id) const; + std::vector<Core::Memory::CheatEntry> CreateCheatList( + const Core::System& system, const std::array<u8, 0x20>& build_id) const; // Currently tracked RomFS patches: // - Game Updates diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp index 6e9cf67ef..ba5f76288 100644 --- a/src/core/file_sys/registered_cache.cpp +++ b/src/core/file_sys/registered_cache.cpp @@ -591,14 +591,18 @@ InstallResult RegisteredCache::InstallEntry(const NSP& nsp, bool overwrite_if_ex InstallResult RegisteredCache::InstallEntry(const NCA& nca, TitleType type, bool overwrite_if_exists, const VfsCopyFunction& copy) { CNMTHeader header{ - nca.GetTitleId(), ///< Title ID - 0, ///< Ignore/Default title version - type, ///< Type - {}, ///< Padding - 0x10, ///< Default table offset - 1, ///< 1 Content Entry - 0, ///< No Meta Entries - {}, ///< Padding + nca.GetTitleId(), // Title ID + 0, // Ignore/Default title version + type, // Type + {}, // Padding + 0x10, // Default table offset + 1, // 1 Content Entry + 0, // No Meta Entries + {}, // Padding + {}, // Reserved 1 + 0, // Is committed + 0, // Required download system version + {}, // Reserved 2 }; OptionalHeader opt_header{0, 0}; ContentRecord c_rec{{}, {}, {}, GetCRTypeFromNCAType(nca.GetType()), {}}; @@ -848,7 +852,8 @@ VirtualFile ManualContentProvider::GetEntryUnparsed(u64 title_id, ContentRecordT VirtualFile ManualContentProvider::GetEntryRaw(u64 title_id, ContentRecordType type) const { const auto iter = std::find_if(entries.begin(), entries.end(), [title_id, type](const auto& entry) { - const auto [title_type, content_type, e_title_id] = entry.first; + const auto content_type = std::get<1>(entry.first); + const auto e_title_id = std::get<2>(entry.first); return content_type == type && e_title_id == title_id; }); if (iter == entries.end()) diff --git a/src/core/file_sys/vfs_libzip.cpp b/src/core/file_sys/vfs_libzip.cpp index 11d1978ea..d69952940 100644 --- a/src/core/file_sys/vfs_libzip.cpp +++ b/src/core/file_sys/vfs_libzip.cpp @@ -42,11 +42,11 @@ VirtualDir ExtractZIP(VirtualFile file) { continue; if (name.back() != '/') { - std::unique_ptr<zip_file_t, decltype(&zip_fclose)> file{ + std::unique_ptr<zip_file_t, decltype(&zip_fclose)> file2{ zip_fopen_index(zip.get(), i, 0), zip_fclose}; std::vector<u8> buf(stat.size); - if (zip_fread(file.get(), buf.data(), buf.size()) != buf.size()) + if (zip_fread(file2.get(), buf.data(), buf.size()) != s64(buf.size())) return nullptr; const auto parts = FileUtil::SplitPathComponents(stat.name); diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 72294d4d8..13aa14934 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -12,6 +12,15 @@ namespace Core::Frontend { +/// Information for the Graphics Backends signifying what type of screen pointer is in +/// WindowInformation +enum class WindowSystemType { + Headless, + Windows, + X11, + Wayland, +}; + /** * Represents a drawing context that supports graphics operations. */ @@ -76,6 +85,23 @@ public: std::pair<unsigned, unsigned> min_client_area_size; }; + /// Data describing host window system information + struct WindowSystemInfo { + // Window system type. Determines which GL context or Vulkan WSI is used. + WindowSystemType type = WindowSystemType::Headless; + + // Connection to a display server. This is used on X11 and Wayland platforms. + void* display_connection = nullptr; + + // Render surface. This is a pointer to the native window handle, which depends + // on the platform. e.g. HWND for Windows, Window for X11. If the surface is + // set to nullptr, the video backend will run in headless mode. + void* render_surface = nullptr; + + // Scale of the render surface. For hidpi systems, this will be >1. + float render_surface_scale = 1.0f; + }; + /// Polls window events virtual void PollEvents() = 0; @@ -87,10 +113,6 @@ public: /// Returns if window is shown (not minimized) virtual bool IsShown() const = 0; - /// Retrieves Vulkan specific handlers from the window - virtual void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const = 0; - /** * Signal that a touch pressed event has occurred (e.g. mouse click pressed) * @param framebuffer_x Framebuffer x-coordinate that was pressed @@ -128,6 +150,13 @@ public: } /** + * Returns system information about the drawing area. + */ + const WindowSystemInfo& GetWindowInfo() const { + return window_info; + } + + /** * Gets the framebuffer layout (width, height, and screen regions) * @note This method is thread-safe */ @@ -142,7 +171,7 @@ public: void UpdateCurrentFramebufferLayout(unsigned width, unsigned height); protected: - EmuWindow(); + explicit EmuWindow(); virtual ~EmuWindow(); /** @@ -179,6 +208,8 @@ protected: client_area_height = size.second; } + WindowSystemInfo window_info; + private: /** * Handler called when the minimal client area was requested to be changed via SetConfig. diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index 68a0e0906..d0c43447c 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -25,7 +25,7 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) { ASSERT(height > 0); // The drawing code needs at least somewhat valid values for both screens // so just calculate them both even if the other isn't showing. - FramebufferLayout res{width, height}; + FramebufferLayout res{width, height, false, {}}; const float window_aspect_ratio = static_cast<float>(height) / width; const float emulation_aspect_ratio = EmulationAspectRatio( diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 6d15aeed9..2f15635c5 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -37,9 +37,9 @@ #include "core/core.h" #include "core/core_manager.h" #include "core/gdbstub/gdbstub.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" -#include "core/hle/kernel/vm_manager.h" #include "core/loader/loader.h" #include "core/memory.h" @@ -643,7 +643,7 @@ static void HandleQuery() { SendReply(target_xml); } else if (strncmp(query, "Offsets", strlen("Offsets")) == 0) { const VAddr base_address = - Core::System::GetInstance().CurrentProcess()->VMManager().GetCodeRegionBaseAddress(); + Core::System::GetInstance().CurrentProcess()->PageTable().GetCodeRegionStart(); std::string buffer = fmt::format("TextSeg={:0x}", base_address); SendReply(buffer.c_str()); } else if (strncmp(query, "fThreadInfo", strlen("fThreadInfo")) == 0) { diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp index 6d66276bc..5ab204b9b 100644 --- a/src/core/hle/kernel/client_session.cpp +++ b/src/core/hle/kernel/client_session.cpp @@ -47,7 +47,8 @@ ResultVal<std::shared_ptr<ClientSession>> ClientSession::Create(KernelCore& kern return MakeResult(std::move(client_session)); } -ResultCode ClientSession::SendSyncRequest(std::shared_ptr<Thread> thread, Memory::Memory& memory) { +ResultCode ClientSession::SendSyncRequest(std::shared_ptr<Thread> thread, + Core::Memory::Memory& memory) { // Keep ServerSession alive until we're done working with it. if (!parent->Server()) { return ERR_SESSION_CLOSED_BY_REMOTE; diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h index d15b09554..c5f760d7d 100644 --- a/src/core/hle/kernel/client_session.h +++ b/src/core/hle/kernel/client_session.h @@ -12,7 +12,7 @@ union ResultCode; -namespace Memory { +namespace Core::Memory { class Memory; } @@ -42,7 +42,7 @@ public: return HANDLE_TYPE; } - ResultCode SendSyncRequest(std::shared_ptr<Thread> thread, Memory::Memory& memory); + ResultCode SendSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory); bool ShouldWait(const Thread* thread) const override; diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h index 8097b3863..29bfa3621 100644 --- a/src/core/hle/kernel/errors.h +++ b/src/core/hle/kernel/errors.h @@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7}; constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; +constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103}; constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104}; constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105}; constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106}; diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index c558a2f33..d65dae3ae 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp @@ -284,17 +284,17 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) { std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const { std::vector<u8> buffer; - const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && + const bool is_buffer_a{BufferDescriptorA().size() > std::size_t(buffer_index) && BufferDescriptorA()[buffer_index].Size()}; auto& memory = Core::System::GetInstance().Memory(); if (is_buffer_a) { - ASSERT_MSG(BufferDescriptorA().size() > buffer_index, + ASSERT_MSG(BufferDescriptorA().size() > std::size_t(buffer_index), "BufferDescriptorA invalid buffer_index {}", buffer_index); buffer.resize(BufferDescriptorA()[buffer_index].Size()); memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size()); } else { - ASSERT_MSG(BufferDescriptorX().size() > buffer_index, + ASSERT_MSG(BufferDescriptorX().size() > std::size_t(buffer_index), "BufferDescriptorX invalid buffer_index {}", buffer_index); buffer.resize(BufferDescriptorX()[buffer_index].Size()); memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size()); @@ -310,7 +310,7 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size, return 0; } - const bool is_buffer_b{BufferDescriptorB().size() > buffer_index && + const bool is_buffer_b{BufferDescriptorB().size() > std::size_t(buffer_index) && BufferDescriptorB()[buffer_index].Size()}; const std::size_t buffer_size{GetWriteBufferSize(buffer_index)}; if (size > buffer_size) { @@ -321,13 +321,13 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size, auto& memory = Core::System::GetInstance().Memory(); if (is_buffer_b) { - ASSERT_MSG(BufferDescriptorB().size() > buffer_index, + ASSERT_MSG(BufferDescriptorB().size() > std::size_t(buffer_index), "BufferDescriptorB invalid buffer_index {}", buffer_index); ASSERT_MSG(BufferDescriptorB()[buffer_index].Size() >= size, "BufferDescriptorB buffer_index {} is not large enough", buffer_index); memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size); } else { - ASSERT_MSG(BufferDescriptorC().size() > buffer_index, + ASSERT_MSG(BufferDescriptorC().size() > std::size_t(buffer_index), "BufferDescriptorC invalid buffer_index {}", buffer_index); ASSERT_MSG(BufferDescriptorC()[buffer_index].Size() >= size, "BufferDescriptorC buffer_index {} is not large enough", buffer_index); @@ -338,16 +338,16 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size, } std::size_t HLERequestContext::GetReadBufferSize(int buffer_index) const { - const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && + const bool is_buffer_a{BufferDescriptorA().size() > std::size_t(buffer_index) && BufferDescriptorA()[buffer_index].Size()}; if (is_buffer_a) { - ASSERT_MSG(BufferDescriptorA().size() > buffer_index, + ASSERT_MSG(BufferDescriptorA().size() > std::size_t(buffer_index), "BufferDescriptorA invalid buffer_index {}", buffer_index); ASSERT_MSG(BufferDescriptorA()[buffer_index].Size() > 0, "BufferDescriptorA buffer_index {} is empty", buffer_index); return BufferDescriptorA()[buffer_index].Size(); } else { - ASSERT_MSG(BufferDescriptorX().size() > buffer_index, + ASSERT_MSG(BufferDescriptorX().size() > std::size_t(buffer_index), "BufferDescriptorX invalid buffer_index {}", buffer_index); ASSERT_MSG(BufferDescriptorX()[buffer_index].Size() > 0, "BufferDescriptorX buffer_index {} is empty", buffer_index); @@ -356,14 +356,14 @@ std::size_t HLERequestContext::GetReadBufferSize(int buffer_index) const { } std::size_t HLERequestContext::GetWriteBufferSize(int buffer_index) const { - const bool is_buffer_b{BufferDescriptorB().size() > buffer_index && + const bool is_buffer_b{BufferDescriptorB().size() > std::size_t(buffer_index) && BufferDescriptorB()[buffer_index].Size()}; if (is_buffer_b) { - ASSERT_MSG(BufferDescriptorB().size() > buffer_index, + ASSERT_MSG(BufferDescriptorB().size() > std::size_t(buffer_index), "BufferDescriptorB invalid buffer_index {}", buffer_index); return BufferDescriptorB()[buffer_index].Size(); } else { - ASSERT_MSG(BufferDescriptorC().size() > buffer_index, + ASSERT_MSG(BufferDescriptorC().size() > std::size_t(buffer_index), "BufferDescriptorC invalid buffer_index {}", buffer_index); return BufferDescriptorC()[buffer_index].Size(); } diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index e47f1deed..7655382fa 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -18,15 +18,20 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/device_memory.h" #include "core/hardware_properties.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/memory/memory_layout.h" +#include "core/hle/kernel/memory/memory_manager.h" +#include "core/hle/kernel/memory/slab_heap.h" #include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/thread.h" #include "core/hle/kernel/time_manager.h" @@ -103,13 +108,14 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ struct KernelCore::Impl { explicit Impl(Core::System& system, KernelCore& kernel) - : system{system}, global_scheduler{kernel}, synchronization{system}, time_manager{system} {} + : global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {} void Initialize(KernelCore& kernel) { Shutdown(); InitializePhysicalCores(); InitializeSystemResourceLimit(kernel); + InitializeMemoryLayout(); InitializeThreads(); InitializePreemption(); } @@ -154,12 +160,17 @@ struct KernelCore::Impl { system_resource_limit = ResourceLimit::Create(kernel); // If setting the default system values fails, then something seriously wrong has occurred. - ASSERT(system_resource_limit->SetLimitValue(ResourceType::PhysicalMemory, 0x200000000) + ASSERT(system_resource_limit->SetLimitValue(ResourceType::PhysicalMemory, 0x100000000) .IsSuccess()); ASSERT(system_resource_limit->SetLimitValue(ResourceType::Threads, 800).IsSuccess()); ASSERT(system_resource_limit->SetLimitValue(ResourceType::Events, 700).IsSuccess()); ASSERT(system_resource_limit->SetLimitValue(ResourceType::TransferMemory, 200).IsSuccess()); ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess()); + + if (!system_resource_limit->Reserve(ResourceType::PhysicalMemory, 0) || + !system_resource_limit->Reserve(ResourceType::PhysicalMemory, 0x60000)) { + UNREACHABLE(); + } } void InitializeThreads() { @@ -237,6 +248,57 @@ struct KernelCore::Impl { return result; } + void InitializeMemoryLayout() { + // Initialize memory layout + constexpr Memory::MemoryLayout layout{Memory::MemoryLayout::GetDefaultLayout()}; + constexpr std::size_t hid_size{0x40000}; + constexpr std::size_t font_size{0x1100000}; + constexpr std::size_t irs_size{0x8000}; + constexpr std::size_t time_size{0x1000}; + constexpr PAddr hid_addr{layout.System().StartAddress()}; + constexpr PAddr font_pa{layout.System().StartAddress() + hid_size}; + constexpr PAddr irs_addr{layout.System().StartAddress() + hid_size + font_size}; + constexpr PAddr time_addr{layout.System().StartAddress() + hid_size + font_size + irs_size}; + + // Initialize memory manager + memory_manager = std::make_unique<Memory::MemoryManager>(); + memory_manager->InitializeManager(Memory::MemoryManager::Pool::Application, + layout.Application().StartAddress(), + layout.Application().EndAddress()); + memory_manager->InitializeManager(Memory::MemoryManager::Pool::Applet, + layout.Applet().StartAddress(), + layout.Applet().EndAddress()); + memory_manager->InitializeManager(Memory::MemoryManager::Pool::System, + layout.System().StartAddress(), + layout.System().EndAddress()); + + hid_shared_mem = Kernel::SharedMemory::Create( + system.Kernel(), system.DeviceMemory(), nullptr, + {hid_addr, hid_size / Memory::PageSize}, Memory::MemoryPermission::None, + Memory::MemoryPermission::Read, hid_addr, hid_size, "HID:SharedMemory"); + font_shared_mem = Kernel::SharedMemory::Create( + system.Kernel(), system.DeviceMemory(), nullptr, + {font_pa, font_size / Memory::PageSize}, Memory::MemoryPermission::None, + Memory::MemoryPermission::Read, font_pa, font_size, "Font:SharedMemory"); + irs_shared_mem = Kernel::SharedMemory::Create( + system.Kernel(), system.DeviceMemory(), nullptr, + {irs_addr, irs_size / Memory::PageSize}, Memory::MemoryPermission::None, + Memory::MemoryPermission::Read, irs_addr, irs_size, "IRS:SharedMemory"); + time_shared_mem = Kernel::SharedMemory::Create( + system.Kernel(), system.DeviceMemory(), nullptr, + {time_addr, time_size / Memory::PageSize}, Memory::MemoryPermission::None, + Memory::MemoryPermission::Read, time_addr, time_size, "Time:SharedMemory"); + + // Allocate slab heaps + user_slab_heap_pages = std::make_unique<Memory::SlabHeap<Memory::Page>>(); + + // Initialize slab heaps + constexpr u64 user_slab_heap_size{0x3de000}; + user_slab_heap_pages->Initialize( + system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase), + user_slab_heap_size); + } + std::atomic<u32> next_object_id{0}; std::atomic<u64> next_kernel_process_id{Process::InitialKIPIDMin}; std::atomic<u64> next_user_process_id{Process::ProcessIDMin}; @@ -271,6 +333,16 @@ struct KernelCore::Impl { std::bitset<Core::Hardware::NUM_CPU_CORES> registered_core_threads; std::mutex register_thread_mutex; + // Kernel memory management + std::unique_ptr<Memory::MemoryManager> memory_manager; + std::unique_ptr<Memory::SlabHeap<Memory::Page>> user_slab_heap_pages; + + // Shared memory for services + std::shared_ptr<Kernel::SharedMemory> hid_shared_mem; + std::shared_ptr<Kernel::SharedMemory> font_shared_mem; + std::shared_ptr<Kernel::SharedMemory> irs_shared_mem; + std::shared_ptr<Kernel::SharedMemory> time_shared_mem; + // System context Core::System& system; }; @@ -437,4 +509,52 @@ Core::EmuThreadHandle KernelCore::GetCurrentEmuThreadID() const { return impl->GetCurrentEmuThreadID(); } +Memory::MemoryManager& KernelCore::MemoryManager() { + return *impl->memory_manager; +} + +const Memory::MemoryManager& KernelCore::MemoryManager() const { + return *impl->memory_manager; +} + +Memory::SlabHeap<Memory::Page>& KernelCore::GetUserSlabHeapPages() { + return *impl->user_slab_heap_pages; +} + +const Memory::SlabHeap<Memory::Page>& KernelCore::GetUserSlabHeapPages() const { + return *impl->user_slab_heap_pages; +} + +Kernel::SharedMemory& KernelCore::GetHidSharedMem() { + return *impl->hid_shared_mem; +} + +const Kernel::SharedMemory& KernelCore::GetHidSharedMem() const { + return *impl->hid_shared_mem; +} + +Kernel::SharedMemory& KernelCore::GetFontSharedMem() { + return *impl->font_shared_mem; +} + +const Kernel::SharedMemory& KernelCore::GetFontSharedMem() const { + return *impl->font_shared_mem; +} + +Kernel::SharedMemory& KernelCore::GetIrsSharedMem() { + return *impl->irs_shared_mem; +} + +const Kernel::SharedMemory& KernelCore::GetIrsSharedMem() const { + return *impl->irs_shared_mem; +} + +Kernel::SharedMemory& KernelCore::GetTimeSharedMem() { + return *impl->time_shared_mem; +} + +const Kernel::SharedMemory& KernelCore::GetTimeSharedMem() const { + return *impl->time_shared_mem; +} + } // namespace Kernel diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index c4f78ab71..83de1f542 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -8,6 +8,7 @@ #include <string> #include <unordered_map> #include <vector> +#include "core/hle/kernel/memory/memory_types.h" #include "core/hle/kernel/object.h" namespace Core { @@ -23,6 +24,12 @@ struct EventType; namespace Kernel { +namespace Memory { +class MemoryManager; +template <typename T> +class SlabHeap; +} // namespace Memory + class AddressArbiter; class ClientPort; class GlobalScheduler; @@ -31,6 +38,7 @@ class PhysicalCore; class Process; class ResourceLimit; class Scheduler; +class SharedMemory; class Synchronization; class Thread; class TimeManager; @@ -147,6 +155,42 @@ public: /// Register the current thread as a non CPU core thread. void RegisterHostThread(); + /// Gets the virtual memory manager for the kernel. + Memory::MemoryManager& MemoryManager(); + + /// Gets the virtual memory manager for the kernel. + const Memory::MemoryManager& MemoryManager() const; + + /// Gets the slab heap allocated for user space pages. + Memory::SlabHeap<Memory::Page>& GetUserSlabHeapPages(); + + /// Gets the slab heap allocated for user space pages. + const Memory::SlabHeap<Memory::Page>& GetUserSlabHeapPages() const; + + /// Gets the shared memory object for HID services. + Kernel::SharedMemory& GetHidSharedMem(); + + /// Gets the shared memory object for HID services. + const Kernel::SharedMemory& GetHidSharedMem() const; + + /// Gets the shared memory object for font services. + Kernel::SharedMemory& GetFontSharedMem(); + + /// Gets the shared memory object for font services. + const Kernel::SharedMemory& GetFontSharedMem() const; + + /// Gets the shared memory object for IRS services. + Kernel::SharedMemory& GetIrsSharedMem(); + + /// Gets the shared memory object for IRS services. + const Kernel::SharedMemory& GetIrsSharedMem() const; + + /// Gets the shared memory object for Time services. + Kernel::SharedMemory& GetTimeSharedMem(); + + /// Gets the shared memory object for Time services. + const Kernel::SharedMemory& GetTimeSharedMem() const; + private: friend class Object; friend class Process; diff --git a/src/core/hle/kernel/memory/address_space_info.cpp b/src/core/hle/kernel/memory/address_space_info.cpp new file mode 100644 index 000000000..27fae05e7 --- /dev/null +++ b/src/core/hle/kernel/memory/address_space_info.cpp @@ -0,0 +1,118 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphère, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphère-NX. + +#include <array> + +#include "common/assert.h" +#include "core/hle/kernel/memory/address_space_info.h" + +namespace Kernel::Memory { + +namespace { + +enum : u64 { + Size_1_MB = 0x100000, + Size_2_MB = 2 * Size_1_MB, + Size_128_MB = 128 * Size_1_MB, + Size_1_GB = 0x40000000, + Size_2_GB = 2 * Size_1_GB, + Size_4_GB = 4 * Size_1_GB, + Size_6_GB = 6 * Size_1_GB, + Size_64_GB = 64 * Size_1_GB, + Size_512_GB = 512 * Size_1_GB, + Invalid = std::numeric_limits<u64>::max(), +}; + +// clang-format off +constexpr std::array<AddressSpaceInfo, 13> AddressSpaceInfos{{ + { 32 /*bit_width*/, Size_2_MB /*addr*/, Size_1_GB - Size_2_MB /*size*/, AddressSpaceInfo::Type::Is32Bit, }, + { 32 /*bit_width*/, Size_1_GB /*addr*/, Size_4_GB - Size_1_GB /*size*/, AddressSpaceInfo::Type::Small64Bit, }, + { 32 /*bit_width*/, Invalid /*addr*/, Size_1_GB /*size*/, AddressSpaceInfo::Type::Heap, }, + { 32 /*bit_width*/, Invalid /*addr*/, Size_1_GB /*size*/, AddressSpaceInfo::Type::Alias, }, + { 36 /*bit_width*/, Size_128_MB /*addr*/, Size_2_GB - Size_128_MB /*size*/, AddressSpaceInfo::Type::Is32Bit, }, + { 36 /*bit_width*/, Size_2_GB /*addr*/, Size_64_GB - Size_2_GB /*size*/, AddressSpaceInfo::Type::Small64Bit, }, + { 36 /*bit_width*/, Invalid /*addr*/, Size_6_GB /*size*/, AddressSpaceInfo::Type::Heap, }, + { 36 /*bit_width*/, Invalid /*addr*/, Size_6_GB /*size*/, AddressSpaceInfo::Type::Alias, }, + { 39 /*bit_width*/, Size_128_MB /*addr*/, Size_512_GB - Size_128_MB /*size*/, AddressSpaceInfo::Type::Large64Bit, }, + { 39 /*bit_width*/, Invalid /*addr*/, Size_64_GB /*size*/, AddressSpaceInfo::Type::Is32Bit }, + { 39 /*bit_width*/, Invalid /*addr*/, Size_6_GB /*size*/, AddressSpaceInfo::Type::Heap, }, + { 39 /*bit_width*/, Invalid /*addr*/, Size_64_GB /*size*/, AddressSpaceInfo::Type::Alias, }, + { 39 /*bit_width*/, Invalid /*addr*/, Size_2_GB /*size*/, AddressSpaceInfo::Type::Stack, }, +}}; +// clang-format on + +constexpr bool IsAllowedIndexForAddress(std::size_t index) { + return index < std::size(AddressSpaceInfos) && AddressSpaceInfos[index].GetAddress() != Invalid; +} + +constexpr std::size_t + AddressSpaceIndices32Bit[static_cast<std::size_t>(AddressSpaceInfo::Type::Count)]{ + 0, 1, 0, 2, 0, 3, + }; + +constexpr std::size_t + AddressSpaceIndices36Bit[static_cast<std::size_t>(AddressSpaceInfo::Type::Count)]{ + 4, 5, 4, 6, 4, 7, + }; + +constexpr std::size_t + AddressSpaceIndices39Bit[static_cast<std::size_t>(AddressSpaceInfo::Type::Count)]{ + 9, 8, 8, 10, 12, 11, + }; + +constexpr bool IsAllowed32BitType(AddressSpaceInfo::Type type) { + return type < AddressSpaceInfo::Type::Count && type != AddressSpaceInfo::Type::Large64Bit && + type != AddressSpaceInfo::Type::Stack; +} + +constexpr bool IsAllowed36BitType(AddressSpaceInfo::Type type) { + return type < AddressSpaceInfo::Type::Count && type != AddressSpaceInfo::Type::Large64Bit && + type != AddressSpaceInfo::Type::Stack; +} + +constexpr bool IsAllowed39BitType(AddressSpaceInfo::Type type) { + return type < AddressSpaceInfo::Type::Count && type != AddressSpaceInfo::Type::Small64Bit; +} + +} // namespace + +u64 AddressSpaceInfo::GetAddressSpaceStart(std::size_t width, AddressSpaceInfo::Type type) { + const std::size_t index{static_cast<std::size_t>(type)}; + switch (width) { + case 32: + ASSERT(IsAllowed32BitType(type)); + ASSERT(IsAllowedIndexForAddress(AddressSpaceIndices32Bit[index])); + return AddressSpaceInfos[AddressSpaceIndices32Bit[index]].GetAddress(); + case 36: + ASSERT(IsAllowed36BitType(type)); + ASSERT(IsAllowedIndexForAddress(AddressSpaceIndices36Bit[index])); + return AddressSpaceInfos[AddressSpaceIndices36Bit[index]].GetAddress(); + case 39: + ASSERT(IsAllowed39BitType(type)); + ASSERT(IsAllowedIndexForAddress(AddressSpaceIndices39Bit[index])); + return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].GetAddress(); + } + UNREACHABLE(); +} + +std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, AddressSpaceInfo::Type type) { + const std::size_t index{static_cast<std::size_t>(type)}; + switch (width) { + case 32: + ASSERT(IsAllowed32BitType(type)); + return AddressSpaceInfos[AddressSpaceIndices32Bit[index]].GetSize(); + case 36: + ASSERT(IsAllowed36BitType(type)); + return AddressSpaceInfos[AddressSpaceIndices36Bit[index]].GetSize(); + case 39: + ASSERT(IsAllowed39BitType(type)); + return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].GetSize(); + } + UNREACHABLE(); +} + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/address_space_info.h b/src/core/hle/kernel/memory/address_space_info.h new file mode 100644 index 000000000..cc9a6421e --- /dev/null +++ b/src/core/hle/kernel/memory/address_space_info.h @@ -0,0 +1,54 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphère, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphère-NX. + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Kernel::Memory { + +class AddressSpaceInfo final : NonCopyable { +public: + enum class Type : u32 { + Is32Bit = 0, + Small64Bit = 1, + Large64Bit = 2, + Heap = 3, + Stack = 4, + Alias = 5, + Count, + }; + +private: + std::size_t bit_width{}; + std::size_t addr{}; + std::size_t size{}; + Type type{}; + +public: + static u64 GetAddressSpaceStart(std::size_t width, Type type); + static std::size_t GetAddressSpaceSize(std::size_t width, Type type); + + constexpr AddressSpaceInfo(std::size_t bit_width, std::size_t addr, std::size_t size, Type type) + : bit_width{bit_width}, addr{addr}, size{size}, type{type} {} + + constexpr std::size_t GetWidth() const { + return bit_width; + } + constexpr std::size_t GetAddress() const { + return addr; + } + constexpr std::size_t GetSize() const { + return size; + } + constexpr Type GetType() const { + return type; + } +}; + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/memory_block.h b/src/core/hle/kernel/memory/memory_block.h new file mode 100644 index 000000000..e11043b60 --- /dev/null +++ b/src/core/hle/kernel/memory/memory_block.h @@ -0,0 +1,318 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphère, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphère-NX. + +#pragma once + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/common_types.h" +#include "core/hle/kernel/memory/memory_types.h" +#include "core/hle/kernel/svc_types.h" + +namespace Kernel::Memory { + +enum class MemoryState : u32 { + None = 0, + Mask = 0xFFFFFFFF, // TODO(bunnei): This should probable be 0xFF + All = ~None, + + FlagCanReprotect = (1 << 8), + FlagCanDebug = (1 << 9), + FlagCanUseIpc = (1 << 10), + FlagCanUseNonDeviceIpc = (1 << 11), + FlagCanUseNonSecureIpc = (1 << 12), + FlagMapped = (1 << 13), + FlagCode = (1 << 14), + FlagCanAlias = (1 << 15), + FlagCanCodeAlias = (1 << 16), + FlagCanTransfer = (1 << 17), + FlagCanQueryPhysical = (1 << 18), + FlagCanDeviceMap = (1 << 19), + FlagCanAlignedDeviceMap = (1 << 20), + FlagCanIpcUserBuffer = (1 << 21), + FlagReferenceCounted = (1 << 22), + FlagCanMapProcess = (1 << 23), + FlagCanChangeAttribute = (1 << 24), + FlagCanCodeMemory = (1 << 25), + + FlagsData = FlagCanReprotect | FlagCanUseIpc | FlagCanUseNonDeviceIpc | FlagCanUseNonSecureIpc | + FlagMapped | FlagCanAlias | FlagCanTransfer | FlagCanQueryPhysical | + FlagCanDeviceMap | FlagCanAlignedDeviceMap | FlagCanIpcUserBuffer | + FlagReferenceCounted | FlagCanChangeAttribute, + + FlagsCode = FlagCanDebug | FlagCanUseIpc | FlagCanUseNonDeviceIpc | FlagCanUseNonSecureIpc | + FlagMapped | FlagCode | FlagCanQueryPhysical | FlagCanDeviceMap | + FlagCanAlignedDeviceMap | FlagReferenceCounted, + + FlagsMisc = FlagMapped | FlagReferenceCounted | FlagCanQueryPhysical | FlagCanDeviceMap, + + Free = static_cast<u32>(Svc::MemoryState::Free), + Io = static_cast<u32>(Svc::MemoryState::Io) | FlagMapped, + Static = static_cast<u32>(Svc::MemoryState::Static) | FlagMapped | FlagCanQueryPhysical, + Code = static_cast<u32>(Svc::MemoryState::Code) | FlagsCode | FlagCanMapProcess, + CodeData = static_cast<u32>(Svc::MemoryState::CodeData) | FlagsData | FlagCanMapProcess | + FlagCanCodeMemory, + Shared = static_cast<u32>(Svc::MemoryState::Shared) | FlagMapped | FlagReferenceCounted, + Normal = static_cast<u32>(Svc::MemoryState::Normal) | FlagsData | FlagCanCodeMemory, + + AliasCode = static_cast<u32>(Svc::MemoryState::AliasCode) | FlagsCode | FlagCanMapProcess | + FlagCanCodeAlias, + AliasCodeData = static_cast<u32>(Svc::MemoryState::AliasCodeData) | FlagsData | + FlagCanMapProcess | FlagCanCodeAlias | FlagCanCodeMemory, + + Ipc = static_cast<u32>(Svc::MemoryState::Ipc) | FlagsMisc | FlagCanAlignedDeviceMap | + FlagCanUseIpc | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, + + Stack = static_cast<u32>(Svc::MemoryState::Stack) | FlagsMisc | FlagCanAlignedDeviceMap | + FlagCanUseIpc | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, + + ThreadLocal = + static_cast<u32>(Svc::MemoryState::ThreadLocal) | FlagMapped | FlagReferenceCounted, + + Transfered = static_cast<u32>(Svc::MemoryState::Transfered) | FlagsMisc | + FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc | + FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, + + SharedTransfered = static_cast<u32>(Svc::MemoryState::SharedTransfered) | FlagsMisc | + FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, + + SharedCode = static_cast<u32>(Svc::MemoryState::SharedCode) | FlagMapped | + FlagReferenceCounted | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, + + Inaccessible = static_cast<u32>(Svc::MemoryState::Inaccessible), + + NonSecureIpc = static_cast<u32>(Svc::MemoryState::NonSecureIpc) | FlagsMisc | + FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, + + NonDeviceIpc = + static_cast<u32>(Svc::MemoryState::NonDeviceIpc) | FlagsMisc | FlagCanUseNonDeviceIpc, + + Kernel = static_cast<u32>(Svc::MemoryState::Kernel) | FlagMapped, + + GeneratedCode = static_cast<u32>(Svc::MemoryState::GeneratedCode) | FlagMapped | + FlagReferenceCounted | FlagCanDebug, + CodeOut = static_cast<u32>(Svc::MemoryState::CodeOut) | FlagMapped | FlagReferenceCounted, +}; +DECLARE_ENUM_FLAG_OPERATORS(MemoryState); + +static_assert(static_cast<u32>(MemoryState::Free) == 0x00000000); +static_assert(static_cast<u32>(MemoryState::Io) == 0x00002001); +static_assert(static_cast<u32>(MemoryState::Static) == 0x00042002); +static_assert(static_cast<u32>(MemoryState::Code) == 0x00DC7E03); +static_assert(static_cast<u32>(MemoryState::CodeData) == 0x03FEBD04); +static_assert(static_cast<u32>(MemoryState::Normal) == 0x037EBD05); +static_assert(static_cast<u32>(MemoryState::Shared) == 0x00402006); +static_assert(static_cast<u32>(MemoryState::AliasCode) == 0x00DD7E08); +static_assert(static_cast<u32>(MemoryState::AliasCodeData) == 0x03FFBD09); +static_assert(static_cast<u32>(MemoryState::Ipc) == 0x005C3C0A); +static_assert(static_cast<u32>(MemoryState::Stack) == 0x005C3C0B); +static_assert(static_cast<u32>(MemoryState::ThreadLocal) == 0x0040200C); +static_assert(static_cast<u32>(MemoryState::Transfered) == 0x015C3C0D); +static_assert(static_cast<u32>(MemoryState::SharedTransfered) == 0x005C380E); +static_assert(static_cast<u32>(MemoryState::SharedCode) == 0x0040380F); +static_assert(static_cast<u32>(MemoryState::Inaccessible) == 0x00000010); +static_assert(static_cast<u32>(MemoryState::NonSecureIpc) == 0x005C3811); +static_assert(static_cast<u32>(MemoryState::NonDeviceIpc) == 0x004C2812); +static_assert(static_cast<u32>(MemoryState::Kernel) == 0x00002013); +static_assert(static_cast<u32>(MemoryState::GeneratedCode) == 0x00402214); +static_assert(static_cast<u32>(MemoryState::CodeOut) == 0x00402015); + +enum class MemoryPermission : u8 { + None = 0, + Mask = static_cast<u8>(~None), + + Read = 1 << 0, + Write = 1 << 1, + Execute = 1 << 2, + + ReadAndWrite = Read | Write, + ReadAndExecute = Read | Execute, + + UserMask = static_cast<u8>(Svc::MemoryPermission::Read | Svc::MemoryPermission::Write | + Svc::MemoryPermission::Execute), +}; +DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission); + +enum class MemoryAttribute : u8 { + None = 0x00, + Mask = 0x7F, + All = Mask, + DontCareMask = 0x80, + + Locked = static_cast<u8>(Svc::MemoryAttribute::Locked), + IpcLocked = static_cast<u8>(Svc::MemoryAttribute::IpcLocked), + DeviceShared = static_cast<u8>(Svc::MemoryAttribute::DeviceShared), + Uncached = static_cast<u8>(Svc::MemoryAttribute::Uncached), + + IpcAndDeviceMapped = IpcLocked | DeviceShared, + LockedAndIpcLocked = Locked | IpcLocked, + DeviceSharedAndUncached = DeviceShared | Uncached +}; +DECLARE_ENUM_FLAG_OPERATORS(MemoryAttribute); + +static_assert((static_cast<u8>(MemoryAttribute::Mask) & + static_cast<u8>(MemoryAttribute::DontCareMask)) == 0); + +struct MemoryInfo { + VAddr addr{}; + std::size_t size{}; + MemoryState state{}; + MemoryPermission perm{}; + MemoryAttribute attribute{}; + MemoryPermission original_perm{}; + u16 ipc_lock_count{}; + u16 device_use_count{}; + + constexpr Svc::MemoryInfo GetSvcMemoryInfo() const { + return { + addr, + size, + static_cast<Svc::MemoryState>(state & MemoryState::Mask), + static_cast<Svc::MemoryAttribute>(attribute & MemoryAttribute::Mask), + static_cast<Svc::MemoryPermission>(perm & MemoryPermission::UserMask), + ipc_lock_count, + device_use_count, + }; + } + + constexpr VAddr GetAddress() const { + return addr; + } + constexpr std::size_t GetSize() const { + return size; + } + constexpr std::size_t GetNumPages() const { + return GetSize() / PageSize; + } + constexpr VAddr GetEndAddress() const { + return GetAddress() + GetSize(); + } + constexpr VAddr GetLastAddress() const { + return GetEndAddress() - 1; + } +}; + +class MemoryBlock final { + friend class MemoryBlockManager; + +private: + VAddr addr{}; + std::size_t num_pages{}; + MemoryState state{MemoryState::None}; + u16 ipc_lock_count{}; + u16 device_use_count{}; + MemoryPermission perm{MemoryPermission::None}; + MemoryPermission original_perm{MemoryPermission::None}; + MemoryAttribute attribute{MemoryAttribute::None}; + +public: + static constexpr int Compare(const MemoryBlock& lhs, const MemoryBlock& rhs) { + if (lhs.GetAddress() < rhs.GetAddress()) { + return -1; + } else if (lhs.GetAddress() <= rhs.GetLastAddress()) { + return 0; + } else { + return 1; + } + } + +public: + constexpr MemoryBlock() = default; + constexpr MemoryBlock(VAddr addr, std::size_t num_pages, MemoryState state, + MemoryPermission perm, MemoryAttribute attribute) + : addr{addr}, num_pages(num_pages), state{state}, perm{perm}, attribute{attribute} {} + + constexpr VAddr GetAddress() const { + return addr; + } + + constexpr std::size_t GetNumPages() const { + return num_pages; + } + + constexpr std::size_t GetSize() const { + return GetNumPages() * PageSize; + } + + constexpr VAddr GetEndAddress() const { + return GetAddress() + GetSize(); + } + + constexpr VAddr GetLastAddress() const { + return GetEndAddress() - 1; + } + + constexpr MemoryInfo GetMemoryInfo() const { + return { + GetAddress(), GetSize(), state, perm, + attribute, original_perm, ipc_lock_count, device_use_count, + }; + } + +private: + constexpr bool HasProperties(MemoryState s, MemoryPermission p, MemoryAttribute a) const { + constexpr MemoryAttribute AttributeIgnoreMask{MemoryAttribute::DontCareMask | + MemoryAttribute::IpcLocked | + MemoryAttribute::DeviceShared}; + return state == s && perm == p && + (attribute | AttributeIgnoreMask) == (a | AttributeIgnoreMask); + } + + constexpr bool HasSameProperties(const MemoryBlock& rhs) const { + return state == rhs.state && perm == rhs.perm && original_perm == rhs.original_perm && + attribute == rhs.attribute && ipc_lock_count == rhs.ipc_lock_count && + device_use_count == rhs.device_use_count; + } + + constexpr bool Contains(VAddr start) const { + return GetAddress() <= start && start <= GetEndAddress(); + } + + constexpr void Add(std::size_t count) { + ASSERT(count > 0); + ASSERT(GetAddress() + count * PageSize - 1 < GetEndAddress() + count * PageSize - 1); + + num_pages += count; + } + + constexpr void Update(MemoryState new_state, MemoryPermission new_perm, + MemoryAttribute new_attribute) { + ASSERT(original_perm == MemoryPermission::None); + ASSERT((attribute & MemoryAttribute::IpcLocked) == MemoryAttribute::None); + + state = new_state; + perm = new_perm; + + // TODO(bunnei): Is this right? + attribute = static_cast<MemoryAttribute>( + new_attribute /*| (attribute & (MemoryAttribute::IpcLocked | MemoryAttribute::DeviceShared))*/); + } + + constexpr MemoryBlock Split(VAddr split_addr) { + ASSERT(GetAddress() < split_addr); + ASSERT(Contains(split_addr)); + ASSERT(Common::IsAligned(split_addr, PageSize)); + + MemoryBlock block; + block.addr = addr; + block.num_pages = (split_addr - GetAddress()) / PageSize; + block.state = state; + block.ipc_lock_count = ipc_lock_count; + block.device_use_count = device_use_count; + block.perm = perm; + block.original_perm = original_perm; + block.attribute = attribute; + + addr = split_addr; + num_pages -= block.num_pages; + + return block; + } +}; +static_assert(std::is_trivially_destructible<MemoryBlock>::value); + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/memory_block_manager.cpp b/src/core/hle/kernel/memory/memory_block_manager.cpp new file mode 100644 index 000000000..1ebc126c0 --- /dev/null +++ b/src/core/hle/kernel/memory/memory_block_manager.cpp @@ -0,0 +1,190 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/hle/kernel/memory/memory_block_manager.h" +#include "core/hle/kernel/memory/memory_types.h" + +namespace Kernel::Memory { + +MemoryBlockManager::MemoryBlockManager(VAddr start_addr, VAddr end_addr) + : start_addr{start_addr}, end_addr{end_addr} { + const u64 num_pages{(end_addr - start_addr) / PageSize}; + memory_block_tree.emplace_back(start_addr, num_pages, MemoryState::Free, MemoryPermission::None, + MemoryAttribute::None); +} + +MemoryBlockManager::iterator MemoryBlockManager::FindIterator(VAddr addr) { + auto node{memory_block_tree.begin()}; + while (node != end()) { + const VAddr end_addr{node->GetNumPages() * PageSize + node->GetAddress()}; + if (node->GetAddress() <= addr && end_addr - 1 >= addr) { + return node; + } + node = std::next(node); + } + return end(); +} + +VAddr MemoryBlockManager::FindFreeArea(VAddr region_start, std::size_t region_num_pages, + std::size_t num_pages, std::size_t align, std::size_t offset, + std::size_t guard_pages) { + if (num_pages == 0) { + return {}; + } + + const VAddr region_end{region_start + region_num_pages * PageSize}; + const VAddr region_last{region_end - 1}; + for (auto it{FindIterator(region_start)}; it != memory_block_tree.cend(); it++) { + const auto info{it->GetMemoryInfo()}; + if (region_last < info.GetAddress()) { + break; + } + + if (info.state != MemoryState::Free) { + continue; + } + + VAddr area{(info.GetAddress() <= region_start) ? region_start : info.GetAddress()}; + area += guard_pages * PageSize; + + const VAddr offset_area{Common::AlignDown(area, align) + offset}; + area = (area <= offset_area) ? offset_area : offset_area + align; + + const VAddr area_end{area + num_pages * PageSize + guard_pages * PageSize}; + const VAddr area_last{area_end - 1}; + + if (info.GetAddress() <= area && area < area_last && area_last <= region_last && + area_last <= info.GetLastAddress()) { + return area; + } + } + + return {}; +} + +void MemoryBlockManager::Update(VAddr addr, std::size_t num_pages, MemoryState prev_state, + MemoryPermission prev_perm, MemoryAttribute prev_attribute, + MemoryState state, MemoryPermission perm, + MemoryAttribute attribute) { + const std::size_t prev_count{memory_block_tree.size()}; + const VAddr end_addr{addr + num_pages * PageSize}; + iterator node{memory_block_tree.begin()}; + + prev_attribute |= MemoryAttribute::IpcAndDeviceMapped; + + while (node != memory_block_tree.end()) { + MemoryBlock* block{&(*node)}; + iterator next_node{std::next(node)}; + const VAddr cur_addr{block->GetAddress()}; + const VAddr cur_end_addr{block->GetNumPages() * PageSize + cur_addr}; + + if (addr < cur_end_addr && cur_addr < end_addr) { + if (!block->HasProperties(prev_state, prev_perm, prev_attribute)) { + node = next_node; + continue; + } + + iterator new_node{node}; + if (addr > cur_addr) { + memory_block_tree.insert(node, block->Split(addr)); + } + + if (end_addr < cur_end_addr) { + new_node = memory_block_tree.insert(node, block->Split(end_addr)); + } + + new_node->Update(state, perm, attribute); + + MergeAdjacent(new_node, next_node); + } + + if (cur_end_addr - 1 >= end_addr - 1) { + break; + } + + node = next_node; + } +} + +void MemoryBlockManager::Update(VAddr addr, std::size_t num_pages, MemoryState state, + MemoryPermission perm, MemoryAttribute attribute) { + const std::size_t prev_count{memory_block_tree.size()}; + const VAddr end_addr{addr + num_pages * PageSize}; + iterator node{memory_block_tree.begin()}; + + while (node != memory_block_tree.end()) { + MemoryBlock* block{&(*node)}; + iterator next_node{std::next(node)}; + const VAddr cur_addr{block->GetAddress()}; + const VAddr cur_end_addr{block->GetNumPages() * PageSize + cur_addr}; + + if (addr < cur_end_addr && cur_addr < end_addr) { + iterator new_node{node}; + + if (addr > cur_addr) { + memory_block_tree.insert(node, block->Split(addr)); + } + + if (end_addr < cur_end_addr) { + new_node = memory_block_tree.insert(node, block->Split(end_addr)); + } + + new_node->Update(state, perm, attribute); + + MergeAdjacent(new_node, next_node); + } + + if (cur_end_addr - 1 >= end_addr - 1) { + break; + } + + node = next_node; + } +} + +void MemoryBlockManager::IterateForRange(VAddr start, VAddr end, IterateFunc&& func) { + const_iterator it{FindIterator(start)}; + MemoryInfo info{}; + do { + info = it->GetMemoryInfo(); + func(info); + it = std::next(it); + } while (info.addr + info.size - 1 < end - 1 && it != cend()); +} + +void MemoryBlockManager::MergeAdjacent(iterator it, iterator& next_it) { + MemoryBlock* block{&(*it)}; + + auto EraseIt = [&](const iterator it_to_erase) { + if (next_it == it_to_erase) { + next_it = std::next(next_it); + } + memory_block_tree.erase(it_to_erase); + }; + + if (it != memory_block_tree.begin()) { + MemoryBlock* prev{&(*std::prev(it))}; + + if (block->HasSameProperties(*prev)) { + const iterator prev_it{std::prev(it)}; + + prev->Add(block->GetNumPages()); + EraseIt(it); + + it = prev_it; + block = prev; + } + } + + if (it != cend()) { + const MemoryBlock* const next{&(*std::next(it))}; + + if (block->HasSameProperties(*next)) { + block->Add(next->GetNumPages()); + EraseIt(std::next(it)); + } + } +} + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/memory_block_manager.h b/src/core/hle/kernel/memory/memory_block_manager.h new file mode 100644 index 000000000..0f2270f0f --- /dev/null +++ b/src/core/hle/kernel/memory/memory_block_manager.h @@ -0,0 +1,64 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <functional> +#include <list> +#include <memory> + +#include "common/common_types.h" +#include "core/hle/kernel/memory/memory_block.h" + +namespace Kernel::Memory { + +class MemoryBlockManager final { +public: + using MemoryBlockTree = std::list<MemoryBlock>; + using iterator = MemoryBlockTree::iterator; + using const_iterator = MemoryBlockTree::const_iterator; + +public: + MemoryBlockManager(VAddr start_addr, VAddr end_addr); + + iterator end() { + return memory_block_tree.end(); + } + const_iterator end() const { + return memory_block_tree.end(); + } + const_iterator cend() const { + return memory_block_tree.cend(); + } + + iterator FindIterator(VAddr addr); + + VAddr FindFreeArea(VAddr region_start, std::size_t region_num_pages, std::size_t num_pages, + std::size_t align, std::size_t offset, std::size_t guard_pages); + + void Update(VAddr addr, std::size_t num_pages, MemoryState prev_state, + MemoryPermission prev_perm, MemoryAttribute prev_attribute, MemoryState state, + MemoryPermission perm, MemoryAttribute attribute); + + void Update(VAddr addr, std::size_t num_pages, MemoryState state, + MemoryPermission perm = MemoryPermission::None, + MemoryAttribute attribute = MemoryAttribute::None); + + using IterateFunc = std::function<void(const MemoryInfo&)>; + void IterateForRange(VAddr start, VAddr end, IterateFunc&& func); + + MemoryBlock& FindBlock(VAddr addr) { + return *FindIterator(addr); + } + +private: + void MergeAdjacent(iterator it, iterator& next_it); + + const VAddr start_addr; + const VAddr end_addr; + + MemoryBlockTree memory_block_tree; +}; + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/memory_layout.h b/src/core/hle/kernel/memory/memory_layout.h new file mode 100644 index 000000000..830c6f0d7 --- /dev/null +++ b/src/core/hle/kernel/memory/memory_layout.h @@ -0,0 +1,73 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Kernel::Memory { + +class MemoryRegion final { + friend class MemoryLayout; + +public: + constexpr PAddr StartAddress() const { + return start_address; + } + + constexpr PAddr EndAddress() const { + return end_address; + } + +private: + constexpr MemoryRegion() = default; + constexpr MemoryRegion(PAddr start_address, PAddr end_address) + : start_address{start_address}, end_address{end_address} {} + + const PAddr start_address{}; + const PAddr end_address{}; +}; + +class MemoryLayout final { +public: + constexpr const MemoryRegion& Application() const { + return application; + } + + constexpr const MemoryRegion& Applet() const { + return applet; + } + + constexpr const MemoryRegion& System() const { + return system; + } + + static constexpr MemoryLayout GetDefaultLayout() { + constexpr std::size_t application_size{0xcd500000}; + constexpr std::size_t applet_size{0x1fb00000}; + constexpr PAddr application_start_address{Core::DramMemoryMap::End - application_size}; + constexpr PAddr application_end_address{Core::DramMemoryMap::End}; + constexpr PAddr applet_start_address{application_start_address - applet_size}; + constexpr PAddr applet_end_address{applet_start_address + applet_size}; + constexpr PAddr system_start_address{Core::DramMemoryMap::SlabHeapEnd}; + constexpr PAddr system_end_address{applet_start_address}; + return {application_start_address, application_end_address, applet_start_address, + applet_end_address, system_start_address, system_end_address}; + } + +private: + constexpr MemoryLayout(PAddr application_start_address, std::size_t application_size, + PAddr applet_start_address, std::size_t applet_size, + PAddr system_start_address, std::size_t system_size) + : application{application_start_address, application_size}, + applet{applet_start_address, applet_size}, system{system_start_address, system_size} {} + + const MemoryRegion application; + const MemoryRegion applet; + const MemoryRegion system; + + const PAddr start_address{}; +}; + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/memory_manager.cpp b/src/core/hle/kernel/memory/memory_manager.cpp new file mode 100644 index 000000000..3cd4f9e85 --- /dev/null +++ b/src/core/hle/kernel/memory/memory_manager.cpp @@ -0,0 +1,176 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/common_types.h" +#include "common/scope_exit.h" +#include "core/hle/kernel/errors.h" +#include "core/hle/kernel/memory/memory_manager.h" +#include "core/hle/kernel/memory/page_linked_list.h" + +namespace Kernel::Memory { + +std::size_t MemoryManager::Impl::Initialize(Pool new_pool, u64 start_address, u64 end_address) { + const auto size{end_address - start_address}; + + // Calculate metadata sizes + const auto ref_count_size{(size / PageSize) * sizeof(u16)}; + const auto optimize_map_size{(Common::AlignUp((size / PageSize), 64) / 64) * sizeof(u64)}; + const auto manager_size{Common::AlignUp(optimize_map_size + ref_count_size, PageSize)}; + const auto page_heap_size{PageHeap::CalculateMetadataOverheadSize(size)}; + const auto total_metadata_size{manager_size + page_heap_size}; + ASSERT(manager_size <= total_metadata_size); + ASSERT(Common::IsAligned(total_metadata_size, PageSize)); + + // Setup region + pool = new_pool; + + // Initialize the manager's KPageHeap + heap.Initialize(start_address, size, page_heap_size); + + // Free the memory to the heap + heap.Free(start_address, size / PageSize); + + // Update the heap's used size + heap.UpdateUsedSize(); + + return total_metadata_size; +} + +void MemoryManager::InitializeManager(Pool pool, u64 start_address, u64 end_address) { + ASSERT(pool < Pool::Count); + managers[static_cast<std::size_t>(pool)].Initialize(pool, start_address, end_address); +} + +VAddr MemoryManager::AllocateContinuous(std::size_t num_pages, std::size_t align_pages, Pool pool, + Direction dir) { + // Early return if we're allocating no pages + if (num_pages == 0) { + return {}; + } + + // Lock the pool that we're allocating from + const auto pool_index{static_cast<std::size_t>(pool)}; + std::lock_guard lock{pool_locks[pool_index]}; + + // Choose a heap based on our page size request + const s32 heap_index{PageHeap::GetAlignedBlockIndex(num_pages, align_pages)}; + + // Loop, trying to iterate from each block + // TODO (bunnei): Support multiple managers + Impl& chosen_manager{managers[pool_index]}; + VAddr allocated_block{chosen_manager.AllocateBlock(heap_index)}; + + // If we failed to allocate, quit now + if (!allocated_block) { + return {}; + } + + // If we allocated more than we need, free some + const auto allocated_pages{PageHeap::GetBlockNumPages(heap_index)}; + if (allocated_pages > num_pages) { + chosen_manager.Free(allocated_block + num_pages * PageSize, allocated_pages - num_pages); + } + + return allocated_block; +} + +ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pages, Pool pool, + Direction dir) { + ASSERT(page_list.GetNumPages() == 0); + + // Early return if we're allocating no pages + if (num_pages == 0) { + return RESULT_SUCCESS; + } + + // Lock the pool that we're allocating from + const auto pool_index{static_cast<std::size_t>(pool)}; + std::lock_guard lock{pool_locks[pool_index]}; + + // Choose a heap based on our page size request + const s32 heap_index{PageHeap::GetBlockIndex(num_pages)}; + if (heap_index < 0) { + return ERR_OUT_OF_MEMORY; + } + + // TODO (bunnei): Support multiple managers + Impl& chosen_manager{managers[pool_index]}; + + // Ensure that we don't leave anything un-freed + auto group_guard = detail::ScopeExit([&] { + for (const auto& it : page_list.Nodes()) { + const auto num_pages{std::min( + it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)}; + chosen_manager.Free(it.GetAddress(), num_pages); + } + }); + + // Keep allocating until we've allocated all our pages + for (s32 index{heap_index}; index >= 0 && num_pages > 0; index--) { + const auto pages_per_alloc{PageHeap::GetBlockNumPages(index)}; + + while (num_pages >= pages_per_alloc) { + // Allocate a block + VAddr allocated_block{chosen_manager.AllocateBlock(index)}; + if (!allocated_block) { + break; + } + + // Safely add it to our group + { + auto block_guard = detail::ScopeExit( + [&] { chosen_manager.Free(allocated_block, pages_per_alloc); }); + + if (const ResultCode result{page_list.AddBlock(allocated_block, pages_per_alloc)}; + result.IsError()) { + return result; + } + + block_guard.Cancel(); + } + + num_pages -= pages_per_alloc; + } + } + + // Only succeed if we allocated as many pages as we wanted + ASSERT(num_pages >= 0); + if (num_pages) { + return ERR_OUT_OF_MEMORY; + } + + // We succeeded! + group_guard.Cancel(); + return RESULT_SUCCESS; +} + +ResultCode MemoryManager::Free(PageLinkedList& page_list, std::size_t num_pages, Pool pool, + Direction dir) { + // Early return if we're freeing no pages + if (!num_pages) { + return RESULT_SUCCESS; + } + + // Lock the pool that we're freeing from + const auto pool_index{static_cast<std::size_t>(pool)}; + std::lock_guard lock{pool_locks[pool_index]}; + + // TODO (bunnei): Support multiple managers + Impl& chosen_manager{managers[pool_index]}; + + // Free all of the pages + for (const auto& it : page_list.Nodes()) { + const auto num_pages{std::min( + it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)}; + chosen_manager.Free(it.GetAddress(), num_pages); + } + + return RESULT_SUCCESS; +} + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/memory_manager.h b/src/core/hle/kernel/memory/memory_manager.h new file mode 100644 index 000000000..b078d7a5e --- /dev/null +++ b/src/core/hle/kernel/memory/memory_manager.h @@ -0,0 +1,97 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <mutex> + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "core/hle/kernel/memory/page_heap.h" +#include "core/hle/result.h" + +namespace Kernel::Memory { + +class PageLinkedList; + +class MemoryManager final : NonCopyable { +public: + enum class Pool : u32 { + Application = 0, + Applet = 1, + System = 2, + SystemNonSecure = 3, + + Count, + + Shift = 4, + Mask = (0xF << Shift), + }; + + enum class Direction : u32 { + FromFront = 0, + FromBack = 1, + + Shift = 0, + Mask = (0xF << Shift), + }; + + MemoryManager() = default; + + constexpr std::size_t GetSize(Pool pool) const { + return managers[static_cast<std::size_t>(pool)].GetSize(); + } + + void InitializeManager(Pool pool, u64 start_address, u64 end_address); + VAddr AllocateContinuous(std::size_t num_pages, std::size_t align_pages, Pool pool, + Direction dir = Direction::FromFront); + ResultCode Allocate(PageLinkedList& page_list, std::size_t num_pages, Pool pool, + Direction dir = Direction::FromFront); + ResultCode Free(PageLinkedList& page_list, std::size_t num_pages, Pool pool, + Direction dir = Direction::FromFront); + + static constexpr std::size_t MaxManagerCount = 10; + +private: + class Impl final : NonCopyable { + private: + using RefCount = u16; + + private: + PageHeap heap; + Pool pool{}; + + public: + Impl() = default; + + std::size_t Initialize(Pool new_pool, u64 start_address, u64 end_address); + + VAddr AllocateBlock(s32 index) { + return heap.AllocateBlock(index); + } + + void Free(VAddr addr, std::size_t num_pages) { + heap.Free(addr, num_pages); + } + + constexpr std::size_t GetSize() const { + return heap.GetSize(); + } + + constexpr VAddr GetAddress() const { + return heap.GetAddress(); + } + + constexpr VAddr GetEndAddress() const { + return heap.GetEndAddress(); + } + }; + +private: + std::array<std::mutex, static_cast<std::size_t>(Pool::Count)> pool_locks; + std::array<Impl, MaxManagerCount> managers; +}; + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/memory_types.h b/src/core/hle/kernel/memory/memory_types.h new file mode 100644 index 000000000..a75bf77c0 --- /dev/null +++ b/src/core/hle/kernel/memory/memory_types.h @@ -0,0 +1,18 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> + +#include "common/common_types.h" + +namespace Kernel::Memory { + +constexpr std::size_t PageBits{12}; +constexpr std::size_t PageSize{1 << PageBits}; + +using Page = std::array<u8, PageSize>; + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/page_heap.cpp b/src/core/hle/kernel/memory/page_heap.cpp new file mode 100644 index 000000000..efcbb3cad --- /dev/null +++ b/src/core/hle/kernel/memory/page_heap.cpp @@ -0,0 +1,119 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphère, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphère-NX. + +#include "core/core.h" +#include "core/hle/kernel/memory/page_heap.h" +#include "core/memory.h" + +namespace Kernel::Memory { + +void PageHeap::Initialize(VAddr address, std::size_t size, std::size_t metadata_size) { + // Check our assumptions + ASSERT(Common::IsAligned((address), PageSize)); + ASSERT(Common::IsAligned(size, PageSize)); + + // Set our members + heap_address = address; + heap_size = size; + + // Setup bitmaps + metadata.resize(metadata_size / sizeof(u64)); + u64* cur_bitmap_storage{metadata.data()}; + for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) { + const std::size_t cur_block_shift{MemoryBlockPageShifts[i]}; + const std::size_t next_block_shift{ + (i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0}; + cur_bitmap_storage = blocks[i].Initialize(heap_address, heap_size, cur_block_shift, + next_block_shift, cur_bitmap_storage); + } +} + +VAddr PageHeap::AllocateBlock(s32 index) { + const std::size_t needed_size{blocks[index].GetSize()}; + + for (s32 i{index}; i < static_cast<s32>(MemoryBlockPageShifts.size()); i++) { + if (const VAddr addr{blocks[i].PopBlock()}; addr) { + if (const std::size_t allocated_size{blocks[i].GetSize()}; + allocated_size > needed_size) { + Free(addr + needed_size, (allocated_size - needed_size) / PageSize); + } + return addr; + } + } + + return 0; +} + +void PageHeap::FreeBlock(VAddr block, s32 index) { + do { + block = blocks[index++].PushBlock(block); + } while (block != 0); +} + +void PageHeap::Free(VAddr addr, std::size_t num_pages) { + // Freeing no pages is a no-op + if (num_pages == 0) { + return; + } + + // Find the largest block size that we can free, and free as many as possible + s32 big_index{static_cast<s32>(MemoryBlockPageShifts.size()) - 1}; + const VAddr start{addr}; + const VAddr end{(num_pages * PageSize) + addr}; + VAddr before_start{start}; + VAddr before_end{start}; + VAddr after_start{end}; + VAddr after_end{end}; + while (big_index >= 0) { + const std::size_t block_size{blocks[big_index].GetSize()}; + const VAddr big_start{Common::AlignUp((start), block_size)}; + const VAddr big_end{Common::AlignDown((end), block_size)}; + if (big_start < big_end) { + // Free as many big blocks as we can + for (auto block{big_start}; block < big_end; block += block_size) { + FreeBlock(block, big_index); + } + before_end = big_start; + after_start = big_end; + break; + } + big_index--; + } + ASSERT(big_index >= 0); + + // Free space before the big blocks + for (s32 i{big_index - 1}; i >= 0; i--) { + const std::size_t block_size{blocks[i].GetSize()}; + while (before_start + block_size <= before_end) { + before_end -= block_size; + FreeBlock(before_end, i); + } + } + + // Free space after the big blocks + for (s32 i{big_index - 1}; i >= 0; i--) { + const std::size_t block_size{blocks[i].GetSize()}; + while (after_start + block_size <= after_end) { + FreeBlock(after_start, i); + after_start += block_size; + } + } +} + +std::size_t PageHeap::CalculateMetadataOverheadSize(std::size_t region_size) { + std::size_t overhead_size = 0; + for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) { + const std::size_t cur_block_shift{MemoryBlockPageShifts[i]}; + const std::size_t next_block_shift{ + (i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0}; + overhead_size += PageHeap::Block::CalculateMetadataOverheadSize( + region_size, cur_block_shift, next_block_shift); + } + return Common::AlignUp(overhead_size, PageSize); +} + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/page_heap.h b/src/core/hle/kernel/memory/page_heap.h new file mode 100644 index 000000000..380c3f5a1 --- /dev/null +++ b/src/core/hle/kernel/memory/page_heap.h @@ -0,0 +1,370 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphère, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphère-NX. + +#pragma once + +#include <array> +#include <vector> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "core/hle/kernel/memory/memory_types.h" + +namespace Kernel::Memory { + +class PageHeap final : NonCopyable { +public: + static constexpr s32 GetAlignedBlockIndex(std::size_t num_pages, std::size_t align_pages) { + const auto target_pages{std::max(num_pages, align_pages)}; + for (std::size_t i = 0; i < NumMemoryBlockPageShifts; i++) { + if (target_pages <= + (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) { + return static_cast<s32>(i); + } + } + return -1; + } + + static constexpr s32 GetBlockIndex(std::size_t num_pages) { + for (s32 i{static_cast<s32>(NumMemoryBlockPageShifts) - 1}; i >= 0; i--) { + if (num_pages >= (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) { + return i; + } + } + return -1; + } + + static constexpr std::size_t GetBlockSize(std::size_t index) { + return static_cast<std::size_t>(1) << MemoryBlockPageShifts[index]; + } + + static constexpr std::size_t GetBlockNumPages(std::size_t index) { + return GetBlockSize(index) / PageSize; + } + +private: + static constexpr std::size_t NumMemoryBlockPageShifts{7}; + static constexpr std::array<std::size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{ + 0xC, 0x10, 0x15, 0x16, 0x19, 0x1D, 0x1E, + }; + + class Block final : NonCopyable { + private: + class Bitmap final : NonCopyable { + public: + static constexpr std::size_t MaxDepth{4}; + + private: + std::array<u64*, MaxDepth> bit_storages{}; + std::size_t num_bits{}; + std::size_t used_depths{}; + + public: + constexpr Bitmap() = default; + + constexpr std::size_t GetNumBits() const { + return num_bits; + } + constexpr s32 GetHighestDepthIndex() const { + return static_cast<s32>(used_depths) - 1; + } + + constexpr u64* Initialize(u64* storage, std::size_t size) { + //* Initially, everything is un-set + num_bits = 0; + + // Calculate the needed bitmap depth + used_depths = static_cast<std::size_t>(GetRequiredDepth(size)); + ASSERT(used_depths <= MaxDepth); + + // Set the bitmap pointers + for (s32 depth{GetHighestDepthIndex()}; depth >= 0; depth--) { + bit_storages[depth] = storage; + size = Common::AlignUp(size, 64) / 64; + storage += size; + } + + return storage; + } + + s64 FindFreeBlock() const { + uintptr_t offset{}; + s32 depth{}; + + do { + const u64 v{bit_storages[depth][offset]}; + if (v == 0) { + // Non-zero depth indicates that a previous level had a free block + ASSERT(depth == 0); + return -1; + } + offset = offset * 64 + Common::CountTrailingZeroes64(v); + ++depth; + } while (depth < static_cast<s32>(used_depths)); + + return static_cast<s64>(offset); + } + + constexpr void SetBit(std::size_t offset) { + SetBit(GetHighestDepthIndex(), offset); + num_bits++; + } + + constexpr void ClearBit(std::size_t offset) { + ClearBit(GetHighestDepthIndex(), offset); + num_bits--; + } + + constexpr bool ClearRange(std::size_t offset, std::size_t count) { + const s32 depth{GetHighestDepthIndex()}; + const auto bit_ind{offset / 64}; + u64* bits{bit_storages[depth]}; + if (count < 64) { + const auto shift{offset % 64}; + ASSERT(shift + count <= 64); + // Check that all the bits are set + const u64 mask{((1ULL << count) - 1) << shift}; + u64 v{bits[bit_ind]}; + if ((v & mask) != mask) { + return false; + } + + // Clear the bits + v &= ~mask; + bits[bit_ind] = v; + if (v == 0) { + ClearBit(depth - 1, bit_ind); + } + } else { + ASSERT(offset % 64 == 0); + ASSERT(count % 64 == 0); + // Check that all the bits are set + std::size_t remaining{count}; + std::size_t i = 0; + do { + if (bits[bit_ind + i++] != ~u64(0)) { + return false; + } + remaining -= 64; + } while (remaining > 0); + + // Clear the bits + remaining = count; + i = 0; + do { + bits[bit_ind + i] = 0; + ClearBit(depth - 1, bit_ind + i); + i++; + remaining -= 64; + } while (remaining > 0); + } + + num_bits -= count; + return true; + } + + private: + constexpr void SetBit(s32 depth, std::size_t offset) { + while (depth >= 0) { + const auto ind{offset / 64}; + const auto which{offset % 64}; + const u64 mask{1ULL << which}; + + u64* bit{std::addressof(bit_storages[depth][ind])}; + const u64 v{*bit}; + ASSERT((v & mask) == 0); + *bit = v | mask; + if (v) { + break; + } + offset = ind; + depth--; + } + } + + constexpr void ClearBit(s32 depth, std::size_t offset) { + while (depth >= 0) { + const auto ind{offset / 64}; + const auto which{offset % 64}; + const u64 mask{1ULL << which}; + + u64* bit{std::addressof(bit_storages[depth][ind])}; + u64 v{*bit}; + ASSERT((v & mask) != 0); + v &= ~mask; + *bit = v; + if (v) { + break; + } + offset = ind; + depth--; + } + } + + private: + static constexpr s32 GetRequiredDepth(std::size_t region_size) { + s32 depth = 0; + while (true) { + region_size /= 64; + depth++; + if (region_size == 0) { + return depth; + } + } + } + + public: + static constexpr std::size_t CalculateMetadataOverheadSize(std::size_t region_size) { + std::size_t overhead_bits = 0; + for (s32 depth{GetRequiredDepth(region_size) - 1}; depth >= 0; depth--) { + region_size = Common::AlignUp(region_size, 64) / 64; + overhead_bits += region_size; + } + return overhead_bits * sizeof(u64); + } + }; + + private: + Bitmap bitmap; + VAddr heap_address{}; + uintptr_t end_offset{}; + std::size_t block_shift{}; + std::size_t next_block_shift{}; + + public: + constexpr Block() = default; + + constexpr std::size_t GetShift() const { + return block_shift; + } + constexpr std::size_t GetNextShift() const { + return next_block_shift; + } + constexpr std::size_t GetSize() const { + return static_cast<std::size_t>(1) << GetShift(); + } + constexpr std::size_t GetNumPages() const { + return GetSize() / PageSize; + } + constexpr std::size_t GetNumFreeBlocks() const { + return bitmap.GetNumBits(); + } + constexpr std::size_t GetNumFreePages() const { + return GetNumFreeBlocks() * GetNumPages(); + } + + constexpr u64* Initialize(VAddr addr, std::size_t size, std::size_t bs, std::size_t nbs, + u64* bit_storage) { + // Set shifts + block_shift = bs; + next_block_shift = nbs; + + // Align up the address + VAddr end{addr + size}; + const auto align{(next_block_shift != 0) ? (1ULL << next_block_shift) + : (1ULL << block_shift)}; + addr = Common::AlignDown((addr), align); + end = Common::AlignUp((end), align); + + heap_address = addr; + end_offset = (end - addr) / (1ULL << block_shift); + return bitmap.Initialize(bit_storage, end_offset); + } + + constexpr VAddr PushBlock(VAddr address) { + // Set the bit for the free block + std::size_t offset{(address - heap_address) >> GetShift()}; + bitmap.SetBit(offset); + + // If we have a next shift, try to clear the blocks below and return the address + if (GetNextShift()) { + const auto diff{1ULL << (GetNextShift() - GetShift())}; + offset = Common::AlignDown(offset, diff); + if (bitmap.ClearRange(offset, diff)) { + return heap_address + (offset << GetShift()); + } + } + + // We couldn't coalesce, or we're already as big as possible + return 0; + } + + VAddr PopBlock() { + // Find a free block + const s64 soffset{bitmap.FindFreeBlock()}; + if (soffset < 0) { + return 0; + } + const auto offset{static_cast<std::size_t>(soffset)}; + + // Update our tracking and return it + bitmap.ClearBit(offset); + return heap_address + (offset << GetShift()); + } + + public: + static constexpr std::size_t CalculateMetadataOverheadSize(std::size_t region_size, + std::size_t cur_block_shift, + std::size_t next_block_shift) { + const auto cur_block_size{(1ULL << cur_block_shift)}; + const auto next_block_size{(1ULL << next_block_shift)}; + const auto align{(next_block_shift != 0) ? next_block_size : cur_block_size}; + return Bitmap::CalculateMetadataOverheadSize( + (align * 2 + Common::AlignUp(region_size, align)) / cur_block_size); + } + }; + +public: + PageHeap() = default; + + constexpr VAddr GetAddress() const { + return heap_address; + } + constexpr std::size_t GetSize() const { + return heap_size; + } + constexpr VAddr GetEndAddress() const { + return GetAddress() + GetSize(); + } + constexpr std::size_t GetPageOffset(VAddr block) const { + return (block - GetAddress()) / PageSize; + } + + void Initialize(VAddr heap_address, std::size_t heap_size, std::size_t metadata_size); + VAddr AllocateBlock(s32 index); + void Free(VAddr addr, std::size_t num_pages); + + void UpdateUsedSize() { + used_size = heap_size - (GetNumFreePages() * PageSize); + } + + static std::size_t CalculateMetadataOverheadSize(std::size_t region_size); + +private: + constexpr std::size_t GetNumFreePages() const { + std::size_t num_free{}; + + for (const auto& block : blocks) { + num_free += block.GetNumFreePages(); + } + + return num_free; + } + + void FreeBlock(VAddr block, s32 index); + + VAddr heap_address{}; + std::size_t heap_size{}; + std::size_t used_size{}; + std::array<Block, NumMemoryBlockPageShifts> blocks{}; + std::vector<u64> metadata; +}; + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/page_linked_list.h b/src/core/hle/kernel/memory/page_linked_list.h new file mode 100644 index 000000000..0668d00c6 --- /dev/null +++ b/src/core/hle/kernel/memory/page_linked_list.h @@ -0,0 +1,93 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <list> + +#include "common/assert.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "core/hle/kernel/memory/memory_types.h" +#include "core/hle/result.h" + +namespace Kernel::Memory { + +class PageLinkedList final { +public: + class Node final { + public: + constexpr Node(u64 addr, std::size_t num_pages) : addr{addr}, num_pages{num_pages} {} + + constexpr u64 GetAddress() const { + return addr; + } + + constexpr std::size_t GetNumPages() const { + return num_pages; + } + + private: + u64 addr{}; + std::size_t num_pages{}; + }; + +public: + PageLinkedList() = default; + PageLinkedList(u64 address, u64 num_pages) { + ASSERT(AddBlock(address, num_pages).IsSuccess()); + } + + constexpr std::list<Node>& Nodes() { + return nodes; + } + + constexpr const std::list<Node>& Nodes() const { + return nodes; + } + + std::size_t GetNumPages() const { + std::size_t num_pages = 0; + for (const Node& node : nodes) { + num_pages += node.GetNumPages(); + } + return num_pages; + } + + bool IsEqual(PageLinkedList& other) const { + auto this_node = nodes.begin(); + auto other_node = other.nodes.begin(); + while (this_node != nodes.end() && other_node != other.nodes.end()) { + if (this_node->GetAddress() != other_node->GetAddress() || + this_node->GetNumPages() != other_node->GetNumPages()) { + return false; + } + this_node = std::next(this_node); + other_node = std::next(other_node); + } + + return this_node == nodes.end() && other_node == other.nodes.end(); + } + + ResultCode AddBlock(u64 address, u64 num_pages) { + if (!num_pages) { + return RESULT_SUCCESS; + } + if (!nodes.empty()) { + const auto node = nodes.back(); + if (node.GetAddress() + node.GetNumPages() * PageSize == address) { + address = node.GetAddress(); + num_pages += node.GetNumPages(); + nodes.pop_back(); + } + } + nodes.push_back({address, num_pages}); + return RESULT_SUCCESS; + } + +private: + std::list<Node> nodes; +}; + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/page_table.cpp b/src/core/hle/kernel/memory/page_table.cpp new file mode 100644 index 000000000..091e52ca4 --- /dev/null +++ b/src/core/hle/kernel/memory/page_table.cpp @@ -0,0 +1,1130 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/scope_exit.h" +#include "core/core.h" +#include "core/device_memory.h" +#include "core/hle/kernel/errors.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/memory/address_space_info.h" +#include "core/hle/kernel/memory/memory_block.h" +#include "core/hle/kernel/memory/memory_block_manager.h" +#include "core/hle/kernel/memory/page_linked_list.h" +#include "core/hle/kernel/memory/page_table.h" +#include "core/hle/kernel/memory/system_control.h" +#include "core/hle/kernel/process.h" +#include "core/hle/kernel/resource_limit.h" +#include "core/memory.h" + +namespace Kernel::Memory { + +namespace { + +constexpr std::size_t GetAddressSpaceWidthFromType(FileSys::ProgramAddressSpaceType as_type) { + switch (as_type) { + case FileSys::ProgramAddressSpaceType::Is32Bit: + case FileSys::ProgramAddressSpaceType::Is32BitNoMap: + return 32; + case FileSys::ProgramAddressSpaceType::Is36Bit: + return 36; + case FileSys::ProgramAddressSpaceType::Is39Bit: + return 39; + default: + UNREACHABLE(); + return {}; + } +} + +constexpr u64 GetAddressInRange(const MemoryInfo& info, VAddr addr) { + if (info.GetAddress() < addr) { + return addr; + } + return info.GetAddress(); +} + +constexpr std::size_t GetSizeInRange(const MemoryInfo& info, VAddr start, VAddr end) { + std::size_t size{info.GetSize()}; + if (info.GetAddress() < start) { + size -= start - info.GetAddress(); + } + if (info.GetEndAddress() > end) { + size -= info.GetEndAddress() - end; + } + return size; +} + +} // namespace + +PageTable::PageTable(Core::System& system) : system{system} {} + +ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_type, + bool enable_aslr, VAddr code_addr, std::size_t code_size, + Memory::MemoryManager::Pool pool) { + + const auto GetSpaceStart = [this](AddressSpaceInfo::Type type) { + return AddressSpaceInfo::GetAddressSpaceStart(address_space_width, type); + }; + const auto GetSpaceSize = [this](AddressSpaceInfo::Type type) { + return AddressSpaceInfo::GetAddressSpaceSize(address_space_width, type); + }; + + // Set our width and heap/alias sizes + address_space_width = GetAddressSpaceWidthFromType(as_type); + const VAddr start = 0; + const VAddr end{1ULL << address_space_width}; + std::size_t alias_region_size{GetSpaceSize(AddressSpaceInfo::Type::Alias)}; + std::size_t heap_region_size{GetSpaceSize(AddressSpaceInfo::Type::Heap)}; + + ASSERT(start <= code_addr); + ASSERT(code_addr < code_addr + code_size); + ASSERT(code_addr + code_size - 1 <= end - 1); + + // Adjust heap/alias size if we don't have an alias region + if (as_type == FileSys::ProgramAddressSpaceType::Is32BitNoMap) { + heap_region_size += alias_region_size; + alias_region_size = 0; + } + + // Set code regions and determine remaining + constexpr std::size_t RegionAlignment{2 * 1024 * 1024}; + VAddr process_code_start{}; + VAddr process_code_end{}; + std::size_t stack_region_size{}; + std::size_t kernel_map_region_size{}; + + if (address_space_width == 39) { + alias_region_size = GetSpaceSize(AddressSpaceInfo::Type::Alias); + heap_region_size = GetSpaceSize(AddressSpaceInfo::Type::Heap); + stack_region_size = GetSpaceSize(AddressSpaceInfo::Type::Stack); + kernel_map_region_size = GetSpaceSize(AddressSpaceInfo::Type::Is32Bit); + code_region_start = GetSpaceStart(AddressSpaceInfo::Type::Large64Bit); + code_region_end = code_region_start + GetSpaceSize(AddressSpaceInfo::Type::Large64Bit); + alias_code_region_start = code_region_start; + alias_code_region_end = code_region_end; + process_code_start = Common::AlignDown(code_addr, RegionAlignment); + process_code_end = Common::AlignUp(code_addr + code_size, RegionAlignment); + } else { + stack_region_size = 0; + kernel_map_region_size = 0; + code_region_start = GetSpaceStart(AddressSpaceInfo::Type::Is32Bit); + code_region_end = code_region_start + GetSpaceSize(AddressSpaceInfo::Type::Is32Bit); + stack_region_start = code_region_start; + alias_code_region_start = code_region_start; + alias_code_region_end = GetSpaceStart(AddressSpaceInfo::Type::Small64Bit) + + GetSpaceSize(AddressSpaceInfo::Type::Small64Bit); + stack_region_end = code_region_end; + kernel_map_region_start = code_region_start; + kernel_map_region_end = code_region_end; + process_code_start = code_region_start; + process_code_end = code_region_end; + } + + // Set other basic fields + is_aslr_enabled = enable_aslr; + address_space_start = start; + address_space_end = end; + is_kernel = false; + + // Determine the region we can place our undetermineds in + VAddr alloc_start{}; + std::size_t alloc_size{}; + if ((process_code_start - code_region_start) >= (end - process_code_end)) { + alloc_start = code_region_start; + alloc_size = process_code_start - code_region_start; + } else { + alloc_start = process_code_end; + alloc_size = end - process_code_end; + } + const std::size_t needed_size{ + (alias_region_size + heap_region_size + stack_region_size + kernel_map_region_size)}; + if (alloc_size < needed_size) { + UNREACHABLE(); + return ERR_OUT_OF_MEMORY; + } + + const std::size_t remaining_size{alloc_size - needed_size}; + + // Determine random placements for each region + std::size_t alias_rnd{}, heap_rnd{}, stack_rnd{}, kmap_rnd{}; + if (enable_aslr) { + alias_rnd = SystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) * + RegionAlignment; + heap_rnd = SystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) * + RegionAlignment; + stack_rnd = SystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) * + RegionAlignment; + kmap_rnd = SystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) * + RegionAlignment; + } + + // Setup heap and alias regions + alias_region_start = alloc_start + alias_rnd; + alias_region_end = alias_region_start + alias_region_size; + heap_region_start = alloc_start + heap_rnd; + heap_region_end = heap_region_start + heap_region_size; + + if (alias_rnd <= heap_rnd) { + heap_region_start += alias_region_size; + heap_region_end += alias_region_size; + } else { + alias_region_start += heap_region_size; + alias_region_end += heap_region_size; + } + + // Setup stack region + if (stack_region_size) { + stack_region_start = alloc_start + stack_rnd; + stack_region_end = stack_region_start + stack_region_size; + + if (alias_rnd < stack_rnd) { + stack_region_start += alias_region_size; + stack_region_end += alias_region_size; + } else { + alias_region_start += stack_region_size; + alias_region_end += stack_region_size; + } + + if (heap_rnd < stack_rnd) { + stack_region_start += heap_region_size; + stack_region_end += heap_region_size; + } else { + heap_region_start += stack_region_size; + heap_region_end += stack_region_size; + } + } + + // Setup kernel map region + if (kernel_map_region_size) { + kernel_map_region_start = alloc_start + kmap_rnd; + kernel_map_region_end = kernel_map_region_start + kernel_map_region_size; + + if (alias_rnd < kmap_rnd) { + kernel_map_region_start += alias_region_size; + kernel_map_region_end += alias_region_size; + } else { + alias_region_start += kernel_map_region_size; + alias_region_end += kernel_map_region_size; + } + + if (heap_rnd < kmap_rnd) { + kernel_map_region_start += heap_region_size; + kernel_map_region_end += heap_region_size; + } else { + heap_region_start += kernel_map_region_size; + heap_region_end += kernel_map_region_size; + } + + if (stack_region_size) { + if (stack_rnd < kmap_rnd) { + kernel_map_region_start += stack_region_size; + kernel_map_region_end += stack_region_size; + } else { + stack_region_start += kernel_map_region_size; + stack_region_end += kernel_map_region_size; + } + } + } + + // Set heap members + current_heap_end = heap_region_start; + max_heap_size = 0; + max_physical_memory_size = 0; + + // Ensure that we regions inside our address space + auto IsInAddressSpace = [&](VAddr addr) { + return address_space_start <= addr && addr <= address_space_end; + }; + ASSERT(IsInAddressSpace(alias_region_start)); + ASSERT(IsInAddressSpace(alias_region_end)); + ASSERT(IsInAddressSpace(heap_region_start)); + ASSERT(IsInAddressSpace(heap_region_end)); + ASSERT(IsInAddressSpace(stack_region_start)); + ASSERT(IsInAddressSpace(stack_region_end)); + ASSERT(IsInAddressSpace(kernel_map_region_start)); + ASSERT(IsInAddressSpace(kernel_map_region_end)); + + // Ensure that we selected regions that don't overlap + const VAddr alias_start{alias_region_start}; + const VAddr alias_last{alias_region_end - 1}; + const VAddr heap_start{heap_region_start}; + const VAddr heap_last{heap_region_end - 1}; + const VAddr stack_start{stack_region_start}; + const VAddr stack_last{stack_region_end - 1}; + const VAddr kmap_start{kernel_map_region_start}; + const VAddr kmap_last{kernel_map_region_end - 1}; + ASSERT(alias_last < heap_start || heap_last < alias_start); + ASSERT(alias_last < stack_start || stack_last < alias_start); + ASSERT(alias_last < kmap_start || kmap_last < alias_start); + ASSERT(heap_last < stack_start || stack_last < heap_start); + ASSERT(heap_last < kmap_start || kmap_last < heap_start); + + current_heap_addr = heap_region_start; + heap_capacity = 0; + physical_memory_usage = 0; + memory_pool = pool; + + page_table_impl.Resize(address_space_width, PageBits, true); + + return InitializeMemoryLayout(start, end); +} + +ResultCode PageTable::MapProcessCode(VAddr addr, std::size_t num_pages, MemoryState state, + MemoryPermission perm) { + std::lock_guard lock{page_table_lock}; + + const u64 size{num_pages * PageSize}; + + if (!CanContain(addr, size, state)) { + return ERR_INVALID_ADDRESS_STATE; + } + + if (IsRegionMapped(addr, size)) { + return ERR_INVALID_ADDRESS_STATE; + } + + PageLinkedList page_linked_list; + CASCADE_CODE( + system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool)); + CASCADE_CODE(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup)); + + block_manager->Update(addr, num_pages, state, perm); + + return RESULT_SUCCESS; +} + +ResultCode PageTable::MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { + std::lock_guard lock{page_table_lock}; + + const std::size_t num_pages{size / PageSize}; + + MemoryState state{}; + MemoryPermission perm{}; + CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, src_addr, size, MemoryState::All, + MemoryState::Normal, MemoryPermission::Mask, + MemoryPermission::ReadAndWrite, MemoryAttribute::Mask, + MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); + + if (IsRegionMapped(dst_addr, size)) { + return ERR_INVALID_ADDRESS_STATE; + } + + PageLinkedList page_linked_list; + AddRegionToPages(src_addr, num_pages, page_linked_list); + + { + auto block_guard = detail::ScopeExit( + [&] { Operate(src_addr, num_pages, perm, OperationType::ChangePermissions); }); + + CASCADE_CODE( + Operate(src_addr, num_pages, MemoryPermission::None, OperationType::ChangePermissions)); + CASCADE_CODE(MapPages(dst_addr, page_linked_list, MemoryPermission::None)); + + block_guard.Cancel(); + } + + block_manager->Update(src_addr, num_pages, state, MemoryPermission::None, + MemoryAttribute::Locked); + block_manager->Update(dst_addr, num_pages, MemoryState::AliasCode); + + return RESULT_SUCCESS; +} + +ResultCode PageTable::UnmapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { + std::lock_guard lock{page_table_lock}; + + if (!size) { + return RESULT_SUCCESS; + } + + const std::size_t num_pages{size / PageSize}; + + CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, src_addr, size, MemoryState::All, + MemoryState::Normal, MemoryPermission::None, + MemoryPermission::None, MemoryAttribute::Mask, + MemoryAttribute::Locked, MemoryAttribute::IpcAndDeviceMapped)); + + MemoryState state{}; + CASCADE_CODE(CheckMemoryState( + &state, nullptr, nullptr, dst_addr, PageSize, MemoryState::FlagCanCodeAlias, + MemoryState::FlagCanCodeAlias, MemoryPermission::None, MemoryPermission::None, + MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); + CASCADE_CODE(CheckMemoryState(dst_addr, size, MemoryState::All, state, MemoryPermission::None, + MemoryPermission::None, MemoryAttribute::Mask, + MemoryAttribute::None)); + CASCADE_CODE(Operate(dst_addr, num_pages, MemoryPermission::None, OperationType::Unmap)); + + block_manager->Update(dst_addr, num_pages, MemoryState::Free); + block_manager->Update(src_addr, num_pages, MemoryState::Normal, MemoryPermission::ReadAndWrite); + + return RESULT_SUCCESS; +} + +void PageTable::MapPhysicalMemory(PageLinkedList& page_linked_list, VAddr start, VAddr end) { + auto node{page_linked_list.Nodes().begin()}; + PAddr map_addr{node->GetAddress()}; + std::size_t src_num_pages{node->GetNumPages()}; + + block_manager->IterateForRange(start, end, [&](const MemoryInfo& info) { + if (info.state != MemoryState::Free) { + return; + } + + std::size_t dst_num_pages{GetSizeInRange(info, start, end) / PageSize}; + VAddr dst_addr{GetAddressInRange(info, start)}; + + while (dst_num_pages) { + if (!src_num_pages) { + node = std::next(node); + map_addr = node->GetAddress(); + src_num_pages = node->GetNumPages(); + } + + const std::size_t num_pages{std::min(src_num_pages, dst_num_pages)}; + Operate(dst_addr, num_pages, MemoryPermission::ReadAndWrite, OperationType::Map, + map_addr); + + dst_addr += num_pages * PageSize; + map_addr += num_pages * PageSize; + src_num_pages -= num_pages; + dst_num_pages -= num_pages; + } + }); +} + +ResultCode PageTable::MapPhysicalMemory(VAddr addr, std::size_t size) { + std::lock_guard lock{page_table_lock}; + + std::size_t mapped_size{}; + const VAddr end_addr{addr + size}; + + block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) { + if (info.state != MemoryState::Free) { + mapped_size += GetSizeInRange(info, addr, end_addr); + } + }); + + if (mapped_size == size) { + return RESULT_SUCCESS; + } + + auto process{system.Kernel().CurrentProcess()}; + const std::size_t remaining_size{size - mapped_size}; + const std::size_t remaining_pages{remaining_size / PageSize}; + + if (process->GetResourceLimit() && + !process->GetResourceLimit()->Reserve(ResourceType::PhysicalMemory, remaining_size)) { + return ERR_RESOURCE_LIMIT_EXCEEDED; + } + + PageLinkedList page_linked_list; + { + auto block_guard = detail::ScopeExit([&] { + system.Kernel().MemoryManager().Free(page_linked_list, remaining_pages, memory_pool); + process->GetResourceLimit()->Release(ResourceType::PhysicalMemory, remaining_size); + }); + + CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages, + memory_pool)); + + block_guard.Cancel(); + } + + MapPhysicalMemory(page_linked_list, addr, end_addr); + + physical_memory_usage += remaining_size; + + const std::size_t num_pages{size / PageSize}; + block_manager->Update(addr, num_pages, MemoryState::Free, MemoryPermission::None, + MemoryAttribute::None, MemoryState::Normal, + MemoryPermission::ReadAndWrite, MemoryAttribute::None); + + return RESULT_SUCCESS; +} + +ResultCode PageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) { + std::lock_guard lock{page_table_lock}; + + const VAddr end_addr{addr + size}; + ResultCode result{RESULT_SUCCESS}; + std::size_t mapped_size{}; + + // Verify that the region can be unmapped + block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) { + if (info.state == MemoryState::Normal) { + if (info.attribute != MemoryAttribute::None) { + result = ERR_INVALID_ADDRESS_STATE; + return; + } + mapped_size += GetSizeInRange(info, addr, end_addr); + } else if (info.state != MemoryState::Free) { + result = ERR_INVALID_ADDRESS_STATE; + } + }); + + if (result.IsError()) { + return result; + } + + if (!mapped_size) { + return RESULT_SUCCESS; + } + + CASCADE_CODE(UnmapMemory(addr, size)); + + auto process{system.Kernel().CurrentProcess()}; + process->GetResourceLimit()->Release(ResourceType::PhysicalMemory, mapped_size); + physical_memory_usage -= mapped_size; + + return RESULT_SUCCESS; +} + +ResultCode PageTable::UnmapMemory(VAddr addr, std::size_t size) { + std::lock_guard lock{page_table_lock}; + + const VAddr end_addr{addr + size}; + ResultCode result{RESULT_SUCCESS}; + PageLinkedList page_linked_list; + + // Unmap each region within the range + block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) { + if (info.state == MemoryState::Normal) { + const std::size_t block_size{GetSizeInRange(info, addr, end_addr)}; + const std::size_t block_num_pages{block_size / PageSize}; + const VAddr block_addr{GetAddressInRange(info, addr)}; + + AddRegionToPages(block_addr, block_size / PageSize, page_linked_list); + + if (result = Operate(block_addr, block_num_pages, MemoryPermission::None, + OperationType::Unmap); + result.IsError()) { + return; + } + } + }); + + if (result.IsError()) { + return result; + } + + const std::size_t num_pages{size / PageSize}; + system.Kernel().MemoryManager().Free(page_linked_list, num_pages, memory_pool); + + block_manager->Update(addr, num_pages, MemoryState::Free); + + return RESULT_SUCCESS; +} + +ResultCode PageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) { + std::lock_guard lock{page_table_lock}; + + MemoryState src_state{}; + CASCADE_CODE(CheckMemoryState( + &src_state, nullptr, nullptr, src_addr, size, MemoryState::FlagCanAlias, + MemoryState::FlagCanAlias, MemoryPermission::Mask, MemoryPermission::ReadAndWrite, + MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); + + if (IsRegionMapped(dst_addr, size)) { + return ERR_INVALID_ADDRESS_STATE; + } + + PageLinkedList page_linked_list; + const std::size_t num_pages{size / PageSize}; + + AddRegionToPages(src_addr, num_pages, page_linked_list); + + { + auto block_guard = detail::ScopeExit([&] { + Operate(src_addr, num_pages, MemoryPermission::ReadAndWrite, + OperationType::ChangePermissions); + }); + + CASCADE_CODE( + Operate(src_addr, num_pages, MemoryPermission::None, OperationType::ChangePermissions)); + CASCADE_CODE(MapPages(dst_addr, page_linked_list, MemoryPermission::ReadAndWrite)); + + block_guard.Cancel(); + } + + block_manager->Update(src_addr, num_pages, src_state, MemoryPermission::None, + MemoryAttribute::Locked); + block_manager->Update(dst_addr, num_pages, MemoryState::Stack, MemoryPermission::ReadAndWrite); + + return RESULT_SUCCESS; +} + +ResultCode PageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) { + std::lock_guard lock{page_table_lock}; + + MemoryState src_state{}; + CASCADE_CODE(CheckMemoryState( + &src_state, nullptr, nullptr, src_addr, size, MemoryState::FlagCanAlias, + MemoryState::FlagCanAlias, MemoryPermission::Mask, MemoryPermission::None, + MemoryAttribute::Mask, MemoryAttribute::Locked, MemoryAttribute::IpcAndDeviceMapped)); + + MemoryPermission dst_perm{}; + CASCADE_CODE(CheckMemoryState(nullptr, &dst_perm, nullptr, dst_addr, size, MemoryState::All, + MemoryState::Stack, MemoryPermission::None, + MemoryPermission::None, MemoryAttribute::Mask, + MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); + + PageLinkedList src_pages; + PageLinkedList dst_pages; + const std::size_t num_pages{size / PageSize}; + + AddRegionToPages(src_addr, num_pages, src_pages); + AddRegionToPages(dst_addr, num_pages, dst_pages); + + if (!dst_pages.IsEqual(src_pages)) { + return ERR_INVALID_MEMORY_RANGE; + } + + { + auto block_guard = detail::ScopeExit([&] { MapPages(dst_addr, dst_pages, dst_perm); }); + + CASCADE_CODE(Operate(dst_addr, num_pages, MemoryPermission::None, OperationType::Unmap)); + CASCADE_CODE(Operate(src_addr, num_pages, MemoryPermission::ReadAndWrite, + OperationType::ChangePermissions)); + + block_guard.Cancel(); + } + + block_manager->Update(src_addr, num_pages, src_state, MemoryPermission::ReadAndWrite); + block_manager->Update(dst_addr, num_pages, MemoryState::Free); + + return RESULT_SUCCESS; +} + +ResultCode PageTable::MapPages(VAddr addr, const PageLinkedList& page_linked_list, + MemoryPermission perm) { + VAddr cur_addr{addr}; + + for (const auto& node : page_linked_list.Nodes()) { + if (const auto result{ + Operate(cur_addr, node.GetNumPages(), perm, OperationType::Map, node.GetAddress())}; + result.IsError()) { + const MemoryInfo info{block_manager->FindBlock(cur_addr).GetMemoryInfo()}; + const std::size_t num_pages{(addr - cur_addr) / PageSize}; + + ASSERT( + Operate(addr, num_pages, MemoryPermission::None, OperationType::Unmap).IsSuccess()); + + return result; + } + + cur_addr += node.GetNumPages() * PageSize; + } + + return RESULT_SUCCESS; +} + +ResultCode PageTable::MapPages(VAddr addr, PageLinkedList& page_linked_list, MemoryState state, + MemoryPermission perm) { + std::lock_guard lock{page_table_lock}; + + const std::size_t num_pages{page_linked_list.GetNumPages()}; + const std::size_t size{num_pages * PageSize}; + + if (!CanContain(addr, size, state)) { + return ERR_INVALID_ADDRESS_STATE; + } + + if (IsRegionMapped(addr, num_pages * PageSize)) { + return ERR_INVALID_ADDRESS_STATE; + } + + CASCADE_CODE(MapPages(addr, page_linked_list, perm)); + + block_manager->Update(addr, num_pages, state, perm); + + return RESULT_SUCCESS; +} + +ResultCode PageTable::SetCodeMemoryPermission(VAddr addr, std::size_t size, MemoryPermission perm) { + + std::lock_guard lock{page_table_lock}; + + MemoryState prev_state{}; + MemoryPermission prev_perm{}; + + CASCADE_CODE(CheckMemoryState( + &prev_state, &prev_perm, nullptr, addr, size, MemoryState::FlagCode, MemoryState::FlagCode, + MemoryPermission::None, MemoryPermission::None, MemoryAttribute::Mask, + MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); + + MemoryState state{prev_state}; + + // Ensure state is mutable if permission allows write + if ((perm & MemoryPermission::Write) != MemoryPermission::None) { + if (prev_state == MemoryState::Code) { + state = MemoryState::CodeData; + } else if (prev_state == MemoryState::AliasCode) { + state = MemoryState::AliasCodeData; + } else { + UNREACHABLE(); + } + } + + // Return early if there is nothing to change + if (state == prev_state && perm == prev_perm) { + return RESULT_SUCCESS; + } + + const std::size_t num_pages{size / PageSize}; + const OperationType operation{(perm & MemoryPermission::Execute) != MemoryPermission::None + ? OperationType::ChangePermissionsAndRefresh + : OperationType::ChangePermissions}; + + CASCADE_CODE(Operate(addr, num_pages, perm, operation)); + + block_manager->Update(addr, num_pages, state, perm); + + return RESULT_SUCCESS; +} + +MemoryInfo PageTable::QueryInfoImpl(VAddr addr) { + std::lock_guard lock{page_table_lock}; + + return block_manager->FindBlock(addr).GetMemoryInfo(); +} + +MemoryInfo PageTable::QueryInfo(VAddr addr) { + if (!Contains(addr, 1)) { + return {address_space_end, 0 - address_space_end, MemoryState::Inaccessible, + MemoryPermission::None, MemoryAttribute::None, MemoryPermission::None}; + } + + return QueryInfoImpl(addr); +} + +ResultCode PageTable::ReserveTransferMemory(VAddr addr, std::size_t size, MemoryPermission perm) { + std::lock_guard lock{page_table_lock}; + + MemoryState state{}; + MemoryAttribute attribute{}; + + CASCADE_CODE(CheckMemoryState(&state, nullptr, &attribute, addr, size, + MemoryState::FlagCanTransfer | MemoryState::FlagReferenceCounted, + MemoryState::FlagCanTransfer | MemoryState::FlagReferenceCounted, + MemoryPermission::Mask, MemoryPermission::ReadAndWrite, + MemoryAttribute::Mask, MemoryAttribute::None, + MemoryAttribute::IpcAndDeviceMapped)); + + block_manager->Update(addr, size / PageSize, state, perm, attribute | MemoryAttribute::Locked); + + return RESULT_SUCCESS; +} + +ResultCode PageTable::ResetTransferMemory(VAddr addr, std::size_t size) { + std::lock_guard lock{page_table_lock}; + + MemoryState state{}; + + CASCADE_CODE(CheckMemoryState(&state, nullptr, nullptr, addr, size, + MemoryState::FlagCanTransfer | MemoryState::FlagReferenceCounted, + MemoryState::FlagCanTransfer | MemoryState::FlagReferenceCounted, + MemoryPermission::None, MemoryPermission::None, + MemoryAttribute::Mask, MemoryAttribute::Locked, + MemoryAttribute::IpcAndDeviceMapped)); + + block_manager->Update(addr, size / PageSize, state, MemoryPermission::ReadAndWrite); + + return RESULT_SUCCESS; +} + +ResultCode PageTable::SetMemoryAttribute(VAddr addr, std::size_t size, MemoryAttribute mask, + MemoryAttribute value) { + std::lock_guard lock{page_table_lock}; + + MemoryState state{}; + MemoryPermission perm{}; + MemoryAttribute attribute{}; + + CASCADE_CODE(CheckMemoryState(&state, &perm, &attribute, addr, size, + MemoryState::FlagCanChangeAttribute, + MemoryState::FlagCanChangeAttribute, MemoryPermission::None, + MemoryPermission::None, MemoryAttribute::LockedAndIpcLocked, + MemoryAttribute::None, MemoryAttribute::DeviceSharedAndUncached)); + + attribute = attribute & ~mask; + attribute = attribute | (mask & value); + + block_manager->Update(addr, size / PageSize, state, perm, attribute); + + return RESULT_SUCCESS; +} + +ResultCode PageTable::SetHeapCapacity(std::size_t new_heap_capacity) { + std::lock_guard lock{page_table_lock}; + heap_capacity = new_heap_capacity; + return RESULT_SUCCESS; +} + +ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) { + + if (size > heap_region_end - heap_region_start) { + return ERR_OUT_OF_MEMORY; + } + + const u64 previous_heap_size{GetHeapSize()}; + + UNIMPLEMENTED_IF_MSG(previous_heap_size > size, "Heap shrink is unimplemented"); + + // Increase the heap size + { + std::lock_guard lock{page_table_lock}; + + const u64 delta{size - previous_heap_size}; + + auto process{system.Kernel().CurrentProcess()}; + if (process->GetResourceLimit() && delta != 0 && + !process->GetResourceLimit()->Reserve(ResourceType::PhysicalMemory, delta)) { + return ERR_RESOURCE_LIMIT_EXCEEDED; + } + + PageLinkedList page_linked_list; + const std::size_t num_pages{delta / PageSize}; + + CASCADE_CODE( + system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool)); + + if (IsRegionMapped(current_heap_addr, delta)) { + return ERR_INVALID_ADDRESS_STATE; + } + + CASCADE_CODE( + Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup)); + + block_manager->Update(current_heap_addr, num_pages, MemoryState::Normal, + MemoryPermission::ReadAndWrite); + + current_heap_addr = heap_region_start + size; + } + + return MakeResult<VAddr>(heap_region_start); +} + +ResultVal<VAddr> PageTable::AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align, + bool is_map_only, VAddr region_start, + std::size_t region_num_pages, MemoryState state, + MemoryPermission perm, PAddr map_addr) { + std::lock_guard lock{page_table_lock}; + + if (!CanContain(region_start, region_num_pages * PageSize, state)) { + return ERR_INVALID_ADDRESS_STATE; + } + + if (region_num_pages <= needed_num_pages) { + return ERR_OUT_OF_MEMORY; + } + + const VAddr addr{ + AllocateVirtualMemory(region_start, region_num_pages, needed_num_pages, align)}; + if (!addr) { + return ERR_OUT_OF_MEMORY; + } + + if (is_map_only) { + CASCADE_CODE(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr)); + } else { + PageLinkedList page_group; + CASCADE_CODE( + system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, memory_pool)); + CASCADE_CODE(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup)); + } + + block_manager->Update(addr, needed_num_pages, state, perm); + + return MakeResult<VAddr>(addr); +} + +ResultCode PageTable::InitializeMemoryLayout(VAddr start, VAddr end) { + block_manager = std::make_unique<MemoryBlockManager>(start, end); + + return RESULT_SUCCESS; +} + +bool PageTable::IsRegionMapped(VAddr address, u64 size) { + return CheckMemoryState(address, size, MemoryState::All, MemoryState::Free, + MemoryPermission::Mask, MemoryPermission::None, MemoryAttribute::Mask, + MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped) + .IsError(); +} + +bool PageTable::IsRegionContiguous(VAddr addr, u64 size) const { + auto start_ptr = system.Memory().GetPointer(addr); + for (u64 offset{}; offset < size; offset += PageSize) { + if (start_ptr != system.Memory().GetPointer(addr + offset)) { + return false; + } + start_ptr += PageSize; + } + return true; +} + +void PageTable::AddRegionToPages(VAddr start, std::size_t num_pages, + PageLinkedList& page_linked_list) { + VAddr addr{start}; + while (addr < start + (num_pages * PageSize)) { + const PAddr paddr{GetPhysicalAddr(addr)}; + if (!paddr) { + UNREACHABLE(); + } + page_linked_list.AddBlock(paddr, 1); + addr += PageSize; + } +} + +VAddr PageTable::AllocateVirtualMemory(VAddr start, std::size_t region_num_pages, + u64 needed_num_pages, std::size_t align) { + if (is_aslr_enabled) { + UNIMPLEMENTED(); + } + return block_manager->FindFreeArea(start, region_num_pages, needed_num_pages, align, 0, + IsKernel() ? 1 : 4); +} + +ResultCode PageTable::Operate(VAddr addr, std::size_t num_pages, const PageLinkedList& page_group, + OperationType operation) { + std::lock_guard lock{page_table_lock}; + + ASSERT(Common::IsAligned(addr, PageSize)); + ASSERT(num_pages > 0); + ASSERT(num_pages == page_group.GetNumPages()); + + for (const auto& node : page_group.Nodes()) { + const std::size_t size{node.GetNumPages() * PageSize}; + + switch (operation) { + case OperationType::MapGroup: + system.Memory().MapMemoryRegion(page_table_impl, addr, size, node.GetAddress()); + break; + default: + UNREACHABLE(); + } + + addr += size; + } + + return RESULT_SUCCESS; +} + +ResultCode PageTable::Operate(VAddr addr, std::size_t num_pages, MemoryPermission perm, + OperationType operation, PAddr map_addr) { + std::lock_guard lock{page_table_lock}; + + ASSERT(num_pages > 0); + ASSERT(Common::IsAligned(addr, PageSize)); + ASSERT(ContainsPages(addr, num_pages)); + + switch (operation) { + case OperationType::Unmap: + system.Memory().UnmapRegion(page_table_impl, addr, num_pages * PageSize); + break; + case OperationType::Map: { + ASSERT(map_addr); + ASSERT(Common::IsAligned(map_addr, PageSize)); + system.Memory().MapMemoryRegion(page_table_impl, addr, num_pages * PageSize, map_addr); + break; + } + case OperationType::ChangePermissions: + case OperationType::ChangePermissionsAndRefresh: + break; + default: + UNREACHABLE(); + } + return RESULT_SUCCESS; +} + +constexpr VAddr PageTable::GetRegionAddress(MemoryState state) const { + switch (state) { + case MemoryState::Free: + case MemoryState::Kernel: + return address_space_start; + case MemoryState::Normal: + return heap_region_start; + case MemoryState::Ipc: + case MemoryState::NonSecureIpc: + case MemoryState::NonDeviceIpc: + return alias_region_start; + case MemoryState::Stack: + return stack_region_start; + case MemoryState::Io: + case MemoryState::Static: + case MemoryState::ThreadLocal: + return kernel_map_region_start; + case MemoryState::Shared: + case MemoryState::AliasCode: + case MemoryState::AliasCodeData: + case MemoryState::Transfered: + case MemoryState::SharedTransfered: + case MemoryState::SharedCode: + case MemoryState::GeneratedCode: + case MemoryState::CodeOut: + return alias_code_region_start; + case MemoryState::Code: + case MemoryState::CodeData: + return code_region_start; + default: + UNREACHABLE(); + return {}; + } +} + +constexpr std::size_t PageTable::GetRegionSize(MemoryState state) const { + switch (state) { + case MemoryState::Free: + case MemoryState::Kernel: + return address_space_end - address_space_start; + case MemoryState::Normal: + return heap_region_end - heap_region_start; + case MemoryState::Ipc: + case MemoryState::NonSecureIpc: + case MemoryState::NonDeviceIpc: + return alias_region_end - alias_region_start; + case MemoryState::Stack: + return stack_region_end - stack_region_start; + case MemoryState::Io: + case MemoryState::Static: + case MemoryState::ThreadLocal: + return kernel_map_region_end - kernel_map_region_start; + case MemoryState::Shared: + case MemoryState::AliasCode: + case MemoryState::AliasCodeData: + case MemoryState::Transfered: + case MemoryState::SharedTransfered: + case MemoryState::SharedCode: + case MemoryState::GeneratedCode: + case MemoryState::CodeOut: + return alias_code_region_end - alias_code_region_start; + case MemoryState::Code: + case MemoryState::CodeData: + return code_region_end - code_region_start; + default: + UNREACHABLE(); + return {}; + } +} + +constexpr bool PageTable::CanContain(VAddr addr, std::size_t size, MemoryState state) const { + const VAddr end{addr + size}; + const VAddr last{end - 1}; + const VAddr region_start{GetRegionAddress(state)}; + const std::size_t region_size{GetRegionSize(state)}; + const bool is_in_region{region_start <= addr && addr < end && + last <= region_start + region_size - 1}; + const bool is_in_heap{!(end <= heap_region_start || heap_region_end <= addr)}; + const bool is_in_alias{!(end <= alias_region_start || alias_region_end <= addr)}; + + switch (state) { + case MemoryState::Free: + case MemoryState::Kernel: + return is_in_region; + case MemoryState::Io: + case MemoryState::Static: + case MemoryState::Code: + case MemoryState::CodeData: + case MemoryState::Shared: + case MemoryState::AliasCode: + case MemoryState::AliasCodeData: + case MemoryState::Stack: + case MemoryState::ThreadLocal: + case MemoryState::Transfered: + case MemoryState::SharedTransfered: + case MemoryState::SharedCode: + case MemoryState::GeneratedCode: + case MemoryState::CodeOut: + return is_in_region && !is_in_heap && !is_in_alias; + case MemoryState::Normal: + ASSERT(is_in_heap); + return is_in_region && !is_in_alias; + case MemoryState::Ipc: + case MemoryState::NonSecureIpc: + case MemoryState::NonDeviceIpc: + ASSERT(is_in_alias); + return is_in_region && !is_in_heap; + default: + return false; + } +} + +constexpr ResultCode PageTable::CheckMemoryState(const MemoryInfo& info, MemoryState state_mask, + MemoryState state, MemoryPermission perm_mask, + MemoryPermission perm, MemoryAttribute attr_mask, + MemoryAttribute attr) const { + // Validate the states match expectation + if ((info.state & state_mask) != state) { + return ERR_INVALID_ADDRESS_STATE; + } + if ((info.perm & perm_mask) != perm) { + return ERR_INVALID_ADDRESS_STATE; + } + if ((info.attribute & attr_mask) != attr) { + return ERR_INVALID_ADDRESS_STATE; + } + + return RESULT_SUCCESS; +} + +ResultCode PageTable::CheckMemoryState(MemoryState* out_state, MemoryPermission* out_perm, + MemoryAttribute* out_attr, VAddr addr, std::size_t size, + MemoryState state_mask, MemoryState state, + MemoryPermission perm_mask, MemoryPermission perm, + MemoryAttribute attr_mask, MemoryAttribute attr, + MemoryAttribute ignore_attr) { + std::lock_guard lock{page_table_lock}; + + // Get information about the first block + const VAddr last_addr{addr + size - 1}; + MemoryBlockManager::const_iterator it{block_manager->FindIterator(addr)}; + MemoryInfo info{it->GetMemoryInfo()}; + + // Validate all blocks in the range have correct state + const MemoryState first_state{info.state}; + const MemoryPermission first_perm{info.perm}; + const MemoryAttribute first_attr{info.attribute}; + + while (true) { + // Validate the current block + if (!(info.state == first_state)) { + return ERR_INVALID_ADDRESS_STATE; + } + if (!(info.perm == first_perm)) { + return ERR_INVALID_ADDRESS_STATE; + } + if (!((info.attribute | static_cast<MemoryAttribute>(ignore_attr)) == + (first_attr | static_cast<MemoryAttribute>(ignore_attr)))) { + return ERR_INVALID_ADDRESS_STATE; + } + + // Validate against the provided masks + CASCADE_CODE(CheckMemoryState(info, state_mask, state, perm_mask, perm, attr_mask, attr)); + + // Break once we're done + if (last_addr <= info.GetLastAddress()) { + break; + } + + // Advance our iterator + it++; + ASSERT(it != block_manager->cend()); + info = it->GetMemoryInfo(); + } + + // Write output state + if (out_state) { + *out_state = first_state; + } + if (out_perm) { + *out_perm = first_perm; + } + if (out_attr) { + *out_attr = first_attr & static_cast<MemoryAttribute>(~ignore_attr); + } + + return RESULT_SUCCESS; +} + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/page_table.h b/src/core/hle/kernel/memory/page_table.h new file mode 100644 index 000000000..80384ab0f --- /dev/null +++ b/src/core/hle/kernel/memory/page_table.h @@ -0,0 +1,276 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <list> +#include <memory> +#include <mutex> + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/page_table.h" +#include "core/file_sys/program_metadata.h" +#include "core/hle/kernel/memory/memory_block.h" +#include "core/hle/kernel/memory/memory_manager.h" + +namespace Core { +class System; +} + +namespace Kernel::Memory { + +class MemoryBlockManager; + +class PageTable final : NonCopyable { +public: + explicit PageTable(Core::System& system); + + ResultCode InitializeForProcess(FileSys::ProgramAddressSpaceType as_type, bool enable_aslr, + VAddr code_addr, std::size_t code_size, + Memory::MemoryManager::Pool pool); + ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, MemoryState state, + MemoryPermission perm); + ResultCode MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); + ResultCode UnmapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); + ResultCode MapPhysicalMemory(VAddr addr, std::size_t size); + ResultCode UnmapPhysicalMemory(VAddr addr, std::size_t size); + ResultCode UnmapMemory(VAddr addr, std::size_t size); + ResultCode Map(VAddr dst_addr, VAddr src_addr, std::size_t size); + ResultCode Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size); + ResultCode MapPages(VAddr addr, PageLinkedList& page_linked_list, MemoryState state, + MemoryPermission perm); + ResultCode SetCodeMemoryPermission(VAddr addr, std::size_t size, MemoryPermission perm); + MemoryInfo QueryInfo(VAddr addr); + ResultCode ReserveTransferMemory(VAddr addr, std::size_t size, MemoryPermission perm); + ResultCode ResetTransferMemory(VAddr addr, std::size_t size); + ResultCode SetMemoryAttribute(VAddr addr, std::size_t size, MemoryAttribute mask, + MemoryAttribute value); + ResultCode SetHeapCapacity(std::size_t new_heap_capacity); + ResultVal<VAddr> SetHeapSize(std::size_t size); + ResultVal<VAddr> AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align, + bool is_map_only, VAddr region_start, + std::size_t region_num_pages, MemoryState state, + MemoryPermission perm, PAddr map_addr = 0); + + Common::PageTable& PageTableImpl() { + return page_table_impl; + } + + const Common::PageTable& PageTableImpl() const { + return page_table_impl; + } + +private: + enum class OperationType : u32 { + Map, + MapGroup, + Unmap, + ChangePermissions, + ChangePermissionsAndRefresh, + }; + + static constexpr MemoryAttribute DefaultMemoryIgnoreAttr = + MemoryAttribute::DontCareMask | MemoryAttribute::IpcLocked | MemoryAttribute::DeviceShared; + + ResultCode InitializeMemoryLayout(VAddr start, VAddr end); + ResultCode MapPages(VAddr addr, const PageLinkedList& page_linked_list, MemoryPermission perm); + void MapPhysicalMemory(PageLinkedList& page_linked_list, VAddr start, VAddr end); + bool IsRegionMapped(VAddr address, u64 size); + bool IsRegionContiguous(VAddr addr, u64 size) const; + void AddRegionToPages(VAddr start, std::size_t num_pages, PageLinkedList& page_linked_list); + MemoryInfo QueryInfoImpl(VAddr addr); + VAddr AllocateVirtualMemory(VAddr start, std::size_t region_num_pages, u64 needed_num_pages, + std::size_t align); + ResultCode Operate(VAddr addr, std::size_t num_pages, const PageLinkedList& page_group, + OperationType operation); + ResultCode Operate(VAddr addr, std::size_t num_pages, MemoryPermission perm, + OperationType operation, PAddr map_addr = 0); + constexpr VAddr GetRegionAddress(MemoryState state) const; + constexpr std::size_t GetRegionSize(MemoryState state) const; + constexpr bool CanContain(VAddr addr, std::size_t size, MemoryState state) const; + + constexpr ResultCode CheckMemoryState(const MemoryInfo& info, MemoryState state_mask, + MemoryState state, MemoryPermission perm_mask, + MemoryPermission perm, MemoryAttribute attr_mask, + MemoryAttribute attr) const; + ResultCode CheckMemoryState(MemoryState* out_state, MemoryPermission* out_perm, + MemoryAttribute* out_attr, VAddr addr, std::size_t size, + MemoryState state_mask, MemoryState state, + MemoryPermission perm_mask, MemoryPermission perm, + MemoryAttribute attr_mask, MemoryAttribute attr, + MemoryAttribute ignore_attr = DefaultMemoryIgnoreAttr); + ResultCode CheckMemoryState(VAddr addr, std::size_t size, MemoryState state_mask, + MemoryState state, MemoryPermission perm_mask, + MemoryPermission perm, MemoryAttribute attr_mask, + MemoryAttribute attr, + MemoryAttribute ignore_attr = DefaultMemoryIgnoreAttr) { + return CheckMemoryState(nullptr, nullptr, nullptr, addr, size, state_mask, state, perm_mask, + perm, attr_mask, attr, ignore_attr); + } + + std::recursive_mutex page_table_lock; + std::unique_ptr<MemoryBlockManager> block_manager; + +public: + constexpr VAddr GetAddressSpaceStart() const { + return address_space_start; + } + constexpr VAddr GetAddressSpaceEnd() const { + return address_space_end; + } + constexpr std::size_t GetAddressSpaceSize() const { + return address_space_end - address_space_start; + } + constexpr VAddr GetHeapRegionStart() const { + return heap_region_start; + } + constexpr VAddr GetHeapRegionEnd() const { + return heap_region_end; + } + constexpr std::size_t GetHeapRegionSize() const { + return heap_region_end - heap_region_start; + } + constexpr VAddr GetAliasRegionStart() const { + return alias_region_start; + } + constexpr VAddr GetAliasRegionEnd() const { + return alias_region_end; + } + constexpr std::size_t GetAliasRegionSize() const { + return alias_region_end - alias_region_start; + } + constexpr VAddr GetStackRegionStart() const { + return stack_region_start; + } + constexpr VAddr GetStackRegionEnd() const { + return stack_region_end; + } + constexpr std::size_t GetStackRegionSize() const { + return stack_region_end - stack_region_start; + } + constexpr VAddr GetKernelMapRegionStart() const { + return kernel_map_region_start; + } + constexpr VAddr GetKernelMapRegionEnd() const { + return kernel_map_region_end; + } + constexpr VAddr GetCodeRegionStart() const { + return code_region_start; + } + constexpr VAddr GetCodeRegionEnd() const { + return code_region_end; + } + constexpr VAddr GetAliasCodeRegionStart() const { + return alias_code_region_start; + } + constexpr VAddr GetAliasCodeRegionSize() const { + return alias_code_region_end - alias_code_region_start; + } + constexpr std::size_t GetAddressSpaceWidth() const { + return address_space_width; + } + constexpr std::size_t GetHeapSize() { + return current_heap_addr - heap_region_start; + } + constexpr std::size_t GetTotalHeapSize() { + return GetHeapSize() + physical_memory_usage; + } + constexpr bool IsInsideAddressSpace(VAddr address, std::size_t size) const { + return address_space_start <= address && address + size - 1 <= address_space_end - 1; + } + constexpr bool IsOutsideAliasRegion(VAddr address, std::size_t size) const { + return alias_region_start > address || address + size - 1 > alias_region_end - 1; + } + constexpr bool IsOutsideStackRegion(VAddr address, std::size_t size) const { + return stack_region_start > address || address + size - 1 > stack_region_end - 1; + } + constexpr bool IsInvalidRegion(VAddr address, std::size_t size) const { + return address + size - 1 > GetAliasCodeRegionStart() + GetAliasCodeRegionSize() - 1; + } + constexpr bool IsInsideHeapRegion(VAddr address, std::size_t size) const { + return address + size > heap_region_start && heap_region_end > address; + } + constexpr bool IsInsideAliasRegion(VAddr address, std::size_t size) const { + return address + size > alias_region_start && alias_region_end > address; + } + constexpr bool IsOutsideASLRRegion(VAddr address, std::size_t size) const { + if (IsInvalidRegion(address, size)) { + return true; + } + if (IsInsideHeapRegion(address, size)) { + return true; + } + if (IsInsideAliasRegion(address, size)) { + return true; + } + return {}; + } + constexpr bool IsInsideASLRRegion(VAddr address, std::size_t size) const { + return !IsOutsideASLRRegion(address, size); + } + constexpr PAddr GetPhysicalAddr(VAddr addr) { + return page_table_impl.backing_addr[addr >> Memory::PageBits] + addr; + } + +private: + constexpr bool Contains(VAddr addr) const { + return address_space_start <= addr && addr <= address_space_end - 1; + } + constexpr bool Contains(VAddr addr, std::size_t size) const { + return address_space_start <= addr && addr < addr + size && + addr + size - 1 <= address_space_end - 1; + } + constexpr bool IsKernel() const { + return is_kernel; + } + constexpr bool IsAslrEnabled() const { + return is_aslr_enabled; + } + + constexpr std::size_t GetNumGuardPages() const { + return IsKernel() ? 1 : 4; + } + + constexpr bool ContainsPages(VAddr addr, std::size_t num_pages) const { + return (address_space_start <= addr) && + (num_pages <= (address_space_end - address_space_start) / PageSize) && + (addr + num_pages * PageSize - 1 <= address_space_end - 1); + } + +private: + VAddr address_space_start{}; + VAddr address_space_end{}; + VAddr heap_region_start{}; + VAddr heap_region_end{}; + VAddr current_heap_end{}; + VAddr alias_region_start{}; + VAddr alias_region_end{}; + VAddr stack_region_start{}; + VAddr stack_region_end{}; + VAddr kernel_map_region_start{}; + VAddr kernel_map_region_end{}; + VAddr code_region_start{}; + VAddr code_region_end{}; + VAddr alias_code_region_start{}; + VAddr alias_code_region_end{}; + VAddr current_heap_addr{}; + + std::size_t heap_capacity{}; + std::size_t physical_memory_usage{}; + std::size_t max_heap_size{}; + std::size_t max_physical_memory_size{}; + std::size_t address_space_width{}; + + bool is_kernel{}; + bool is_aslr_enabled{}; + + MemoryManager::Pool memory_pool{MemoryManager::Pool::Application}; + + Common::PageTable page_table_impl; + + Core::System& system; +}; + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/slab_heap.h b/src/core/hle/kernel/memory/slab_heap.h new file mode 100644 index 000000000..049403e15 --- /dev/null +++ b/src/core/hle/kernel/memory/slab_heap.h @@ -0,0 +1,164 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphère, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphère-NX. + +#pragma once + +#include <atomic> + +#include "common/assert.h" +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Kernel::Memory { + +namespace impl { + +class SlabHeapImpl final : NonCopyable { +public: + struct Node { + Node* next{}; + }; + + constexpr SlabHeapImpl() = default; + + void Initialize(std::size_t size) { + ASSERT(head == nullptr); + obj_size = size; + } + + constexpr std::size_t GetObjectSize() const { + return obj_size; + } + + Node* GetHead() const { + return head; + } + + void* Allocate() { + Node* ret = head.load(); + + do { + if (ret == nullptr) { + break; + } + } while (!head.compare_exchange_weak(ret, ret->next)); + + return ret; + } + + void Free(void* obj) { + Node* node = reinterpret_cast<Node*>(obj); + + Node* cur_head = head.load(); + do { + node->next = cur_head; + } while (!head.compare_exchange_weak(cur_head, node)); + } + +private: + std::atomic<Node*> head{}; + std::size_t obj_size{}; +}; + +} // namespace impl + +class SlabHeapBase : NonCopyable { +public: + constexpr SlabHeapBase() = default; + + constexpr bool Contains(uintptr_t addr) const { + return start <= addr && addr < end; + } + + constexpr std::size_t GetSlabHeapSize() const { + return (end - start) / GetObjectSize(); + } + + constexpr std::size_t GetObjectSize() const { + return impl.GetObjectSize(); + } + + constexpr uintptr_t GetSlabHeapAddress() const { + return start; + } + + std::size_t GetObjectIndexImpl(const void* obj) const { + return (reinterpret_cast<uintptr_t>(obj) - start) / GetObjectSize(); + } + + std::size_t GetPeakIndex() const { + return GetObjectIndexImpl(reinterpret_cast<const void*>(peak)); + } + + void* AllocateImpl() { + return impl.Allocate(); + } + + void FreeImpl(void* obj) { + // Don't allow freeing an object that wasn't allocated from this heap + ASSERT(Contains(reinterpret_cast<uintptr_t>(obj))); + impl.Free(obj); + } + + void InitializeImpl(std::size_t obj_size, void* memory, std::size_t memory_size) { + // Ensure we don't initialize a slab using null memory + ASSERT(memory != nullptr); + + // Initialize the base allocator + impl.Initialize(obj_size); + + // Set our tracking variables + const std::size_t num_obj = (memory_size / obj_size); + start = reinterpret_cast<uintptr_t>(memory); + end = start + num_obj * obj_size; + peak = start; + + // Free the objects + u8* cur = reinterpret_cast<u8*>(end); + + for (std::size_t i{}; i < num_obj; i++) { + cur -= obj_size; + impl.Free(cur); + } + } + +private: + using Impl = impl::SlabHeapImpl; + + Impl impl; + uintptr_t peak{}; + uintptr_t start{}; + uintptr_t end{}; +}; + +template <typename T> +class SlabHeap final : public SlabHeapBase { +public: + constexpr SlabHeap() : SlabHeapBase() {} + + void Initialize(void* memory, std::size_t memory_size) { + InitializeImpl(sizeof(T), memory, memory_size); + } + + T* Allocate() { + T* obj = reinterpret_cast<T*>(AllocateImpl()); + if (obj != nullptr) { + new (obj) T(); + } + return obj; + } + + void Free(T* obj) { + FreeImpl(obj); + } + + constexpr std::size_t GetObjectIndex(const T* obj) const { + return GetObjectIndexImpl(obj); + } +}; + +} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/system_control.cpp b/src/core/hle/kernel/memory/system_control.cpp new file mode 100644 index 000000000..9cae3c6cb --- /dev/null +++ b/src/core/hle/kernel/memory/system_control.cpp @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <random> + +#include "core/hle/kernel/memory/system_control.h" + +namespace Kernel::Memory::SystemControl { + +u64 GenerateRandomU64ForInit() { + static std::random_device device; + static std::mt19937 gen(device()); + static std::uniform_int_distribution<u64> distribution(1, std::numeric_limits<u64>::max()); + return distribution(gen); +} + +template <typename F> +u64 GenerateUniformRange(u64 min, u64 max, F f) { + /* Handle the case where the difference is too large to represent. */ + if (max == std::numeric_limits<u64>::max() && min == std::numeric_limits<u64>::min()) { + return f(); + } + + /* Iterate until we get a value in range. */ + const u64 range_size = ((max + 1) - min); + const u64 effective_max = (std::numeric_limits<u64>::max() / range_size) * range_size; + while (true) { + if (const u64 rnd = f(); rnd < effective_max) { + return min + (rnd % range_size); + } + } +} + +u64 GenerateRandomRange(u64 min, u64 max) { + return GenerateUniformRange(min, max, GenerateRandomU64ForInit); +} + +} // namespace Kernel::Memory::SystemControl diff --git a/src/core/hle/kernel/memory/system_control.h b/src/core/hle/kernel/memory/system_control.h new file mode 100644 index 000000000..3fa93111d --- /dev/null +++ b/src/core/hle/kernel/memory/system_control.h @@ -0,0 +1,18 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Kernel::Memory::SystemControl { + +u64 GenerateRandomU64ForInit(); + +template <typename F> +u64 GenerateUniformRange(u64 min, u64 max, F f); + +u64 GenerateRandomRange(u64 min, u64 max); + +} // namespace Kernel::Memory::SystemControl diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h index b689e8e8b..7a0266780 100644 --- a/src/core/hle/kernel/physical_memory.h +++ b/src/core/hle/kernel/physical_memory.h @@ -4,6 +4,8 @@ #pragma once +#include <vector> + #include "common/alignment.h" namespace Kernel { diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index edc414d69..36724569f 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -10,15 +10,18 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/device_memory.h" #include "core/file_sys/program_metadata.h" #include "core/hle/kernel/code_set.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/memory/memory_block_manager.h" +#include "core/hle/kernel/memory/page_table.h" +#include "core/hle/kernel/memory/slab_heap.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" -#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "core/settings.h" @@ -31,10 +34,8 @@ namespace { * @param kernel The kernel instance to create the main thread under. * @param priority The priority to give the main thread */ -void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) { - const auto& vm_manager = owner_process.VMManager(); - const VAddr entry_point = vm_manager.GetCodeRegionBaseAddress(); - const VAddr stack_top = vm_manager.GetTLSIORegionEndAddress(); +void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority, VAddr stack_top) { + const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart(); auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, owner_process.GetIdealCore(), stack_top, owner_process); @@ -42,6 +43,8 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) { // Register 1 must be a handle to the main thread const Handle thread_handle = owner_process.GetHandleTable().Create(thread).Unwrap(); + thread->GetContext32().cpu_registers[0] = 0; + thread->GetContext64().cpu_registers[0] = 0; thread->GetContext32().cpu_registers[1] = thread_handle; thread->GetContext64().cpu_registers[1] = thread_handle; @@ -57,7 +60,8 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) { // (whichever page happens to have an available slot). class TLSPage { public: - static constexpr std::size_t num_slot_entries = Memory::PAGE_SIZE / Memory::TLS_ENTRY_SIZE; + static constexpr std::size_t num_slot_entries = + Core::Memory::PAGE_SIZE / Core::Memory::TLS_ENTRY_SIZE; explicit TLSPage(VAddr address) : base_address{address} {} @@ -76,7 +80,7 @@ public: } is_slot_used[i] = true; - return base_address + (i * Memory::TLS_ENTRY_SIZE); + return base_address + (i * Core::Memory::TLS_ENTRY_SIZE); } return std::nullopt; @@ -86,15 +90,15 @@ public: // Ensure that all given addresses are consistent with how TLS pages // are intended to be used when releasing slots. ASSERT(IsWithinPage(address)); - ASSERT((address % Memory::TLS_ENTRY_SIZE) == 0); + ASSERT((address % Core::Memory::TLS_ENTRY_SIZE) == 0); - const std::size_t index = (address - base_address) / Memory::TLS_ENTRY_SIZE; + const std::size_t index = (address - base_address) / Core::Memory::TLS_ENTRY_SIZE; is_slot_used[index] = false; } private: bool IsWithinPage(VAddr address) const { - return base_address <= address && address < base_address + Memory::PAGE_SIZE; + return base_address <= address && address < base_address + Core::Memory::PAGE_SIZE; } VAddr base_address; @@ -106,7 +110,7 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name, std::shared_ptr<Process> process = std::make_shared<Process>(system); process->name = std::move(name); - process->resource_limit = kernel.GetSystemResourceLimit(); + process->resource_limit = ResourceLimit::Create(kernel); process->status = ProcessStatus::Created; process->program_id = 0; process->process_id = type == ProcessType::KernelInternal ? kernel.CreateNewKernelProcessID() @@ -127,7 +131,14 @@ std::shared_ptr<ResourceLimit> Process::GetResourceLimit() const { } u64 Process::GetTotalPhysicalMemoryAvailable() const { - return vm_manager.GetTotalPhysicalMemoryAvailable(); + const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) + + page_table->GetTotalHeapSize() + image_size + main_thread_stack_size}; + + if (capacity < memory_usage_capacity) { + return capacity; + } + + return memory_usage_capacity; } u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const { @@ -135,8 +146,7 @@ u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const { } u64 Process::GetTotalPhysicalMemoryUsed() const { - return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size + - GetSystemResourceUsage(); + return image_size + main_thread_stack_size + page_table->GetTotalHeapSize(); } u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { @@ -209,33 +219,82 @@ ResultCode Process::ClearSignalState() { return RESULT_SUCCESS; } -ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { +ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, + std::size_t code_size) { program_id = metadata.GetTitleID(); ideal_core = metadata.GetMainThreadCore(); is_64bit_process = metadata.Is64BitProgram(); system_resource_size = metadata.GetSystemResourceSize(); + image_size = code_size; + + // Initialize proces address space + if (const ResultCode result{ + page_table->InitializeForProcess(metadata.GetAddressSpaceType(), false, 0x8000000, + code_size, Memory::MemoryManager::Pool::Application)}; + result.IsError()) { + return result; + } - vm_manager.Reset(metadata.GetAddressSpaceType()); + // Map process code region + if (const ResultCode result{page_table->MapProcessCode( + page_table->GetCodeRegionStart(), code_size / Memory::PageSize, + Memory::MemoryState::Code, Memory::MemoryPermission::None)}; + result.IsError()) { + return result; + } - const auto& caps = metadata.GetKernelCapabilities(); - const auto capability_init_result = - capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager); - if (capability_init_result.IsError()) { - return capability_init_result; + // Initialize process capabilities + const auto& caps{metadata.GetKernelCapabilities()}; + if (const ResultCode result{ + capabilities.InitializeForUserProcess(caps.data(), caps.size(), *page_table)}; + result.IsError()) { + return result; } + // Set memory usage capacity + switch (metadata.GetAddressSpaceType()) { + case FileSys::ProgramAddressSpaceType::Is32Bit: + case FileSys::ProgramAddressSpaceType::Is36Bit: + case FileSys::ProgramAddressSpaceType::Is39Bit: + memory_usage_capacity = page_table->GetHeapRegionEnd() - page_table->GetHeapRegionStart(); + break; + + case FileSys::ProgramAddressSpaceType::Is32BitNoMap: + memory_usage_capacity = page_table->GetHeapRegionEnd() - page_table->GetHeapRegionStart() + + page_table->GetAliasRegionEnd() - page_table->GetAliasRegionStart(); + break; + + default: + UNREACHABLE(); + } + + // Set initial resource limits + resource_limit->SetLimitValue( + ResourceType::PhysicalMemory, + kernel.MemoryManager().GetSize(Memory::MemoryManager::Pool::Application)); + resource_limit->SetLimitValue(ResourceType::Threads, 608); + resource_limit->SetLimitValue(ResourceType::Events, 700); + resource_limit->SetLimitValue(ResourceType::TransferMemory, 128); + resource_limit->SetLimitValue(ResourceType::Sessions, 894); + ASSERT(resource_limit->Reserve(ResourceType::PhysicalMemory, code_size)); + + // Create TLS region + tls_region_address = CreateTLSRegion(); + return handle_table.SetSize(capabilities.GetHandleTableSize()); } void Process::Run(s32 main_thread_priority, u64 stack_size) { AllocateMainThreadStack(stack_size); - tls_region_address = CreateTLSRegion(); - vm_manager.LogLayout(); + const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size}; + ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError()); ChangeStatus(ProcessStatus::Running); - SetupMainThread(*this, kernel, main_thread_priority); + SetupMainThread(*this, kernel, main_thread_priority, main_thread_stack_top); + resource_limit->Reserve(ResourceType::Threads, 1); + resource_limit->Reserve(ResourceType::PhysicalMemory, main_thread_stack_size); } void Process::PrepareForTermination() { @@ -279,32 +338,37 @@ static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) { } VAddr Process::CreateTLSRegion() { - auto tls_page_iter = FindTLSPageWithAvailableSlots(tls_pages); + if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)}; + tls_page_iter != tls_pages.cend()) { + return *tls_page_iter->ReserveSlot(); + } - if (tls_page_iter == tls_pages.cend()) { - const auto region_address = - vm_manager.FindFreeRegion(vm_manager.GetTLSIORegionBaseAddress(), - vm_manager.GetTLSIORegionEndAddress(), Memory::PAGE_SIZE); - ASSERT(region_address.Succeeded()); + Memory::Page* const tls_page_ptr{kernel.GetUserSlabHeapPages().Allocate()}; + ASSERT(tls_page_ptr); - const auto map_result = vm_manager.MapMemoryBlock( - *region_address, std::make_shared<PhysicalMemory>(Memory::PAGE_SIZE), 0, - Memory::PAGE_SIZE, MemoryState::ThreadLocal); - ASSERT(map_result.Succeeded()); + const VAddr start{page_table->GetKernelMapRegionStart()}; + const VAddr size{page_table->GetKernelMapRegionEnd() - start}; + const PAddr tls_map_addr{system.DeviceMemory().GetPhysicalAddr(tls_page_ptr)}; + const VAddr tls_page_addr{ + page_table + ->AllocateAndMapMemory(1, Memory::PageSize, true, start, size / Memory::PageSize, + Memory::MemoryState::ThreadLocal, + Memory::MemoryPermission::ReadAndWrite, tls_map_addr) + .ValueOr(0)}; - tls_pages.emplace_back(*region_address); + ASSERT(tls_page_addr); - const auto reserve_result = tls_pages.back().ReserveSlot(); - ASSERT(reserve_result.has_value()); + std::memset(tls_page_ptr, 0, Memory::PageSize); + tls_pages.emplace_back(tls_page_addr); - return *reserve_result; - } + const auto reserve_result{tls_pages.back().ReserveSlot()}; + ASSERT(reserve_result.has_value()); - return *tls_page_iter->ReserveSlot(); + return *reserve_result; } void Process::FreeTLSRegion(VAddr tls_address) { - const VAddr aligned_address = Common::AlignDown(tls_address, Memory::PAGE_SIZE); + const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE); auto iter = std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) { return page.GetBaseAddress() == aligned_address; @@ -317,28 +381,22 @@ void Process::FreeTLSRegion(VAddr tls_address) { iter->ReleaseSlot(tls_address); } -void Process::LoadModule(CodeSet module_, VAddr base_addr) { - code_memory_size += module_.memory.size(); - - const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory)); - - const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, - MemoryState memory_state) { - const auto vma = vm_manager - .MapMemoryBlock(segment.addr + base_addr, memory, segment.offset, - segment.size, memory_state) - .Unwrap(); - vm_manager.Reprotect(vma, permissions); +void Process::LoadModule(CodeSet code_set, VAddr base_addr) { + const auto ReprotectSegment = [&](const CodeSet::Segment& segment, + Memory::MemoryPermission permission) { + page_table->SetCodeMemoryPermission(segment.addr + base_addr, segment.size, permission); }; - // Map CodeSet segments - MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code); - MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData); - MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData); + system.Memory().WriteBlock(*this, base_addr, code_set.memory.data(), code_set.memory.size()); + + ReprotectSegment(code_set.CodeSegment(), Memory::MemoryPermission::ReadAndExecute); + ReprotectSegment(code_set.RODataSegment(), Memory::MemoryPermission::Read); + ReprotectSegment(code_set.DataSegment(), Memory::MemoryPermission::ReadAndWrite); } Process::Process(Core::System& system) - : SynchronizationObject{system.Kernel()}, vm_manager{system}, + : SynchronizationObject{system.Kernel()}, page_table{std::make_unique<Memory::PageTable>( + system)}, address_arbiter{system}, mutex{system}, system{system} {} Process::~Process() = default; @@ -361,16 +419,24 @@ void Process::ChangeStatus(ProcessStatus new_status) { Signal(); } -void Process::AllocateMainThreadStack(u64 stack_size) { +ResultCode Process::AllocateMainThreadStack(std::size_t stack_size) { + ASSERT(stack_size); + // The kernel always ensures that the given stack size is page aligned. - main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); - - // Allocate and map the main thread stack - const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; - vm_manager - .MapMemoryBlock(mapping_address, std::make_shared<PhysicalMemory>(main_thread_stack_size), - 0, main_thread_stack_size, MemoryState::Stack) - .Unwrap(); + main_thread_stack_size = Common::AlignUp(stack_size, Memory::PageSize); + + const VAddr start{page_table->GetStackRegionStart()}; + const std::size_t size{page_table->GetStackRegionEnd() - start}; + + CASCADE_RESULT(main_thread_stack_top, + page_table->AllocateAndMapMemory( + main_thread_stack_size / Memory::PageSize, Memory::PageSize, false, start, + size / Memory::PageSize, Memory::MemoryState::Stack, + Memory::MemoryPermission::ReadAndWrite)); + + main_thread_stack_top += main_thread_stack_size; + + return RESULT_SUCCESS; } } // namespace Kernel diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 4887132a7..9dabe3568 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -16,7 +16,6 @@ #include "core/hle/kernel/mutex.h" #include "core/hle/kernel/process_capability.h" #include "core/hle/kernel/synchronization_object.h" -#include "core/hle/kernel/vm_manager.h" #include "core/hle/result.h" namespace Core { @@ -36,6 +35,10 @@ class TLSPage; struct CodeSet; +namespace Memory { +class PageTable; +} + enum class MemoryRegion : u16 { APPLICATION = 1, SYSTEM = 2, @@ -100,14 +103,14 @@ public: return HANDLE_TYPE; } - /// Gets a reference to the process' memory manager. - Kernel::VMManager& VMManager() { - return vm_manager; + /// Gets a reference to the process' page table. + Memory::PageTable& PageTable() { + return *page_table; } - /// Gets a const reference to the process' memory manager. - const Kernel::VMManager& VMManager() const { - return vm_manager; + /// Gets const a reference to the process' page table. + const Memory::PageTable& PageTable() const { + return *page_table; } /// Gets a reference to the process' handle table. @@ -273,7 +276,7 @@ public: * @returns RESULT_SUCCESS if all relevant metadata was able to be * loaded and parsed. Otherwise, an error code is returned. */ - ResultCode LoadFromMetadata(const FileSys::ProgramMetadata& metadata); + ResultCode LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size); /** * Starts the main application thread for this process. @@ -289,7 +292,7 @@ public: */ void PrepareForTermination(); - void LoadModule(CodeSet module_, VAddr base_addr); + void LoadModule(CodeSet code_set, VAddr base_addr); /////////////////////////////////////////////////////////////////////////////////////////////// // Thread-local storage management @@ -313,16 +316,10 @@ private: void ChangeStatus(ProcessStatus new_status); /// Allocates the main thread stack for the process, given the stack size in bytes. - void AllocateMainThreadStack(u64 stack_size); - - /// Memory manager for this process. - Kernel::VMManager vm_manager; - - /// Size of the main thread's stack in bytes. - u64 main_thread_stack_size = 0; + ResultCode AllocateMainThreadStack(std::size_t stack_size); - /// Size of the loaded code memory in bytes. - u64 code_memory_size = 0; + /// Memory manager for this process + std::unique_ptr<Memory::PageTable> page_table; /// Current status of the process ProcessStatus status{}; @@ -390,6 +387,18 @@ private: /// Name of this process std::string name; + + /// Address of the top of the main thread's stack + VAddr main_thread_stack_top{}; + + /// Size of the main thread's stack + std::size_t main_thread_stack_size{}; + + /// Memory usage capacity for the process + std::size_t memory_usage_capacity{}; + + /// Process total image size + std::size_t image_size{}; }; } // namespace Kernel diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp index 583e35b79..48e5ae682 100644 --- a/src/core/hle/kernel/process_capability.cpp +++ b/src/core/hle/kernel/process_capability.cpp @@ -5,8 +5,8 @@ #include "common/bit_util.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process_capability.h" -#include "core/hle/kernel/vm_manager.h" namespace Kernel { namespace { @@ -66,7 +66,7 @@ u32 GetFlagBitOffset(CapabilityType type) { ResultCode ProcessCapabilities::InitializeForKernelProcess(const u32* capabilities, std::size_t num_capabilities, - VMManager& vm_manager) { + Memory::PageTable& page_table) { Clear(); // Allow all cores and priorities. @@ -74,15 +74,15 @@ ResultCode ProcessCapabilities::InitializeForKernelProcess(const u32* capabiliti priority_mask = 0xFFFFFFFFFFFFFFFF; kernel_version = PackedKernelVersion; - return ParseCapabilities(capabilities, num_capabilities, vm_manager); + return ParseCapabilities(capabilities, num_capabilities, page_table); } ResultCode ProcessCapabilities::InitializeForUserProcess(const u32* capabilities, std::size_t num_capabilities, - VMManager& vm_manager) { + Memory::PageTable& page_table) { Clear(); - return ParseCapabilities(capabilities, num_capabilities, vm_manager); + return ParseCapabilities(capabilities, num_capabilities, page_table); } void ProcessCapabilities::InitializeForMetadatalessProcess() { @@ -105,7 +105,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() { ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities, std::size_t num_capabilities, - VMManager& vm_manager) { + Memory::PageTable& page_table) { u32 set_flags = 0; u32 set_svc_bits = 0; @@ -127,13 +127,13 @@ ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities, return ERR_INVALID_COMBINATION; } - const auto result = HandleMapPhysicalFlags(descriptor, size_flags, vm_manager); + const auto result = HandleMapPhysicalFlags(descriptor, size_flags, page_table); if (result.IsError()) { return result; } } else { const auto result = - ParseSingleFlagCapability(set_flags, set_svc_bits, descriptor, vm_manager); + ParseSingleFlagCapability(set_flags, set_svc_bits, descriptor, page_table); if (result.IsError()) { return result; } @@ -144,7 +144,7 @@ ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities, } ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& set_svc_bits, - u32 flag, VMManager& vm_manager) { + u32 flag, Memory::PageTable& page_table) { const auto type = GetCapabilityType(flag); if (type == CapabilityType::Unset) { @@ -172,7 +172,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s case CapabilityType::Syscall: return HandleSyscallFlags(set_svc_bits, flag); case CapabilityType::MapIO: - return HandleMapIOFlags(flag, vm_manager); + return HandleMapIOFlags(flag, page_table); case CapabilityType::Interrupt: return HandleInterruptFlags(flag); case CapabilityType::ProgramType: @@ -269,12 +269,12 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags) } ResultCode ProcessCapabilities::HandleMapPhysicalFlags(u32 flags, u32 size_flags, - VMManager& vm_manager) { + Memory::PageTable& page_table) { // TODO(Lioncache): Implement once the memory manager can handle this. return RESULT_SUCCESS; } -ResultCode ProcessCapabilities::HandleMapIOFlags(u32 flags, VMManager& vm_manager) { +ResultCode ProcessCapabilities::HandleMapIOFlags(u32 flags, Memory::PageTable& page_table) { // TODO(Lioncache): Implement once the memory manager can handle this. return RESULT_SUCCESS; } diff --git a/src/core/hle/kernel/process_capability.h b/src/core/hle/kernel/process_capability.h index 5cdd80747..ea9d12c16 100644 --- a/src/core/hle/kernel/process_capability.h +++ b/src/core/hle/kernel/process_capability.h @@ -12,7 +12,9 @@ union ResultCode; namespace Kernel { -class VMManager; +namespace Memory { +class PageTable; +} /// The possible types of programs that may be indicated /// by the program type capability descriptor. @@ -81,27 +83,27 @@ public: /// /// @param capabilities The capabilities to parse /// @param num_capabilities The number of capabilities to parse. - /// @param vm_manager The memory manager to use for handling any mapping-related + /// @param page_table The memory manager to use for handling any mapping-related /// operations (such as mapping IO memory, etc). /// /// @returns RESULT_SUCCESS if this capabilities instance was able to be initialized, /// otherwise, an error code upon failure. /// ResultCode InitializeForKernelProcess(const u32* capabilities, std::size_t num_capabilities, - VMManager& vm_manager); + Memory::PageTable& page_table); /// Initializes this process capabilities instance for a userland process. /// /// @param capabilities The capabilities to parse. /// @param num_capabilities The total number of capabilities to parse. - /// @param vm_manager The memory manager to use for handling any mapping-related + /// @param page_table The memory manager to use for handling any mapping-related /// operations (such as mapping IO memory, etc). /// /// @returns RESULT_SUCCESS if this capabilities instance was able to be initialized, /// otherwise, an error code upon failure. /// ResultCode InitializeForUserProcess(const u32* capabilities, std::size_t num_capabilities, - VMManager& vm_manager); + Memory::PageTable& page_table); /// Initializes this process capabilities instance for a process that does not /// have any metadata to parse. @@ -181,13 +183,13 @@ private: /// /// @param capabilities The sequence of capability descriptors to parse. /// @param num_capabilities The number of descriptors within the given sequence. - /// @param vm_manager The memory manager that will perform any memory + /// @param page_table The memory manager that will perform any memory /// mapping if necessary. /// /// @return RESULT_SUCCESS if no errors occur, otherwise an error code. /// ResultCode ParseCapabilities(const u32* capabilities, std::size_t num_capabilities, - VMManager& vm_manager); + Memory::PageTable& page_table); /// Attempts to parse a capability descriptor that is only represented by a /// single flag set. @@ -196,13 +198,13 @@ private: /// flags being initialized more than once when they shouldn't be. /// @param set_svc_bits Running set of bits representing the allowed supervisor calls mask. /// @param flag The flag to attempt to parse. - /// @param vm_manager The memory manager that will perform any memory + /// @param page_table The memory manager that will perform any memory /// mapping if necessary. /// /// @return RESULT_SUCCESS if no errors occurred, otherwise an error code. /// ResultCode ParseSingleFlagCapability(u32& set_flags, u32& set_svc_bits, u32 flag, - VMManager& vm_manager); + Memory::PageTable& page_table); /// Clears the internal state of this process capability instance. Necessary, /// to have a sane starting point due to us allowing running executables without @@ -226,10 +228,10 @@ private: ResultCode HandleSyscallFlags(u32& set_svc_bits, u32 flags); /// Handles flags related to mapping physical memory pages. - ResultCode HandleMapPhysicalFlags(u32 flags, u32 size_flags, VMManager& vm_manager); + ResultCode HandleMapPhysicalFlags(u32 flags, u32 size_flags, Memory::PageTable& page_table); /// Handles flags related to mapping IO pages. - ResultCode HandleMapIOFlags(u32 flags, VMManager& vm_manager); + ResultCode HandleMapIOFlags(u32 flags, Memory::PageTable& page_table); /// Handles flags related to the interrupt capability flags. ResultCode HandleInterruptFlags(u32 flags); diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp index b53423462..96e5b9892 100644 --- a/src/core/hle/kernel/resource_limit.cpp +++ b/src/core/hle/kernel/resource_limit.cpp @@ -16,26 +16,60 @@ constexpr std::size_t ResourceTypeToIndex(ResourceType type) { ResourceLimit::ResourceLimit(KernelCore& kernel) : Object{kernel} {} ResourceLimit::~ResourceLimit() = default; +bool ResourceLimit::Reserve(ResourceType resource, s64 amount) { + return Reserve(resource, amount, 10000000000); +} + +bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) { + const std::size_t index{ResourceTypeToIndex(resource)}; + + s64 new_value = current[index] + amount; + while (new_value > limit[index] && available[index] + amount <= limit[index]) { + // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout + new_value = current[index] + amount; + + if (timeout >= 0) { + break; + } + } + + if (new_value <= limit[index]) { + current[index] = new_value; + return true; + } + return false; +} + +void ResourceLimit::Release(ResourceType resource, u64 amount) { + Release(resource, amount, amount); +} + +void ResourceLimit::Release(ResourceType resource, u64 used_amount, u64 available_amount) { + const std::size_t index{ResourceTypeToIndex(resource)}; + + current[index] -= used_amount; + available[index] -= available_amount; +} + std::shared_ptr<ResourceLimit> ResourceLimit::Create(KernelCore& kernel) { return std::make_shared<ResourceLimit>(kernel); } s64 ResourceLimit::GetCurrentResourceValue(ResourceType resource) const { - return values.at(ResourceTypeToIndex(resource)); + return limit.at(ResourceTypeToIndex(resource)) - current.at(ResourceTypeToIndex(resource)); } s64 ResourceLimit::GetMaxResourceValue(ResourceType resource) const { - return limits.at(ResourceTypeToIndex(resource)); + return limit.at(ResourceTypeToIndex(resource)); } ResultCode ResourceLimit::SetLimitValue(ResourceType resource, s64 value) { - const auto index = ResourceTypeToIndex(resource); - - if (value < values[index]) { + const std::size_t index{ResourceTypeToIndex(resource)}; + if (current[index] <= value) { + limit[index] = value; + return RESULT_SUCCESS; + } else { return ERR_INVALID_STATE; } - - values[index] = value; - return RESULT_SUCCESS; } } // namespace Kernel diff --git a/src/core/hle/kernel/resource_limit.h b/src/core/hle/kernel/resource_limit.h index 53b89e621..936cc4d0f 100644 --- a/src/core/hle/kernel/resource_limit.h +++ b/src/core/hle/kernel/resource_limit.h @@ -51,6 +51,11 @@ public: return HANDLE_TYPE; } + bool Reserve(ResourceType resource, s64 amount); + bool Reserve(ResourceType resource, s64 amount, u64 timeout); + void Release(ResourceType resource, u64 amount); + void Release(ResourceType resource, u64 used_amount, u64 available_amount); + /** * Gets the current value for the specified resource. * @param resource Requested resource type @@ -91,10 +96,9 @@ private: using ResourceArray = std::array<s64, static_cast<std::size_t>(ResourceType::ResourceTypeCount)>; - /// Maximum values a resource type may reach. - ResourceArray limits{}; - /// Current resource limit values. - ResourceArray values{}; + ResourceArray limit{}; + ResourceArray current{}; + ResourceArray available{}; }; } // namespace Kernel diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index 4604e35c5..0f102ca44 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -134,7 +134,8 @@ ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& con return RESULT_SUCCESS; } -ResultCode ServerSession::QueueSyncRequest(std::shared_ptr<Thread> thread, Memory::Memory& memory) { +ResultCode ServerSession::QueueSyncRequest(std::shared_ptr<Thread> thread, + Core::Memory::Memory& memory) { u32* cmd_buf{reinterpret_cast<u32*>(memory.GetPointer(thread->GetTLSAddress()))}; std::shared_ptr<Kernel::HLERequestContext> context{ std::make_shared<Kernel::HLERequestContext>(SharedFrom(this), std::move(thread))}; @@ -178,7 +179,7 @@ ResultCode ServerSession::CompleteSyncRequest() { } ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread, - Memory::Memory& memory) { + Core::Memory::Memory& memory) { Core::System::GetInstance().CoreTiming().ScheduleEvent(20000, request_event, {}); return QueueSyncRequest(std::move(thread), memory); } diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index 77e4f6721..403aaf10b 100644 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h @@ -13,7 +13,7 @@ #include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" -namespace Memory { +namespace Core::Memory { class Memory; } @@ -92,7 +92,7 @@ public: * * @returns ResultCode from the operation. */ - ResultCode HandleSyncRequest(std::shared_ptr<Thread> thread, Memory::Memory& memory); + ResultCode HandleSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory); bool ShouldWait(const Thread* thread) const override; @@ -126,7 +126,7 @@ public: private: /// Queues a sync request from the emulated application. - ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Memory::Memory& memory); + ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory); /// Completes a sync request from the emulated application. ResultCode CompleteSyncRequest(); diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp index afb2e3fc2..c67696757 100644 --- a/src/core/hle/kernel/shared_memory.cpp +++ b/src/core/hle/kernel/shared_memory.cpp @@ -2,149 +2,56 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <utility> - #include "common/assert.h" -#include "common/logging/log.h" -#include "core/hle/kernel/errors.h" +#include "core/core.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/shared_memory.h" namespace Kernel { -SharedMemory::SharedMemory(KernelCore& kernel) : Object{kernel} {} -SharedMemory::~SharedMemory() = default; - -std::shared_ptr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_process, - u64 size, MemoryPermission permissions, - MemoryPermission other_permissions, - VAddr address, MemoryRegion region, - std::string name) { - std::shared_ptr<SharedMemory> shared_memory = std::make_shared<SharedMemory>(kernel); - - shared_memory->owner_process = owner_process; - shared_memory->name = std::move(name); - shared_memory->size = size; - shared_memory->permissions = permissions; - shared_memory->other_permissions = other_permissions; - - if (address == 0) { - shared_memory->backing_block = std::make_shared<Kernel::PhysicalMemory>(size); - shared_memory->backing_block_offset = 0; - - // Refresh the address mappings for the current process. - if (kernel.CurrentProcess() != nullptr) { - kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings( - shared_memory->backing_block.get()); - } - } else { - const auto& vm_manager = shared_memory->owner_process->VMManager(); +SharedMemory::SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory) + : Object{kernel}, device_memory{device_memory} {} - // The memory is already available and mapped in the owner process. - const auto vma = vm_manager.FindVMA(address); - ASSERT_MSG(vm_manager.IsValidHandle(vma), "Invalid memory address"); - ASSERT_MSG(vma->second.backing_block, "Backing block doesn't exist for address"); - - // The returned VMA might be a bigger one encompassing the desired address. - const auto vma_offset = address - vma->first; - ASSERT_MSG(vma_offset + size <= vma->second.size, - "Shared memory exceeds bounds of mapped block"); - - shared_memory->backing_block = vma->second.backing_block; - shared_memory->backing_block_offset = vma->second.offset + vma_offset; - } - - shared_memory->base_address = address; +SharedMemory::~SharedMemory() = default; - return shared_memory; -} +std::shared_ptr<SharedMemory> SharedMemory::Create( + KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process, + Memory::PageLinkedList&& page_list, Memory::MemoryPermission owner_permission, + Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size, + std::string name) { -std::shared_ptr<SharedMemory> SharedMemory::CreateForApplet( - KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset, - u64 size, MemoryPermission permissions, MemoryPermission other_permissions, std::string name) { - std::shared_ptr<SharedMemory> shared_memory = std::make_shared<SharedMemory>(kernel); + std::shared_ptr<SharedMemory> shared_memory{ + std::make_shared<SharedMemory>(kernel, device_memory)}; - shared_memory->owner_process = nullptr; - shared_memory->name = std::move(name); + shared_memory->owner_process = owner_process; + shared_memory->page_list = std::move(page_list); + shared_memory->owner_permission = owner_permission; + shared_memory->user_permission = user_permission; + shared_memory->physical_address = physical_address; shared_memory->size = size; - shared_memory->permissions = permissions; - shared_memory->other_permissions = other_permissions; - shared_memory->backing_block = std::move(heap_block); - shared_memory->backing_block_offset = offset; - shared_memory->base_address = - kernel.CurrentProcess()->VMManager().GetHeapRegionBaseAddress() + offset; + shared_memory->name = name; return shared_memory; } -ResultCode SharedMemory::Map(Process& target_process, VAddr address, MemoryPermission permissions, - MemoryPermission other_permissions) { - const MemoryPermission own_other_permissions = - &target_process == owner_process ? this->permissions : this->other_permissions; - - // Automatically allocated memory blocks can only be mapped with other_permissions = DontCare - if (base_address == 0 && other_permissions != MemoryPermission::DontCare) { - return ERR_INVALID_MEMORY_PERMISSIONS; - } - - // Error out if the requested permissions don't match what the creator process allows. - if (static_cast<u32>(permissions) & ~static_cast<u32>(own_other_permissions)) { - LOG_ERROR(Kernel, "cannot map id={}, address=0x{:X} name={}, permissions don't match", - GetObjectId(), address, name); - return ERR_INVALID_MEMORY_PERMISSIONS; - } +ResultCode SharedMemory::Map(Process& target_process, VAddr address, std::size_t size, + Memory::MemoryPermission permission) { + const u64 page_count{(size + Memory::PageSize - 1) / Memory::PageSize}; - // Error out if the provided permissions are not compatible with what the creator process needs. - if (other_permissions != MemoryPermission::DontCare && - static_cast<u32>(this->permissions) & ~static_cast<u32>(other_permissions)) { - LOG_ERROR(Kernel, "cannot map id={}, address=0x{:X} name={}, permissions don't match", - GetObjectId(), address, name); - return ERR_INVALID_MEMORY_PERMISSIONS; + if (page_list.GetNumPages() != page_count) { + UNIMPLEMENTED_MSG("Page count does not match"); } - VAddr target_address = address; + Memory::MemoryPermission expected = + &target_process == owner_process ? owner_permission : user_permission; - // Map the memory block into the target process - auto result = target_process.VMManager().MapMemoryBlock( - target_address, backing_block, backing_block_offset, size, MemoryState::Shared); - if (result.Failed()) { - LOG_ERROR( - Kernel, - "cannot map id={}, target_address=0x{:X} name={}, error mapping to virtual memory", - GetObjectId(), target_address, name); - return result.Code(); + if (permission != expected) { + UNIMPLEMENTED_MSG("Permission does not match"); } - return target_process.VMManager().ReprotectRange(target_address, size, - ConvertPermissions(permissions)); -} - -ResultCode SharedMemory::Unmap(Process& target_process, VAddr address, u64 unmap_size) { - if (unmap_size != size) { - LOG_ERROR(Kernel, - "Invalid size passed to Unmap. Size must be equal to the size of the " - "memory managed. Shared memory size=0x{:016X}, Unmap size=0x{:016X}", - size, unmap_size); - return ERR_INVALID_SIZE; - } - - // TODO(Subv): Verify what happens if the application tries to unmap an address that is not - // mapped to a SharedMemory. - return target_process.VMManager().UnmapRange(address, size); -} - -VMAPermission SharedMemory::ConvertPermissions(MemoryPermission permission) { - u32 masked_permissions = - static_cast<u32>(permission) & static_cast<u32>(MemoryPermission::ReadWriteExecute); - return static_cast<VMAPermission>(masked_permissions); -} - -u8* SharedMemory::GetPointer(std::size_t offset) { - return backing_block->data() + backing_block_offset + offset; -} - -const u8* SharedMemory::GetPointer(std::size_t offset) const { - return backing_block->data() + backing_block_offset + offset; + return target_process.PageTable().MapPages(address, page_list, Memory::MemoryState::Shared, + permission); } } // namespace Kernel diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h index 014951d82..cd16d6412 100644 --- a/src/core/hle/kernel/shared_memory.h +++ b/src/core/hle/kernel/shared_memory.h @@ -8,8 +8,10 @@ #include <string> #include "common/common_types.h" +#include "core/device_memory.h" +#include "core/hle/kernel/memory/memory_block.h" +#include "core/hle/kernel/memory/page_linked_list.h" #include "core/hle/kernel/object.h" -#include "core/hle/kernel/physical_memory.h" #include "core/hle/kernel/process.h" #include "core/hle/result.h" @@ -17,63 +19,21 @@ namespace Kernel { class KernelCore; -/// Permissions for mapped shared memory blocks -enum class MemoryPermission : u32 { - None = 0, - Read = (1u << 0), - Write = (1u << 1), - ReadWrite = (Read | Write), - Execute = (1u << 2), - ReadExecute = (Read | Execute), - WriteExecute = (Write | Execute), - ReadWriteExecute = (Read | Write | Execute), - DontCare = (1u << 28) -}; - class SharedMemory final : public Object { public: - explicit SharedMemory(KernelCore& kernel); + explicit SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory); ~SharedMemory() override; - /** - * Creates a shared memory object. - * @param kernel The kernel instance to create a shared memory instance under. - * @param owner_process Process that created this shared memory object. - * @param size Size of the memory block. Must be page-aligned. - * @param permissions Permission restrictions applied to the process which created the block. - * @param other_permissions Permission restrictions applied to other processes mapping the - * block. - * @param address The address from which to map the Shared Memory. - * @param region If the address is 0, the shared memory will be allocated in this region of the - * linear heap. - * @param name Optional object name, used for debugging purposes. - */ - static std::shared_ptr<SharedMemory> Create(KernelCore& kernel, Process* owner_process, - u64 size, MemoryPermission permissions, - MemoryPermission other_permissions, - VAddr address = 0, - MemoryRegion region = MemoryRegion::BASE, - std::string name = "Unknown"); - - /** - * Creates a shared memory object from a block of memory managed by an HLE applet. - * @param kernel The kernel instance to create a shared memory instance under. - * @param heap_block Heap block of the HLE applet. - * @param offset The offset into the heap block that the SharedMemory will map. - * @param size Size of the memory block. Must be page-aligned. - * @param permissions Permission restrictions applied to the process which created the block. - * @param other_permissions Permission restrictions applied to other processes mapping the - * block. - * @param name Optional object name, used for debugging purposes. - */ - static std::shared_ptr<SharedMemory> CreateForApplet( - KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset, - u64 size, MemoryPermission permissions, MemoryPermission other_permissions, - std::string name = "Unknown Applet"); + static std::shared_ptr<SharedMemory> Create( + KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process, + Memory::PageLinkedList&& page_list, Memory::MemoryPermission owner_permission, + Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size, + std::string name); std::string GetTypeName() const override { return "SharedMemory"; } + std::string GetName() const override { return name; } @@ -83,71 +43,42 @@ public: return HANDLE_TYPE; } - /// Gets the size of the underlying memory block in bytes. - u64 GetSize() const { - return size; - } - - /** - * Converts the specified MemoryPermission into the equivalent VMAPermission. - * @param permission The MemoryPermission to convert. - */ - static VMAPermission ConvertPermissions(MemoryPermission permission); - /** * Maps a shared memory block to an address in the target process' address space - * @param target_process Process on which to map the memory block. + * @param target_process Process on which to map the memory block * @param address Address in system memory to map shared memory block to + * @param size Size of the shared memory block to map * @param permissions Memory block map permissions (specified by SVC field) - * @param other_permissions Memory block map other permissions (specified by SVC field) - */ - ResultCode Map(Process& target_process, VAddr address, MemoryPermission permissions, - MemoryPermission other_permissions); - - /** - * Unmaps a shared memory block from the specified address in system memory - * - * @param target_process Process from which to unmap the memory block. - * @param address Address in system memory where the shared memory block is mapped. - * @param unmap_size The amount of bytes to unmap from this shared memory instance. - * - * @return Result code of the unmap operation - * - * @pre The given size to unmap must be the same size as the amount of memory managed by - * the SharedMemory instance itself, otherwise ERR_INVALID_SIZE will be returned. */ - ResultCode Unmap(Process& target_process, VAddr address, u64 unmap_size); + ResultCode Map(Process& target_process, VAddr address, std::size_t size, + Memory::MemoryPermission permission); /** * Gets a pointer to the shared memory block * @param offset Offset from the start of the shared memory block to get pointer * @return A pointer to the shared memory block from the specified offset */ - u8* GetPointer(std::size_t offset = 0); + u8* GetPointer(std::size_t offset = 0) { + return device_memory.GetPointer(physical_address + offset); + } /** - * Gets a constant pointer to the shared memory block + * Gets a pointer to the shared memory block * @param offset Offset from the start of the shared memory block to get pointer - * @return A constant pointer to the shared memory block from the specified offset + * @return A pointer to the shared memory block from the specified offset */ - const u8* GetPointer(std::size_t offset = 0) const; + const u8* GetPointer(std::size_t offset = 0) const { + return device_memory.GetPointer(physical_address + offset); + } private: - /// Backing memory for this shared memory block. - std::shared_ptr<PhysicalMemory> backing_block; - /// Offset into the backing block for this shared memory. - std::size_t backing_block_offset = 0; - /// Size of the memory block. Page-aligned. - u64 size = 0; - /// Permission restrictions applied to the process which created the block. - MemoryPermission permissions{}; - /// Permission restrictions applied to other processes mapping the block. - MemoryPermission other_permissions{}; - /// Process that created this shared memory block. - Process* owner_process; - /// Address of shared memory block in the owner process if specified. - VAddr base_address = 0; - /// Name of shared memory object. + Core::DeviceMemory& device_memory; + Process* owner_process{}; + Memory::PageLinkedList page_list; + Memory::MemoryPermission owner_permission{}; + Memory::MemoryPermission user_permission{}; + PAddr physical_address{}; + std::size_t size{}; std::string name; }; diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 4ffc113c2..4134acf65 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -24,6 +24,8 @@ #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/memory/memory_block.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/mutex.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/readable_event.h" @@ -31,6 +33,7 @@ #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/svc.h" +#include "core/hle/kernel/svc_types.h" #include "core/hle/kernel/svc_wrap.h" #include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/thread.h" @@ -42,7 +45,7 @@ #include "core/memory.h" #include "core/reporter.h" -namespace Kernel { +namespace Kernel::Svc { namespace { // Checks if address + size is greater than the given address @@ -58,8 +61,8 @@ constexpr u64 MAIN_MEMORY_SIZE = 0x200000000; // Helper function that performs the common sanity checks for svcMapMemory // and svcUnmapMemory. This is doable, as both functions perform their sanitizing // in the same order. -ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_addr, VAddr src_addr, - u64 size) { +ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr dst_addr, + VAddr src_addr, u64 size) { if (!Common::Is4KBAligned(dst_addr)) { LOG_ERROR(Kernel_SVC, "Destination address is not aligned to 4KB, 0x{:016X}", dst_addr); return ERR_INVALID_ADDRESS; @@ -93,36 +96,33 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add return ERR_INVALID_ADDRESS_STATE; } - if (!vm_manager.IsWithinAddressSpace(src_addr, size)) { + if (!manager.IsInsideAddressSpace(src_addr, size)) { LOG_ERROR(Kernel_SVC, "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", src_addr, size); return ERR_INVALID_ADDRESS_STATE; } - if (!vm_manager.IsWithinStackRegion(dst_addr, size)) { + if (manager.IsOutsideStackRegion(dst_addr, size)) { LOG_ERROR(Kernel_SVC, "Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}", dst_addr, size); return ERR_INVALID_MEMORY_RANGE; } - const VAddr dst_end_address = dst_addr + size; - if (dst_end_address > vm_manager.GetHeapRegionBaseAddress() && - vm_manager.GetHeapRegionEndAddress() > dst_addr) { + if (manager.IsInsideHeapRegion(dst_addr, size)) { LOG_ERROR(Kernel_SVC, "Destination does not fit within the heap region, addr=0x{:016X}, " - "size=0x{:016X}, end_addr=0x{:016X}", - dst_addr, size, dst_end_address); + "size=0x{:016X}", + dst_addr, size); return ERR_INVALID_MEMORY_RANGE; } - if (dst_end_address > vm_manager.GetMapRegionBaseAddress() && - vm_manager.GetMapRegionEndAddress() > dst_addr) { + if (manager.IsInsideAliasRegion(dst_addr, size)) { LOG_ERROR(Kernel_SVC, "Destination does not fit within the map region, addr=0x{:016X}, " - "size=0x{:016X}, end_addr=0x{:016X}", - dst_addr, size, dst_end_address); + "size=0x{:016X}", + dst_addr, size); return ERR_INVALID_MEMORY_RANGE; } @@ -177,13 +177,10 @@ static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_s return ERR_INVALID_SIZE; } - auto& vm_manager = system.Kernel().CurrentProcess()->VMManager(); - const auto alloc_result = vm_manager.SetHeapSize(heap_size); - if (alloc_result.Failed()) { - return alloc_result.Code(); - } + auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; + + CASCADE_RESULT(*heap_addr, page_table.SetHeapSize(heap_size)); - *heap_addr = *alloc_result; return RESULT_SUCCESS; } @@ -194,63 +191,6 @@ static ResultCode SetHeapSize32(Core::System& system, u32* heap_addr, u32 heap_s return result; } -static ResultCode SetMemoryPermission(Core::System& system, VAddr addr, u64 size, u32 prot) { - LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot); - - if (!Common::Is4KBAligned(addr)) { - LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, addr=0x{:016X}", addr); - return ERR_INVALID_ADDRESS; - } - - if (size == 0) { - LOG_ERROR(Kernel_SVC, "Size is 0"); - return ERR_INVALID_SIZE; - } - - if (!Common::Is4KBAligned(size)) { - LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, size=0x{:016X}", size); - return ERR_INVALID_SIZE; - } - - if (!IsValidAddressRange(addr, size)) { - LOG_ERROR(Kernel_SVC, "Region is not a valid address range, addr=0x{:016X}, size=0x{:016X}", - addr, size); - return ERR_INVALID_ADDRESS_STATE; - } - - const auto permission = static_cast<MemoryPermission>(prot); - if (permission != MemoryPermission::None && permission != MemoryPermission::Read && - permission != MemoryPermission::ReadWrite) { - LOG_ERROR(Kernel_SVC, "Invalid memory permission specified, Got memory permission=0x{:08X}", - static_cast<u32>(permission)); - return ERR_INVALID_MEMORY_PERMISSIONS; - } - - auto* const current_process = system.Kernel().CurrentProcess(); - auto& vm_manager = current_process->VMManager(); - - if (!vm_manager.IsWithinAddressSpace(addr, size)) { - LOG_ERROR(Kernel_SVC, - "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, - size); - return ERR_INVALID_ADDRESS_STATE; - } - - const VMManager::VMAHandle iter = vm_manager.FindVMA(addr); - if (!vm_manager.IsValidHandle(iter)) { - LOG_ERROR(Kernel_SVC, "Unable to find VMA for address=0x{:016X}", addr); - return ERR_INVALID_ADDRESS_STATE; - } - - LOG_WARNING(Kernel_SVC, "Uniformity check on protected memory is not implemented."); - // TODO: Performs a uniformity check to make sure only protected memory is changed (it doesn't - // make sense to allow changing permissions on kernel memory itself, etc). - - const auto converted_permissions = SharedMemory::ConvertPermissions(permission); - - return vm_manager.ReprotectRange(addr, size, converted_permissions); -} - static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 size, u32 mask, u32 attribute) { LOG_DEBUG(Kernel_SVC, @@ -274,30 +214,19 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si return ERR_INVALID_ADDRESS_STATE; } - const auto mem_attribute = static_cast<MemoryAttribute>(attribute); - const auto mem_mask = static_cast<MemoryAttribute>(mask); - const auto attribute_with_mask = mem_attribute | mem_mask; - - if (attribute_with_mask != mem_mask) { + const auto attributes{static_cast<Memory::MemoryAttribute>(mask | attribute)}; + if (attributes != static_cast<Memory::MemoryAttribute>(mask) || + (attributes | Memory::MemoryAttribute::Uncached) != Memory::MemoryAttribute::Uncached) { LOG_ERROR(Kernel_SVC, "Memory attribute doesn't match the given mask (Attribute: 0x{:X}, Mask: {:X}", attribute, mask); return ERR_INVALID_COMBINATION; } - if ((attribute_with_mask | MemoryAttribute::Uncached) != MemoryAttribute::Uncached) { - LOG_ERROR(Kernel_SVC, "Specified attribute isn't equal to MemoryAttributeUncached (8)."); - return ERR_INVALID_COMBINATION; - } - - auto& vm_manager = system.Kernel().CurrentProcess()->VMManager(); - if (!vm_manager.IsWithinAddressSpace(address, size)) { - LOG_ERROR(Kernel_SVC, - "Given address (0x{:016X}) is outside the bounds of the address space.", address); - return ERR_INVALID_ADDRESS_STATE; - } + auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; - return vm_manager.SetMemoryAttribute(address, size, mem_mask, mem_attribute); + return page_table.SetMemoryAttribute(address, size, static_cast<Memory::MemoryAttribute>(mask), + static_cast<Memory::MemoryAttribute>(attribute)); } /// Maps a memory range into a different range. @@ -305,14 +234,14 @@ static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, src_addr, size); - auto& vm_manager = system.Kernel().CurrentProcess()->VMManager(); - const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size); + auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; - if (result.IsError()) { + if (const ResultCode result{MapUnmapMemorySanityChecks(page_table, dst_addr, src_addr, size)}; + result.IsError()) { return result; } - return vm_manager.MirrorMemory(dst_addr, src_addr, size, MemoryState::Stack); + return page_table.Map(dst_addr, src_addr, size); } /// Unmaps a region that was previously mapped with svcMapMemory @@ -320,21 +249,14 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, src_addr, size); - auto& vm_manager = system.Kernel().CurrentProcess()->VMManager(); - const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size); + auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; - if (result.IsError()) { + if (const ResultCode result{MapUnmapMemorySanityChecks(page_table, dst_addr, src_addr, size)}; + result.IsError()) { return result; } - const auto unmap_res = vm_manager.UnmapRange(dst_addr, size); - - // Reprotect the source mapping on success - if (unmap_res.IsSuccess()) { - ASSERT(vm_manager.ReprotectRange(src_addr, size, VMAPermission::ReadWrite).IsSuccess()); - } - - return unmap_res; + return page_table.Unmap(dst_addr, src_addr, size); } /// Connect to an OS service given the port name, returns the handle to the port to out @@ -367,6 +289,8 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle, return ERR_NOT_FOUND; } + ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(ResourceType::Sessions, 1)); + auto client_port = it->second; std::shared_ptr<ClientSession> client_session; @@ -538,7 +462,7 @@ static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_hand "requesting_current_thread_handle=0x{:08X}", holding_thread_handle, mutex_addr, requesting_thread_handle); - if (Memory::IsKernelVirtualAddress(mutex_addr)) { + if (Core::Memory::IsKernelVirtualAddress(mutex_addr)) { LOG_ERROR(Kernel_SVC, "Mutex Address is a kernel virtual address, mutex_addr={:016X}", mutex_addr); return ERR_INVALID_ADDRESS_STATE; @@ -558,7 +482,7 @@ static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_hand static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) { LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr); - if (Memory::IsKernelVirtualAddress(mutex_addr)) { + if (Core::Memory::IsKernelVirtualAddress(mutex_addr)) { LOG_ERROR(Kernel_SVC, "Mutex Address is a kernel virtual address, mutex_addr={:016X}", mutex_addr); return ERR_INVALID_ADDRESS_STATE; @@ -683,7 +607,6 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) { auto* const current_thread = system.CurrentScheduler().GetCurrentThread(); const auto thread_processor_id = current_thread->GetProcessorID(); system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace(); - ASSERT(false); system.Kernel().CurrentProcess()->PrepareForTermination(); @@ -785,35 +708,35 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha return RESULT_SUCCESS; case GetInfoType::MapRegionBaseAddr: - *result = process->VMManager().GetMapRegionBaseAddress(); + *result = process->PageTable().GetAliasRegionStart(); return RESULT_SUCCESS; case GetInfoType::MapRegionSize: - *result = process->VMManager().GetMapRegionSize(); + *result = process->PageTable().GetAliasRegionSize(); return RESULT_SUCCESS; case GetInfoType::HeapRegionBaseAddr: - *result = process->VMManager().GetHeapRegionBaseAddress(); + *result = process->PageTable().GetHeapRegionStart(); return RESULT_SUCCESS; case GetInfoType::HeapRegionSize: - *result = process->VMManager().GetHeapRegionSize(); + *result = process->PageTable().GetHeapRegionSize(); return RESULT_SUCCESS; case GetInfoType::ASLRRegionBaseAddr: - *result = process->VMManager().GetASLRRegionBaseAddress(); + *result = process->PageTable().GetAliasCodeRegionStart(); return RESULT_SUCCESS; case GetInfoType::ASLRRegionSize: - *result = process->VMManager().GetASLRRegionSize(); + *result = process->PageTable().GetAliasCodeRegionSize(); return RESULT_SUCCESS; case GetInfoType::StackRegionBaseAddr: - *result = process->VMManager().GetStackRegionBaseAddress(); + *result = process->PageTable().GetStackRegionStart(); return RESULT_SUCCESS; case GetInfoType::StackRegionSize: - *result = process->VMManager().GetStackRegionSize(); + *result = process->PageTable().GetStackRegionSize(); return RESULT_SUCCESS; case GetInfoType::TotalPhysicalMemoryAvailable: @@ -987,20 +910,29 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) return ERR_INVALID_MEMORY_RANGE; } - Process* const current_process = system.Kernel().CurrentProcess(); - auto& vm_manager = current_process->VMManager(); + Process* const current_process{system.Kernel().CurrentProcess()}; + auto& page_table{current_process->PageTable()}; if (current_process->GetSystemResourceSize() == 0) { LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); return ERR_INVALID_STATE; } - if (!vm_manager.IsWithinMapRegion(addr, size)) { - LOG_ERROR(Kernel_SVC, "Range not within map region"); + if (!page_table.IsInsideAddressSpace(addr, size)) { + LOG_ERROR(Kernel_SVC, + "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, + size); + return ERR_INVALID_MEMORY_RANGE; + } + + if (page_table.IsOutsideAliasRegion(addr, size)) { + LOG_ERROR(Kernel_SVC, + "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr, + size); return ERR_INVALID_MEMORY_RANGE; } - return vm_manager.MapPhysicalMemory(addr, size); + return page_table.MapPhysicalMemory(addr, size); } /// Unmaps memory previously mapped via MapPhysicalMemory @@ -1027,20 +959,29 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size return ERR_INVALID_MEMORY_RANGE; } - Process* const current_process = system.Kernel().CurrentProcess(); - auto& vm_manager = current_process->VMManager(); + Process* const current_process{system.Kernel().CurrentProcess()}; + auto& page_table{current_process->PageTable()}; if (current_process->GetSystemResourceSize() == 0) { LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); return ERR_INVALID_STATE; } - if (!vm_manager.IsWithinMapRegion(addr, size)) { - LOG_ERROR(Kernel_SVC, "Range not within map region"); + if (!page_table.IsInsideAddressSpace(addr, size)) { + LOG_ERROR(Kernel_SVC, + "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, + size); + return ERR_INVALID_MEMORY_RANGE; + } + + if (page_table.IsOutsideAliasRegion(addr, size)) { + LOG_ERROR(Kernel_SVC, + "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr, + size); return ERR_INVALID_MEMORY_RANGE; } - return vm_manager.UnmapPhysicalMemory(addr, size); + return page_table.UnmapPhysicalMemory(addr, size); } /// Sets the thread activity @@ -1197,74 +1138,49 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han return ERR_INVALID_ADDRESS_STATE; } - const auto permissions_type = static_cast<MemoryPermission>(permissions); - if (permissions_type != MemoryPermission::Read && - permissions_type != MemoryPermission::ReadWrite) { + const auto permission_type = static_cast<Memory::MemoryPermission>(permissions); + if ((permission_type | Memory::MemoryPermission::Write) != + Memory::MemoryPermission::ReadAndWrite) { LOG_ERROR(Kernel_SVC, "Expected Read or ReadWrite permission but got permissions=0x{:08X}", permissions); return ERR_INVALID_MEMORY_PERMISSIONS; } - auto* const current_process = system.Kernel().CurrentProcess(); - auto shared_memory = current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle); - if (!shared_memory) { - LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}", - shared_memory_handle); - return ERR_INVALID_HANDLE; - } + auto* const current_process{system.Kernel().CurrentProcess()}; + auto& page_table{current_process->PageTable()}; - const auto& vm_manager = current_process->VMManager(); - if (!vm_manager.IsWithinASLRRegion(addr, size)) { - LOG_ERROR(Kernel_SVC, "Region is not within the ASLR region. addr=0x{:016X}, size={:016X}", + if (page_table.IsInvalidRegion(addr, size)) { + LOG_ERROR(Kernel_SVC, + "Addr does not fit within the valid region, addr=0x{:016X}, " + "size=0x{:016X}", addr, size); return ERR_INVALID_MEMORY_RANGE; } - return shared_memory->Map(*current_process, addr, permissions_type, MemoryPermission::DontCare); -} - -static ResultCode UnmapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr, - u64 size) { - LOG_WARNING(Kernel_SVC, "called, shared_memory_handle=0x{:08X}, addr=0x{:X}, size=0x{:X}", - shared_memory_handle, addr, size); - - if (!Common::Is4KBAligned(addr)) { - LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, addr=0x{:016X}", addr); - return ERR_INVALID_ADDRESS; - } - - if (size == 0) { - LOG_ERROR(Kernel_SVC, "Size is 0"); - return ERR_INVALID_SIZE; - } - - if (!Common::Is4KBAligned(size)) { - LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, size=0x{:016X}", size); - return ERR_INVALID_SIZE; + if (page_table.IsInsideHeapRegion(addr, size)) { + LOG_ERROR(Kernel_SVC, + "Addr does not fit within the heap region, addr=0x{:016X}, " + "size=0x{:016X}", + addr, size); + return ERR_INVALID_MEMORY_RANGE; } - if (!IsValidAddressRange(addr, size)) { - LOG_ERROR(Kernel_SVC, "Region is not a valid address range, addr=0x{:016X}, size=0x{:016X}", + if (page_table.IsInsideAliasRegion(addr, size)) { + LOG_ERROR(Kernel_SVC, + "Address does not fit within the map region, addr=0x{:016X}, " + "size=0x{:016X}", addr, size); - return ERR_INVALID_ADDRESS_STATE; + return ERR_INVALID_MEMORY_RANGE; } - auto* const current_process = system.Kernel().CurrentProcess(); - auto shared_memory = current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle); + auto shared_memory{current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle)}; if (!shared_memory) { LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}", shared_memory_handle); return ERR_INVALID_HANDLE; } - const auto& vm_manager = current_process->VMManager(); - if (!vm_manager.IsWithinASLRRegion(addr, size)) { - LOG_ERROR(Kernel_SVC, "Region is not within the ASLR region. addr=0x{:016X}, size={:016X}", - addr, size); - return ERR_INVALID_MEMORY_RANGE; - } - - return shared_memory->Unmap(*current_process, addr, size); + return shared_memory->Map(*current_process, addr, size, permission_type); } static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address, @@ -1279,18 +1195,17 @@ static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_add return ERR_INVALID_HANDLE; } - auto& memory = system.Memory(); - const auto& vm_manager = process->VMManager(); - const MemoryInfo memory_info = vm_manager.QueryMemory(address); - - memory.Write64(memory_info_address, memory_info.base_address); - memory.Write64(memory_info_address + 8, memory_info.size); - memory.Write32(memory_info_address + 16, memory_info.state); - memory.Write32(memory_info_address + 20, memory_info.attributes); - memory.Write32(memory_info_address + 24, memory_info.permission); - memory.Write32(memory_info_address + 32, memory_info.ipc_ref_count); - memory.Write32(memory_info_address + 28, memory_info.device_ref_count); - memory.Write32(memory_info_address + 36, 0); + auto& memory{system.Memory()}; + const auto memory_info{process->PageTable().QueryInfo(address).GetSvcMemoryInfo()}; + + memory.Write64(memory_info_address + 0x00, memory_info.addr); + memory.Write64(memory_info_address + 0x08, memory_info.size); + memory.Write32(memory_info_address + 0x10, static_cast<u32>(memory_info.state) & 0xff); + memory.Write32(memory_info_address + 0x14, static_cast<u32>(memory_info.attr)); + memory.Write32(memory_info_address + 0x18, static_cast<u32>(memory_info.perm)); + memory.Write32(memory_info_address + 0x1c, memory_info.ipc_refcount); + memory.Write32(memory_info_address + 0x20, memory_info.device_refcount); + memory.Write32(memory_info_address + 0x24, 0); // Page info appears to be currently unused by the kernel and is always set to zero. memory.Write32(page_info_address, 0); @@ -1314,142 +1229,6 @@ static ResultCode QueryMemory32(Core::System& system, u32 memory_info_address, return QueryMemory(system, memory_info_address, page_info_address, query_address); } -static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address, - u64 src_address, u64 size) { - LOG_DEBUG(Kernel_SVC, - "called. process_handle=0x{:08X}, dst_address=0x{:016X}, " - "src_address=0x{:016X}, size=0x{:016X}", - process_handle, dst_address, src_address, size); - - if (!Common::Is4KBAligned(src_address)) { - LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", - src_address); - return ERR_INVALID_ADDRESS; - } - - if (!Common::Is4KBAligned(dst_address)) { - LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", - dst_address); - return ERR_INVALID_ADDRESS; - } - - if (size == 0 || !Common::Is4KBAligned(size)) { - LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size); - return ERR_INVALID_SIZE; - } - - if (!IsValidAddressRange(dst_address, size)) { - LOG_ERROR(Kernel_SVC, - "Destination address range overflows the address space (dst_address=0x{:016X}, " - "size=0x{:016X}).", - dst_address, size); - return ERR_INVALID_ADDRESS_STATE; - } - - if (!IsValidAddressRange(src_address, size)) { - LOG_ERROR(Kernel_SVC, - "Source address range overflows the address space (src_address=0x{:016X}, " - "size=0x{:016X}).", - src_address, size); - return ERR_INVALID_ADDRESS_STATE; - } - - const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); - auto process = handle_table.Get<Process>(process_handle); - if (!process) { - LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", - process_handle); - return ERR_INVALID_HANDLE; - } - - auto& vm_manager = process->VMManager(); - if (!vm_manager.IsWithinAddressSpace(src_address, size)) { - LOG_ERROR(Kernel_SVC, - "Source address range is not within the address space (src_address=0x{:016X}, " - "size=0x{:016X}).", - src_address, size); - return ERR_INVALID_ADDRESS_STATE; - } - - if (!vm_manager.IsWithinASLRRegion(dst_address, size)) { - LOG_ERROR(Kernel_SVC, - "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " - "size=0x{:016X}).", - dst_address, size); - return ERR_INVALID_MEMORY_RANGE; - } - - return vm_manager.MapCodeMemory(dst_address, src_address, size); -} - -static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle, - u64 dst_address, u64 src_address, u64 size) { - LOG_DEBUG(Kernel_SVC, - "called. process_handle=0x{:08X}, dst_address=0x{:016X}, src_address=0x{:016X}, " - "size=0x{:016X}", - process_handle, dst_address, src_address, size); - - if (!Common::Is4KBAligned(dst_address)) { - LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", - dst_address); - return ERR_INVALID_ADDRESS; - } - - if (!Common::Is4KBAligned(src_address)) { - LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", - src_address); - return ERR_INVALID_ADDRESS; - } - - if (size == 0 || Common::Is4KBAligned(size)) { - LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size); - return ERR_INVALID_SIZE; - } - - if (!IsValidAddressRange(dst_address, size)) { - LOG_ERROR(Kernel_SVC, - "Destination address range overflows the address space (dst_address=0x{:016X}, " - "size=0x{:016X}).", - dst_address, size); - return ERR_INVALID_ADDRESS_STATE; - } - - if (!IsValidAddressRange(src_address, size)) { - LOG_ERROR(Kernel_SVC, - "Source address range overflows the address space (src_address=0x{:016X}, " - "size=0x{:016X}).", - src_address, size); - return ERR_INVALID_ADDRESS_STATE; - } - - const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); - auto process = handle_table.Get<Process>(process_handle); - if (!process) { - LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", - process_handle); - return ERR_INVALID_HANDLE; - } - - auto& vm_manager = process->VMManager(); - if (!vm_manager.IsWithinAddressSpace(src_address, size)) { - LOG_ERROR(Kernel_SVC, - "Source address range is not within the address space (src_address=0x{:016X}, " - "size=0x{:016X}).", - src_address, size); - return ERR_INVALID_ADDRESS_STATE; - } - - if (!vm_manager.IsWithinASLRRegion(dst_address, size)) { - LOG_ERROR(Kernel_SVC, - "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " - "size=0x{:016X}).", - dst_address, size); - return ERR_INVALID_MEMORY_RANGE; - } - - return vm_manager.UnmapCodeMemory(dst_address, src_address, size); -} - /// Exits the current process static void ExitProcess(Core::System& system) { auto* current_process = system.Kernel().CurrentProcess(); @@ -1506,6 +1285,9 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e } auto& kernel = system.Kernel(); + + ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(ResourceType::Threads, 1)); + CASCADE_RESULT(std::shared_ptr<Thread> thread, Thread::Create(kernel, "", entry_point, priority, arg, processor_id, stack_top, *current_process)); @@ -1610,7 +1392,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}", mutex_addr, condition_variable_addr, thread_handle, nano_seconds); - if (Memory::IsKernelVirtualAddress(mutex_addr)) { + if (Core::Memory::IsKernelVirtualAddress(mutex_addr)) { LOG_ERROR( Kernel_SVC, "Given mutex address must not be within the kernel address space. address=0x{:016X}", @@ -1741,7 +1523,7 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, type, value, timeout); // If the passed address is a kernel virtual address, return invalid memory state. - if (Memory::IsKernelVirtualAddress(address)) { + if (Core::Memory::IsKernelVirtualAddress(address)) { LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address); return ERR_INVALID_ADDRESS_STATE; } @@ -1769,7 +1551,7 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, address, type, value, num_to_wake); // If the passed address is a kernel virtual address, return invalid memory state. - if (Memory::IsKernelVirtualAddress(address)) { + if (Core::Memory::IsKernelVirtualAddress(address)) { LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address); return ERR_INVALID_ADDRESS_STATE; } @@ -1865,9 +1647,9 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd return ERR_INVALID_ADDRESS_STATE; } - const auto perms = static_cast<MemoryPermission>(permissions); - if (perms != MemoryPermission::None && perms != MemoryPermission::Read && - perms != MemoryPermission::ReadWrite) { + const auto perms{static_cast<Memory::MemoryPermission>(permissions)}; + if (perms > Memory::MemoryPermission::ReadAndWrite || + perms == Memory::MemoryPermission::Write) { LOG_ERROR(Kernel_SVC, "Invalid memory permissions for transfer memory! (perms={:08X})", permissions); return ERR_INVALID_MEMORY_PERMISSIONS; @@ -1890,111 +1672,6 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd return RESULT_SUCCESS; } -static ResultCode MapTransferMemory(Core::System& system, Handle handle, VAddr address, u64 size, - u32 permission_raw) { - LOG_DEBUG(Kernel_SVC, - "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}, permissions=0x{:08X}", - handle, address, size, permission_raw); - - if (!Common::Is4KBAligned(address)) { - LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).", - address); - return ERR_INVALID_ADDRESS; - } - - if (size == 0 || !Common::Is4KBAligned(size)) { - LOG_ERROR(Kernel_SVC, - "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).", - size); - return ERR_INVALID_SIZE; - } - - if (!IsValidAddressRange(address, size)) { - LOG_ERROR(Kernel_SVC, - "Given address and size overflows the 64-bit range (address=0x{:016X}, " - "size=0x{:016X}).", - address, size); - return ERR_INVALID_ADDRESS_STATE; - } - - const auto permissions = static_cast<MemoryPermission>(permission_raw); - if (permissions != MemoryPermission::None && permissions != MemoryPermission::Read && - permissions != MemoryPermission::ReadWrite) { - LOG_ERROR(Kernel_SVC, "Invalid transfer memory permissions given (permissions=0x{:08X}).", - permission_raw); - return ERR_INVALID_STATE; - } - - const auto& kernel = system.Kernel(); - const auto* const current_process = kernel.CurrentProcess(); - const auto& handle_table = current_process->GetHandleTable(); - - auto transfer_memory = handle_table.Get<TransferMemory>(handle); - if (!transfer_memory) { - LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).", - handle); - return ERR_INVALID_HANDLE; - } - - if (!current_process->VMManager().IsWithinASLRRegion(address, size)) { - LOG_ERROR(Kernel_SVC, - "Given address and size don't fully fit within the ASLR region " - "(address=0x{:016X}, size=0x{:016X}).", - address, size); - return ERR_INVALID_MEMORY_RANGE; - } - - return transfer_memory->MapMemory(address, size, permissions); -} - -static ResultCode UnmapTransferMemory(Core::System& system, Handle handle, VAddr address, - u64 size) { - LOG_DEBUG(Kernel_SVC, "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}", handle, - address, size); - - if (!Common::Is4KBAligned(address)) { - LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).", - address); - return ERR_INVALID_ADDRESS; - } - - if (size == 0 || !Common::Is4KBAligned(size)) { - LOG_ERROR(Kernel_SVC, - "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).", - size); - return ERR_INVALID_SIZE; - } - - if (!IsValidAddressRange(address, size)) { - LOG_ERROR(Kernel_SVC, - "Given address and size overflows the 64-bit range (address=0x{:016X}, " - "size=0x{:016X}).", - address, size); - return ERR_INVALID_ADDRESS_STATE; - } - - const auto& kernel = system.Kernel(); - const auto* const current_process = kernel.CurrentProcess(); - const auto& handle_table = current_process->GetHandleTable(); - - auto transfer_memory = handle_table.Get<TransferMemory>(handle); - if (!transfer_memory) { - LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).", - handle); - return ERR_INVALID_HANDLE; - } - - if (!current_process->VMManager().IsWithinASLRRegion(address, size)) { - LOG_ERROR(Kernel_SVC, - "Given address and size don't fully fit within the ASLR region " - "(address=0x{:016X}, size=0x{:016X}).", - address, size); - return ERR_INVALID_MEMORY_RANGE; - } - - return transfer_memory->UnmapMemory(address, size); -} - static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, u32* core, u64* mask) { LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle); @@ -2073,52 +1750,6 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, return RESULT_SUCCESS; } -static ResultCode CreateSharedMemory(Core::System& system, Handle* handle, u64 size, - u32 local_permissions, u32 remote_permissions) { - LOG_TRACE(Kernel_SVC, "called, size=0x{:X}, localPerms=0x{:08X}, remotePerms=0x{:08X}", size, - local_permissions, remote_permissions); - if (size == 0) { - LOG_ERROR(Kernel_SVC, "Size is 0"); - return ERR_INVALID_SIZE; - } - if (!Common::Is4KBAligned(size)) { - LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:016X}", size); - return ERR_INVALID_SIZE; - } - - if (size >= MAIN_MEMORY_SIZE) { - LOG_ERROR(Kernel_SVC, "Size is not less than 8GB, 0x{:016X}", size); - return ERR_INVALID_SIZE; - } - - const auto local_perms = static_cast<MemoryPermission>(local_permissions); - if (local_perms != MemoryPermission::Read && local_perms != MemoryPermission::ReadWrite) { - LOG_ERROR(Kernel_SVC, - "Invalid local memory permissions, expected Read or ReadWrite but got " - "local_permissions={}", - static_cast<u32>(local_permissions)); - return ERR_INVALID_MEMORY_PERMISSIONS; - } - - const auto remote_perms = static_cast<MemoryPermission>(remote_permissions); - if (remote_perms != MemoryPermission::Read && remote_perms != MemoryPermission::ReadWrite && - remote_perms != MemoryPermission::DontCare) { - LOG_ERROR(Kernel_SVC, - "Invalid remote memory permissions, expected Read, ReadWrite or DontCare but got " - "remote_permissions={}", - static_cast<u32>(remote_permissions)); - return ERR_INVALID_MEMORY_PERMISSIONS; - } - - auto& kernel = system.Kernel(); - auto process = kernel.CurrentProcess(); - auto& handle_table = process->GetHandleTable(); - auto shared_mem_handle = SharedMemory::Create(kernel, process, size, local_perms, remote_perms); - - CASCADE_RESULT(*handle, handle_table.Create(shared_mem_handle)); - return RESULT_SUCCESS; -} - static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle* read_handle) { LOG_DEBUG(Kernel_SVC, "called"); @@ -2305,11 +1936,10 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes, } const auto& kernel = system.Kernel(); - const auto& vm_manager = kernel.CurrentProcess()->VMManager(); const auto total_copy_size = out_process_ids_size * sizeof(u64); - if (out_process_ids_size > 0 && - !vm_manager.IsWithinAddressSpace(out_process_ids, total_copy_size)) { + if (out_process_ids_size > 0 && !kernel.CurrentProcess()->PageTable().IsInsideAddressSpace( + out_process_ids, total_copy_size)) { LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}", out_process_ids, out_process_ids + total_copy_size); return ERR_INVALID_ADDRESS_STATE; @@ -2345,11 +1975,10 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd } const auto* const current_process = system.Kernel().CurrentProcess(); - const auto& vm_manager = current_process->VMManager(); const auto total_copy_size = out_thread_ids_size * sizeof(u64); if (out_thread_ids_size > 0 && - !vm_manager.IsWithinAddressSpace(out_thread_ids, total_copy_size)) { + !current_process->PageTable().IsInsideAddressSpace(out_thread_ids, total_copy_size)) { LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}", out_thread_ids, out_thread_ids + total_copy_size); return ERR_INVALID_ADDRESS_STATE; @@ -2510,7 +2139,7 @@ static const FunctionDef SVC_Table_32[] = { static const FunctionDef SVC_Table_64[] = { {0x00, nullptr, "Unknown"}, {0x01, SvcWrap64<SetHeapSize>, "SetHeapSize"}, - {0x02, SvcWrap64<SetMemoryPermission>, "SetMemoryPermission"}, + {0x02, nullptr, "SetMemoryPermission"}, {0x03, SvcWrap64<SetMemoryAttribute>, "SetMemoryAttribute"}, {0x04, SvcWrap64<MapMemory>, "MapMemory"}, {0x05, SvcWrap64<UnmapMemory>, "UnmapMemory"}, @@ -2528,7 +2157,7 @@ static const FunctionDef SVC_Table_64[] = { {0x11, SvcWrap64<SignalEvent>, "SignalEvent"}, {0x12, SvcWrap64<ClearEvent>, "ClearEvent"}, {0x13, SvcWrap64<MapSharedMemory>, "MapSharedMemory"}, - {0x14, SvcWrap64<UnmapSharedMemory>, "UnmapSharedMemory"}, + {0x14, nullptr, "UnmapSharedMemory"}, {0x15, SvcWrap64<CreateTransferMemory>, "CreateTransferMemory"}, {0x16, SvcWrap64<CloseHandle>, "CloseHandle"}, {0x17, SvcWrap64<ResetSignal>, "ResetSignal"}, @@ -2588,9 +2217,9 @@ static const FunctionDef SVC_Table_64[] = { {0x4D, nullptr, "SleepSystem"}, {0x4E, nullptr, "ReadWriteRegister"}, {0x4F, nullptr, "SetProcessActivity"}, - {0x50, SvcWrap64<CreateSharedMemory>, "CreateSharedMemory"}, - {0x51, SvcWrap64<MapTransferMemory>, "MapTransferMemory"}, - {0x52, SvcWrap64<UnmapTransferMemory>, "UnmapTransferMemory"}, + {0x50, nullptr, "CreateSharedMemory"}, + {0x51, nullptr, "MapTransferMemory"}, + {0x52, nullptr, "UnmapTransferMemory"}, {0x53, nullptr, "CreateInterruptEvent"}, {0x54, nullptr, "QueryPhysicalAddress"}, {0x55, nullptr, "QueryIoMapping"}, @@ -2627,8 +2256,8 @@ static const FunctionDef SVC_Table_64[] = { {0x74, nullptr, "MapProcessMemory"}, {0x75, nullptr, "UnmapProcessMemory"}, {0x76, SvcWrap64<QueryProcessMemory>, "QueryProcessMemory"}, - {0x77, SvcWrap64<MapProcessCodeMemory>, "MapProcessCodeMemory"}, - {0x78, SvcWrap64<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"}, + {0x77, nullptr, "MapProcessCodeMemory"}, + {0x78, nullptr, "UnmapProcessCodeMemory"}, {0x79, nullptr, "CreateProcess"}, {0x7A, nullptr, "StartProcess"}, {0x7B, nullptr, "TerminateProcess"}, @@ -2656,7 +2285,7 @@ static const FunctionDef* GetSVCInfo64(u32 func_num) { MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); -void CallSVC(Core::System& system, u32 immediate) { +void Call(Core::System& system, u32 immediate) { MICROPROFILE_SCOPE(Kernel_SVC); // Lock the global kernel mutex when we enter the kernel HLE. @@ -2675,4 +2304,4 @@ void CallSVC(Core::System& system, u32 immediate) { } } -} // namespace Kernel +} // namespace Kernel::Svc diff --git a/src/core/hle/kernel/svc.h b/src/core/hle/kernel/svc.h index c5539ac1c..46e64277e 100644 --- a/src/core/hle/kernel/svc.h +++ b/src/core/hle/kernel/svc.h @@ -10,8 +10,8 @@ namespace Core { class System; } -namespace Kernel { +namespace Kernel::Svc { -void CallSVC(Core::System& system, u32 immediate); +void Call(Core::System& system, u32 immediate); -} // namespace Kernel +} // namespace Kernel::Svc diff --git a/src/core/hle/kernel/svc_types.h b/src/core/hle/kernel/svc_types.h new file mode 100644 index 000000000..986724beb --- /dev/null +++ b/src/core/hle/kernel/svc_types.h @@ -0,0 +1,68 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Kernel::Svc { + +enum class MemoryState : u32 { + Free = 0x00, + Io = 0x01, + Static = 0x02, + Code = 0x03, + CodeData = 0x04, + Normal = 0x05, + Shared = 0x06, + Alias = 0x07, + AliasCode = 0x08, + AliasCodeData = 0x09, + Ipc = 0x0A, + Stack = 0x0B, + ThreadLocal = 0x0C, + Transfered = 0x0D, + SharedTransfered = 0x0E, + SharedCode = 0x0F, + Inaccessible = 0x10, + NonSecureIpc = 0x11, + NonDeviceIpc = 0x12, + Kernel = 0x13, + GeneratedCode = 0x14, + CodeOut = 0x15, +}; +DECLARE_ENUM_FLAG_OPERATORS(MemoryState); + +enum class MemoryAttribute : u32 { + Locked = (1 << 0), + IpcLocked = (1 << 1), + DeviceShared = (1 << 2), + Uncached = (1 << 3), +}; +DECLARE_ENUM_FLAG_OPERATORS(MemoryAttribute); + +enum class MemoryPermission : u32 { + None = (0 << 0), + Read = (1 << 0), + Write = (1 << 1), + Execute = (1 << 2), + ReadWrite = Read | Write, + ReadExecute = Read | Execute, + DontCare = (1 << 28), +}; +DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission); + +struct MemoryInfo { + u64 addr{}; + u64 size{}; + MemoryState state{}; + MemoryAttribute attr{}; + MemoryPermission perm{}; + u32 ipc_refcount{}; + u32 device_refcount{}; + u32 padding{}; +}; + +} // namespace Kernel::Svc diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 83e956036..4c0451c01 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -85,6 +85,7 @@ void Thread::ResumeFromWait() { ASSERT_MSG(wait_objects.empty(), "Thread is waking up while waiting for objects"); switch (status) { + case ThreadStatus::Paused: case ThreadStatus::WaitSynch: case ThreadStatus::WaitHLEEvent: case ThreadStatus::WaitSleep: @@ -92,6 +93,7 @@ void Thread::ResumeFromWait() { case ThreadStatus::WaitMutex: case ThreadStatus::WaitCondVar: case ThreadStatus::WaitArb: + case ThreadStatus::Dormant: break; case ThreadStatus::Ready: diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp index f2d3f8b49..765f408c3 100644 --- a/src/core/hle/kernel/transfer_memory.cpp +++ b/src/core/hle/kernel/transfer_memory.cpp @@ -2,17 +2,16 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "core/hle/kernel/errors.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/transfer_memory.h" #include "core/hle/result.h" #include "core/memory.h" namespace Kernel { -TransferMemory::TransferMemory(KernelCore& kernel, Memory::Memory& memory) +TransferMemory::TransferMemory(KernelCore& kernel, Core::Memory::Memory& memory) : Object{kernel}, memory{memory} {} TransferMemory::~TransferMemory() { @@ -20,14 +19,15 @@ TransferMemory::~TransferMemory() { Reset(); } -std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, Memory::Memory& memory, - VAddr base_address, u64 size, - MemoryPermission permissions) { +std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, + Core::Memory::Memory& memory, + VAddr base_address, std::size_t size, + Memory::MemoryPermission permissions) { std::shared_ptr<TransferMemory> transfer_memory{ std::make_shared<TransferMemory>(kernel, memory)}; transfer_memory->base_address = base_address; - transfer_memory->memory_size = size; + transfer_memory->size = size; transfer_memory->owner_permissions = permissions; transfer_memory->owner_process = kernel.CurrentProcess(); @@ -38,98 +38,12 @@ const u8* TransferMemory::GetPointer() const { return memory.GetPointer(base_address); } -u64 TransferMemory::GetSize() const { - return memory_size; -} - -ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission permissions) { - if (memory_size != size) { - return ERR_INVALID_SIZE; - } - - if (owner_permissions != permissions) { - return ERR_INVALID_STATE; - } - - if (is_mapped) { - return ERR_INVALID_STATE; - } - - backing_block = std::make_shared<PhysicalMemory>(size); - - const auto map_state = owner_permissions == MemoryPermission::None - ? MemoryState::TransferMemoryIsolated - : MemoryState::TransferMemory; - auto& vm_manager = owner_process->VMManager(); - const auto map_result = vm_manager.MapMemoryBlock(address, backing_block, 0, size, map_state); - if (map_result.Failed()) { - return map_result.Code(); - } - - is_mapped = true; - return RESULT_SUCCESS; -} - ResultCode TransferMemory::Reserve() { - auto& vm_manager{owner_process->VMManager()}; - const auto check_range_result{vm_manager.CheckRangeState( - base_address, memory_size, MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, - MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, VMAPermission::All, - VMAPermission::ReadWrite, MemoryAttribute::Mask, MemoryAttribute::None, - MemoryAttribute::IpcAndDeviceMapped)}; - - if (check_range_result.Failed()) { - return check_range_result.Code(); - } - - auto [state_, permissions_, attribute] = *check_range_result; - - if (const auto result{vm_manager.ReprotectRange( - base_address, memory_size, SharedMemory::ConvertPermissions(owner_permissions))}; - result.IsError()) { - return result; - } - - return vm_manager.SetMemoryAttribute(base_address, memory_size, MemoryAttribute::Mask, - attribute | MemoryAttribute::Locked); + return owner_process->PageTable().ReserveTransferMemory(base_address, size, owner_permissions); } ResultCode TransferMemory::Reset() { - auto& vm_manager{owner_process->VMManager()}; - if (const auto result{vm_manager.CheckRangeState( - base_address, memory_size, - MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, - MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, VMAPermission::None, - VMAPermission::None, MemoryAttribute::Mask, MemoryAttribute::Locked, - MemoryAttribute::IpcAndDeviceMapped)}; - result.Failed()) { - return result.Code(); - } - - if (const auto result{ - vm_manager.ReprotectRange(base_address, memory_size, VMAPermission::ReadWrite)}; - result.IsError()) { - return result; - } - - return vm_manager.SetMemoryAttribute(base_address, memory_size, MemoryAttribute::Mask, - MemoryAttribute::None); -} - -ResultCode TransferMemory::UnmapMemory(VAddr address, u64 size) { - if (memory_size != size) { - return ERR_INVALID_SIZE; - } - - auto& vm_manager = owner_process->VMManager(); - const auto result = vm_manager.UnmapRange(address, size); - - if (result.IsError()) { - return result; - } - - is_mapped = false; - return RESULT_SUCCESS; + return owner_process->PageTable().ResetTransferMemory(base_address, size); } } // namespace Kernel diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h index 6e388536a..05e9f7464 100644 --- a/src/core/hle/kernel/transfer_memory.h +++ b/src/core/hle/kernel/transfer_memory.h @@ -6,12 +6,13 @@ #include <memory> +#include "core/hle/kernel/memory/memory_block.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/physical_memory.h" union ResultCode; -namespace Memory { +namespace Core::Memory { class Memory; } @@ -20,8 +21,6 @@ namespace Kernel { class KernelCore; class Process; -enum class MemoryPermission : u32; - /// Defines the interface for transfer memory objects. /// /// Transfer memory is typically used for the purpose of @@ -30,14 +29,14 @@ enum class MemoryPermission : u32; /// class TransferMemory final : public Object { public: - explicit TransferMemory(KernelCore& kernel, Memory::Memory& memory); + explicit TransferMemory(KernelCore& kernel, Core::Memory::Memory& memory); ~TransferMemory() override; static constexpr HandleType HANDLE_TYPE = HandleType::TransferMemory; - static std::shared_ptr<TransferMemory> Create(KernelCore& kernel, Memory::Memory& memory, - VAddr base_address, u64 size, - MemoryPermission permissions); + static std::shared_ptr<TransferMemory> Create(KernelCore& kernel, Core::Memory::Memory& memory, + VAddr base_address, std::size_t size, + Memory::MemoryPermission permissions); TransferMemory(const TransferMemory&) = delete; TransferMemory& operator=(const TransferMemory&) = delete; @@ -61,29 +60,9 @@ public: const u8* GetPointer() const; /// Gets the size of the memory backing this instance in bytes. - u64 GetSize() const; - - /// Attempts to map transfer memory with the given range and memory permissions. - /// - /// @param address The base address to being mapping memory at. - /// @param size The size of the memory to map, in bytes. - /// @param permissions The memory permissions to check against when mapping memory. - /// - /// @pre The given address, size, and memory permissions must all match - /// the same values that were given when creating the transfer memory - /// instance. - /// - ResultCode MapMemory(VAddr address, u64 size, MemoryPermission permissions); - - /// Unmaps the transfer memory with the given range - /// - /// @param address The base address to begin unmapping memory at. - /// @param size The size of the memory to unmap, in bytes. - /// - /// @pre The given address and size must be the same as the ones used - /// to create the transfer memory instance. - /// - ResultCode UnmapMemory(VAddr address, u64 size); + constexpr std::size_t GetSize() const { + return size; + } /// Reserves the region to be used for the transfer memory, called after the transfer memory is /// created. @@ -94,25 +73,19 @@ public: ResultCode Reset(); private: - /// Memory block backing this instance. - std::shared_ptr<PhysicalMemory> backing_block; - /// The base address for the memory managed by this instance. - VAddr base_address = 0; + VAddr base_address{}; /// Size of the memory, in bytes, that this instance manages. - u64 memory_size = 0; + std::size_t size{}; /// The memory permissions that are applied to this instance. - MemoryPermission owner_permissions{}; + Memory::MemoryPermission owner_permissions{}; /// The process that this transfer memory instance was created under. - Process* owner_process = nullptr; - - /// Whether or not this transfer memory instance has mapped memory. - bool is_mapped = false; + Process* owner_process{}; - Memory::Memory& memory; + Core::Memory::Memory& memory; }; } // namespace Kernel diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp deleted file mode 100644 index 024c22901..000000000 --- a/src/core/hle/kernel/vm_manager.cpp +++ /dev/null @@ -1,1175 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <cstring> -#include <iterator> -#include <utility> -#include "common/alignment.h" -#include "common/assert.h" -#include "common/logging/log.h" -#include "common/memory_hook.h" -#include "core/core.h" -#include "core/file_sys/program_metadata.h" -#include "core/hle/kernel/errors.h" -#include "core/hle/kernel/process.h" -#include "core/hle/kernel/resource_limit.h" -#include "core/hle/kernel/vm_manager.h" -#include "core/memory.h" - -namespace Kernel { -namespace { -const char* GetMemoryStateName(MemoryState state) { - static constexpr const char* names[] = { - "Unmapped", "Io", - "Normal", "Code", - "CodeData", "Heap", - "Shared", "Unknown1", - "ModuleCode", "ModuleCodeData", - "IpcBuffer0", "Stack", - "ThreadLocal", "TransferMemoryIsolated", - "TransferMemory", "ProcessMemory", - "Inaccessible", "IpcBuffer1", - "IpcBuffer3", "KernelStack", - }; - - return names[ToSvcMemoryState(state)]; -} - -// Checks if a given address range lies within a larger address range. -constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin, - VAddr address_range_end) { - const VAddr end_address = address + size - 1; - return address_range_begin <= address && end_address <= address_range_end - 1; -} -} // Anonymous namespace - -bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { - ASSERT(base + size == next.base); - if (permissions != next.permissions || state != next.state || attribute != next.attribute || - type != next.type) { - return false; - } - if ((attribute & MemoryAttribute::DeviceMapped) == MemoryAttribute::DeviceMapped) { - // TODO: Can device mapped memory be merged sanely? - // Not merging it may cause inaccuracies versus hardware when memory layout is queried. - return false; - } - if (type == VMAType::AllocatedMemoryBlock) { - return true; - } - if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { - return false; - } - if (type == VMAType::MMIO && paddr + size != next.paddr) { - return false; - } - return true; -} - -VMManager::VMManager(Core::System& system) : system{system} { - // Default to assuming a 39-bit address space. This way we have a sane - // starting point with executables that don't provide metadata. - Reset(FileSys::ProgramAddressSpaceType::Is39Bit); -} - -VMManager::~VMManager() = default; - -void VMManager::Reset(FileSys::ProgramAddressSpaceType type) { - Clear(); - - InitializeMemoryRegionRanges(type); - - page_table.Resize(address_space_width); - - // Initialize the map with a single free region covering the entire managed space. - VirtualMemoryArea initial_vma; - initial_vma.size = address_space_end; - vma_map.emplace(initial_vma.base, initial_vma); - - UpdatePageTableForVMA(initial_vma); -} - -VMManager::VMAHandle VMManager::FindVMA(VAddr target) const { - if (target >= address_space_end) { - return vma_map.end(); - } else { - return std::prev(vma_map.upper_bound(target)); - } -} - -bool VMManager::IsValidHandle(VMAHandle handle) const { - return handle != vma_map.cend(); -} - -ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, - std::shared_ptr<PhysicalMemory> block, - std::size_t offset, u64 size, - MemoryState state, VMAPermission perm) { - ASSERT(block != nullptr); - ASSERT(offset + size <= block->size()); - - // This is the appropriately sized VMA that will turn into our allocation. - CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size)); - VirtualMemoryArea& final_vma = vma_handle->second; - ASSERT(final_vma.size == size); - - final_vma.type = VMAType::AllocatedMemoryBlock; - final_vma.permissions = perm; - final_vma.state = state; - final_vma.backing_block = std::move(block); - final_vma.offset = offset; - UpdatePageTableForVMA(final_vma); - - return MakeResult<VMAHandle>(MergeAdjacent(vma_handle)); -} - -ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* memory, u64 size, - MemoryState state) { - ASSERT(memory != nullptr); - - // This is the appropriately sized VMA that will turn into our allocation. - CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size)); - VirtualMemoryArea& final_vma = vma_handle->second; - ASSERT(final_vma.size == size); - - final_vma.type = VMAType::BackingMemory; - final_vma.permissions = VMAPermission::ReadWrite; - final_vma.state = state; - final_vma.backing_memory = memory; - UpdatePageTableForVMA(final_vma); - - return MakeResult<VMAHandle>(MergeAdjacent(vma_handle)); -} - -ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const { - return FindFreeRegion(GetASLRRegionBaseAddress(), GetASLRRegionEndAddress(), size); -} - -ResultVal<VAddr> VMManager::FindFreeRegion(VAddr begin, VAddr end, u64 size) const { - ASSERT(begin < end); - ASSERT(size <= end - begin); - - const VMAHandle vma_handle = - std::find_if(vma_map.begin(), vma_map.end(), [begin, end, size](const auto& vma) { - if (vma.second.type != VMAType::Free) { - return false; - } - const VAddr vma_base = vma.second.base; - const VAddr vma_end = vma_base + vma.second.size; - const VAddr assumed_base = (begin < vma_base) ? vma_base : begin; - const VAddr used_range = assumed_base + size; - - return vma_base <= assumed_base && assumed_base < used_range && used_range < end && - used_range <= vma_end; - }); - - if (vma_handle == vma_map.cend()) { - // TODO(Subv): Find the correct error code here. - return RESULT_UNKNOWN; - } - - const VAddr target = std::max(begin, vma_handle->second.base); - return MakeResult<VAddr>(target); -} - -ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size, - MemoryState state, - Common::MemoryHookPointer mmio_handler) { - // This is the appropriately sized VMA that will turn into our allocation. - CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size)); - VirtualMemoryArea& final_vma = vma_handle->second; - ASSERT(final_vma.size == size); - - final_vma.type = VMAType::MMIO; - final_vma.permissions = VMAPermission::ReadWrite; - final_vma.state = state; - final_vma.paddr = paddr; - final_vma.mmio_handler = std::move(mmio_handler); - UpdatePageTableForVMA(final_vma); - - return MakeResult<VMAHandle>(MergeAdjacent(vma_handle)); -} - -VMManager::VMAIter VMManager::Unmap(VMAIter vma_handle) { - VirtualMemoryArea& vma = vma_handle->second; - vma.type = VMAType::Free; - vma.permissions = VMAPermission::None; - vma.state = MemoryState::Unmapped; - vma.attribute = MemoryAttribute::None; - - vma.backing_block = nullptr; - vma.offset = 0; - vma.backing_memory = nullptr; - vma.paddr = 0; - - UpdatePageTableForVMA(vma); - - return MergeAdjacent(vma_handle); -} - -ResultCode VMManager::UnmapRange(VAddr target, u64 size) { - CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size)); - const VAddr target_end = target + size; - - const VMAIter end = vma_map.end(); - // The comparison against the end of the range must be done using addresses since VMAs can be - // merged during this process, causing invalidation of the iterators. - while (vma != end && vma->second.base < target_end) { - vma = std::next(Unmap(vma)); - } - - ASSERT(FindVMA(target)->second.size >= size); - - return RESULT_SUCCESS; -} - -VMManager::VMAHandle VMManager::Reprotect(VMAHandle vma_handle, VMAPermission new_perms) { - VMAIter iter = StripIterConstness(vma_handle); - - VirtualMemoryArea& vma = iter->second; - vma.permissions = new_perms; - UpdatePageTableForVMA(vma); - - return MergeAdjacent(iter); -} - -ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_perms) { - CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size)); - const VAddr target_end = target + size; - - const VMAIter end = vma_map.end(); - // The comparison against the end of the range must be done using addresses since VMAs can be - // merged during this process, causing invalidation of the iterators. - while (vma != end && vma->second.base < target_end) { - vma = std::next(StripIterConstness(Reprotect(vma, new_perms))); - } - - return RESULT_SUCCESS; -} - -ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { - if (size > GetHeapRegionSize()) { - return ERR_OUT_OF_MEMORY; - } - - // No need to do any additional work if the heap is already the given size. - if (size == GetCurrentHeapSize()) { - return MakeResult(heap_region_base); - } - - if (heap_memory == nullptr) { - // Initialize heap - heap_memory = std::make_shared<PhysicalMemory>(size); - heap_end = heap_region_base + size; - } else { - UnmapRange(heap_region_base, GetCurrentHeapSize()); - } - - // If necessary, expand backing vector to cover new heap extents in - // the case of allocating. Otherwise, shrink the backing memory, - // if a smaller heap has been requested. - heap_memory->resize(size); - heap_memory->shrink_to_fit(); - RefreshMemoryBlockMappings(heap_memory.get()); - - heap_end = heap_region_base + size; - ASSERT(GetCurrentHeapSize() == heap_memory->size()); - - const auto mapping_result = - MapMemoryBlock(heap_region_base, heap_memory, 0, size, MemoryState::Heap); - if (mapping_result.Failed()) { - return mapping_result.Code(); - } - - return MakeResult<VAddr>(heap_region_base); -} - -ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { - // Check how much memory we've already mapped. - const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size); - if (mapped_size_result.Failed()) { - return mapped_size_result.Code(); - } - - // If we've already mapped the desired amount, return early. - const std::size_t mapped_size = *mapped_size_result; - if (mapped_size == size) { - return RESULT_SUCCESS; - } - - // Check that we can map the memory we want. - const auto res_limit = system.CurrentProcess()->GetResourceLimit(); - const u64 physmem_remaining = res_limit->GetMaxResourceValue(ResourceType::PhysicalMemory) - - res_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory); - if (physmem_remaining < (size - mapped_size)) { - return ERR_RESOURCE_LIMIT_EXCEEDED; - } - - // Keep track of the memory regions we unmap. - std::vector<std::pair<u64, u64>> mapped_regions; - ResultCode result = RESULT_SUCCESS; - - // Iterate, trying to map memory. - { - const auto end_addr = target + size; - const auto last_addr = end_addr - 1; - VAddr cur_addr = target; - - auto iter = FindVMA(target); - ASSERT(iter != vma_map.end()); - - while (true) { - const auto& vma = iter->second; - const auto vma_start = vma.base; - const auto vma_end = vma_start + vma.size; - const auto vma_last = vma_end - 1; - - // Map the memory block - const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr); - if (vma.state == MemoryState::Unmapped) { - const auto map_res = - MapMemoryBlock(cur_addr, std::make_shared<PhysicalMemory>(map_size), 0, - map_size, MemoryState::Heap, VMAPermission::ReadWrite); - result = map_res.Code(); - if (result.IsError()) { - break; - } - - mapped_regions.emplace_back(cur_addr, map_size); - } - - // Break once we hit the end of the range. - if (last_addr <= vma_last) { - break; - } - - // Advance to the next block. - cur_addr = vma_end; - iter = FindVMA(cur_addr); - ASSERT(iter != vma_map.end()); - } - } - - // If we failed, unmap memory. - if (result.IsError()) { - for (const auto [unmap_address, unmap_size] : mapped_regions) { - ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(), - "Failed to unmap memory range."); - } - - return result; - } - - // Update amount of mapped physical memory. - physical_memory_mapped += size - mapped_size; - - return RESULT_SUCCESS; -} - -ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) { - // Check how much memory is currently mapped. - const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size); - if (mapped_size_result.Failed()) { - return mapped_size_result.Code(); - } - - // If we've already unmapped all the memory, return early. - const std::size_t mapped_size = *mapped_size_result; - if (mapped_size == 0) { - return RESULT_SUCCESS; - } - - // Keep track of the memory regions we unmap. - std::vector<std::pair<u64, u64>> unmapped_regions; - ResultCode result = RESULT_SUCCESS; - - // Try to unmap regions. - { - const auto end_addr = target + size; - const auto last_addr = end_addr - 1; - VAddr cur_addr = target; - - auto iter = FindVMA(target); - ASSERT(iter != vma_map.end()); - - while (true) { - const auto& vma = iter->second; - const auto vma_start = vma.base; - const auto vma_end = vma_start + vma.size; - const auto vma_last = vma_end - 1; - - // Unmap the memory block - const auto unmap_size = std::min(end_addr - cur_addr, vma_end - cur_addr); - if (vma.state == MemoryState::Heap) { - result = UnmapRange(cur_addr, unmap_size); - if (result.IsError()) { - break; - } - - unmapped_regions.emplace_back(cur_addr, unmap_size); - } - - // Break once we hit the end of the range. - if (last_addr <= vma_last) { - break; - } - - // Advance to the next block. - cur_addr = vma_end; - iter = FindVMA(cur_addr); - ASSERT(iter != vma_map.end()); - } - } - - // If we failed, re-map regions. - // TODO: Preserve memory contents? - if (result.IsError()) { - for (const auto [map_address, map_size] : unmapped_regions) { - const auto remap_res = - MapMemoryBlock(map_address, std::make_shared<PhysicalMemory>(map_size), 0, map_size, - MemoryState::Heap, VMAPermission::None); - ASSERT_MSG(remap_res.Succeeded(), "Failed to remap a memory block."); - } - - return result; - } - - // Update mapped amount - physical_memory_mapped -= mapped_size; - - return RESULT_SUCCESS; -} - -ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { - constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; - const auto src_check_result = CheckRangeState( - src_address, size, MemoryState::All, MemoryState::Heap, VMAPermission::All, - VMAPermission::ReadWrite, MemoryAttribute::Mask, MemoryAttribute::None, ignore_attribute); - - if (src_check_result.Failed()) { - return src_check_result.Code(); - } - - const auto mirror_result = - MirrorMemory(dst_address, src_address, size, MemoryState::ModuleCode); - if (mirror_result.IsError()) { - return mirror_result; - } - - // Ensure we lock the source memory region. - const auto src_vma_result = CarveVMARange(src_address, size); - if (src_vma_result.Failed()) { - return src_vma_result.Code(); - } - auto src_vma_iter = *src_vma_result; - src_vma_iter->second.attribute = MemoryAttribute::Locked; - Reprotect(src_vma_iter, VMAPermission::Read); - - // The destination memory region is fine as is, however we need to make it read-only. - return ReprotectRange(dst_address, size, VMAPermission::Read); -} - -ResultCode VMManager::UnmapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { - constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; - const auto src_check_result = CheckRangeState( - src_address, size, MemoryState::All, MemoryState::Heap, VMAPermission::None, - VMAPermission::None, MemoryAttribute::Mask, MemoryAttribute::Locked, ignore_attribute); - - if (src_check_result.Failed()) { - return src_check_result.Code(); - } - - // Yes, the kernel only checks the first page of the region. - const auto dst_check_result = - CheckRangeState(dst_address, Memory::PAGE_SIZE, MemoryState::FlagModule, - MemoryState::FlagModule, VMAPermission::None, VMAPermission::None, - MemoryAttribute::Mask, MemoryAttribute::None, ignore_attribute); - - if (dst_check_result.Failed()) { - return dst_check_result.Code(); - } - - const auto dst_memory_state = std::get<MemoryState>(*dst_check_result); - const auto dst_contiguous_check_result = CheckRangeState( - dst_address, size, MemoryState::All, dst_memory_state, VMAPermission::None, - VMAPermission::None, MemoryAttribute::Mask, MemoryAttribute::None, ignore_attribute); - - if (dst_contiguous_check_result.Failed()) { - return dst_contiguous_check_result.Code(); - } - - const auto unmap_result = UnmapRange(dst_address, size); - if (unmap_result.IsError()) { - return unmap_result; - } - - // With the mirrored portion unmapped, restore the original region's traits. - const auto src_vma_result = CarveVMARange(src_address, size); - if (src_vma_result.Failed()) { - return src_vma_result.Code(); - } - auto src_vma_iter = *src_vma_result; - src_vma_iter->second.state = MemoryState::Heap; - src_vma_iter->second.attribute = MemoryAttribute::None; - Reprotect(src_vma_iter, VMAPermission::ReadWrite); - - if (dst_memory_state == MemoryState::ModuleCode) { - system.InvalidateCpuInstructionCaches(); - } - - return unmap_result; -} - -MemoryInfo VMManager::QueryMemory(VAddr address) const { - const auto vma = FindVMA(address); - MemoryInfo memory_info{}; - - if (IsValidHandle(vma)) { - memory_info.base_address = vma->second.base; - memory_info.attributes = ToSvcMemoryAttribute(vma->second.attribute); - memory_info.permission = static_cast<u32>(vma->second.permissions); - memory_info.size = vma->second.size; - memory_info.state = ToSvcMemoryState(vma->second.state); - } else { - memory_info.base_address = address_space_end; - memory_info.permission = static_cast<u32>(VMAPermission::None); - memory_info.size = 0 - address_space_end; - memory_info.state = static_cast<u32>(MemoryState::Inaccessible); - } - - return memory_info; -} - -ResultCode VMManager::SetMemoryAttribute(VAddr address, u64 size, MemoryAttribute mask, - MemoryAttribute attribute) { - constexpr auto ignore_mask = - MemoryAttribute::Uncached | MemoryAttribute::DeviceMapped | MemoryAttribute::Locked; - constexpr auto attribute_mask = ~ignore_mask; - - const auto result = CheckRangeState( - address, size, MemoryState::FlagUncached, MemoryState::FlagUncached, VMAPermission::None, - VMAPermission::None, attribute_mask, MemoryAttribute::None, ignore_mask); - - if (result.Failed()) { - return result.Code(); - } - - const auto [prev_state, prev_permissions, prev_attributes] = *result; - const auto new_attribute = (prev_attributes & ~mask) | (mask & attribute); - - const auto carve_result = CarveVMARange(address, size); - if (carve_result.Failed()) { - return carve_result.Code(); - } - - auto vma_iter = *carve_result; - vma_iter->second.attribute = new_attribute; - - MergeAdjacent(vma_iter); - return RESULT_SUCCESS; -} - -ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, MemoryState state) { - const auto vma = FindVMA(src_addr); - - ASSERT_MSG(vma != vma_map.end(), "Invalid memory address"); - ASSERT_MSG(vma->second.backing_block, "Backing block doesn't exist for address"); - - // The returned VMA might be a bigger one encompassing the desired address. - const auto vma_offset = src_addr - vma->first; - ASSERT_MSG(vma_offset + size <= vma->second.size, - "Shared memory exceeds bounds of mapped block"); - - const std::shared_ptr<PhysicalMemory>& backing_block = vma->second.backing_block; - const std::size_t backing_block_offset = vma->second.offset + vma_offset; - - CASCADE_RESULT(auto new_vma, - MapMemoryBlock(dst_addr, backing_block, backing_block_offset, size, state)); - // Protect mirror with permissions from old region - Reprotect(new_vma, vma->second.permissions); - // Remove permissions from old region - ReprotectRange(src_addr, size, VMAPermission::None); - - return RESULT_SUCCESS; -} - -void VMManager::RefreshMemoryBlockMappings(const PhysicalMemory* block) { - // If this ever proves to have a noticeable performance impact, allow users of the function to - // specify a specific range of addresses to limit the scan to. - for (const auto& p : vma_map) { - const VirtualMemoryArea& vma = p.second; - if (block == vma.backing_block.get()) { - UpdatePageTableForVMA(vma); - } - } -} - -void VMManager::LogLayout() const { - for (const auto& p : vma_map) { - const VirtualMemoryArea& vma = p.second; - LOG_DEBUG(Kernel, "{:016X} - {:016X} size: {:016X} {}{}{} {}", vma.base, - vma.base + vma.size, vma.size, - (u8)vma.permissions & (u8)VMAPermission::Read ? 'R' : '-', - (u8)vma.permissions & (u8)VMAPermission::Write ? 'W' : '-', - (u8)vma.permissions & (u8)VMAPermission::Execute ? 'X' : '-', - GetMemoryStateName(vma.state)); - } -} - -VMManager::VMAIter VMManager::StripIterConstness(const VMAHandle& iter) { - // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given - // non-const access to its container. - return vma_map.erase(iter, iter); // Erases an empty range of elements -} - -ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u64 size) { - ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x{:016X}", size); - ASSERT_MSG((base & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x{:016X}", base); - - VMAIter vma_handle = StripIterConstness(FindVMA(base)); - if (vma_handle == vma_map.end()) { - // Target address is outside the range managed by the kernel - return ERR_INVALID_ADDRESS; - } - - const VirtualMemoryArea& vma = vma_handle->second; - if (vma.type != VMAType::Free) { - // Region is already allocated - return ERR_INVALID_ADDRESS_STATE; - } - - const VAddr start_in_vma = base - vma.base; - const VAddr end_in_vma = start_in_vma + size; - - if (end_in_vma > vma.size) { - // Requested allocation doesn't fit inside VMA - return ERR_INVALID_ADDRESS_STATE; - } - - if (end_in_vma != vma.size) { - // Split VMA at the end of the allocated region - SplitVMA(vma_handle, end_in_vma); - } - if (start_in_vma != 0) { - // Split VMA at the start of the allocated region - vma_handle = SplitVMA(vma_handle, start_in_vma); - } - - return MakeResult<VMAIter>(vma_handle); -} - -ResultVal<VMManager::VMAIter> VMManager::CarveVMARange(VAddr target, u64 size) { - ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x{:016X}", size); - ASSERT_MSG((target & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x{:016X}", target); - - const VAddr target_end = target + size; - ASSERT(target_end >= target); - ASSERT(target_end <= address_space_end); - ASSERT(size > 0); - - VMAIter begin_vma = StripIterConstness(FindVMA(target)); - const VMAIter i_end = vma_map.lower_bound(target_end); - if (std::any_of(begin_vma, i_end, - [](const auto& entry) { return entry.second.type == VMAType::Free; })) { - return ERR_INVALID_ADDRESS_STATE; - } - - if (target != begin_vma->second.base) { - begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base); - } - - VMAIter end_vma = StripIterConstness(FindVMA(target_end)); - if (end_vma != vma_map.end() && target_end != end_vma->second.base) { - end_vma = SplitVMA(end_vma, target_end - end_vma->second.base); - } - - return MakeResult<VMAIter>(begin_vma); -} - -VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) { - VirtualMemoryArea& old_vma = vma_handle->second; - VirtualMemoryArea new_vma = old_vma; // Make a copy of the VMA - - // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably - // a bug. This restriction might be removed later. - ASSERT(offset_in_vma < old_vma.size); - ASSERT(offset_in_vma > 0); - - old_vma.size = offset_in_vma; - new_vma.base += offset_in_vma; - new_vma.size -= offset_in_vma; - - switch (new_vma.type) { - case VMAType::Free: - break; - case VMAType::AllocatedMemoryBlock: - new_vma.offset += offset_in_vma; - break; - case VMAType::BackingMemory: - new_vma.backing_memory += offset_in_vma; - break; - case VMAType::MMIO: - new_vma.paddr += offset_in_vma; - break; - } - - ASSERT(old_vma.CanBeMergedWith(new_vma)); - - return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma); -} - -VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { - const VMAIter next_vma = std::next(iter); - if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { - MergeAdjacentVMA(iter->second, next_vma->second); - vma_map.erase(next_vma); - } - - if (iter != vma_map.begin()) { - VMAIter prev_vma = std::prev(iter); - if (prev_vma->second.CanBeMergedWith(iter->second)) { - MergeAdjacentVMA(prev_vma->second, iter->second); - vma_map.erase(iter); - iter = prev_vma; - } - } - - return iter; -} - -void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right) { - ASSERT(left.CanBeMergedWith(right)); - - // Always merge allocated memory blocks, even when they don't share the same backing block. - if (left.type == VMAType::AllocatedMemoryBlock && - (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) { - - // Check if we can save work. - if (left.offset == 0 && left.size == left.backing_block->size()) { - // Fast case: left is an entire backing block. - left.backing_block->resize(left.size + right.size); - std::memcpy(left.backing_block->data() + left.size, - right.backing_block->data() + right.offset, right.size); - } else { - // Slow case: make a new memory block for left and right. - auto new_memory = std::make_shared<PhysicalMemory>(); - new_memory->resize(left.size + right.size); - std::memcpy(new_memory->data(), left.backing_block->data() + left.offset, left.size); - std::memcpy(new_memory->data() + left.size, right.backing_block->data() + right.offset, - right.size); - - left.backing_block = std::move(new_memory); - left.offset = 0; - } - - // Page table update is needed, because backing memory changed. - left.size += right.size; - UpdatePageTableForVMA(left); - } else { - // Just update the size. - left.size += right.size; - } -} - -void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { - auto& memory = system.Memory(); - - switch (vma.type) { - case VMAType::Free: - memory.UnmapRegion(page_table, vma.base, vma.size); - break; - case VMAType::AllocatedMemoryBlock: - memory.MapMemoryRegion(page_table, vma.base, vma.size, *vma.backing_block, vma.offset); - break; - case VMAType::BackingMemory: - memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory); - break; - case VMAType::MMIO: - memory.MapIoRegion(page_table, vma.base, vma.size, vma.mmio_handler); - break; - } -} - -void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType type) { - u64 map_region_size = 0; - u64 heap_region_size = 0; - u64 stack_region_size = 0; - u64 tls_io_region_size = 0; - - u64 stack_and_tls_io_end = 0; - - switch (type) { - case FileSys::ProgramAddressSpaceType::Is32Bit: - case FileSys::ProgramAddressSpaceType::Is32BitNoMap: - address_space_width = 32; - code_region_base = 0x200000; - code_region_end = code_region_base + 0x3FE00000; - aslr_region_base = 0x200000; - aslr_region_end = aslr_region_base + 0xFFE00000; - if (type == FileSys::ProgramAddressSpaceType::Is32Bit) { - map_region_size = 0x40000000; - heap_region_size = 0x40000000; - } else { - map_region_size = 0; - heap_region_size = 0x80000000; - } - stack_and_tls_io_end = 0x40000000; - break; - case FileSys::ProgramAddressSpaceType::Is36Bit: - address_space_width = 36; - code_region_base = 0x8000000; - code_region_end = code_region_base + 0x78000000; - aslr_region_base = 0x8000000; - aslr_region_end = aslr_region_base + 0xFF8000000; - map_region_size = 0x180000000; - heap_region_size = 0x180000000; - stack_and_tls_io_end = 0x80000000; - break; - case FileSys::ProgramAddressSpaceType::Is39Bit: - address_space_width = 39; - code_region_base = 0x8000000; - code_region_end = code_region_base + 0x80000000; - aslr_region_base = 0x8000000; - aslr_region_end = aslr_region_base + 0x7FF8000000; - map_region_size = 0x1000000000; - heap_region_size = 0x180000000; - stack_region_size = 0x80000000; - tls_io_region_size = 0x1000000000; - break; - default: - UNREACHABLE_MSG("Invalid address space type specified: {}", static_cast<u32>(type)); - return; - } - - const u64 stack_and_tls_io_begin = aslr_region_base; - - address_space_base = 0; - address_space_end = 1ULL << address_space_width; - - map_region_base = code_region_end; - map_region_end = map_region_base + map_region_size; - - heap_region_base = map_region_end; - heap_region_end = heap_region_base + heap_region_size; - heap_end = heap_region_base; - - stack_region_base = heap_region_end; - stack_region_end = stack_region_base + stack_region_size; - - tls_io_region_base = stack_region_end; - tls_io_region_end = tls_io_region_base + tls_io_region_size; - - if (stack_region_size == 0) { - stack_region_base = stack_and_tls_io_begin; - stack_region_end = stack_and_tls_io_end; - } - - if (tls_io_region_size == 0) { - tls_io_region_base = stack_and_tls_io_begin; - tls_io_region_end = stack_and_tls_io_end; - } -} - -void VMManager::Clear() { - ClearVMAMap(); - ClearPageTable(); -} - -void VMManager::ClearVMAMap() { - vma_map.clear(); -} - -void VMManager::ClearPageTable() { - std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); - page_table.special_regions.clear(); - std::fill(page_table.attributes.begin(), page_table.attributes.end(), - Common::PageType::Unmapped); -} - -VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask, - MemoryState state, VMAPermission permission_mask, - VMAPermission permissions, - MemoryAttribute attribute_mask, - MemoryAttribute attribute, - MemoryAttribute ignore_mask) const { - auto iter = FindVMA(address); - - // If we don't have a valid VMA handle at this point, then it means this is - // being called with an address outside of the address space, which is definitely - // indicative of a bug, as this function only operates on mapped memory regions. - DEBUG_ASSERT(IsValidHandle(iter)); - - const VAddr end_address = address + size - 1; - const MemoryAttribute initial_attributes = iter->second.attribute; - const VMAPermission initial_permissions = iter->second.permissions; - const MemoryState initial_state = iter->second.state; - - while (true) { - // The iterator should be valid throughout the traversal. Hitting the end of - // the mapped VMA regions is unquestionably indicative of a bug. - DEBUG_ASSERT(IsValidHandle(iter)); - - const auto& vma = iter->second; - - if (vma.state != initial_state) { - return ERR_INVALID_ADDRESS_STATE; - } - - if ((vma.state & state_mask) != state) { - return ERR_INVALID_ADDRESS_STATE; - } - - if (vma.permissions != initial_permissions) { - return ERR_INVALID_ADDRESS_STATE; - } - - if ((vma.permissions & permission_mask) != permissions) { - return ERR_INVALID_ADDRESS_STATE; - } - - if ((vma.attribute | ignore_mask) != (initial_attributes | ignore_mask)) { - return ERR_INVALID_ADDRESS_STATE; - } - - if ((vma.attribute & attribute_mask) != attribute) { - return ERR_INVALID_ADDRESS_STATE; - } - - if (end_address <= vma.EndAddress()) { - break; - } - - ++iter; - } - - return MakeResult( - std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); -} - -ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address, - std::size_t size) const { - const VAddr end_addr = address + size; - const VAddr last_addr = end_addr - 1; - std::size_t mapped_size = 0; - - VAddr cur_addr = address; - auto iter = FindVMA(cur_addr); - ASSERT(iter != vma_map.end()); - - while (true) { - const auto& vma = iter->second; - const VAddr vma_start = vma.base; - const VAddr vma_end = vma_start + vma.size; - const VAddr vma_last = vma_end - 1; - - // Add size if relevant. - if (vma.state != MemoryState::Unmapped) { - mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr); - } - - // Break once we hit the end of the range. - if (last_addr <= vma_last) { - break; - } - - // Advance to the next block. - cur_addr = vma_end; - iter = std::next(iter); - ASSERT(iter != vma_map.end()); - } - - return MakeResult(mapped_size); -} - -ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr address, - std::size_t size) const { - const VAddr end_addr = address + size; - const VAddr last_addr = end_addr - 1; - std::size_t mapped_size = 0; - - VAddr cur_addr = address; - auto iter = FindVMA(cur_addr); - ASSERT(iter != vma_map.end()); - - while (true) { - const auto& vma = iter->second; - const auto vma_start = vma.base; - const auto vma_end = vma_start + vma.size; - const auto vma_last = vma_end - 1; - const auto state = vma.state; - const auto attr = vma.attribute; - - // Memory within region must be free or mapped heap. - if (!((state == MemoryState::Heap && attr == MemoryAttribute::None) || - (state == MemoryState::Unmapped))) { - return ERR_INVALID_ADDRESS_STATE; - } - - // Add size if relevant. - if (state != MemoryState::Unmapped) { - mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr); - } - - // Break once we hit the end of the range. - if (last_addr <= vma_last) { - break; - } - - // Advance to the next block. - cur_addr = vma_end; - iter = std::next(iter); - ASSERT(iter != vma_map.end()); - } - - return MakeResult(mapped_size); -} - -u64 VMManager::GetTotalPhysicalMemoryAvailable() const { - LOG_WARNING(Kernel, "(STUBBED) called"); - return 0xF8000000; -} - -VAddr VMManager::GetAddressSpaceBaseAddress() const { - return address_space_base; -} - -VAddr VMManager::GetAddressSpaceEndAddress() const { - return address_space_end; -} - -u64 VMManager::GetAddressSpaceSize() const { - return address_space_end - address_space_base; -} - -u64 VMManager::GetAddressSpaceWidth() const { - return address_space_width; -} - -bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const { - return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(), - GetAddressSpaceEndAddress()); -} - -VAddr VMManager::GetASLRRegionBaseAddress() const { - return aslr_region_base; -} - -VAddr VMManager::GetASLRRegionEndAddress() const { - return aslr_region_end; -} - -u64 VMManager::GetASLRRegionSize() const { - return aslr_region_end - aslr_region_base; -} - -bool VMManager::IsWithinASLRRegion(VAddr begin, u64 size) const { - const VAddr range_end = begin + size; - const VAddr aslr_start = GetASLRRegionBaseAddress(); - const VAddr aslr_end = GetASLRRegionEndAddress(); - - if (aslr_start > begin || begin > range_end || range_end - 1 > aslr_end - 1) { - return false; - } - - if (range_end > heap_region_base && heap_region_end > begin) { - return false; - } - - if (range_end > map_region_base && map_region_end > begin) { - return false; - } - - return true; -} - -VAddr VMManager::GetCodeRegionBaseAddress() const { - return code_region_base; -} - -VAddr VMManager::GetCodeRegionEndAddress() const { - return code_region_end; -} - -u64 VMManager::GetCodeRegionSize() const { - return code_region_end - code_region_base; -} - -bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const { - return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(), - GetCodeRegionEndAddress()); -} - -VAddr VMManager::GetHeapRegionBaseAddress() const { - return heap_region_base; -} - -VAddr VMManager::GetHeapRegionEndAddress() const { - return heap_region_end; -} - -u64 VMManager::GetHeapRegionSize() const { - return heap_region_end - heap_region_base; -} - -u64 VMManager::GetCurrentHeapSize() const { - return heap_end - heap_region_base; -} - -bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const { - return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(), - GetHeapRegionEndAddress()); -} - -VAddr VMManager::GetMapRegionBaseAddress() const { - return map_region_base; -} - -VAddr VMManager::GetMapRegionEndAddress() const { - return map_region_end; -} - -u64 VMManager::GetMapRegionSize() const { - return map_region_end - map_region_base; -} - -bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const { - return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress()); -} - -VAddr VMManager::GetStackRegionBaseAddress() const { - return stack_region_base; -} - -VAddr VMManager::GetStackRegionEndAddress() const { - return stack_region_end; -} - -u64 VMManager::GetStackRegionSize() const { - return stack_region_end - stack_region_base; -} - -bool VMManager::IsWithinStackRegion(VAddr address, u64 size) const { - return IsInsideAddressRange(address, size, GetStackRegionBaseAddress(), - GetStackRegionEndAddress()); -} - -VAddr VMManager::GetTLSIORegionBaseAddress() const { - return tls_io_region_base; -} - -VAddr VMManager::GetTLSIORegionEndAddress() const { - return tls_io_region_end; -} - -u64 VMManager::GetTLSIORegionSize() const { - return tls_io_region_end - tls_io_region_base; -} - -bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const { - return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(), - GetTLSIORegionEndAddress()); -} - -} // namespace Kernel diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h deleted file mode 100644 index 90b4b006a..000000000 --- a/src/core/hle/kernel/vm_manager.h +++ /dev/null @@ -1,796 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <map> -#include <memory> -#include <tuple> -#include <vector> -#include "common/common_types.h" -#include "common/memory_hook.h" -#include "common/page_table.h" -#include "core/hle/kernel/physical_memory.h" -#include "core/hle/result.h" -#include "core/memory.h" - -namespace Core { -class System; -} - -namespace FileSys { -enum class ProgramAddressSpaceType : u8; -} - -namespace Kernel { - -enum class VMAType : u8 { - /// VMA represents an unmapped region of the address space. - Free, - /// VMA is backed by a ref-counted allocate memory block. - AllocatedMemoryBlock, - /// VMA is backed by a raw, unmanaged pointer. - BackingMemory, - /// VMA is mapped to MMIO registers at a fixed PAddr. - MMIO, - // TODO(yuriks): Implement MemoryAlias to support MAP/UNMAP -}; - -/// Permissions for mapped memory blocks -enum class VMAPermission : u8 { - None = 0, - Read = 1, - Write = 2, - Execute = 4, - - ReadWrite = Read | Write, - ReadExecute = Read | Execute, - WriteExecute = Write | Execute, - ReadWriteExecute = Read | Write | Execute, - - // Used as a wildcard when checking permissions across memory ranges - All = 0xFF, -}; - -constexpr VMAPermission operator|(VMAPermission lhs, VMAPermission rhs) { - return static_cast<VMAPermission>(u32(lhs) | u32(rhs)); -} - -constexpr VMAPermission operator&(VMAPermission lhs, VMAPermission rhs) { - return static_cast<VMAPermission>(u32(lhs) & u32(rhs)); -} - -constexpr VMAPermission operator^(VMAPermission lhs, VMAPermission rhs) { - return static_cast<VMAPermission>(u32(lhs) ^ u32(rhs)); -} - -constexpr VMAPermission operator~(VMAPermission permission) { - return static_cast<VMAPermission>(~u32(permission)); -} - -constexpr VMAPermission& operator|=(VMAPermission& lhs, VMAPermission rhs) { - lhs = lhs | rhs; - return lhs; -} - -constexpr VMAPermission& operator&=(VMAPermission& lhs, VMAPermission rhs) { - lhs = lhs & rhs; - return lhs; -} - -constexpr VMAPermission& operator^=(VMAPermission& lhs, VMAPermission rhs) { - lhs = lhs ^ rhs; - return lhs; -} - -/// Attribute flags that can be applied to a VMA -enum class MemoryAttribute : u32 { - Mask = 0xFF, - - /// No particular qualities - None = 0, - /// Memory locked/borrowed for use. e.g. This would be used by transfer memory. - Locked = 1, - /// Memory locked for use by IPC-related internals. - LockedForIPC = 2, - /// Mapped as part of the device address space. - DeviceMapped = 4, - /// Uncached memory - Uncached = 8, - - IpcAndDeviceMapped = LockedForIPC | DeviceMapped, -}; - -constexpr MemoryAttribute operator|(MemoryAttribute lhs, MemoryAttribute rhs) { - return static_cast<MemoryAttribute>(u32(lhs) | u32(rhs)); -} - -constexpr MemoryAttribute operator&(MemoryAttribute lhs, MemoryAttribute rhs) { - return static_cast<MemoryAttribute>(u32(lhs) & u32(rhs)); -} - -constexpr MemoryAttribute operator^(MemoryAttribute lhs, MemoryAttribute rhs) { - return static_cast<MemoryAttribute>(u32(lhs) ^ u32(rhs)); -} - -constexpr MemoryAttribute operator~(MemoryAttribute attribute) { - return static_cast<MemoryAttribute>(~u32(attribute)); -} - -constexpr MemoryAttribute& operator|=(MemoryAttribute& lhs, MemoryAttribute rhs) { - lhs = lhs | rhs; - return lhs; -} - -constexpr MemoryAttribute& operator&=(MemoryAttribute& lhs, MemoryAttribute rhs) { - lhs = lhs & rhs; - return lhs; -} - -constexpr MemoryAttribute& operator^=(MemoryAttribute& lhs, MemoryAttribute rhs) { - lhs = lhs ^ rhs; - return lhs; -} - -constexpr u32 ToSvcMemoryAttribute(MemoryAttribute attribute) { - return static_cast<u32>(attribute & MemoryAttribute::Mask); -} - -// clang-format off -/// Represents memory states and any relevant flags, as used by the kernel. -/// svcQueryMemory interprets these by masking away all but the first eight -/// bits when storing memory state into a MemoryInfo instance. -enum class MemoryState : u32 { - Mask = 0xFF, - FlagProtect = 1U << 8, - FlagDebug = 1U << 9, - FlagIPC0 = 1U << 10, - FlagIPC3 = 1U << 11, - FlagIPC1 = 1U << 12, - FlagMapped = 1U << 13, - FlagCode = 1U << 14, - FlagAlias = 1U << 15, - FlagModule = 1U << 16, - FlagTransfer = 1U << 17, - FlagQueryPhysicalAddressAllowed = 1U << 18, - FlagSharedDevice = 1U << 19, - FlagSharedDeviceAligned = 1U << 20, - FlagIPCBuffer = 1U << 21, - FlagMemoryPoolAllocated = 1U << 22, - FlagMapProcess = 1U << 23, - FlagUncached = 1U << 24, - FlagCodeMemory = 1U << 25, - - // Wildcard used in range checking to indicate all states. - All = 0xFFFFFFFF, - - // Convenience flag sets to reduce repetition - IPCFlags = FlagIPC0 | FlagIPC3 | FlagIPC1, - - CodeFlags = FlagDebug | IPCFlags | FlagMapped | FlagCode | FlagQueryPhysicalAddressAllowed | - FlagSharedDevice | FlagSharedDeviceAligned | FlagMemoryPoolAllocated, - - DataFlags = FlagProtect | IPCFlags | FlagMapped | FlagAlias | FlagTransfer | - FlagQueryPhysicalAddressAllowed | FlagSharedDevice | FlagSharedDeviceAligned | - FlagMemoryPoolAllocated | FlagIPCBuffer | FlagUncached, - - Unmapped = 0x00, - Io = 0x01 | FlagMapped, - Normal = 0x02 | FlagMapped | FlagQueryPhysicalAddressAllowed, - Code = 0x03 | CodeFlags | FlagMapProcess, - CodeData = 0x04 | DataFlags | FlagMapProcess | FlagCodeMemory, - Heap = 0x05 | DataFlags | FlagCodeMemory, - Shared = 0x06 | FlagMapped | FlagMemoryPoolAllocated, - ModuleCode = 0x08 | CodeFlags | FlagModule | FlagMapProcess, - ModuleCodeData = 0x09 | DataFlags | FlagModule | FlagMapProcess | FlagCodeMemory, - - IpcBuffer0 = 0x0A | FlagMapped | FlagQueryPhysicalAddressAllowed | FlagMemoryPoolAllocated | - IPCFlags | FlagSharedDevice | FlagSharedDeviceAligned, - - Stack = 0x0B | FlagMapped | IPCFlags | FlagQueryPhysicalAddressAllowed | - FlagSharedDevice | FlagSharedDeviceAligned | FlagMemoryPoolAllocated, - - ThreadLocal = 0x0C | FlagMapped | FlagMemoryPoolAllocated, - - TransferMemoryIsolated = 0x0D | IPCFlags | FlagMapped | FlagQueryPhysicalAddressAllowed | - FlagSharedDevice | FlagSharedDeviceAligned | FlagMemoryPoolAllocated | - FlagUncached, - - TransferMemory = 0x0E | FlagIPC3 | FlagIPC1 | FlagMapped | FlagQueryPhysicalAddressAllowed | - FlagSharedDevice | FlagSharedDeviceAligned | FlagMemoryPoolAllocated, - - ProcessMemory = 0x0F | FlagIPC3 | FlagIPC1 | FlagMapped | FlagMemoryPoolAllocated, - - // Used to signify an inaccessible or invalid memory region with memory queries - Inaccessible = 0x10, - - IpcBuffer1 = 0x11 | FlagIPC3 | FlagIPC1 | FlagMapped | FlagQueryPhysicalAddressAllowed | - FlagSharedDevice | FlagSharedDeviceAligned | FlagMemoryPoolAllocated, - - IpcBuffer3 = 0x12 | FlagIPC3 | FlagMapped | FlagQueryPhysicalAddressAllowed | - FlagSharedDeviceAligned | FlagMemoryPoolAllocated, - - KernelStack = 0x13 | FlagMapped, -}; -// clang-format on - -constexpr MemoryState operator|(MemoryState lhs, MemoryState rhs) { - return static_cast<MemoryState>(u32(lhs) | u32(rhs)); -} - -constexpr MemoryState operator&(MemoryState lhs, MemoryState rhs) { - return static_cast<MemoryState>(u32(lhs) & u32(rhs)); -} - -constexpr MemoryState operator^(MemoryState lhs, MemoryState rhs) { - return static_cast<MemoryState>(u32(lhs) ^ u32(rhs)); -} - -constexpr MemoryState operator~(MemoryState lhs) { - return static_cast<MemoryState>(~u32(lhs)); -} - -constexpr MemoryState& operator|=(MemoryState& lhs, MemoryState rhs) { - lhs = lhs | rhs; - return lhs; -} - -constexpr MemoryState& operator&=(MemoryState& lhs, MemoryState rhs) { - lhs = lhs & rhs; - return lhs; -} - -constexpr MemoryState& operator^=(MemoryState& lhs, MemoryState rhs) { - lhs = lhs ^ rhs; - return lhs; -} - -constexpr u32 ToSvcMemoryState(MemoryState state) { - return static_cast<u32>(state & MemoryState::Mask); -} - -struct MemoryInfo { - u64 base_address; - u64 size; - u32 state; - u32 attributes; - u32 permission; - u32 ipc_ref_count; - u32 device_ref_count; -}; -static_assert(sizeof(MemoryInfo) == 0x28, "MemoryInfo has incorrect size."); - -struct PageInfo { - u32 flags; -}; - -/** - * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space - * with homogeneous attributes across its extents. In this particular implementation each VMA is - * also backed by a single host memory allocation. - */ -struct VirtualMemoryArea { - /// Gets the starting (base) address of this VMA. - VAddr StartAddress() const { - return base; - } - - /// Gets the ending address of this VMA. - VAddr EndAddress() const { - return base + size - 1; - } - - /// Virtual base address of the region. - VAddr base = 0; - /// Size of the region. - u64 size = 0; - - VMAType type = VMAType::Free; - VMAPermission permissions = VMAPermission::None; - MemoryState state = MemoryState::Unmapped; - MemoryAttribute attribute = MemoryAttribute::None; - - // Settings for type = AllocatedMemoryBlock - /// Memory block backing this VMA. - std::shared_ptr<PhysicalMemory> backing_block = nullptr; - /// Offset into the backing_memory the mapping starts from. - std::size_t offset = 0; - - // Settings for type = BackingMemory - /// Pointer backing this VMA. It will not be destroyed or freed when the VMA is removed. - u8* backing_memory = nullptr; - - // Settings for type = MMIO - /// Physical address of the register area this VMA maps to. - PAddr paddr = 0; - Common::MemoryHookPointer mmio_handler = nullptr; - - /// Tests if this area can be merged to the right with `next`. - bool CanBeMergedWith(const VirtualMemoryArea& next) const; -}; - -/** - * Manages a process' virtual addressing space. This class maintains a list of allocated and free - * regions in the address space, along with their attributes, and allows kernel clients to - * manipulate it, adjusting the page table to match. - * - * This is similar in idea and purpose to the VM manager present in operating system kernels, with - * the main difference being that it doesn't have to support swapping or memory mapping of files. - * The implementation is also simplified by not having to allocate page frames. See these articles - * about the Linux kernel for an explantion of the concept and implementation: - * - http://duartes.org/gustavo/blog/post/how-the-kernel-manages-your-memory/ - * - http://duartes.org/gustavo/blog/post/page-cache-the-affair-between-memory-and-files/ - */ -class VMManager final { - using VMAMap = std::map<VAddr, VirtualMemoryArea>; - -public: - using VMAHandle = VMAMap::const_iterator; - - explicit VMManager(Core::System& system); - ~VMManager(); - - /// Clears the address space map, re-initializing with a single free area. - void Reset(FileSys::ProgramAddressSpaceType type); - - /// Finds the VMA in which the given address is included in, or `vma_map.end()`. - VMAHandle FindVMA(VAddr target) const; - - /// Indicates whether or not the given handle is within the VMA map. - bool IsValidHandle(VMAHandle handle) const; - - // TODO(yuriks): Should these functions actually return the handle? - - /** - * Maps part of a ref-counted block of memory at a given address. - * - * @param target The guest address to start the mapping at. - * @param block The block to be mapped. - * @param offset Offset into `block` to map from. - * @param size Size of the mapping. - * @param state MemoryState tag to attach to the VMA. - */ - ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<PhysicalMemory> block, - std::size_t offset, u64 size, MemoryState state, - VMAPermission perm = VMAPermission::ReadWrite); - - /** - * Maps an unmanaged host memory pointer at a given address. - * - * @param target The guest address to start the mapping at. - * @param memory The memory to be mapped. - * @param size Size of the mapping. - * @param state MemoryState tag to attach to the VMA. - */ - ResultVal<VMAHandle> MapBackingMemory(VAddr target, u8* memory, u64 size, MemoryState state); - - /** - * Finds the first free memory region of the given size within - * the user-addressable ASLR memory region. - * - * @param size The size of the desired region in bytes. - * - * @returns If successful, the base address of the free region with - * the given size. - */ - ResultVal<VAddr> FindFreeRegion(u64 size) const; - - /** - * Finds the first free address range that can hold a region of the desired size - * - * @param begin The starting address of the range. - * This is treated as an inclusive beginning address. - * - * @param end The ending address of the range. - * This is treated as an exclusive ending address. - * - * @param size The size of the free region to attempt to locate, - * in bytes. - * - * @returns If successful, the base address of the free region with - * the given size. - * - * @returns If unsuccessful, a result containing an error code. - * - * @pre The starting address must be less than the ending address. - * @pre The size must not exceed the address range itself. - */ - ResultVal<VAddr> FindFreeRegion(VAddr begin, VAddr end, u64 size) const; - - /** - * Maps a memory-mapped IO region at a given address. - * - * @param target The guest address to start the mapping at. - * @param paddr The physical address where the registers are present. - * @param size Size of the mapping. - * @param state MemoryState tag to attach to the VMA. - * @param mmio_handler The handler that will implement read and write for this MMIO region. - */ - ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state, - Common::MemoryHookPointer mmio_handler); - - /// Unmaps a range of addresses, splitting VMAs as necessary. - ResultCode UnmapRange(VAddr target, u64 size); - - /// Changes the permissions of the given VMA. - VMAHandle Reprotect(VMAHandle vma, VMAPermission new_perms); - - /// Changes the permissions of a range of addresses, splitting VMAs as necessary. - ResultCode ReprotectRange(VAddr target, u64 size, VMAPermission new_perms); - - ResultCode MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, MemoryState state); - - /// Attempts to allocate a heap with the given size. - /// - /// @param size The size of the heap to allocate in bytes. - /// - /// @note If a heap is currently allocated, and this is called - /// with a size that is equal to the size of the current heap, - /// then this function will do nothing and return the current - /// heap's starting address, as there's no need to perform - /// any additional heap allocation work. - /// - /// @note If a heap is currently allocated, and this is called - /// with a size less than the current heap's size, then - /// this function will attempt to shrink the heap. - /// - /// @note If a heap is currently allocated, and this is called - /// with a size larger than the current heap's size, then - /// this function will attempt to extend the size of the heap. - /// - /// @returns A result indicating either success or failure. - /// <p> - /// If successful, this function will return a result - /// containing the starting address to the allocated heap. - /// <p> - /// If unsuccessful, this function will return a result - /// containing an error code. - /// - /// @pre The given size must lie within the allowable heap - /// memory region managed by this VMManager instance. - /// Failure to abide by this will result in ERR_OUT_OF_MEMORY - /// being returned as the result. - /// - ResultVal<VAddr> SetHeapSize(u64 size); - - /// Maps memory at a given address. - /// - /// @param target The virtual address to map memory at. - /// @param size The amount of memory to map. - /// - /// @note The destination address must lie within the Map region. - /// - /// @note This function requires that SystemResourceSize be non-zero, - /// however, this is just because if it were not then the - /// resulting page tables could be exploited on hardware by - /// a malicious program. SystemResource usage does not need - /// to be explicitly checked or updated here. - ResultCode MapPhysicalMemory(VAddr target, u64 size); - - /// Unmaps memory at a given address. - /// - /// @param target The virtual address to unmap memory at. - /// @param size The amount of memory to unmap. - /// - /// @note The destination address must lie within the Map region. - /// - /// @note This function requires that SystemResourceSize be non-zero, - /// however, this is just because if it were not then the - /// resulting page tables could be exploited on hardware by - /// a malicious program. SystemResource usage does not need - /// to be explicitly checked or updated here. - ResultCode UnmapPhysicalMemory(VAddr target, u64 size); - - /// Maps a region of memory as code memory. - /// - /// @param dst_address The base address of the region to create the aliasing memory region. - /// @param src_address The base address of the region to be aliased. - /// @param size The total amount of memory to map in bytes. - /// - /// @pre Both memory regions lie within the actual addressable address space. - /// - /// @post After this function finishes execution, assuming success, then the address range - /// [dst_address, dst_address+size) will alias the memory region, - /// [src_address, src_address+size). - /// <p> - /// What this also entails is as follows: - /// 1. The aliased region gains the Locked memory attribute. - /// 2. The aliased region becomes read-only. - /// 3. The aliasing region becomes read-only. - /// 4. The aliasing region is created with a memory state of MemoryState::CodeModule. - /// - ResultCode MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size); - - /// Unmaps a region of memory designated as code module memory. - /// - /// @param dst_address The base address of the memory region aliasing the source memory region. - /// @param src_address The base address of the memory region being aliased. - /// @param size The size of the memory region to unmap in bytes. - /// - /// @pre Both memory ranges lie within the actual addressable address space. - /// - /// @pre The memory region being unmapped has been previously been mapped - /// by a call to MapCodeMemory. - /// - /// @post After execution of the function, if successful. the aliasing memory region - /// will be unmapped and the aliased region will have various traits about it - /// restored to what they were prior to the original mapping call preceding - /// this function call. - /// <p> - /// What this also entails is as follows: - /// 1. The state of the memory region will now indicate a general heap region. - /// 2. All memory attributes for the memory region are cleared. - /// 3. Memory permissions for the region are restored to user read/write. - /// - ResultCode UnmapCodeMemory(VAddr dst_address, VAddr src_address, u64 size); - - /// Queries the memory manager for information about the given address. - /// - /// @param address The address to query the memory manager about for information. - /// - /// @return A MemoryInfo instance containing information about the given address. - /// - MemoryInfo QueryMemory(VAddr address) const; - - /// Sets an attribute across the given address range. - /// - /// @param address The starting address - /// @param size The size of the range to set the attribute on. - /// @param mask The attribute mask - /// @param attribute The attribute to set across the given address range - /// - /// @returns RESULT_SUCCESS if successful - /// @returns ERR_INVALID_ADDRESS_STATE if the attribute could not be set. - /// - ResultCode SetMemoryAttribute(VAddr address, u64 size, MemoryAttribute mask, - MemoryAttribute attribute); - - /** - * Scans all VMAs and updates the page table range of any that use the given vector as backing - * memory. This should be called after any operation that causes reallocation of the vector. - */ - void RefreshMemoryBlockMappings(const PhysicalMemory* block); - - /// Dumps the address space layout to the log, for debugging - void LogLayout() const; - - /// Gets the total memory usage, used by svcGetInfo - u64 GetTotalPhysicalMemoryAvailable() const; - - /// Gets the address space base address - VAddr GetAddressSpaceBaseAddress() const; - - /// Gets the address space end address - VAddr GetAddressSpaceEndAddress() const; - - /// Gets the total address space address size in bytes - u64 GetAddressSpaceSize() const; - - /// Gets the address space width in bits. - u64 GetAddressSpaceWidth() const; - - /// Determines whether or not the given address range lies within the address space. - bool IsWithinAddressSpace(VAddr address, u64 size) const; - - /// Gets the base address of the ASLR region. - VAddr GetASLRRegionBaseAddress() const; - - /// Gets the end address of the ASLR region. - VAddr GetASLRRegionEndAddress() const; - - /// Gets the size of the ASLR region - u64 GetASLRRegionSize() const; - - /// Determines whether or not the specified address range is within the ASLR region. - bool IsWithinASLRRegion(VAddr address, u64 size) const; - - /// Gets the base address of the code region. - VAddr GetCodeRegionBaseAddress() const; - - /// Gets the end address of the code region. - VAddr GetCodeRegionEndAddress() const; - - /// Gets the total size of the code region in bytes. - u64 GetCodeRegionSize() const; - - /// Determines whether or not the specified range is within the code region. - bool IsWithinCodeRegion(VAddr address, u64 size) const; - - /// Gets the base address of the heap region. - VAddr GetHeapRegionBaseAddress() const; - - /// Gets the end address of the heap region; - VAddr GetHeapRegionEndAddress() const; - - /// Gets the total size of the heap region in bytes. - u64 GetHeapRegionSize() const; - - /// Gets the total size of the current heap in bytes. - /// - /// @note This is the current allocated heap size, not the size - /// of the region it's allowed to exist within. - /// - u64 GetCurrentHeapSize() const; - - /// Determines whether or not the specified range is within the heap region. - bool IsWithinHeapRegion(VAddr address, u64 size) const; - - /// Gets the base address of the map region. - VAddr GetMapRegionBaseAddress() const; - - /// Gets the end address of the map region. - VAddr GetMapRegionEndAddress() const; - - /// Gets the total size of the map region in bytes. - u64 GetMapRegionSize() const; - - /// Determines whether or not the specified range is within the map region. - bool IsWithinMapRegion(VAddr address, u64 size) const; - - /// Gets the base address of the stack region. - VAddr GetStackRegionBaseAddress() const; - - /// Gets the end address of the stack region. - VAddr GetStackRegionEndAddress() const; - - /// Gets the total size of the stack region in bytes. - u64 GetStackRegionSize() const; - - /// Determines whether or not the given address range is within the stack region - bool IsWithinStackRegion(VAddr address, u64 size) const; - - /// Gets the base address of the TLS IO region. - VAddr GetTLSIORegionBaseAddress() const; - - /// Gets the end address of the TLS IO region. - VAddr GetTLSIORegionEndAddress() const; - - /// Gets the total size of the TLS IO region in bytes. - u64 GetTLSIORegionSize() const; - - /// Determines if the given address range is within the TLS IO region. - bool IsWithinTLSIORegion(VAddr address, u64 size) const; - - /// Each VMManager has its own page table, which is set as the main one when the owning process - /// is scheduled. - Common::PageTable page_table{Memory::PAGE_BITS}; - - using CheckResults = ResultVal<std::tuple<MemoryState, VMAPermission, MemoryAttribute>>; - - /// Checks if an address range adheres to the specified states provided. - /// - /// @param address The starting address of the address range. - /// @param size The size of the address range. - /// @param state_mask The memory state mask. - /// @param state The state to compare the individual VMA states against, - /// which is done in the form of: (vma.state & state_mask) != state. - /// @param permission_mask The memory permissions mask. - /// @param permissions The permission to compare the individual VMA permissions against, - /// which is done in the form of: - /// (vma.permission & permission_mask) != permission. - /// @param attribute_mask The memory attribute mask. - /// @param attribute The memory attributes to compare the individual VMA attributes - /// against, which is done in the form of: - /// (vma.attributes & attribute_mask) != attribute. - /// @param ignore_mask The memory attributes to ignore during the check. - /// - /// @returns If successful, returns a tuple containing the memory attributes - /// (with ignored bits specified by ignore_mask unset), memory permissions, and - /// memory state across the memory range. - /// @returns If not successful, returns ERR_INVALID_ADDRESS_STATE. - /// - CheckResults CheckRangeState(VAddr address, u64 size, MemoryState state_mask, MemoryState state, - VMAPermission permission_mask, VMAPermission permissions, - MemoryAttribute attribute_mask, MemoryAttribute attribute, - MemoryAttribute ignore_mask) const; - -private: - using VMAIter = VMAMap::iterator; - - /// Converts a VMAHandle to a mutable VMAIter. - VMAIter StripIterConstness(const VMAHandle& iter); - - /// Unmaps the given VMA. - VMAIter Unmap(VMAIter vma); - - /** - * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing - * the appropriate error checking. - */ - ResultVal<VMAIter> CarveVMA(VAddr base, u64 size); - - /** - * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each - * end of the range. - */ - ResultVal<VMAIter> CarveVMARange(VAddr base, u64 size); - - /** - * Splits a VMA in two, at the specified offset. - * @returns the right side of the split, with the original iterator becoming the left side. - */ - VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma); - - /** - * Checks for and merges the specified VMA with adjacent ones if possible. - * @returns the merged VMA or the original if no merging was possible. - */ - VMAIter MergeAdjacent(VMAIter vma); - - /** - * Merges two adjacent VMAs. - */ - void MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right); - - /// Updates the pages corresponding to this VMA so they match the VMA's attributes. - void UpdatePageTableForVMA(const VirtualMemoryArea& vma); - - /// Initializes memory region ranges to adhere to a given address space type. - void InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType type); - - /// Clears the underlying map and page table. - void Clear(); - - /// Clears out the VMA map, unmapping any previously mapped ranges. - void ClearVMAMap(); - - /// Clears out the page table - void ClearPageTable(); - - /// Gets the amount of memory currently mapped (state != Unmapped) in a range. - ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const; - - /// Gets the amount of memory unmappable by UnmapPhysicalMemory in a range. - ResultVal<std::size_t> SizeOfUnmappablePhysicalMemoryInRange(VAddr address, - std::size_t size) const; - - /** - * A map covering the entirety of the managed address space, keyed by the `base` field of each - * VMA. It must always be modified by splitting or merging VMAs, so that the invariant - * `elem.base + elem.size == next.base` is preserved, and mergeable regions must always be - * merged when possible so that no two similar and adjacent regions exist that have not been - * merged. - */ - VMAMap vma_map; - - u32 address_space_width = 0; - VAddr address_space_base = 0; - VAddr address_space_end = 0; - - VAddr aslr_region_base = 0; - VAddr aslr_region_end = 0; - - VAddr code_region_base = 0; - VAddr code_region_end = 0; - - VAddr heap_region_base = 0; - VAddr heap_region_end = 0; - - VAddr map_region_base = 0; - VAddr map_region_end = 0; - - VAddr stack_region_base = 0; - VAddr stack_region_end = 0; - - VAddr tls_io_region_base = 0; - VAddr tls_io_region_end = 0; - - // Memory used to back the allocations in the regular heap. A single vector is used to cover - // the entire virtual address space extents that bound the allocations, including any holes. - // This makes deallocation and reallocation of holes fast and keeps process memory contiguous - // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. - std::shared_ptr<PhysicalMemory> heap_memory; - - // The end of the currently allocated heap. This is not an inclusive - // end of the range. This is essentially 'base_address + current_size'. - VAddr heap_end = 0; - - // The current amount of memory mapped via MapPhysicalMemory. - // This is used here (and in Nintendo's kernel) only for debugging, and does not impact - // any behavior. - u64 physical_memory_mapped = 0; - - Core::System& system; -}; -} // namespace Kernel diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 4fb2cbc4b..106e89743 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -210,7 +210,7 @@ private: /// This is the event handle used to check if the audio buffer was released Kernel::EventPair buffer_event; - Memory::Memory& main_memory; + Core::Memory::Memory& main_memory; }; AudOutU::AudOutU(Core::System& system_) : ServiceFramework("audout:u"), system{system_} { diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 82a5dbf14..175cabf45 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -129,7 +129,7 @@ private: LOG_DEBUG(Service_Audio, "called. rendering_time_limit_percent={}", rendering_time_limit_percent); - ASSERT(rendering_time_limit_percent >= 0 && rendering_time_limit_percent <= 100); + ASSERT(rendering_time_limit_percent <= 100); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp index 102017d73..cadc03805 100644 --- a/src/core/hle/service/filesystem/filesystem.cpp +++ b/src/core/hle/service/filesystem/filesystem.cpp @@ -451,7 +451,8 @@ FileSys::SaveDataSize FileSystemController::ReadSaveDataSize(FileSys::SaveDataTy if (res != Loader::ResultStatus::Success) { FileSys::PatchManager pm{system.CurrentProcess()->GetTitleID()}; - auto [nacp_unique, discard] = pm.GetControlMetadata(); + const auto metadata = pm.GetControlMetadata(); + const auto& nacp_unique = metadata.first; if (nacp_unique != nullptr) { new_size = {nacp_unique->GetDefaultNormalSaveSize(), diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index e6811d5b5..61045c75c 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp @@ -575,6 +575,7 @@ private: 0, user_id->GetSize(), {}, + {}, }); continue; @@ -595,6 +596,7 @@ private: stoull_be(title_id->GetName()), title_id->GetSize(), {}, + {}, }); } } @@ -619,6 +621,7 @@ private: stoull_be(title_id->GetName()), title_id->GetSize(), {}, + {}, }); } } diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp index 6aadb3ea8..7938b4b80 100644 --- a/src/core/hle/service/friend/friend.cpp +++ b/src/core/hle/service/friend/friend.cpp @@ -27,7 +27,7 @@ public: {10110, nullptr, "GetFriendProfileImage"}, {10200, nullptr, "SendFriendRequestForApplication"}, {10211, nullptr, "AddFacedFriendRequestForApplication"}, - {10400, nullptr, "GetBlockedUserListIds"}, + {10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"}, {10500, nullptr, "GetProfileList"}, {10600, nullptr, "DeclareOpenOnlinePlaySession"}, {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"}, @@ -121,6 +121,15 @@ private: }; static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size"); + void GetBlockedUserListIds(Kernel::HLERequestContext& ctx) { + // This is safe to stub, as there should be no adverse consequences from reporting no + // blocked users. + LOG_WARNING(Service_ACC, "(STUBBED) called"); + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(0); // Indicates there are no blocked users + } + void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { // Stub used by Splatoon 2 LOG_WARNING(Service_ACC, "(STUBBED) called"); diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index d6ed5f304..d6031a987 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -14,6 +14,7 @@ #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" +#include "core/hle/kernel/kernel.h" #include "core/hle/kernel/readable_event.h" #include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/writable_event.h" @@ -53,9 +54,7 @@ IAppletResource::IAppletResource(Core::System& system) RegisterHandlers(functions); auto& kernel = system.Kernel(); - shared_mem = Kernel::SharedMemory::Create( - kernel, nullptr, SHARED_MEMORY_SIZE, Kernel::MemoryPermission::ReadWrite, - Kernel::MemoryPermission::Read, 0, Kernel::MemoryRegion::BASE, "HID:SharedMemory"); + shared_mem = SharedFrom(&kernel.GetHidSharedMem()); MakeController<Controller_DebugPad>(HidController::DebugPad); MakeController<Controller_Touchscreen>(HidController::Touchscreen); diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp index 5e79e2c1a..36ed6f7da 100644 --- a/src/core/hle/service/hid/irs.cpp +++ b/src/core/hle/service/hid/irs.cpp @@ -6,6 +6,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/hle/ipc_helpers.h" +#include "core/hle/kernel/kernel.h" #include "core/hle/kernel/shared_memory.h" #include "core/hle/service/hid/irs.h" @@ -38,9 +39,8 @@ IRS::IRS(Core::System& system) : ServiceFramework{"irs"}, system(system) { RegisterHandlers(functions); auto& kernel = system.Kernel(); - shared_mem = Kernel::SharedMemory::Create( - kernel, nullptr, 0x8000, Kernel::MemoryPermission::ReadWrite, - Kernel::MemoryPermission::Read, 0, Kernel::MemoryRegion::BASE, "IRS:SharedMemory"); + + shared_mem = SharedFrom(&kernel.GetIrsSharedMem()); } void IRS::ActivateIrsensor(Kernel::HLERequestContext& ctx) { diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index 647943020..0cde7a557 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp @@ -8,14 +8,21 @@ #include "common/alignment.h" #include "common/hex_util.h" +#include "common/scope_exit.h" #include "core/hle/ipc_helpers.h" +#include "core/hle/kernel/errors.h" +#include "core/hle/kernel/memory/page_table.h" +#include "core/hle/kernel/memory/system_control.h" #include "core/hle/kernel/process.h" #include "core/hle/service/ldr/ldr.h" #include "core/hle/service/service.h" #include "core/loader/nro.h" +#include "core/memory.h" namespace Service::LDR { +constexpr ResultCode ERROR_INSUFFICIENT_ADDRESS_SPACE{ErrorModule::RO, 2}; + constexpr ResultCode ERROR_INVALID_MEMORY_STATE{ErrorModule::Loader, 51}; constexpr ResultCode ERROR_INVALID_NRO{ErrorModule::Loader, 52}; constexpr ResultCode ERROR_INVALID_NRR{ErrorModule::Loader, 53}; @@ -29,7 +36,61 @@ constexpr ResultCode ERROR_INVALID_NRO_ADDRESS{ErrorModule::Loader, 84}; constexpr ResultCode ERROR_INVALID_NRR_ADDRESS{ErrorModule::Loader, 85}; constexpr ResultCode ERROR_NOT_INITIALIZED{ErrorModule::Loader, 87}; -constexpr u64 MAXIMUM_LOADED_RO = 0x40; +constexpr std::size_t MAXIMUM_LOADED_RO{0x40}; +constexpr std::size_t MAXIMUM_MAP_RETRIES{0x200}; + +struct NRRHeader { + u32_le magic; + INSERT_PADDING_BYTES(12); + u64_le title_id_mask; + u64_le title_id_pattern; + INSERT_PADDING_BYTES(16); + std::array<u8, 0x100> modulus; + std::array<u8, 0x100> signature_1; + std::array<u8, 0x100> signature_2; + u64_le title_id; + u32_le size; + INSERT_PADDING_BYTES(4); + u32_le hash_offset; + u32_le hash_count; + INSERT_PADDING_BYTES(8); +}; +static_assert(sizeof(NRRHeader) == 0x350, "NRRHeader has incorrect size."); + +struct NROHeader { + INSERT_PADDING_WORDS(1); + u32_le mod_offset; + INSERT_PADDING_WORDS(2); + u32_le magic; + u32_le version; + u32_le nro_size; + u32_le flags; + u32_le text_offset; + u32_le text_size; + u32_le ro_offset; + u32_le ro_size; + u32_le rw_offset; + u32_le rw_size; + u32_le bss_size; + INSERT_PADDING_WORDS(1); + std::array<u8, 0x20> build_id; + INSERT_PADDING_BYTES(0x20); +}; +static_assert(sizeof(NROHeader) == 0x80, "NROHeader has invalid size."); + +using SHA256Hash = std::array<u8, 0x20>; + +struct NROInfo { + SHA256Hash hash{}; + VAddr nro_address{}; + std::size_t nro_size{}; + VAddr bss_address{}; + std::size_t bss_size{}; + std::size_t text_size{}; + std::size_t ro_size{}; + std::size_t data_size{}; + VAddr src_addr{}; +}; class DebugMonitor final : public ServiceFramework<DebugMonitor> { public: @@ -84,7 +145,7 @@ public: {0, &RelocatableObject::LoadNro, "LoadNro"}, {1, &RelocatableObject::UnloadNro, "UnloadNro"}, {2, &RelocatableObject::LoadNrr, "LoadNrr"}, - {3, &RelocatableObject::UnloadNrr, "UnloadNrr"}, + {3, nullptr, "UnloadNrr"}, {4, &RelocatableObject::Initialize, "Initialize"}, {10, nullptr, "LoadNrrEx"}, }; @@ -190,46 +251,125 @@ public: rb.Push(RESULT_SUCCESS); } - void UnloadNrr(Kernel::HLERequestContext& ctx) { - if (!initialized) { - LOG_ERROR(Service_LDR, "LDR:RO not initialized before use!"); - IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(ERROR_NOT_INITIALIZED); - return; + bool ValidateRegionForMap(Kernel::Memory::PageTable& page_table, VAddr start, + std::size_t size) const { + constexpr std::size_t padding_size{4 * Kernel::Memory::PageSize}; + const auto start_info{page_table.QueryInfo(start - 1)}; + + if (start_info.state != Kernel::Memory::MemoryState::Free) { + return {}; } - struct Parameters { - u64_le process_id; - u64_le nrr_address; - }; + if (start_info.GetAddress() > (start - padding_size)) { + return {}; + } - IPC::RequestParser rp{ctx}; - const auto [process_id, nrr_address] = rp.PopRaw<Parameters>(); + const auto end_info{page_table.QueryInfo(start + size)}; - LOG_DEBUG(Service_LDR, "called with process_id={:016X}, nrr_addr={:016X}", process_id, - nrr_address); + if (end_info.state != Kernel::Memory::MemoryState::Free) { + return {}; + } - if (!Common::Is4KBAligned(nrr_address)) { - LOG_ERROR(Service_LDR, "NRR Address has invalid alignment (actual {:016X})!", - nrr_address); - IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(ERROR_INVALID_ALIGNMENT); - return; + return (start + size + padding_size) <= (end_info.GetAddress() + end_info.GetSize()); + } + + VAddr GetRandomMapRegion(const Kernel::Memory::PageTable& page_table, std::size_t size) const { + VAddr addr{}; + const std::size_t end_pages{(page_table.GetAliasCodeRegionSize() - size) >> + Kernel::Memory::PageBits}; + do { + addr = page_table.GetAliasCodeRegionStart() + + (Kernel::Memory::SystemControl::GenerateRandomRange(0, end_pages) + << Kernel::Memory::PageBits); + } while (!page_table.IsInsideAddressSpace(addr, size) || + page_table.IsInsideHeapRegion(addr, size) || + page_table.IsInsideAliasRegion(addr, size)); + return addr; + } + + ResultVal<VAddr> MapProcessCodeMemory(Kernel::Process* process, VAddr baseAddress, + u64 size) const { + for (int retry{}; retry < MAXIMUM_MAP_RETRIES; retry++) { + auto& page_table{process->PageTable()}; + const VAddr addr{GetRandomMapRegion(page_table, size)}; + const ResultCode result{page_table.MapProcessCodeMemory(addr, baseAddress, size)}; + + if (result == Kernel::ERR_INVALID_ADDRESS_STATE) { + continue; + } + + CASCADE_CODE(result); + + if (ValidateRegionForMap(page_table, addr, size)) { + return MakeResult<VAddr>(addr); + } } - const auto iter = nrr.find(nrr_address); - if (iter == nrr.end()) { - LOG_ERROR(Service_LDR, - "Attempting to unload NRR which has not been loaded! (addr={:016X})", - nrr_address); - IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(ERROR_INVALID_NRR_ADDRESS); - return; + return ERROR_INSUFFICIENT_ADDRESS_SPACE; + } + + ResultVal<VAddr> MapNro(Kernel::Process* process, VAddr nro_addr, std::size_t nro_size, + VAddr bss_addr, std::size_t bss_size, std::size_t size) const { + + for (int retry{}; retry < MAXIMUM_MAP_RETRIES; retry++) { + auto& page_table{process->PageTable()}; + VAddr addr{}; + + CASCADE_RESULT(addr, MapProcessCodeMemory(process, nro_addr, nro_size)); + + if (bss_size) { + auto block_guard = detail::ScopeExit([&] { + page_table.UnmapProcessCodeMemory(addr + nro_size, bss_addr, bss_size); + page_table.UnmapProcessCodeMemory(addr, nro_addr, nro_size); + }); + + const ResultCode result{ + page_table.MapProcessCodeMemory(addr + nro_size, bss_addr, bss_size)}; + + if (result == Kernel::ERR_INVALID_ADDRESS_STATE) { + continue; + } + + if (result.IsError()) { + return result; + } + + block_guard.Cancel(); + } + + if (ValidateRegionForMap(page_table, addr, size)) { + return MakeResult<VAddr>(addr); + } } - nrr.erase(iter); - IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(RESULT_SUCCESS); + return ERROR_INSUFFICIENT_ADDRESS_SPACE; + } + + ResultCode LoadNro(Kernel::Process* process, const NROHeader& nro_header, VAddr nro_addr, + VAddr start) const { + const VAddr text_start{start + nro_header.text_offset}; + const VAddr ro_start{start + nro_header.ro_offset}; + const VAddr data_start{start + nro_header.rw_offset}; + const VAddr bss_start{data_start + nro_header.rw_size}; + const VAddr bss_end_addr{ + Common::AlignUp(bss_start + nro_header.bss_size, Kernel::Memory::PageSize)}; + + auto CopyCode{[&](VAddr src_addr, VAddr dst_addr, u64 size) { + std::vector<u8> source_data(size); + system.Memory().ReadBlock(src_addr, source_data.data(), source_data.size()); + system.Memory().WriteBlock(dst_addr, source_data.data(), source_data.size()); + }}; + CopyCode(nro_addr + nro_header.text_offset, text_start, nro_header.text_size); + CopyCode(nro_addr + nro_header.ro_offset, ro_start, nro_header.ro_size); + CopyCode(nro_addr + nro_header.rw_offset, data_start, nro_header.rw_size); + + CASCADE_CODE(process->PageTable().SetCodeMemoryPermission( + text_start, ro_start - text_start, Kernel::Memory::MemoryPermission::ReadAndExecute)); + CASCADE_CODE(process->PageTable().SetCodeMemoryPermission( + ro_start, data_start - ro_start, Kernel::Memory::MemoryPermission::Read)); + + return process->PageTable().SetCodeMemoryPermission( + data_start, bss_end_addr - data_start, Kernel::Memory::MemoryPermission::ReadAndWrite); } void LoadNro(Kernel::HLERequestContext& ctx) { @@ -317,9 +457,9 @@ public: return; } - NROHeader header; + // Load and validate the NRO header + NROHeader header{}; std::memcpy(&header, nro_data.data(), sizeof(NROHeader)); - if (!IsValidNRO(header, nro_size, bss_size)) { LOG_ERROR(Service_LDR, "NRO was invalid!"); IPC::ResponseBuilder rb{ctx, 2}; @@ -327,62 +467,48 @@ public: return; } - // Load NRO as new executable module - auto* process = system.CurrentProcess(); - auto& vm_manager = process->VMManager(); - auto map_address = vm_manager.FindFreeRegion(nro_size + bss_size); - - if (!map_address.Succeeded() || - *map_address + nro_size + bss_size > vm_manager.GetAddressSpaceEndAddress()) { - - LOG_ERROR(Service_LDR, - "General error while allocation memory or no available memory to allocate!"); + // Map memory for the NRO + const auto map_result{MapNro(system.CurrentProcess(), nro_address, nro_size, bss_address, + bss_size, nro_size + bss_size)}; + if (map_result.Failed()) { IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(ERROR_INVALID_MEMORY_STATE); - return; + rb.Push(map_result.Code()); } - // Mark text and read-only region as ModuleCode - ASSERT(vm_manager - .MirrorMemory(*map_address, nro_address, header.text_size + header.ro_size, - Kernel::MemoryState::ModuleCode) - .IsSuccess()); - // Mark read/write region as ModuleCodeData, which is necessary if this region is used for - // TransferMemory (e.g. Final Fantasy VIII Remastered does this) - ASSERT(vm_manager - .MirrorMemory(*map_address + header.rw_offset, nro_address + header.rw_offset, - header.rw_size, Kernel::MemoryState::ModuleCodeData) - .IsSuccess()); - // Revoke permissions from the old memory region - ASSERT(vm_manager.ReprotectRange(nro_address, nro_size, Kernel::VMAPermission::None) - .IsSuccess()); - - if (bss_size > 0) { - // Mark BSS region as ModuleCodeData, which is necessary if this region is used for - // TransferMemory (e.g. Final Fantasy VIII Remastered does this) - ASSERT(vm_manager - .MirrorMemory(*map_address + nro_size, bss_address, bss_size, - Kernel::MemoryState::ModuleCodeData) - .IsSuccess()); - ASSERT(vm_manager.ReprotectRange(bss_address, bss_size, Kernel::VMAPermission::None) - .IsSuccess()); + // Load the NRO into the mapped memory + if (const auto result{LoadNro(system.CurrentProcess(), header, nro_address, *map_result)}; + result.IsError()) { + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(map_result.Code()); } - vm_manager.ReprotectRange(*map_address, header.text_size, - Kernel::VMAPermission::ReadExecute); - vm_manager.ReprotectRange(*map_address + header.ro_offset, header.ro_size, - Kernel::VMAPermission::Read); - vm_manager.ReprotectRange(*map_address + header.rw_offset, header.rw_size, - Kernel::VMAPermission::ReadWrite); + // Track the loaded NRO + nro.insert_or_assign(*map_result, NROInfo{hash, *map_result, nro_size, bss_address, + bss_size, header.text_size, header.ro_size, + header.rw_size, nro_address}); + // Invalidate JIT caches for the newly mapped process code system.InvalidateCpuInstructionCaches(); - nro.insert_or_assign(*map_address, - NROInfo{hash, nro_address, nro_size, bss_address, bss_size}); - IPC::ResponseBuilder rb{ctx, 4}; rb.Push(RESULT_SUCCESS); - rb.Push(*map_address); + rb.Push(*map_result); + } + + ResultCode UnmapNro(const NROInfo& info) { + // Each region must be unmapped separately to validate memory state + auto& page_table{system.CurrentProcess()->PageTable()}; + CASCADE_CODE(page_table.UnmapProcessCodeMemory(info.nro_address + info.text_size + + info.ro_size + info.data_size, + info.bss_address, info.bss_size)); + CASCADE_CODE(page_table.UnmapProcessCodeMemory( + info.nro_address + info.text_size + info.ro_size, + info.src_addr + info.text_size + info.ro_size, info.data_size)); + CASCADE_CODE(page_table.UnmapProcessCodeMemory( + info.nro_address + info.text_size, info.src_addr + info.text_size, info.ro_size)); + CASCADE_CODE( + page_table.UnmapProcessCodeMemory(info.nro_address, info.src_addr, info.text_size)); + return RESULT_SUCCESS; } void UnloadNro(Kernel::HLERequestContext& ctx) { @@ -422,30 +548,15 @@ public: return; } - auto& vm_manager = system.CurrentProcess()->VMManager(); - const auto& nro_info = iter->second; - - // Unmap the mirrored memory - ASSERT( - vm_manager.UnmapRange(nro_address, nro_info.nro_size + nro_info.bss_size).IsSuccess()); - - // Reprotect the source memory - ASSERT(vm_manager - .ReprotectRange(nro_info.nro_address, nro_info.nro_size, - Kernel::VMAPermission::ReadWrite) - .IsSuccess()); - if (nro_info.bss_size > 0) { - ASSERT(vm_manager - .ReprotectRange(nro_info.bss_address, nro_info.bss_size, - Kernel::VMAPermission::ReadWrite) - .IsSuccess()); - } + const auto result{UnmapNro(iter->second)}; system.InvalidateCpuInstructionCaches(); nro.erase(iter); + IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(RESULT_SUCCESS); + + rb.Push(result); } void Initialize(Kernel::HLERequestContext& ctx) { @@ -458,56 +569,7 @@ public: } private: - using SHA256Hash = std::array<u8, 0x20>; - - struct NROHeader { - INSERT_PADDING_WORDS(1); - u32_le mod_offset; - INSERT_PADDING_WORDS(2); - u32_le magic; - u32_le version; - u32_le nro_size; - u32_le flags; - u32_le text_offset; - u32_le text_size; - u32_le ro_offset; - u32_le ro_size; - u32_le rw_offset; - u32_le rw_size; - u32_le bss_size; - INSERT_PADDING_WORDS(1); - std::array<u8, 0x20> build_id; - INSERT_PADDING_BYTES(0x20); - }; - static_assert(sizeof(NROHeader) == 0x80, "NROHeader has invalid size."); - - struct NRRHeader { - u32_le magic; - INSERT_PADDING_BYTES(12); - u64_le title_id_mask; - u64_le title_id_pattern; - INSERT_PADDING_BYTES(16); - std::array<u8, 0x100> modulus; - std::array<u8, 0x100> signature_1; - std::array<u8, 0x100> signature_2; - u64_le title_id; - u32_le size; - INSERT_PADDING_BYTES(4); - u32_le hash_offset; - u32_le hash_count; - INSERT_PADDING_BYTES(8); - }; - static_assert(sizeof(NRRHeader) == 0x350, "NRRHeader has incorrect size."); - - struct NROInfo { - SHA256Hash hash; - VAddr nro_address; - u64 nro_size; - VAddr bss_address; - u64 bss_size; - }; - - bool initialized = false; + bool initialized{}; std::map<VAddr, NROInfo> nro; std::map<VAddr, std::vector<SHA256Hash>> nrr; diff --git a/src/core/hle/service/lm/lm.cpp b/src/core/hle/service/lm/lm.cpp index 346c8f899..dec96b771 100644 --- a/src/core/hle/service/lm/lm.cpp +++ b/src/core/hle/service/lm/lm.cpp @@ -17,7 +17,7 @@ namespace Service::LM { class ILogger final : public ServiceFramework<ILogger> { public: - explicit ILogger(Manager& manager_, Memory::Memory& memory_) + explicit ILogger(Manager& manager_, Core::Memory::Memory& memory_) : ServiceFramework("ILogger"), manager{manager_}, memory{memory_} { static const FunctionInfo functions[] = { {0, &ILogger::Log, "Log"}, @@ -75,12 +75,12 @@ private: } Manager& manager; - Memory::Memory& memory; + Core::Memory::Memory& memory; }; class LM final : public ServiceFramework<LM> { public: - explicit LM(Manager& manager_, Memory::Memory& memory_) + explicit LM(Manager& manager_, Core::Memory::Memory& memory_) : ServiceFramework{"lm"}, manager{manager_}, memory{memory_} { // clang-format off static const FunctionInfo functions[] = { @@ -101,7 +101,7 @@ private: } Manager& manager; - Memory::Memory& memory; + Core::Memory::Memory& memory; }; void InstallInterfaces(Core::System& system) { diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp index 8da4e52c5..ab1746d28 100644 --- a/src/core/hle/service/ns/pl_u.cpp +++ b/src/core/hle/service/ns/pl_u.cpp @@ -19,6 +19,7 @@ #include "core/file_sys/romfs.h" #include "core/file_sys/system_archive/system_archive.h" #include "core/hle/ipc_helpers.h" +#include "core/hle/kernel/kernel.h" #include "core/hle/kernel/physical_memory.h" #include "core/hle/kernel/shared_memory.h" #include "core/hle/service/filesystem/filesystem.h" @@ -265,16 +266,13 @@ void PL_U::GetSharedMemoryAddressOffset(Kernel::HLERequestContext& ctx) { void PL_U::GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx) { // Map backing memory for the font data LOG_DEBUG(Service_NS, "called"); - system.CurrentProcess()->VMManager().MapMemoryBlock(SHARED_FONT_MEM_VADDR, impl->shared_font, 0, - SHARED_FONT_MEM_SIZE, - Kernel::MemoryState::Shared); // Create shared font memory object auto& kernel = system.Kernel(); - impl->shared_font_mem = Kernel::SharedMemory::Create( - kernel, system.CurrentProcess(), SHARED_FONT_MEM_SIZE, Kernel::MemoryPermission::ReadWrite, - Kernel::MemoryPermission::Read, SHARED_FONT_MEM_VADDR, Kernel::MemoryRegion::BASE, - "PL_U:shared_font_mem"); + impl->shared_font_mem = SharedFrom(&kernel.GetFontSharedMem()); + + std::memcpy(impl->shared_font_mem->GetPointer(), impl->shared_font->data(), + impl->shared_font->size()); IPC::ResponseBuilder rb{ctx, 2, 1}; rb.Push(RESULT_SUCCESS); diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index 32b6f4b27..f1e3d832a 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -28,6 +28,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) buffer.slot = slot; buffer.igbp_buffer = igbp_buffer; buffer.status = Buffer::Status::Free; + free_buffers.push_back(slot); queue.emplace_back(buffer); buffer_wait_event.writable->Signal(); @@ -35,16 +36,37 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, u32 height) { - auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { - // Only consider free buffers. Buffers become free once again after they've been Acquired - // and Released by the compositor, see the NVFlinger::Compose method. - if (buffer.status != Buffer::Status::Free) { - return false; - } - // Make sure that the parameters match. - return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height; - }); + if (free_buffers.empty()) { + return {}; + } + + auto f_itr = free_buffers.begin(); + auto itr = queue.end(); + + while (f_itr != free_buffers.end()) { + auto slot = *f_itr; + itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { + // Only consider free buffers. Buffers become free once again after they've been + // Acquired and Released by the compositor, see the NVFlinger::Compose method. + if (buffer.status != Buffer::Status::Free) { + return false; + } + + if (buffer.slot != slot) { + return false; + } + + // Make sure that the parameters match. + return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height; + }); + + if (itr != queue.end()) { + free_buffers.erase(f_itr); + break; + } + ++f_itr; + } if (itr == queue.end()) { return {}; @@ -99,10 +121,18 @@ void BufferQueue::ReleaseBuffer(u32 slot) { ASSERT(itr != queue.end()); ASSERT(itr->status == Buffer::Status::Acquired); itr->status = Buffer::Status::Free; + free_buffers.push_back(slot); buffer_wait_event.writable->Signal(); } +void BufferQueue::Disconnect() { + queue.clear(); + queue_sequence.clear(); + id = 1; + layer_id = 1; +} + u32 BufferQueue::Query(QueryType type) { LOG_WARNING(Service, "(STUBBED) called type={}", static_cast<u32>(type)); diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index f4bbfd945..d5f31e567 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -87,6 +87,7 @@ public: Service::Nvidia::MultiFence& multi_fence); std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); void ReleaseBuffer(u32 slot); + void Disconnect(); u32 Query(QueryType type); u32 GetId() const { @@ -101,6 +102,7 @@ private: u32 id; u64 layer_id; + std::list<u32> free_buffers; std::vector<Buffer> queue; std::list<u32> queue_sequence; Kernel::EventPair buffer_wait_event; diff --git a/src/core/hle/service/time/interface.cpp b/src/core/hle/service/time/interface.cpp index f509653a3..ba8fd6152 100644 --- a/src/core/hle/service/time/interface.cpp +++ b/src/core/hle/service/time/interface.cpp @@ -29,7 +29,7 @@ Time::Time(std::shared_ptr<Module> module, Core::System& system, const char* nam {300, &Time::CalculateMonotonicSystemClockBaseTimePoint, "CalculateMonotonicSystemClockBaseTimePoint"}, {400, &Time::GetClockSnapshot, "GetClockSnapshot"}, {401, &Time::GetClockSnapshotFromSystemClockContext, "GetClockSnapshotFromSystemClockContext"}, - {500, nullptr, "CalculateStandardUserSystemClockDifferenceByUser"}, + {500, &Time::CalculateStandardUserSystemClockDifferenceByUser, "CalculateStandardUserSystemClockDifferenceByUser"}, {501, &Time::CalculateSpanBetween, "CalculateSpanBetween"}, }; // clang-format on diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index ce859f18d..e722886de 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -308,6 +308,29 @@ void Module::Interface::GetClockSnapshotFromSystemClockContext(Kernel::HLEReques ctx.WriteBuffer(&clock_snapshot, sizeof(Clock::ClockSnapshot)); } +void Module::Interface::CalculateStandardUserSystemClockDifferenceByUser( + Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_Time, "called"); + + IPC::RequestParser rp{ctx}; + const auto snapshot_a = rp.PopRaw<Clock::ClockSnapshot>(); + const auto snapshot_b = rp.PopRaw<Clock::ClockSnapshot>(); + + auto time_span_type{Clock::TimeSpanType::FromSeconds(snapshot_b.user_context.offset - + snapshot_a.user_context.offset)}; + + if ((snapshot_b.user_context.steady_time_point.clock_source_id != + snapshot_a.user_context.steady_time_point.clock_source_id) || + (snapshot_b.is_automatic_correction_enabled && + snapshot_a.is_automatic_correction_enabled)) { + time_span_type.nanoseconds = 0; + } + + IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2}; + rb.Push(RESULT_SUCCESS); + rb.PushRaw(time_span_type.nanoseconds); +} + void Module::Interface::CalculateSpanBetween(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Time, "called"); diff --git a/src/core/hle/service/time/time.h b/src/core/hle/service/time/time.h index 351988468..41f3002e9 100644 --- a/src/core/hle/service/time/time.h +++ b/src/core/hle/service/time/time.h @@ -32,6 +32,7 @@ public: void CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERequestContext& ctx); void GetClockSnapshot(Kernel::HLERequestContext& ctx); void GetClockSnapshotFromSystemClockContext(Kernel::HLERequestContext& ctx); + void CalculateStandardUserSystemClockDifferenceByUser(Kernel::HLERequestContext& ctx); void CalculateSpanBetween(Kernel::HLERequestContext& ctx); void GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx); diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp index fdaef233f..999ec1e51 100644 --- a/src/core/hle/service/time/time_sharedmemory.cpp +++ b/src/core/hle/service/time/time_sharedmemory.cpp @@ -6,6 +6,7 @@ #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/hardware_properties.h" +#include "core/hle/kernel/kernel.h" #include "core/hle/service/time/clock_types.h" #include "core/hle/service/time/steady_clock_core.h" #include "core/hle/service/time/time_sharedmemory.h" @@ -15,9 +16,7 @@ namespace Service::Time { static constexpr std::size_t SHARED_MEMORY_SIZE{0x1000}; SharedMemory::SharedMemory(Core::System& system) : system(system) { - shared_memory_holder = Kernel::SharedMemory::Create( - system.Kernel(), nullptr, SHARED_MEMORY_SIZE, Kernel::MemoryPermission::ReadWrite, - Kernel::MemoryPermission::Read, 0, Kernel::MemoryRegion::BASE, "Time:SharedMemory"); + shared_memory_holder = SharedFrom(&system.Kernel().GetTimeSharedMem()); std::memset(shared_memory_holder->GetPointer(), 0, SHARED_MEMORY_SIZE); } diff --git a/src/core/hle/service/time/time_zone_manager.cpp b/src/core/hle/service/time/time_zone_manager.cpp index 07b553a43..c8159bcd5 100644 --- a/src/core/hle/service/time/time_zone_manager.cpp +++ b/src/core/hle/service/time/time_zone_manager.cpp @@ -309,7 +309,7 @@ static bool ParsePosixName(const char* name, TimeZoneRule& rule) { offset = GetTZName(name, offset); std_len = offset; } - if (!std_len) { + if (std_len == 0) { return {}; } if (!GetOffset(name, offset, std_offset)) { @@ -320,7 +320,7 @@ static bool ParsePosixName(const char* name, TimeZoneRule& rule) { int dest_len{}; int dest_offset{}; const char* dest_name{name + offset}; - if (rule.chars.size() < char_count) { + if (rule.chars.size() < std::size_t(char_count)) { return {}; } @@ -343,7 +343,7 @@ static bool ParsePosixName(const char* name, TimeZoneRule& rule) { return {}; } char_count += dest_len + 1; - if (rule.chars.size() < char_count) { + if (rule.chars.size() < std::size_t(char_count)) { return {}; } if (name[offset] != '\0' && name[offset] != ',' && name[offset] != ';') { @@ -414,7 +414,7 @@ static bool ParsePosixName(const char* name, TimeZoneRule& rule) { if (is_reversed || (start_time < end_time && (end_time - start_time < (year_seconds + (std_offset - dest_offset))))) { - if (rule.ats.size() - 2 < time_count) { + if (rule.ats.size() - 2 < std::size_t(time_count)) { break; } @@ -609,7 +609,7 @@ static bool ParseTimeZoneBinary(TimeZoneRule& time_zone_rule, FileSys::VirtualFi } const u64 position{(read_offset - sizeof(TzifHeader))}; - const std::size_t bytes_read{vfs_file->GetSize() - sizeof(TzifHeader) - position}; + const s64 bytes_read = s64(vfs_file->GetSize() - sizeof(TzifHeader) - position); if (bytes_read < 0) { return {}; } @@ -621,11 +621,11 @@ static bool ParseTimeZoneBinary(TimeZoneRule& time_zone_rule, FileSys::VirtualFi std::array<char, time_zone_name_max + 1> temp_name{}; vfs_file->ReadArray(temp_name.data(), bytes_read, read_offset); if (bytes_read > 2 && temp_name[0] == '\n' && temp_name[bytes_read - 1] == '\n' && - time_zone_rule.type_count + 2 <= time_zone_rule.ttis.size()) { + std::size_t(time_zone_rule.type_count) + 2 <= time_zone_rule.ttis.size()) { temp_name[bytes_read - 1] = '\0'; std::array<char, time_zone_name_max> name{}; - std::memcpy(name.data(), temp_name.data() + 1, bytes_read - 1); + std::memcpy(name.data(), temp_name.data() + 1, std::size_t(bytes_read - 1)); TimeZoneRule temp_rule; if (ParsePosixName(name.data(), temp_rule)) { diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 519da74e0..7f109f4eb 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -101,8 +101,8 @@ public: } std::u16string ReadInterfaceToken() { - u32 unknown = Read<u32_le>(); - u32 length = Read<u32_le>(); + [[maybe_unused]] const u32 unknown = Read<u32_le>(); + const u32 length = Read<u32_le>(); std::u16string token{}; @@ -513,7 +513,8 @@ private: auto& buffer_queue = nv_flinger->FindBufferQueue(id); - if (transaction == TransactionId::Connect) { + switch (transaction) { + case TransactionId::Connect: { IGBPConnectRequestParcel request{ctx.ReadBuffer()}; IGBPConnectResponseParcel response{ static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) * @@ -521,14 +522,18 @@ private: static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * Settings::values.resolution_factor)}; ctx.WriteBuffer(response.Serialize()); - } else if (transaction == TransactionId::SetPreallocatedBuffer) { + break; + } + case TransactionId::SetPreallocatedBuffer: { IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer); IGBPSetPreallocatedBufferResponseParcel response{}; ctx.WriteBuffer(response.Serialize()); - } else if (transaction == TransactionId::DequeueBuffer) { + break; + } + case TransactionId::DequeueBuffer: { IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; const u32 width{request.data.width}; const u32 height{request.data.height}; @@ -556,14 +561,18 @@ private: }, buffer_queue.GetWritableBufferWaitEvent()); } - } else if (transaction == TransactionId::RequestBuffer) { + break; + } + case TransactionId::RequestBuffer: { IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; auto& buffer = buffer_queue.RequestBuffer(request.slot); IGBPRequestBufferResponseParcel response{buffer}; ctx.WriteBuffer(response.Serialize()); - } else if (transaction == TransactionId::QueueBuffer) { + break; + } + case TransactionId::QueueBuffer: { IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; buffer_queue.QueueBuffer(request.data.slot, request.data.transform, @@ -572,7 +581,9 @@ private: IGBPQueueBufferResponseParcel response{1280, 720}; ctx.WriteBuffer(response.Serialize()); - } else if (transaction == TransactionId::Query) { + break; + } + case TransactionId::Query: { IGBPQueryRequestParcel request{ctx.ReadBuffer()}; const u32 value = @@ -580,15 +591,30 @@ private: IGBPQueryResponseParcel response{value}; ctx.WriteBuffer(response.Serialize()); - } else if (transaction == TransactionId::CancelBuffer) { + break; + } + case TransactionId::CancelBuffer: { LOG_CRITICAL(Service_VI, "(STUBBED) called, transaction=CancelBuffer"); - } else if (transaction == TransactionId::Disconnect || - transaction == TransactionId::DetachBuffer) { + break; + } + case TransactionId::Disconnect: { + LOG_WARNING(Service_VI, "(STUBBED) called, transaction=Disconnect"); + const auto buffer = ctx.ReadBuffer(); + + buffer_queue.Disconnect(); + + IGBPEmptyResponseParcel response{}; + ctx.WriteBuffer(response.Serialize()); + break; + } + case TransactionId::DetachBuffer: { const auto buffer = ctx.ReadBuffer(); IGBPEmptyResponseParcel response{}; ctx.WriteBuffer(response.Serialize()); - } else { + break; + } + default: ASSERT_MSG(false, "Unimplemented"); } diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index 53559e8b1..134e83412 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -14,6 +14,7 @@ #include "core/file_sys/romfs_factory.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" #include "core/hle/service/filesystem/filesystem.h" #include "core/loader/deconstructed_rom_directory.h" @@ -129,27 +130,47 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect } metadata.Print(); - if (process.LoadFromMetadata(metadata).IsError()) { - return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; + const auto static_modules = {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", "subsdk3", + "subsdk4", "subsdk5", "subsdk6", "subsdk7", "sdk"}; + + // Use the NSO module loader to figure out the code layout + std::size_t code_size{}; + for (const auto& module : static_modules) { + const FileSys::VirtualFile module_file{dir->GetFile(module)}; + if (!module_file) { + continue; + } + + const bool should_pass_arguments{std::strcmp(module, "rtld") == 0}; + const auto tentative_next_load_addr{AppLoader_NSO::LoadModule( + process, *module_file, code_size, should_pass_arguments, false)}; + if (!tentative_next_load_addr) { + return {ResultStatus::ErrorLoadingNSO, {}}; + } + + code_size = *tentative_next_load_addr; } - const FileSys::PatchManager pm(metadata.GetTitleID()); + // Setup the process code layout + if (process.LoadFromMetadata(metadata, code_size).IsError()) { + return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; + } // Load NSO modules modules.clear(); - const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); - VAddr next_load_addr = base_address; - for (const auto& module : {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", "subsdk3", - "subsdk4", "subsdk5", "subsdk6", "subsdk7", "sdk"}) { - const FileSys::VirtualFile module_file = dir->GetFile(module); - if (module_file == nullptr) { + const VAddr base_address{process.PageTable().GetCodeRegionStart()}; + VAddr next_load_addr{base_address}; + const FileSys::PatchManager pm{metadata.GetTitleID()}; + for (const auto& module : static_modules) { + const FileSys::VirtualFile module_file{dir->GetFile(module)}; + if (!module_file) { continue; } - const VAddr load_addr = next_load_addr; - const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; - const auto tentative_next_load_addr = - AppLoader_NSO::LoadModule(process, *module_file, load_addr, should_pass_arguments, pm); + const VAddr load_addr{next_load_addr}; + const bool should_pass_arguments{std::strcmp(module, "rtld") == 0}; + const auto tentative_next_load_addr{AppLoader_NSO::LoadModule( + process, *module_file, load_addr, should_pass_arguments, true, pm)}; if (!tentative_next_load_addr) { return {ResultStatus::ErrorLoadingNSO, {}}; } diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 8908e5328..1e9ed2837 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -10,8 +10,8 @@ #include "common/file_util.h" #include "common/logging/log.h" #include "core/hle/kernel/code_set.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/vm_manager.h" #include "core/loader/elf.h" #include "core/memory.h" @@ -393,7 +393,7 @@ AppLoader_ELF::LoadResult AppLoader_ELF::Load(Kernel::Process& process) { return {ResultStatus::ErrorIncorrectELFFileSize, {}}; } - const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); + const VAddr base_address = process.PageTable().GetCodeRegionStart(); ElfReader elf_reader(&buffer[0]); Kernel::CodeSet codeset = elf_reader.LoadInto(base_address); const VAddr entry_point = codeset.entrypoint; @@ -401,7 +401,7 @@ AppLoader_ELF::LoadResult AppLoader_ELF::Load(Kernel::Process& process) { process.LoadModule(std::move(codeset), entry_point); is_loaded = true; - return {ResultStatus::Success, LoadParameters{48, Memory::DEFAULT_STACK_SIZE}}; + return {ResultStatus::Success, LoadParameters{48, Core::Memory::DEFAULT_STACK_SIZE}}; } } // namespace Loader diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp index 092103abe..40fa03ad1 100644 --- a/src/core/loader/kip.cpp +++ b/src/core/loader/kip.cpp @@ -7,14 +7,16 @@ #include "core/file_sys/program_metadata.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/code_set.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" #include "core/loader/kip.h" +#include "core/memory.h" namespace Loader { namespace { constexpr u32 PageAlignSize(u32 size) { - return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK; + return (size + Core::Memory::PAGE_MASK) & ~Core::Memory::PAGE_MASK; } } // Anonymous namespace @@ -68,7 +70,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) { kip->GetMainThreadCpuCore(), kip->GetMainThreadStackSize(), kip->GetTitleID(), 0xFFFFFFFFFFFFFFFF, kip->GetKernelCapabilities()); - const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); + const VAddr base_address = process.PageTable().GetCodeRegionStart(); Kernel::CodeSet codeset; Kernel::PhysicalMemory program_image; diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 175898b91..5d7e8136e 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -16,8 +16,8 @@ #include "core/file_sys/vfs_offset.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/code_set.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/vm_manager.h" #include "core/hle/service/filesystem/filesystem.h" #include "core/loader/nro.h" #include "core/loader/nso.h" @@ -127,7 +127,7 @@ FileType AppLoader_NRO::IdentifyType(const FileSys::VirtualFile& file) { } static constexpr u32 PageAlignSize(u32 size) { - return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK; + return (size + Core::Memory::PAGE_MASK) & ~Core::Memory::PAGE_MASK; } static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data, @@ -208,7 +208,7 @@ AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) { } // Load NRO - const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); + const VAddr base_address = process.PageTable().GetCodeRegionStart(); if (!LoadNro(process, *file, base_address)) { return {ResultStatus::ErrorLoadingNRO, {}}; @@ -221,7 +221,7 @@ AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) { is_loaded = true; return {ResultStatus::Success, - LoadParameters{Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE}}; + LoadParameters{Kernel::THREADPRIO_DEFAULT, Core::Memory::DEFAULT_STACK_SIZE}}; } ResultStatus AppLoader_NRO::ReadIcon(std::vector<u8>& buffer) { diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 044067a5b..612ff9bf6 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -16,8 +16,8 @@ #include "core/file_sys/patch_manager.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/code_set.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/vm_manager.h" #include "core/loader/nso.h" #include "core/memory.h" #include "core/settings.h" @@ -47,7 +47,7 @@ std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data, } constexpr u32 PageAlignSize(u32 size) { - return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK; + return (size + Core::Memory::PAGE_MASK) & ~Core::Memory::PAGE_MASK; } } // Anonymous namespace @@ -73,7 +73,7 @@ FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& file) { std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, const FileSys::VfsFile& file, VAddr load_base, - bool should_pass_arguments, + bool should_pass_arguments, bool load_into_process, std::optional<FileSys::PatchManager> pm) { if (file.GetSize() < sizeof(NSOHeader)) { return {}; @@ -97,21 +97,17 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, if (nso_header.IsSegmentCompressed(i)) { data = DecompressSegment(data, nso_header.segments[i]); } - program_image.resize(nso_header.segments[i].location + - PageAlignSize(static_cast<u32>(data.size()))); + program_image.resize(nso_header.segments[i].location + static_cast<u32>(data.size())); std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(), data.size()); codeset.segments[i].addr = nso_header.segments[i].location; codeset.segments[i].offset = nso_header.segments[i].location; - codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size())); + codeset.segments[i].size = nso_header.segments[i].size; } - if (should_pass_arguments) { - std::vector<u8> arg_data{Settings::values.program_args.begin(), - Settings::values.program_args.end()}; - if (arg_data.empty()) { - arg_data.resize(NSO_ARGUMENT_DEFAULT_SIZE); - } + if (should_pass_arguments && !Settings::values.program_args.empty()) { + const auto arg_data{Settings::values.program_args}; + codeset.DataSegment().size += NSO_ARGUMENT_DATA_ALLOCATION_SIZE; NSOArgumentHeader args_header{ NSO_ARGUMENT_DATA_ALLOCATION_SIZE, static_cast<u32_le>(arg_data.size()), {}}; @@ -123,24 +119,15 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, arg_data.size()); } - // MOD header pointer is at .text offset + 4 - u32 module_offset; - std::memcpy(&module_offset, program_image.data() + 4, sizeof(u32)); - - // Read MOD header - MODHeader mod_header{}; - // Default .bss to size in segment header if MOD0 section doesn't exist - u32 bss_size{PageAlignSize(nso_header.segments[2].bss_size)}; - std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(MODHeader)); - const bool has_mod_header{mod_header.magic == Common::MakeMagic('M', 'O', 'D', '0')}; - if (has_mod_header) { - // Resize program image to include .bss section and page align each section - bss_size = PageAlignSize(mod_header.bss_end_offset - mod_header.bss_start_offset); - } - codeset.DataSegment().size += bss_size; - const u32 image_size{PageAlignSize(static_cast<u32>(program_image.size()) + bss_size)}; + codeset.DataSegment().size += nso_header.segments[2].bss_size; + const u32 image_size{ + PageAlignSize(static_cast<u32>(program_image.size()) + nso_header.segments[2].bss_size)}; program_image.resize(image_size); + for (std::size_t i = 0; i < nso_header.segments.size(); ++i) { + codeset.segments[i].size = PageAlignSize(codeset.segments[i].size); + } + // Apply patches if necessary if (pm && (pm->HasNSOPatch(nso_header.build_id) || Settings::values.dump_nso)) { std::vector<u8> pi_header; @@ -154,6 +141,11 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data()); } + // If we aren't actually loading (i.e. just computing the process code layout), we are done + if (!load_into_process) { + return load_base + image_size; + } + // Apply cheats if they exist and the program has a valid title ID if (pm) { auto& system = Core::System::GetInstance(); @@ -182,8 +174,8 @@ AppLoader_NSO::LoadResult AppLoader_NSO::Load(Kernel::Process& process) { modules.clear(); // Load module - const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); - if (!LoadModule(process, *file, base_address, true)) { + const VAddr base_address = process.PageTable().GetCodeRegionStart(); + if (!LoadModule(process, *file, base_address, true, true)) { return {ResultStatus::ErrorLoadingNSO, {}}; } @@ -192,7 +184,7 @@ AppLoader_NSO::LoadResult AppLoader_NSO::Load(Kernel::Process& process) { is_loaded = true; return {ResultStatus::Success, - LoadParameters{Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE}}; + LoadParameters{Kernel::THREADPRIO_DEFAULT, Core::Memory::DEFAULT_STACK_SIZE}}; } ResultStatus AppLoader_NSO::ReadNSOModules(Modules& modules) { diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h index d2d600cd9..b210830f0 100644 --- a/src/core/loader/nso.h +++ b/src/core/loader/nso.h @@ -56,8 +56,6 @@ static_assert(sizeof(NSOHeader) == 0x100, "NSOHeader has incorrect size."); static_assert(std::is_trivially_copyable_v<NSOHeader>, "NSOHeader must be trivially copyable."); constexpr u64 NSO_ARGUMENT_DATA_ALLOCATION_SIZE = 0x9000; -// NOTE: Official software default argument state is unverified. -constexpr u64 NSO_ARGUMENT_DEFAULT_SIZE = 1; struct NSOArgumentHeader { u32_le allocated_size; @@ -84,6 +82,7 @@ public: static std::optional<VAddr> LoadModule(Kernel::Process& process, const FileSys::VfsFile& file, VAddr load_base, bool should_pass_arguments, + bool load_into_process, std::optional<FileSys::PatchManager> pm = {}); LoadResult Load(Kernel::Process& process) override; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f0888327f..9d87045a0 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -14,13 +14,14 @@ #include "common/swap.h" #include "core/arm/arm_interface.h" #include "core/core.h" +#include "core/device_memory.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/physical_memory.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "video_core/gpu.h" -namespace Memory { +namespace Core::Memory { // Implementation class used to keep the specifics of the memory subsystem hidden // from outside classes. This also allows modification to the internals of the memory @@ -29,9 +30,9 @@ struct Memory::Impl { explicit Impl(Core::System& system_) : system{system_} {} void SetCurrentPageTable(Kernel::Process& process) { - current_page_table = &process.VMManager().page_table; + current_page_table = &process.PageTable().PageTableImpl(); - const std::size_t address_space_width = process.VMManager().GetAddressSpaceWidth(); + const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth(); system.ArmInterface(0).PageTableChanged(*current_page_table, address_space_width); system.ArmInterface(1).PageTableChanged(*current_page_table, address_space_width); @@ -39,12 +40,7 @@ struct Memory::Impl { system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width); } - void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, - Kernel::PhysicalMemory& memory, VAddr offset) { - MapMemoryRegion(page_table, base, size, memory.data() + offset); - } - - void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) { + void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory); @@ -52,46 +48,27 @@ struct Memory::Impl { void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size, Common::MemoryHookPointer mmio_handler) { - ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); - ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); - MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, - Common::PageType::Special); - - const auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); - const Common::SpecialRegion region{Common::SpecialRegion::Type::IODevice, - std::move(mmio_handler)}; - page_table.special_regions.add( - std::make_pair(interval, std::set<Common::SpecialRegion>{region})); + UNIMPLEMENTED(); } void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); - MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, - Common::PageType::Unmapped); - - const auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); - page_table.special_regions.erase(interval); + MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped); } void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size, Common::MemoryHookPointer hook) { - const auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); - const Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)}; - page_table.special_regions.add( - std::make_pair(interval, std::set<Common::SpecialRegion>{region})); + UNIMPLEMENTED(); } void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size, Common::MemoryHookPointer hook) { - const auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); - const Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)}; - page_table.special_regions.subtract( - std::make_pair(interval, std::set<Common::SpecialRegion>{region})); + UNIMPLEMENTED(); } bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) const { - const auto& page_table = process.VMManager().page_table; + const auto& page_table = process.PageTable().PageTableImpl(); const u8* const page_pointer = page_table.pointers[vaddr >> PAGE_BITS]; if (page_pointer != nullptr) { @@ -113,55 +90,28 @@ struct Memory::Impl { return IsValidVirtualAddress(*system.CurrentProcess(), vaddr); } - /** - * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned) - * using a VMA from the current process - */ - u8* GetPointerFromVMA(const Kernel::Process& process, VAddr vaddr) { - const auto& vm_manager = process.VMManager(); - - const auto it = vm_manager.FindVMA(vaddr); - DEBUG_ASSERT(vm_manager.IsValidHandle(it)); + u8* GetPointerFromRasterizerCachedMemory(VAddr vaddr) const { + const PAddr paddr{current_page_table->backing_addr[vaddr >> PAGE_BITS]}; - u8* direct_pointer = nullptr; - const auto& vma = it->second; - switch (vma.type) { - case Kernel::VMAType::AllocatedMemoryBlock: - direct_pointer = vma.backing_block->data() + vma.offset; - break; - case Kernel::VMAType::BackingMemory: - direct_pointer = vma.backing_memory; - break; - case Kernel::VMAType::Free: - return nullptr; - default: - UNREACHABLE(); + if (!paddr) { + return {}; } - return direct_pointer + (vaddr - vma.base); - } - - /** - * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned) - * using a VMA from the current process. - */ - u8* GetPointerFromVMA(VAddr vaddr) { - return GetPointerFromVMA(*system.CurrentProcess(), vaddr); + return system.DeviceMemory().GetPointer(paddr) + vaddr; } - u8* GetPointer(const VAddr vaddr) { - u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; - if (page_pointer != nullptr) { + u8* GetPointer(const VAddr vaddr) const { + u8* const page_pointer{current_page_table->pointers[vaddr >> PAGE_BITS]}; + if (page_pointer) { return page_pointer + vaddr; } if (current_page_table->attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory) { - return GetPointerFromVMA(vaddr); + return GetPointerFromRasterizerCachedMemory(vaddr); } - LOG_ERROR(HW_Memory, "Unknown GetPointer @ 0x{:016X}", vaddr); - return nullptr; + return {}; } u8 Read8(const VAddr addr) { @@ -169,15 +119,33 @@ struct Memory::Impl { } u16 Read16(const VAddr addr) { - return Read<u16_le>(addr); + if ((addr & 1) == 0) { + return Read<u16_le>(addr); + } else { + const u8 a{Read<u8>(addr)}; + const u8 b{Read<u8>(addr + sizeof(u8))}; + return (static_cast<u16>(b) << 8) | a; + } } u32 Read32(const VAddr addr) { - return Read<u32_le>(addr); + if ((addr & 3) == 0) { + return Read<u32_le>(addr); + } else { + const u16 a{Read16(addr)}; + const u16 b{Read16(addr + sizeof(u16))}; + return (static_cast<u32>(b) << 16) | a; + } } u64 Read64(const VAddr addr) { - return Read<u64_le>(addr); + if ((addr & 7) == 0) { + return Read<u64_le>(addr); + } else { + const u32 a{Read32(addr)}; + const u32 b{Read32(addr + sizeof(u32))}; + return (static_cast<u64>(b) << 32) | a; + } } void Write8(const VAddr addr, const u8 data) { @@ -185,15 +153,30 @@ struct Memory::Impl { } void Write16(const VAddr addr, const u16 data) { - Write<u16_le>(addr, data); + if ((addr & 1) == 0) { + Write<u16_le>(addr, data); + } else { + Write<u8>(addr, static_cast<u8>(data)); + Write<u8>(addr + sizeof(u8), static_cast<u8>(data >> 8)); + } } void Write32(const VAddr addr, const u32 data) { - Write<u32_le>(addr, data); + if ((addr & 3) == 0) { + Write<u32_le>(addr, data); + } else { + Write16(addr, static_cast<u16>(data)); + Write16(addr + sizeof(u16), static_cast<u16>(data >> 16)); + } } void Write64(const VAddr addr, const u64 data) { - Write<u64_le>(addr, data); + if ((addr & 7) == 0) { + Write<u64_le>(addr, data); + } else { + Write32(addr, static_cast<u32>(data)); + Write32(addr + sizeof(u32), static_cast<u32>(data >> 32)); + } } std::string ReadCString(VAddr vaddr, std::size_t max_length) { @@ -213,7 +196,53 @@ struct Memory::Impl { void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer, const std::size_t size) { - const auto& page_table = process.VMManager().page_table; + const auto& page_table = process.PageTable().PageTableImpl(); + + std::size_t remaining_size = size; + std::size_t page_index = src_addr >> PAGE_BITS; + std::size_t page_offset = src_addr & PAGE_MASK; + + while (remaining_size > 0) { + const std::size_t copy_amount = + std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); + const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); + + switch (page_table.attributes[page_index]) { + case Common::PageType::Unmapped: { + LOG_ERROR(HW_Memory, + "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", + current_vaddr, src_addr, size); + std::memset(dest_buffer, 0, copy_amount); + break; + } + case Common::PageType::Memory: { + DEBUG_ASSERT(page_table.pointers[page_index]); + + const u8* const src_ptr = + page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); + std::memcpy(dest_buffer, src_ptr, copy_amount); + break; + } + case Common::PageType::RasterizerCachedMemory: { + const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; + system.GPU().FlushRegion(current_vaddr, copy_amount); + std::memcpy(dest_buffer, host_ptr, copy_amount); + break; + } + default: + UNREACHABLE(); + } + + page_index++; + page_offset = 0; + dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; + remaining_size -= copy_amount; + } + } + + void ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer, + const std::size_t size) { + const auto& page_table = process.PageTable().PageTableImpl(); std::size_t remaining_size = size; std::size_t page_index = src_addr >> PAGE_BITS; @@ -241,8 +270,7 @@ struct Memory::Impl { break; } case Common::PageType::RasterizerCachedMemory: { - const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); - system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); + const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; std::memcpy(dest_buffer, host_ptr, copy_amount); break; } @@ -261,9 +289,13 @@ struct Memory::Impl { ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size); } + void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) { + ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size); + } + void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, const std::size_t size) { - const auto& page_table = process.VMManager().page_table; + const auto& page_table = process.PageTable().PageTableImpl(); std::size_t remaining_size = size; std::size_t page_index = dest_addr >> PAGE_BITS; std::size_t page_offset = dest_addr & PAGE_MASK; @@ -289,8 +321,51 @@ struct Memory::Impl { break; } case Common::PageType::RasterizerCachedMemory: { - u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); - system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); + u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; + system.GPU().InvalidateRegion(current_vaddr, copy_amount); + std::memcpy(host_ptr, src_buffer, copy_amount); + break; + } + default: + UNREACHABLE(); + } + + page_index++; + page_offset = 0; + src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; + remaining_size -= copy_amount; + } + } + + void WriteBlockUnsafe(const Kernel::Process& process, const VAddr dest_addr, + const void* src_buffer, const std::size_t size) { + const auto& page_table = process.PageTable().PageTableImpl(); + std::size_t remaining_size = size; + std::size_t page_index = dest_addr >> PAGE_BITS; + std::size_t page_offset = dest_addr & PAGE_MASK; + + while (remaining_size > 0) { + const std::size_t copy_amount = + std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); + const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); + + switch (page_table.attributes[page_index]) { + case Common::PageType::Unmapped: { + LOG_ERROR(HW_Memory, + "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", + current_vaddr, dest_addr, size); + break; + } + case Common::PageType::Memory: { + DEBUG_ASSERT(page_table.pointers[page_index]); + + u8* const dest_ptr = + page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); + std::memcpy(dest_ptr, src_buffer, copy_amount); + break; + } + case Common::PageType::RasterizerCachedMemory: { + u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; std::memcpy(host_ptr, src_buffer, copy_amount); break; } @@ -309,8 +384,12 @@ struct Memory::Impl { WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size); } + void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) { + WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size); + } + void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) { - const auto& page_table = process.VMManager().page_table; + const auto& page_table = process.PageTable().PageTableImpl(); std::size_t remaining_size = size; std::size_t page_index = dest_addr >> PAGE_BITS; std::size_t page_offset = dest_addr & PAGE_MASK; @@ -336,8 +415,8 @@ struct Memory::Impl { break; } case Common::PageType::RasterizerCachedMemory: { - u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); - system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); + u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; + system.GPU().InvalidateRegion(current_vaddr, copy_amount); std::memset(host_ptr, 0, copy_amount); break; } @@ -357,7 +436,7 @@ struct Memory::Impl { void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr, const std::size_t size) { - const auto& page_table = process.VMManager().page_table; + const auto& page_table = process.PageTable().PageTableImpl(); std::size_t remaining_size = size; std::size_t page_index = src_addr >> PAGE_BITS; std::size_t page_offset = src_addr & PAGE_MASK; @@ -383,8 +462,8 @@ struct Memory::Impl { break; } case Common::PageType::RasterizerCachedMemory: { - const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); - system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); + const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; + system.GPU().FlushRegion(current_vaddr, copy_amount); WriteBlock(process, dest_addr, host_ptr, copy_amount); break; } @@ -416,7 +495,7 @@ struct Memory::Impl { u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1; for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { - Common::PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; + Common::PageType& page_type{current_page_table->attributes[vaddr >> PAGE_BITS]}; if (cached) { // Switch page type to cached if now cached @@ -448,7 +527,7 @@ struct Memory::Impl { // that this area is already unmarked as cached. break; case Common::PageType::RasterizerCachedMemory: { - u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK); + u8* pointer{GetPointerFromRasterizerCachedMemory(vaddr & ~PAGE_MASK)}; if (pointer == nullptr) { // It's possible that this function has been called while updating the // pagetable after unmapping a VMA. In that case the underlying VMA will no @@ -477,9 +556,9 @@ struct Memory::Impl { * @param memory The memory to map. * @param type The page type to map the memory as. */ - void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory, + void MapPages(Common::PageTable& page_table, VAddr base, u64 size, PAddr target, Common::PageType type) { - LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, + LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", target, base * PAGE_SIZE, (base + size) * PAGE_SIZE); // During boot, current_page_table might not be set yet, in which case we need not flush @@ -497,19 +576,26 @@ struct Memory::Impl { ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", base + page_table.pointers.size()); - std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type); + if (!target) { + while (base != end) { + page_table.pointers[base] = nullptr; + page_table.attributes[base] = type; + page_table.backing_addr[base] = 0; - if (memory == nullptr) { - std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, - memory); + base += 1; + } } else { while (base != end) { - page_table.pointers[base] = memory - (base << PAGE_BITS); + page_table.pointers[base] = + system.DeviceMemory().GetPointer(target) - (base << PAGE_BITS); + page_table.attributes[base] = type; + page_table.backing_addr[base] = target - (base << PAGE_BITS); + ASSERT_MSG(page_table.pointers[base], "memory mapping base yield a nullptr within the table"); base += 1; - memory += PAGE_SIZE; + target += PAGE_SIZE; } } } @@ -544,8 +630,8 @@ struct Memory::Impl { ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); break; case Common::PageType::RasterizerCachedMemory: { - const u8* const host_ptr = GetPointerFromVMA(vaddr); - system.GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T)); + const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)}; + system.GPU().FlushRegion(vaddr, sizeof(T)); T value; std::memcpy(&value, host_ptr, sizeof(T)); return value; @@ -586,8 +672,8 @@ struct Memory::Impl { ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); break; case Common::PageType::RasterizerCachedMemory: { - u8* const host_ptr{GetPointerFromVMA(vaddr)}; - system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T)); + u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)}; + system.GPU().InvalidateRegion(vaddr, sizeof(T)); std::memcpy(host_ptr, &data, sizeof(T)); break; } @@ -607,12 +693,7 @@ void Memory::SetCurrentPageTable(Kernel::Process& process) { impl->SetCurrentPageTable(process); } -void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, - Kernel::PhysicalMemory& memory, VAddr offset) { - impl->MapMemoryRegion(page_table, base, size, memory, offset); -} - -void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) { +void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { impl->MapMemoryRegion(page_table, base, size, target); } @@ -696,6 +777,15 @@ void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_ impl->ReadBlock(src_addr, dest_buffer, size); } +void Memory::ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, + void* dest_buffer, const std::size_t size) { + impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size); +} + +void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) { + impl->ReadBlockUnsafe(src_addr, dest_buffer, size); +} + void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, std::size_t size) { impl->WriteBlock(process, dest_addr, src_buffer, size); @@ -705,6 +795,16 @@ void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std impl->WriteBlock(dest_addr, src_buffer, size); } +void Memory::WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, + const void* src_buffer, std::size_t size) { + impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size); +} + +void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, + const std::size_t size) { + impl->WriteBlockUnsafe(dest_addr, src_buffer, size); +} + void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) { impl->ZeroBlock(process, dest_addr, size); } @@ -730,4 +830,4 @@ bool IsKernelVirtualAddress(const VAddr vaddr) { return KERNEL_REGION_VADDR <= vaddr && vaddr < KERNEL_REGION_END; } -} // namespace Memory +} // namespace Core::Memory diff --git a/src/core/memory.h b/src/core/memory.h index 8913a9da4..9292f3b0a 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -23,7 +23,7 @@ class PhysicalMemory; class Process; } // namespace Kernel -namespace Memory { +namespace Core::Memory { /** * Page size used by the ARM architecture. This is the smallest granularity with which memory can @@ -67,19 +67,6 @@ public: void SetCurrentPageTable(Kernel::Process& process); /** - * Maps an physical buffer onto a region of the emulated process address space. - * - * @param page_table The page table of the emulated process. - * @param base The address to start mapping at. Must be page-aligned. - * @param size The amount of bytes to map. Must be page-aligned. - * @param memory Physical buffer with the memory backing the mapping. Must be of length - * at least `size + offset`. - * @param offset The offset within the physical memory. Must be page-aligned. - */ - void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, - Kernel::PhysicalMemory& memory, VAddr offset); - - /** * Maps an allocated buffer onto a region of the emulated process address space. * * @param page_table The page table of the emulated process. @@ -88,7 +75,7 @@ public: * @param target Buffer with the memory backing the mapping. Must be of length at least * `size`. */ - void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target); + void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target); /** * Maps a region of the emulated process address space as a IO region. @@ -295,6 +282,27 @@ public: std::size_t size); /** + * Reads a contiguous block of bytes from a specified process' address space. + * This unsafe version does not trigger GPU flushing. + * + * @param process The process to read the data from. + * @param src_addr The virtual address to begin reading from. + * @param dest_buffer The buffer to place the read bytes into. + * @param size The amount of data to read, in bytes. + * + * @note If a size of 0 is specified, then this function reads nothing and + * no attempts to access memory are made at all. + * + * @pre dest_buffer must be at least size bytes in length, otherwise a + * buffer overrun will occur. + * + * @post The range [dest_buffer, size) contains the read bytes from the + * process' address space. + */ + void ReadBlockUnsafe(const Kernel::Process& process, VAddr src_addr, void* dest_buffer, + std::size_t size); + + /** * Reads a contiguous block of bytes from the current process' address space. * * @param src_addr The virtual address to begin reading from. @@ -313,6 +321,25 @@ public: void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size); /** + * Reads a contiguous block of bytes from the current process' address space. + * This unsafe version does not trigger GPU flushing. + * + * @param src_addr The virtual address to begin reading from. + * @param dest_buffer The buffer to place the read bytes into. + * @param size The amount of data to read, in bytes. + * + * @note If a size of 0 is specified, then this function reads nothing and + * no attempts to access memory are made at all. + * + * @pre dest_buffer must be at least size bytes in length, otherwise a + * buffer overrun will occur. + * + * @post The range [dest_buffer, size) contains the read bytes from the + * current process' address space. + */ + void ReadBlockUnsafe(VAddr src_addr, void* dest_buffer, std::size_t size); + + /** * Writes a range of bytes into a given process' address space at the specified * virtual address. * @@ -336,6 +363,26 @@ public: std::size_t size); /** + * Writes a range of bytes into a given process' address space at the specified + * virtual address. + * This unsafe version does not invalidate GPU Memory. + * + * @param process The process to write data into the address space of. + * @param dest_addr The destination virtual address to begin writing the data at. + * @param src_buffer The data to write into the process' address space. + * @param size The size of the data to write, in bytes. + * + * @post The address range [dest_addr, size) in the process' address space + * contains the data that was within src_buffer. + * + * @post If an attempt is made to write into an unmapped region of memory, the writes + * will be ignored and an error will be logged. + * + */ + void WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, + std::size_t size); + + /** * Writes a range of bytes into the current process' address space at the specified * virtual address. * @@ -357,6 +404,24 @@ public: void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size); /** + * Writes a range of bytes into the current process' address space at the specified + * virtual address. + * This unsafe version does not invalidate GPU Memory. + * + * @param dest_addr The destination virtual address to begin writing the data at. + * @param src_buffer The data to write into the current process' address space. + * @param size The size of the data to write, in bytes. + * + * @post The address range [dest_addr, size) in the current process' address space + * contains the data that was within src_buffer. + * + * @post If an attempt is made to write into an unmapped region of memory, the writes + * will be ignored and an error will be logged. + * + */ + void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size); + + /** * Fills the specified address range within a process' address space with zeroes. * * @param process The process that will have a portion of its memory zeroed out. @@ -425,4 +490,4 @@ private: /// Determines if the given VAddr is a kernel address bool IsKernelVirtualAddress(VAddr vaddr); -} // namespace Memory +} // namespace Core::Memory diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp index 4472500d2..b139e8465 100644 --- a/src/core/memory/cheat_engine.cpp +++ b/src/core/memory/cheat_engine.cpp @@ -10,13 +10,15 @@ #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/hardware_properties.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" #include "core/hle/service/hid/controllers/npad.h" #include "core/hle/service/hid/hid.h" #include "core/hle/service/sm/sm.h" +#include "core/memory.h" #include "core/memory/cheat_engine.h" -namespace Memory { +namespace Core::Memory { constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 12); constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF; @@ -194,11 +196,12 @@ void CheatEngine::Initialize() { metadata.process_id = system.CurrentProcess()->GetProcessID(); metadata.title_id = system.CurrentProcess()->GetTitleID(); - const auto& vm_manager = system.CurrentProcess()->VMManager(); - metadata.heap_extents = {vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionSize()}; - metadata.address_space_extents = {vm_manager.GetAddressSpaceBaseAddress(), - vm_manager.GetAddressSpaceSize()}; - metadata.alias_extents = {vm_manager.GetMapRegionBaseAddress(), vm_manager.GetMapRegionSize()}; + const auto& page_table = system.CurrentProcess()->PageTable(); + metadata.heap_extents = {page_table.GetHeapRegionStart(), page_table.GetHeapRegionSize()}; + metadata.address_space_extents = {page_table.GetAddressSpaceStart(), + page_table.GetAddressSpaceSize()}; + metadata.alias_extents = {page_table.GetAliasCodeRegionStart(), + page_table.GetAliasCodeRegionSize()}; is_pending_reload.exchange(true); } @@ -230,4 +233,4 @@ void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) { core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event); } -} // namespace Memory +} // namespace Core::Memory diff --git a/src/core/memory/cheat_engine.h b/src/core/memory/cheat_engine.h index 3d6b2298a..2649423f8 100644 --- a/src/core/memory/cheat_engine.h +++ b/src/core/memory/cheat_engine.h @@ -20,7 +20,7 @@ class CoreTiming; struct EventType; } // namespace Core::Timing -namespace Memory { +namespace Core::Memory { class StandardVmCallbacks : public DmntCheatVm::Callbacks { public: @@ -84,4 +84,4 @@ private: Core::System& system; }; -} // namespace Memory +} // namespace Core::Memory diff --git a/src/core/memory/dmnt_cheat_types.h b/src/core/memory/dmnt_cheat_types.h index bf68fa0fe..5e60733dc 100644 --- a/src/core/memory/dmnt_cheat_types.h +++ b/src/core/memory/dmnt_cheat_types.h @@ -26,7 +26,7 @@ #include "common/common_types.h" -namespace Memory { +namespace Core::Memory { struct MemoryRegionExtents { u64 base{}; @@ -55,4 +55,4 @@ struct CheatEntry { CheatDefinition definition{}; }; -} // namespace Memory +} // namespace Core::Memory diff --git a/src/core/memory/dmnt_cheat_vm.cpp b/src/core/memory/dmnt_cheat_vm.cpp index 4f4fa5099..fb9f36bfd 100644 --- a/src/core/memory/dmnt_cheat_vm.cpp +++ b/src/core/memory/dmnt_cheat_vm.cpp @@ -27,7 +27,7 @@ #include "core/memory/dmnt_cheat_types.h" #include "core/memory/dmnt_cheat_vm.h" -namespace Memory { +namespace Core::Memory { DmntCheatVm::DmntCheatVm(std::unique_ptr<Callbacks> callbacks) : callbacks(std::move(callbacks)) {} @@ -55,7 +55,7 @@ void DmntCheatVm::LogOpcode(const CheatVmOpcode& opcode) { fmt::format("Cond Type: {:X}", static_cast<u32>(begin_cond->cond_type))); callbacks->CommandLog(fmt::format("Rel Addr: {:X}", begin_cond->rel_address)); callbacks->CommandLog(fmt::format("Value: {:X}", begin_cond->value.bit64)); - } else if (auto end_cond = std::get_if<EndConditionalOpcode>(&opcode.opcode)) { + } else if (std::holds_alternative<EndConditionalOpcode>(opcode.opcode)) { callbacks->CommandLog("Opcode: End Conditional"); } else if (auto ctrl_loop = std::get_if<ControlLoopOpcode>(&opcode.opcode)) { if (ctrl_loop->start_loop) { @@ -399,6 +399,7 @@ bool DmntCheatVm::DecodeNextOpcode(CheatVmOpcode& out) { // 8kkkkkkk // Just parse the mask. begin_keypress_cond.key_mask = first_dword & 0x0FFFFFFF; + opcode.opcode = begin_keypress_cond; } break; case CheatVmOpcodeType::PerformArithmeticRegister: { PerformArithmeticRegisterOpcode perform_math_reg{}; @@ -779,7 +780,7 @@ void DmntCheatVm::Execute(const CheatProcessMetadata& metadata) { if (!cond_met) { SkipConditionalBlock(); } - } else if (auto end_cond = std::get_if<EndConditionalOpcode>(&cur_opcode.opcode)) { + } else if (std::holds_alternative<EndConditionalOpcode>(cur_opcode.opcode)) { // Decrement the condition depth. // We will assume, graciously, that mismatched conditional block ends are a nop. if (condition_depth > 0) { @@ -1209,4 +1210,4 @@ void DmntCheatVm::Execute(const CheatProcessMetadata& metadata) { } } -} // namespace Memory +} // namespace Core::Memory diff --git a/src/core/memory/dmnt_cheat_vm.h b/src/core/memory/dmnt_cheat_vm.h index c36212cf1..8351fd798 100644 --- a/src/core/memory/dmnt_cheat_vm.h +++ b/src/core/memory/dmnt_cheat_vm.h @@ -30,7 +30,7 @@ #include "common/common_types.h" #include "core/memory/dmnt_cheat_types.h" -namespace Memory { +namespace Core::Memory { enum class CheatVmOpcodeType : u32 { StoreStatic = 0, @@ -318,4 +318,4 @@ private: MemoryAccessType mem_type, u64 rel_address); }; -}; // namespace Memory +}; // namespace Core::Memory diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index 85ac81ef7..558cbe6d7 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp @@ -16,9 +16,11 @@ #include "core/arm/arm_interface.h" #include "core/core.h" #include "core/hle/kernel/hle_ipc.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" #include "core/hle/result.h" #include "core/hle/service/lm/manager.h" +#include "core/memory.h" #include "core/reporter.h" #include "core/settings.h" @@ -108,14 +110,13 @@ json GetProcessorStateData(const std::string& architecture, u64 entry_point, u64 json GetProcessorStateDataAuto(Core::System& system) { const auto* process{system.CurrentProcess()}; - const auto& vm_manager{process->VMManager()}; auto& arm{system.CurrentArmInterface()}; Core::ARM_Interface::ThreadContext64 context{}; arm.SaveContext(context); return GetProcessorStateData(process->Is64BitProcess() ? "AArch64" : "AArch32", - vm_manager.GetCodeRegionBaseAddress(), context.sp, context.pc, + process->PageTable().GetCodeRegionStart(), context.sp, context.pc, context.pstate, context.cpu_registers); } @@ -147,7 +148,8 @@ json GetFullDataAuto(const std::string& timestamp, u64 title_id, Core::System& s } template <bool read_value, typename DescriptorType> -json GetHLEBufferDescriptorData(const std::vector<DescriptorType>& buffer, Memory::Memory& memory) { +json GetHLEBufferDescriptorData(const std::vector<DescriptorType>& buffer, + Core::Memory::Memory& memory) { auto buffer_out = json::array(); for (const auto& desc : buffer) { auto entry = json{ @@ -167,7 +169,7 @@ json GetHLEBufferDescriptorData(const std::vector<DescriptorType>& buffer, Memor return buffer_out; } -json GetHLERequestContextData(Kernel::HLERequestContext& ctx, Memory::Memory& memory) { +json GetHLERequestContextData(Kernel::HLERequestContext& ctx, Core::Memory::Memory& memory) { json out; auto cmd_buf = json::array(); diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 0f3685d1c..fd5a3ee9f 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -153,9 +153,9 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { app_loader.ReadTitle(name); if (name.empty()) { - auto [nacp, icon_file] = FileSys::PatchManager(program_id).GetControlMetadata(); - if (nacp != nullptr) { - name = nacp->GetApplicationName(); + const auto metadata = FileSys::PatchManager(program_id).GetControlMetadata(); + if (metadata.first != nullptr) { + name = metadata.first->GetApplicationName(); } } diff --git a/src/core/tools/freezer.cpp b/src/core/tools/freezer.cpp index 1e060f009..b2c6c537e 100644 --- a/src/core/tools/freezer.cpp +++ b/src/core/tools/freezer.cpp @@ -16,7 +16,7 @@ namespace { constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60); -u64 MemoryReadWidth(Memory::Memory& memory, u32 width, VAddr addr) { +u64 MemoryReadWidth(Core::Memory::Memory& memory, u32 width, VAddr addr) { switch (width) { case 1: return memory.Read8(addr); @@ -32,7 +32,7 @@ u64 MemoryReadWidth(Memory::Memory& memory, u32 width, VAddr addr) { } } -void MemoryWriteWidth(Memory::Memory& memory, u32 width, VAddr addr, u64 value) { +void MemoryWriteWidth(Core::Memory::Memory& memory, u32 width, VAddr addr, u64 value) { switch (width) { case 1: memory.Write8(addr, static_cast<u8>(value)); @@ -53,7 +53,7 @@ void MemoryWriteWidth(Memory::Memory& memory, u32 width, VAddr addr, u64 value) } // Anonymous namespace -Freezer::Freezer(Core::Timing::CoreTiming& core_timing_, Memory::Memory& memory_) +Freezer::Freezer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& memory_) : core_timing{core_timing_}, memory{memory_} { event = Core::Timing::CreateEvent( "MemoryFreezer::FrameCallback", diff --git a/src/core/tools/freezer.h b/src/core/tools/freezer.h index 916339c6c..62fc6aa6c 100644 --- a/src/core/tools/freezer.h +++ b/src/core/tools/freezer.h @@ -16,7 +16,7 @@ class CoreTiming; struct EventType; } // namespace Core::Timing -namespace Memory { +namespace Core::Memory { class Memory; } @@ -38,7 +38,7 @@ public: u64 value; }; - explicit Freezer(Core::Timing::CoreTiming& core_timing_, Memory::Memory& memory_); + explicit Freezer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& memory_); ~Freezer(); // Enables or disables the entire memory freezer. @@ -82,7 +82,7 @@ private: std::shared_ptr<Core::Timing::EventType> event; Core::Timing::CoreTiming& core_timing; - Memory::Memory& memory; + Core::Memory::Memory& memory; }; } // namespace Tools diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt index 2520ba321..a9c2392b1 100644 --- a/src/input_common/CMakeLists.txt +++ b/src/input_common/CMakeLists.txt @@ -27,4 +27,4 @@ if(SDL2_FOUND) endif() create_target_directory_groups(input_common) -target_link_libraries(input_common PUBLIC core PRIVATE common ${Boost_LIBRARIES}) +target_link_libraries(input_common PUBLIC core PRIVATE common Boost::boost) diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp index a2e0c0bd2..675b477fa 100644 --- a/src/input_common/sdl/sdl_impl.cpp +++ b/src/input_common/sdl/sdl_impl.cpp @@ -603,6 +603,7 @@ public: if (std::abs(event.jaxis.value / 32767.0) < 0.5) { break; } + [[fallthrough]]; case SDL_JOYBUTTONUP: case SDL_JOYHATMOTION: return SDLEventToButtonParamPackage(state, event); diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp index 17043346b..e54674d11 100644 --- a/src/tests/core/arm/arm_test_common.cpp +++ b/src/tests/core/arm/arm_test_common.cpp @@ -6,6 +6,7 @@ #include "common/page_table.h" #include "core/core.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" #include "core/memory.h" #include "tests/core/arm/arm_test_common.h" @@ -18,12 +19,7 @@ TestEnvironment::TestEnvironment(bool mutable_memory_) auto& system = Core::System::GetInstance(); auto process = Kernel::Process::Create(system, "", Kernel::Process::ProcessType::Userland); - page_table = &process->VMManager().page_table; - - std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); - page_table->special_regions.clear(); - std::fill(page_table->attributes.begin(), page_table->attributes.end(), - Common::PageType::Unmapped); + page_table = &process->PageTable().PageTableImpl(); system.Memory().MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); system.Memory().MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index effe76a63..258d58eba 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -148,6 +148,7 @@ add_library(video_core STATIC textures/convert.h textures/decoders.cpp textures/decoders.h + textures/texture.cpp textures/texture.h video_core.cpp video_core.h @@ -155,7 +156,6 @@ add_library(video_core STATIC if (ENABLE_VULKAN) target_sources(video_core PRIVATE - renderer_vulkan/declarations.h renderer_vulkan/fixed_pipeline_state.cpp renderer_vulkan/fixed_pipeline_state.h renderer_vulkan/maxwell_to_vk.cpp diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index 4b9193182..e35ee0b67 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h @@ -15,37 +15,29 @@ namespace VideoCommon { class BufferBlock { public: - bool Overlaps(const CacheAddr start, const CacheAddr end) const { - return (cache_addr < end) && (cache_addr_end > start); + bool Overlaps(const VAddr start, const VAddr end) const { + return (cpu_addr < end) && (cpu_addr_end > start); } - bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { - return cache_addr <= other_start && other_end <= cache_addr_end; + bool IsInside(const VAddr other_start, const VAddr other_end) const { + return cpu_addr <= other_start && other_end <= cpu_addr_end; } - u8* GetWritableHostPtr() const { - return FromCacheAddr(cache_addr); + std::size_t GetOffset(const VAddr in_addr) { + return static_cast<std::size_t>(in_addr - cpu_addr); } - u8* GetWritableHostPtr(std::size_t offset) const { - return FromCacheAddr(cache_addr + offset); + VAddr GetCpuAddr() const { + return cpu_addr; } - std::size_t GetOffset(const CacheAddr in_addr) { - return static_cast<std::size_t>(in_addr - cache_addr); + VAddr GetCpuAddrEnd() const { + return cpu_addr_end; } - CacheAddr GetCacheAddr() const { - return cache_addr; - } - - CacheAddr GetCacheAddrEnd() const { - return cache_addr_end; - } - - void SetCacheAddr(const CacheAddr new_addr) { - cache_addr = new_addr; - cache_addr_end = new_addr + size; + void SetCpuAddr(const VAddr new_addr) { + cpu_addr = new_addr; + cpu_addr_end = new_addr + size; } std::size_t GetSize() const { @@ -61,14 +53,14 @@ public: } protected: - explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { - SetCacheAddr(cache_addr); + explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { + SetCpuAddr(cpu_addr); } ~BufferBlock() = default; private: - CacheAddr cache_addr{}; - CacheAddr cache_addr_end{}; + VAddr cpu_addr{}; + VAddr cpu_addr_end{}; std::size_t size{}; u64 epoch{}; }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 186aca61d..83e7a1cde 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -19,6 +19,7 @@ #include "common/alignment.h" #include "common/common_types.h" #include "core/core.h" +#include "core/memory.h" #include "video_core/buffer_cache/buffer_block.h" #include "video_core/buffer_cache/map_interval.h" #include "video_core/memory_manager.h" @@ -28,37 +29,54 @@ namespace VideoCommon { using MapInterval = std::shared_ptr<MapIntervalBase>; -template <typename TBuffer, typename TBufferType, typename StreamBuffer> +template <typename OwnerBuffer, typename BufferType, typename StreamBuffer> class BufferCache { public: - using BufferInfo = std::pair<const TBufferType*, u64>; + using BufferInfo = std::pair<BufferType, u64>; BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, bool is_written = false, bool use_fast_cbuf = false) { std::lock_guard lock{mutex}; - auto& memory_manager = system.GPU().MemoryManager(); - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - if (!host_ptr) { + const std::optional<VAddr> cpu_addr_opt = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + + if (!cpu_addr_opt) { return {GetEmptyBuffer(size), 0}; } - const auto cache_addr = ToCacheAddr(host_ptr); + + VAddr cpu_addr = *cpu_addr_opt; // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. constexpr std::size_t max_stream_size = 0x800; if (use_fast_cbuf || size < max_stream_size) { - if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { + if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { + auto& memory_manager = system.GPU().MemoryManager(); if (use_fast_cbuf) { - return ConstBufferUpload(host_ptr, size); + if (memory_manager.IsGranularRange(gpu_addr, size)) { + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + return ConstBufferUpload(host_ptr, size); + } else { + staging_buffer.resize(size); + memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); + return ConstBufferUpload(staging_buffer.data(), size); + } } else { - return StreamBufferUpload(host_ptr, size, alignment); + if (memory_manager.IsGranularRange(gpu_addr, size)) { + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + return StreamBufferUpload(host_ptr, size, alignment); + } else { + staging_buffer.resize(size); + memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); + return StreamBufferUpload(staging_buffer.data(), size, alignment); + } } } } - auto block = GetBlock(cache_addr, size); - auto map = MapAddress(block, gpu_addr, cache_addr, size); + auto block = GetBlock(cpu_addr, size); + auto map = MapAddress(block, gpu_addr, cpu_addr, size); if (is_written) { map->MarkAsModified(true, GetModifiedTicks()); if (!map->IsWritten()) { @@ -71,9 +89,7 @@ public: } } - const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); - - return {ToHandle(block), offset}; + return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))}; } /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. @@ -112,7 +128,7 @@ public: } /// Write any cached resources overlapping the specified region back to memory - void FlushRegion(CacheAddr addr, std::size_t size) { + void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; std::vector<MapInterval> objects = GetMapsInRange(addr, size); @@ -127,7 +143,7 @@ public: } /// Mark the specified region as being invalidated - void InvalidateRegion(CacheAddr addr, u64 size) { + void InvalidateRegion(VAddr addr, u64 size) { std::lock_guard lock{mutex}; std::vector<MapInterval> objects = GetMapsInRange(addr, size); @@ -138,7 +154,7 @@ public: } } - virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0; + virtual BufferType GetEmptyBuffer(std::size_t size) = 0; protected: explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, @@ -148,19 +164,19 @@ protected: ~BufferCache() = default; - virtual const TBufferType* ToHandle(const TBuffer& storage) = 0; + virtual BufferType ToHandle(const OwnerBuffer& storage) = 0; virtual void WriteBarrier() = 0; - virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; + virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; - virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, + virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, const u8* data) = 0; - virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, + virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, u8* data) = 0; - virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, + virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) = 0; virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { @@ -169,20 +185,17 @@ protected: /// Register an object into the cache void Register(const MapInterval& new_map, bool inherit_written = false) { - const CacheAddr cache_ptr = new_map->GetStart(); - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); - if (!cache_ptr || !cpu_addr) { + const VAddr cpu_addr = new_map->GetStart(); + if (!cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", new_map->GetGpuAddress()); return; } const std::size_t size = new_map->GetEnd() - new_map->GetStart(); - new_map->SetCpuAddress(*cpu_addr); new_map->MarkAsRegistered(true); const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; mapped_addresses.insert({interval, new_map}); - rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); if (inherit_written) { MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); new_map->MarkAsWritten(true); @@ -192,7 +205,7 @@ protected: /// Unregisters an object from the cache void Unregister(MapInterval& map) { const std::size_t size = map->GetEnd() - map->GetStart(); - rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); + rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); map->MarkAsRegistered(false); if (map->IsWritten()) { UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); @@ -202,32 +215,38 @@ protected: } private: - MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { + MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { return std::make_shared<MapIntervalBase>(start, end, gpu_addr); } - MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, - const CacheAddr cache_addr, const std::size_t size) { - - std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); + MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, + const std::size_t size) { + std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { - const CacheAddr cache_addr_end = cache_addr + size; - MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); - u8* host_ptr = FromCacheAddr(cache_addr); - UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); + auto& memory_manager = system.GPU().MemoryManager(); + const VAddr cpu_addr_end = cpu_addr + size; + MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); + if (memory_manager.IsGranularRange(gpu_addr, size)) { + u8* host_ptr = memory_manager.GetPointer(gpu_addr); + UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); + } else { + staging_buffer.resize(size); + memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); + UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); + } Register(new_map); return new_map; } - const CacheAddr cache_addr_end = cache_addr + size; + const VAddr cpu_addr_end = cpu_addr + size; if (overlaps.size() == 1) { MapInterval& current_map = overlaps[0]; - if (current_map->IsInside(cache_addr, cache_addr_end)) { + if (current_map->IsInside(cpu_addr, cpu_addr_end)) { return current_map; } } - CacheAddr new_start = cache_addr; - CacheAddr new_end = cache_addr_end; + VAddr new_start = cpu_addr; + VAddr new_end = cpu_addr_end; bool write_inheritance = false; bool modified_inheritance = false; // Calculate new buffer parameters @@ -237,7 +256,7 @@ private: write_inheritance |= overlap->IsWritten(); modified_inheritance |= overlap->IsModified(); } - GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; + GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; for (auto& overlap : overlaps) { Unregister(overlap); } @@ -250,7 +269,7 @@ private: return new_map; } - void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, + void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, std::vector<MapInterval>& overlaps) { const IntervalType base_interval{start, end}; IntervalSet interval_set{}; @@ -262,13 +281,15 @@ private: for (auto& interval : interval_set) { std::size_t size = interval.upper() - interval.lower(); if (size > 0) { - u8* host_ptr = FromCacheAddr(interval.lower()); - UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); + staging_buffer.resize(size); + system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); + UploadBlockData(block, block->GetOffset(interval.lower()), size, + staging_buffer.data()); } } } - std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { + std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) { if (size == 0) { return {}; } @@ -289,9 +310,10 @@ private: void FlushMap(MapInterval map) { std::size_t size = map->GetEnd() - map->GetStart(); - TBuffer block = blocks[map->GetStart() >> block_page_bits]; - u8* host_ptr = FromCacheAddr(map->GetStart()); - DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); + OwnerBuffer block = blocks[map->GetStart() >> block_page_bits]; + staging_buffer.resize(size); + DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); + system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); map->MarkAsModified(false, 0); } @@ -303,7 +325,7 @@ private: buffer_ptr += size; buffer_offset += size; - return {&stream_buffer_handle, uploaded_offset}; + return {stream_buffer_handle, uploaded_offset}; } void AlignBuffer(std::size_t alignment) { @@ -313,17 +335,17 @@ private: buffer_offset = offset_aligned; } - TBuffer EnlargeBlock(TBuffer buffer) { + OwnerBuffer EnlargeBlock(OwnerBuffer buffer) { const std::size_t old_size = buffer->GetSize(); const std::size_t new_size = old_size + block_page_size; - const CacheAddr cache_addr = buffer->GetCacheAddr(); - TBuffer new_buffer = CreateBlock(cache_addr, new_size); + const VAddr cpu_addr = buffer->GetCpuAddr(); + OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size); CopyBlock(buffer, new_buffer, 0, 0, old_size); buffer->SetEpoch(epoch); pending_destruction.push_back(buffer); - const CacheAddr cache_addr_end = cache_addr + new_size - 1; - u64 page_start = cache_addr >> block_page_bits; - const u64 page_end = cache_addr_end >> block_page_bits; + const VAddr cpu_addr_end = cpu_addr + new_size - 1; + u64 page_start = cpu_addr >> block_page_bits; + const u64 page_end = cpu_addr_end >> block_page_bits; while (page_start <= page_end) { blocks[page_start] = new_buffer; ++page_start; @@ -331,23 +353,23 @@ private: return new_buffer; } - TBuffer MergeBlocks(TBuffer first, TBuffer second) { + OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) { const std::size_t size_1 = first->GetSize(); const std::size_t size_2 = second->GetSize(); - const CacheAddr first_addr = first->GetCacheAddr(); - const CacheAddr second_addr = second->GetCacheAddr(); - const CacheAddr new_addr = std::min(first_addr, second_addr); + const VAddr first_addr = first->GetCpuAddr(); + const VAddr second_addr = second->GetCpuAddr(); + const VAddr new_addr = std::min(first_addr, second_addr); const std::size_t new_size = size_1 + size_2; - TBuffer new_buffer = CreateBlock(new_addr, new_size); + OwnerBuffer new_buffer = CreateBlock(new_addr, new_size); CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); first->SetEpoch(epoch); second->SetEpoch(epoch); pending_destruction.push_back(first); pending_destruction.push_back(second); - const CacheAddr cache_addr_end = new_addr + new_size - 1; + const VAddr cpu_addr_end = new_addr + new_size - 1; u64 page_start = new_addr >> block_page_bits; - const u64 page_end = cache_addr_end >> block_page_bits; + const u64 page_end = cpu_addr_end >> block_page_bits; while (page_start <= page_end) { blocks[page_start] = new_buffer; ++page_start; @@ -355,18 +377,18 @@ private: return new_buffer; } - TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { - TBuffer found{}; - const CacheAddr cache_addr_end = cache_addr + size - 1; - u64 page_start = cache_addr >> block_page_bits; - const u64 page_end = cache_addr_end >> block_page_bits; + OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { + OwnerBuffer found; + const VAddr cpu_addr_end = cpu_addr + size - 1; + u64 page_start = cpu_addr >> block_page_bits; + const u64 page_end = cpu_addr_end >> block_page_bits; while (page_start <= page_end) { auto it = blocks.find(page_start); if (it == blocks.end()) { if (found) { found = EnlargeBlock(found); } else { - const CacheAddr start_addr = (page_start << block_page_bits); + const VAddr start_addr = (page_start << block_page_bits); found = CreateBlock(start_addr, block_page_size); blocks[page_start] = found; } @@ -386,7 +408,7 @@ private: return found; } - void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { + void MarkRegionAsWritten(const VAddr start, const VAddr end) { u64 page_start = start >> write_page_bit; const u64 page_end = end >> write_page_bit; while (page_start <= page_end) { @@ -400,7 +422,7 @@ private: } } - void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { + void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { u64 page_start = start >> write_page_bit; const u64 page_end = end >> write_page_bit; while (page_start <= page_end) { @@ -416,7 +438,7 @@ private: } } - bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { + bool IsRegionWritten(const VAddr start, const VAddr end) const { u64 page_start = start >> write_page_bit; const u64 page_end = end >> write_page_bit; while (page_start <= page_end) { @@ -432,7 +454,7 @@ private: Core::System& system; std::unique_ptr<StreamBuffer> stream_buffer; - TBufferType stream_buffer_handle{}; + BufferType stream_buffer_handle{}; bool invalidated = false; @@ -440,8 +462,8 @@ private: u64 buffer_offset = 0; u64 buffer_offset_base = 0; - using IntervalSet = boost::icl::interval_set<CacheAddr>; - using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; + using IntervalSet = boost::icl::interval_set<VAddr>; + using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>; using IntervalType = typename IntervalCache::interval_type; IntervalCache mapped_addresses; @@ -450,12 +472,14 @@ private: static constexpr u64 block_page_bits = 21; static constexpr u64 block_page_size = 1ULL << block_page_bits; - std::unordered_map<u64, TBuffer> blocks; + std::unordered_map<u64, OwnerBuffer> blocks; - std::list<TBuffer> pending_destruction; + std::list<OwnerBuffer> pending_destruction; u64 epoch = 0; u64 modified_ticks = 0; + std::vector<u8> staging_buffer; + std::recursive_mutex mutex; }; diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 3a104d5cd..b0956029d 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -11,7 +11,7 @@ namespace VideoCommon { class MapIntervalBase { public: - MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) + MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) : start{start}, end{end}, gpu_addr{gpu_addr} {} void SetCpuAddress(VAddr new_cpu_addr) { @@ -26,7 +26,7 @@ public: return gpu_addr; } - bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { + bool IsInside(const VAddr other_start, const VAddr other_end) const { return (start <= other_start && other_end <= end); } @@ -46,11 +46,11 @@ public: return is_registered; } - CacheAddr GetStart() const { + VAddr GetStart() const { return start; } - CacheAddr GetEnd() const { + VAddr GetEnd() const { return end; } @@ -76,8 +76,8 @@ public: } private: - CacheAddr start; - CacheAddr end; + VAddr start; + VAddr end; GPUVAddr gpu_addr; VAddr cpu_addr{}; bool is_written{}; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d24c9f657..5cf6a4cc3 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -303,6 +303,10 @@ public: return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); } + bool IsConstant() const { + return constant; + } + bool IsValid() const { return size != Size::Invalid; } @@ -312,6 +316,35 @@ public: } }; + struct MsaaSampleLocation { + union { + BitField<0, 4, u32> x0; + BitField<4, 4, u32> y0; + BitField<8, 4, u32> x1; + BitField<12, 4, u32> y1; + BitField<16, 4, u32> x2; + BitField<20, 4, u32> y2; + BitField<24, 4, u32> x3; + BitField<28, 4, u32> y3; + }; + + constexpr std::pair<u32, u32> Location(int index) const { + switch (index) { + case 0: + return {x0, y0}; + case 1: + return {x1, y1}; + case 2: + return {x2, y2}; + case 3: + return {x3, y3}; + default: + UNREACHABLE(); + return {0, 0}; + } + } + }; + enum class DepthMode : u32 { MinusOneToOne = 0, ZeroToOne = 1, @@ -793,7 +826,13 @@ public: u32 rt_separate_frag_data; - INSERT_UNION_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0x1); + + u32 multisample_raster_enable; + u32 multisample_raster_samples; + std::array<u32, 4> multisample_sample_mask; + + INSERT_UNION_PADDING_WORDS(0x5); struct { u32 address_high; @@ -830,7 +869,16 @@ public: std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; - INSERT_UNION_PADDING_WORDS(0xF); + std::array<MsaaSampleLocation, 4> multisample_sample_locations; + + INSERT_UNION_PADDING_WORDS(0x2); + + union { + BitField<0, 1, u32> enable; + BitField<4, 3, u32> target; + } multisample_coverage_to_color; + + INSERT_UNION_PADDING_WORDS(0x8); struct { union { @@ -922,7 +970,10 @@ public: BitField<4, 1, u32> triangle_rast_flip; } screen_y_control; - INSERT_UNION_PADDING_WORDS(0x21); + float line_width_smooth; + float line_width_aliased; + + INSERT_UNION_PADDING_WORDS(0x1F); u32 vb_element_base; u32 vb_base_instance; @@ -943,7 +994,7 @@ public: CounterReset counter_reset; - INSERT_UNION_PADDING_WORDS(0x1); + u32 multisample_enable; u32 zeta_enable; @@ -980,7 +1031,7 @@ public: float polygon_offset_factor; - INSERT_UNION_PADDING_WORDS(0x1); + u32 line_smooth_enable; struct { u32 tic_address_high; @@ -1007,7 +1058,11 @@ public: float polygon_offset_units; - INSERT_UNION_PADDING_WORDS(0x11); + INSERT_UNION_PADDING_WORDS(0x4); + + Tegra::Texture::MsaaMode multisample_mode; + + INSERT_UNION_PADDING_WORDS(0xC); union { BitField<2, 1, u32> coord_origin; @@ -1507,12 +1562,17 @@ ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); ASSERT_REG_POSITION(color_mask_common, 0x3E4); -ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); ASSERT_REG_POSITION(depth_bounds, 0x3E7); +ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); +ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED); +ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); +ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(fill_rectangle, 0x44F); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); +ASSERT_REG_POSITION(multisample_sample_locations, 0x478); +ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); ASSERT_REG_POSITION(rt_control, 0x487); ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); @@ -1538,6 +1598,8 @@ ASSERT_REG_POSITION(stencil_front_func_mask, 0x4E6); ASSERT_REG_POSITION(stencil_front_mask, 0x4E7); ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); ASSERT_REG_POSITION(screen_y_control, 0x4EB); +ASSERT_REG_POSITION(line_width_smooth, 0x4EC); +ASSERT_REG_POSITION(line_width_aliased, 0x4ED); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(vb_base_instance, 0x50E); ASSERT_REG_POSITION(clip_distance_enabled, 0x544); @@ -1545,11 +1607,13 @@ ASSERT_REG_POSITION(samplecnt_enable, 0x545); ASSERT_REG_POSITION(point_size, 0x546); ASSERT_REG_POSITION(point_sprite_enable, 0x548); ASSERT_REG_POSITION(counter_reset, 0x54C); +ASSERT_REG_POSITION(multisample_enable, 0x54D); ASSERT_REG_POSITION(zeta_enable, 0x54E); ASSERT_REG_POSITION(multisample_control, 0x54F); ASSERT_REG_POSITION(condition, 0x554); ASSERT_REG_POSITION(tsc, 0x557); -ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); +ASSERT_REG_POSITION(polygon_offset_factor, 0x55B); +ASSERT_REG_POSITION(line_smooth_enable, 0x55C); ASSERT_REG_POSITION(tic, 0x55D); ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); @@ -1558,6 +1622,7 @@ ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568); ASSERT_REG_POSITION(stencil_back_func_func, 0x569); ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); ASSERT_REG_POSITION(polygon_offset_units, 0x56F); +ASSERT_REG_POSITION(multisample_mode, 0x574); ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 498936f0c..7231597d4 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -290,6 +290,23 @@ enum class VmadShr : u64 { Shr15 = 2, }; +enum class VmnmxType : u64 { + Bits8, + Bits16, + Bits32, +}; + +enum class VmnmxOperation : u64 { + Mrg_16H = 0, + Mrg_16L = 1, + Mrg_8B0 = 2, + Mrg_8B2 = 3, + Acc = 4, + Min = 5, + Max = 6, + Nop = 7, +}; + enum class XmadMode : u64 { None = 0, CLo = 1, @@ -989,6 +1006,12 @@ union Instruction { } stg; union { + BitField<23, 3, AtomicOp> operation; + BitField<48, 1, u64> extended; + BitField<20, 3, GlobalAtomicType> type; + } red; + + union { BitField<52, 4, AtomicOp> operation; BitField<49, 3, GlobalAtomicType> type; BitField<28, 20, s64> offset; @@ -1484,7 +1507,7 @@ union Instruction { TextureType GetTextureType() const { // The TLDS instruction has a weird encoding for the texture type. - if (texture_info >= 0 && texture_info <= 1) { + if (texture_info <= 1) { return TextureType::Texture1D; } if (texture_info == 2 || texture_info == 8 || texture_info == 12 || @@ -1651,6 +1674,42 @@ union Instruction { } vmad; union { + BitField<54, 1, u64> is_dest_signed; + BitField<48, 1, u64> is_src_a_signed; + BitField<49, 1, u64> is_src_b_signed; + BitField<37, 2, u64> src_format_a; + BitField<29, 2, u64> src_format_b; + BitField<56, 1, u64> mx; + BitField<55, 1, u64> sat; + BitField<36, 2, u64> selector_a; + BitField<28, 2, u64> selector_b; + BitField<50, 1, u64> is_op_b_register; + BitField<51, 3, VmnmxOperation> operation; + + VmnmxType SourceFormatA() const { + switch (src_format_a) { + case 0b11: + return VmnmxType::Bits32; + case 0b10: + return VmnmxType::Bits16; + default: + return VmnmxType::Bits8; + } + } + + VmnmxType SourceFormatB() const { + switch (src_format_b) { + case 0b11: + return VmnmxType::Bits32; + case 0b10: + return VmnmxType::Bits16; + default: + return VmnmxType::Bits8; + } + } + } vmnmx; + + union { BitField<20, 16, u64> imm20_16; BitField<35, 1, u64> high_b_rr; // used on RR BitField<36, 1, u64> product_shift_left; @@ -1734,6 +1793,7 @@ public: ST_S, ST, // Store in generic memory STG, // Store in global memory + RED, // Reduction operation ATOM, // Atomic operation on global memory ATOMS, // Atomic operation on shared memory AL2P, // Transforms attribute memory into physical memory @@ -1763,6 +1823,7 @@ public: MEMBAR, VMAD, VSETP, + VMNMX, FFMA_IMM, // Fused Multiply and Add FFMA_CR, FFMA_RC, @@ -1817,7 +1878,8 @@ public: ICMP_R, ICMP_CR, ICMP_IMM, - FCMP_R, + FCMP_RR, + FCMP_RC, MUFU, // Multi-Function Operator RRO_C, // Range Reduction Operator RRO_R, @@ -2042,6 +2104,7 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), + INST("1110101111111---", Id::RED, Type::Memory, "RED"), INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), @@ -2070,6 +2133,7 @@ private: INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), + INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"), INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), @@ -2124,7 +2188,8 @@ private: INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), - INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"), + INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), + INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), @@ -2170,7 +2235,7 @@ private: INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), - INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), + INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ced9d7e28..1a2d747be 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -270,13 +270,13 @@ public: virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - virtual void FlushRegion(CacheAddr addr, u64 size) = 0; + virtual void FlushRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; + virtual void InvalidateRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed and invalidated - virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; + virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; protected: virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 925be8d7b..20e73a37e 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -12,8 +12,9 @@ namespace VideoCommon { GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, std::unique_ptr<Core::Frontend::GraphicsContext>&& context) - : GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)), - cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {} + : GPU(system, std::move(renderer_), true), gpu_thread{system}, + cpu_context(renderer->GetRenderWindow().CreateSharedContext()), + gpu_context(std::move(context)) {} GPUAsynch::~GPUAsynch() = default; @@ -30,15 +31,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { gpu_thread.SwapBuffers(framebuffer); } -void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { +void GPUAsynch::FlushRegion(VAddr addr, u64 size) { gpu_thread.FlushRegion(addr, size); } -void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) { +void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { gpu_thread.InvalidateRegion(addr, size); } -void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { gpu_thread.FlushAndInvalidateRegion(addr, size); } diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 265c62758..03fd0eef0 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -27,9 +27,9 @@ public: void Start() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void FlushRegion(CacheAddr addr, u64 size) override; - void InvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitIdle() const override; protected: diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index bd5278a5c..6f38a672a 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { renderer->SwapBuffers(framebuffer); } -void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { +void GPUSynch::FlushRegion(VAddr addr, u64 size) { renderer->Rasterizer().FlushRegion(addr, size); } -void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { +void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { renderer->Rasterizer().InvalidateRegion(addr, size); } -void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); } diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 866a94c8c..4a6e9a01d 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -26,9 +26,9 @@ public: void Start() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void FlushRegion(CacheAddr addr, u64 size) override; - void InvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitIdle() const override {} protected: diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 270c7ae0d..10cda686b 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); } -void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { +void ThreadManager::FlushRegion(VAddr addr, u64 size) { PushCommand(FlushRegionCommand(addr, size)); } -void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { +void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { system.Renderer().Rasterizer().InvalidateRegion(addr, size); } -void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important InvalidateRegion(addr, size); } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index be36c580e..cd74ad330 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -47,26 +47,26 @@ struct SwapBuffersCommand final { /// Command to signal to the GPU thread to flush a region struct FlushRegionCommand final { - explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} + explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} - CacheAddr addr; + VAddr addr; u64 size; }; /// Command to signal to the GPU thread to invalidate a region struct InvalidateRegionCommand final { - explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} + explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} - CacheAddr addr; + VAddr addr; u64 size; }; /// Command to signal to the GPU thread to flush and invalidate a region struct FlushAndInvalidateRegionCommand final { - explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) + explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} - CacheAddr addr; + VAddr addr; u64 size; }; @@ -111,13 +111,13 @@ public: void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - void FlushRegion(CacheAddr addr, u64 size); + void FlushRegion(VAddr addr, u64 size); /// Notify rasterizer that any caches of the specified region should be invalidated - void InvalidateRegion(CacheAddr addr, u64 size); + void InvalidateRegion(VAddr addr, u64 size); /// Notify rasterizer that any caches of the specified region should be flushed and invalidated - void FlushAndInvalidateRegion(CacheAddr addr, u64 size); + void FlushAndInvalidateRegion(VAddr addr, u64 size); // Wait until the gpu thread is idle. void WaitIdle() const; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index f5d33f27a..fd49bc2a9 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -6,8 +6,8 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -17,10 +17,7 @@ namespace Tegra { MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer}, system{system} { - std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); - std::fill(page_table.attributes.begin(), page_table.attributes.end(), - Common::PageType::Unmapped); - page_table.Resize(address_space_width); + page_table.Resize(address_space_width, page_bits, false); // Initialize the map with a single free region covering the entire managed space. VirtualMemoryArea initial_vma; @@ -55,9 +52,9 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); ASSERT(system.CurrentProcess() - ->VMManager() - .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, - Kernel::MemoryAttribute::DeviceMapped) + ->PageTable() + .SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared, + Kernel::Memory::MemoryAttribute::DeviceShared) .IsSuccess()); return gpu_addr; @@ -70,9 +67,9 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); ASSERT(system.CurrentProcess() - ->VMManager() - .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, - Kernel::MemoryAttribute::DeviceMapped) + ->PageTable() + .SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared, + Kernel::Memory::MemoryAttribute::DeviceShared) .IsSuccess()); return gpu_addr; } @@ -81,18 +78,18 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { ASSERT((gpu_addr & page_mask) == 0); const u64 aligned_size{Common::AlignUp(size, page_size)}; - const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; const auto cpu_addr = GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); // Flush and invalidate through the GPU interface, to be asynchronous if possible. - system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); + system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size); UnmapRange(gpu_addr, aligned_size); ASSERT(system.CurrentProcess() - ->VMManager() - .SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped, - Kernel::MemoryAttribute::None) + ->PageTable() + .SetMemoryAttribute(cpu_addr.value(), size, + Kernel::Memory::MemoryAttribute::DeviceShared, + Kernel::Memory::MemoryAttribute::None) .IsSuccess()); return gpu_addr; @@ -140,24 +137,16 @@ T MemoryManager::Read(GPUVAddr addr) const { return {}; } - const u8* page_pointer{page_table.pointers[addr >> page_bits]}; + const u8* page_pointer{GetPointer(addr)}; if (page_pointer) { // NOTE: Avoid adding any extra logic to this fast-path block T value; - std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T)); + std::memcpy(&value, page_pointer, sizeof(T)); return value; } - switch (page_table.attributes[addr >> page_bits]) { - case Common::PageType::Unmapped: - LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, addr); - return 0; - case Common::PageType::Memory: - ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr); - break; - default: - UNREACHABLE(); - } + UNREACHABLE(); + return {}; } @@ -167,24 +156,14 @@ void MemoryManager::Write(GPUVAddr addr, T data) { return; } - u8* page_pointer{page_table.pointers[addr >> page_bits]}; + u8* page_pointer{GetPointer(addr)}; if (page_pointer) { // NOTE: Avoid adding any extra logic to this fast-path block - std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T)); + std::memcpy(page_pointer, &data, sizeof(T)); return; } - switch (page_table.attributes[addr >> page_bits]) { - case Common::PageType::Unmapped: - LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, - static_cast<u32>(data), addr); - return; - case Common::PageType::Memory: - ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr); - break; - default: - UNREACHABLE(); - } + UNREACHABLE(); } template u8 MemoryManager::Read<u8>(GPUVAddr addr) const; @@ -201,9 +180,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) { return {}; } - u8* const page_pointer{page_table.pointers[addr >> page_bits]}; - if (page_pointer != nullptr) { - return page_pointer + (addr & page_mask); + auto& memory = system.Memory(); + + const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; + + if (page_addr != 0) { + return memory.GetPointer(page_addr + (addr & page_mask)); } LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); @@ -215,9 +197,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { return {}; } - const u8* const page_pointer{page_table.pointers[addr >> page_bits]}; - if (page_pointer != nullptr) { - return page_pointer + (addr & page_mask); + const auto& memory = system.Memory(); + + const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; + + if (page_addr != 0) { + return memory.GetPointer(page_addr + (addr & page_mask)); } LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); @@ -238,22 +223,17 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s std::size_t page_index{src_addr >> page_bits}; std::size_t page_offset{src_addr & page_mask}; + auto& memory = system.Memory(); + while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - switch (page_table.attributes[page_index]) { - case Common::PageType::Memory: { - const u8* src_ptr{page_table.pointers[page_index] + page_offset}; - // Flush must happen on the rasterizer interface, such that memory is always synchronous - // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. - rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); - std::memcpy(dest_buffer, src_ptr, copy_amount); - break; - } - default: - UNREACHABLE(); - } + const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; + // Flush must happen on the rasterizer interface, such that memory is always synchronous + // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. + rasterizer.FlushRegion(src_addr, copy_amount); + memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); page_index++; page_offset = 0; @@ -268,13 +248,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t page_index{src_addr >> page_bits}; std::size_t page_offset{src_addr & page_mask}; + auto& memory = system.Memory(); + while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; const u8* page_pointer = page_table.pointers[page_index]; if (page_pointer) { - const u8* src_ptr{page_pointer + page_offset}; - std::memcpy(dest_buffer, src_ptr, copy_amount); + const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; + memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); } else { std::memset(dest_buffer, 0, copy_amount); } @@ -290,22 +272,17 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t page_index{dest_addr >> page_bits}; std::size_t page_offset{dest_addr & page_mask}; + auto& memory = system.Memory(); + while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - switch (page_table.attributes[page_index]) { - case Common::PageType::Memory: { - u8* dest_ptr{page_table.pointers[page_index] + page_offset}; - // Invalidate must happen on the rasterizer interface, such that memory is always - // synchronous when it is written (even when in asynchronous GPU mode). - rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); - std::memcpy(dest_ptr, src_buffer, copy_amount); - break; - } - default: - UNREACHABLE(); - } + const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; + // Invalidate must happen on the rasterizer interface, such that memory is always + // synchronous when it is written (even when in asynchronous GPU mode). + rasterizer.InvalidateRegion(dest_addr, copy_amount); + memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); page_index++; page_offset = 0; @@ -320,13 +297,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t page_index{dest_addr >> page_bits}; std::size_t page_offset{dest_addr & page_mask}; + auto& memory = system.Memory(); + while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; u8* page_pointer = page_table.pointers[page_index]; if (page_pointer) { - u8* dest_ptr{page_pointer + page_offset}; - std::memcpy(dest_ptr, src_buffer, copy_amount); + const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; + memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); } page_index++; page_offset = 0; @@ -336,33 +315,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, } void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { - std::size_t remaining_size{size}; - std::size_t page_index{src_addr >> page_bits}; - std::size_t page_offset{src_addr & page_mask}; - - while (remaining_size > 0) { - const std::size_t copy_amount{ - std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - - switch (page_table.attributes[page_index]) { - case Common::PageType::Memory: { - // Flush must happen on the rasterizer interface, such that memory is always synchronous - // when it is copied (even when in asynchronous GPU mode). - const u8* src_ptr{page_table.pointers[page_index] + page_offset}; - rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); - WriteBlock(dest_addr, src_ptr, copy_amount); - break; - } - default: - UNREACHABLE(); - } - - page_index++; - page_offset = 0; - dest_addr += static_cast<VAddr>(copy_amount); - src_addr += static_cast<VAddr>(copy_amount); - remaining_size -= copy_amount; - } + std::vector<u8> tmp_buffer(size); + ReadBlock(src_addr, tmp_buffer.data(), size); + WriteBlock(dest_addr, tmp_buffer.data(), size); } void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { @@ -371,6 +326,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); } +bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { + const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits]; + const std::size_t page = (addr & Core::Memory::PAGE_MASK) + size; + return page <= Core::Memory::PAGE_SIZE; +} + void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, VAddr backing_addr) { LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, @@ -380,12 +341,13 @@ void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageTy ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", base + page_table.pointers.size()); - std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type); - if (memory == nullptr) { - std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory); - std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end, - backing_addr); + while (base != end) { + page_table.pointers[base] = nullptr; + page_table.backing_addr[base] = 0; + + base += 1; + } } else { while (base != end) { page_table.pointers[base] = memory; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 073bdb491..0ddd52d5a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -97,6 +97,11 @@ public: void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); + /** + * IsGranularRange checks if a gpu region can be simply read with a pointer + */ + bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size); + private: using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; using VMAHandle = VMAMap::const_iterator; @@ -174,7 +179,7 @@ private: /// End of address space, based on address space in bits. static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; - Common::BackingPageTable page_table{page_bits}; + Common::PageTable page_table; VMAMap vma_map; VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index e66054ed0..5ea2b01f2 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -98,12 +98,12 @@ public: static_cast<QueryCache&>(*this), VideoCore::QueryType::SamplesPassed}}} {} - void InvalidateRegion(CacheAddr addr, std::size_t size) { + void InvalidateRegion(VAddr addr, std::size_t size) { std::unique_lock lock{mutex}; FlushAndRemoveRegion(addr, size); } - void FlushRegion(CacheAddr addr, std::size_t size) { + void FlushRegion(VAddr addr, std::size_t size) { std::unique_lock lock{mutex}; FlushAndRemoveRegion(addr, size); } @@ -117,14 +117,16 @@ public: void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { std::unique_lock lock{mutex}; auto& memory_manager = system.GPU().MemoryManager(); - const auto host_ptr = memory_manager.GetPointer(gpu_addr); + const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr_opt); + VAddr cpu_addr = *cpu_addr_opt; - CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); + CachedQuery* query = TryGet(cpu_addr); if (!query) { - const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); - ASSERT_OR_EXECUTE(cpu_addr, return;); + ASSERT_OR_EXECUTE(cpu_addr_opt, return;); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); - query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); + query = Register(type, cpu_addr, host_ptr, timestamp.has_value()); } query->BindCounter(Stream(type).Current(), timestamp); @@ -173,11 +175,11 @@ protected: private: /// Flushes a memory range to guest memory and removes it from the cache. - void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { + void FlushAndRemoveRegion(VAddr addr, std::size_t size) { const u64 addr_begin = static_cast<u64>(addr); const u64 addr_end = addr_begin + static_cast<u64>(size); const auto in_range = [addr_begin, addr_end](CachedQuery& query) { - const u64 cache_begin = query.GetCacheAddr(); + const u64 cache_begin = query.GetCpuAddr(); const u64 cache_end = cache_begin + query.SizeInBytes(); return cache_begin < addr_end && addr_begin < cache_end; }; @@ -193,7 +195,7 @@ private: if (!in_range(query)) { continue; } - rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); + rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); query.Flush(); } contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), @@ -204,22 +206,21 @@ private: /// Registers the passed parameters as cached and returns a pointer to the stored cached query. CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); - const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; + const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT; return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, host_ptr); } /// Tries to a get a cached query. Returns nullptr on failure. - CachedQuery* TryGet(CacheAddr addr) { + CachedQuery* TryGet(VAddr addr) { const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; const auto it = cached_queries.find(page); if (it == std::end(cached_queries)) { return nullptr; } auto& contents = it->second; - const auto found = - std::find_if(std::begin(contents), std::end(contents), - [addr](auto& query) { return query.GetCacheAddr() == addr; }); + const auto found = std::find_if(std::begin(contents), std::end(contents), + [addr](auto& query) { return query.GetCpuAddr() == addr; }); return found != std::end(contents) ? &*found : nullptr; } @@ -323,14 +324,10 @@ public: timestamp = timestamp_; } - VAddr CpuAddr() const noexcept { + VAddr GetCpuAddr() const noexcept { return cpu_addr; } - CacheAddr GetCacheAddr() const noexcept { - return ToCacheAddr(host_ptr); - } - u64 SizeInBytes() const noexcept { return SizeInBytes(timestamp.has_value()); } diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index d01db97da..53622ca05 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -23,15 +23,15 @@ constexpr auto RangeFromInterval(Map& map, const Interval& interval) { } // Anonymous namespace -RasterizerAccelerated::RasterizerAccelerated(Memory::Memory& cpu_memory_) +RasterizerAccelerated::RasterizerAccelerated(Core::Memory::Memory& cpu_memory_) : cpu_memory{cpu_memory_} {} RasterizerAccelerated::~RasterizerAccelerated() = default; void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { std::lock_guard lock{pages_mutex}; - const u64 page_start{addr >> Memory::PAGE_BITS}; - const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; + const u64 page_start{addr >> Core::Memory::PAGE_BITS}; + const u64 page_end{(addr + size + Core::Memory::PAGE_SIZE - 1) >> Core::Memory::PAGE_BITS}; // Interval maps will erase segments if count reaches 0, so if delta is negative we have to // subtract after iterating @@ -44,8 +44,8 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del const auto interval = pair.first & pages_interval; const int count = pair.second; - const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; - const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const VAddr interval_start_addr = boost::icl::first(interval) << Core::Memory::PAGE_BITS; + const VAddr interval_end_addr = boost::icl::last_next(interval) << Core::Memory::PAGE_BITS; const u64 interval_size = interval_end_addr - interval_start_addr; if (delta > 0 && count == delta) { diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index 315798e7c..91866d7dd 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "video_core/rasterizer_interface.h" -namespace Memory { +namespace Core::Memory { class Memory; } @@ -20,7 +20,7 @@ namespace VideoCore { /// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface. class RasterizerAccelerated : public RasterizerInterface { public: - explicit RasterizerAccelerated(Memory::Memory& cpu_memory_); + explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_); ~RasterizerAccelerated() override; void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; @@ -30,7 +30,7 @@ private: CachedPageMap cached_pages; std::mutex pages_mutex; - Memory::Memory& cpu_memory; + Core::Memory::Memory& cpu_memory; }; } // namespace VideoCore diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 6de1597a2..22987751e 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -18,22 +18,14 @@ class RasterizerCacheObject { public: - explicit RasterizerCacheObject(const u8* host_ptr) - : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {} + explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {} virtual ~RasterizerCacheObject(); - CacheAddr GetCacheAddr() const { - return cache_addr; + VAddr GetCpuAddr() const { + return cpu_addr; } - const u8* GetHostPtr() const { - return host_ptr; - } - - /// Gets the address of the shader in guest memory, required for cache management - virtual VAddr GetCpuAddr() const = 0; - /// Gets the size of the shader in guest memory, required for cache management virtual std::size_t GetSizeInBytes() const = 0; @@ -68,8 +60,7 @@ private: bool is_registered{}; ///< Whether the object is currently registered with the cache bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing - const u8* host_ptr{}; ///< Pointer to the memory backing this cached region - CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space + VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space }; template <class T> @@ -80,7 +71,7 @@ public: explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} /// Write any cached resources overlapping the specified region back to memory - void FlushRegion(CacheAddr addr, std::size_t size) { + void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; const auto& objects{GetSortedObjectsFromRegion(addr, size)}; @@ -90,7 +81,7 @@ public: } /// Mark the specified region as being invalidated - void InvalidateRegion(CacheAddr addr, u64 size) { + void InvalidateRegion(VAddr addr, u64 size) { std::lock_guard lock{mutex}; const auto& objects{GetSortedObjectsFromRegion(addr, size)}; @@ -114,27 +105,20 @@ public: protected: /// Tries to get an object from the cache with the specified cache address - T TryGet(CacheAddr addr) const { + T TryGet(VAddr addr) const { const auto iter = map_cache.find(addr); if (iter != map_cache.end()) return iter->second; return nullptr; } - T TryGet(const void* addr) const { - const auto iter = map_cache.find(ToCacheAddr(addr)); - if (iter != map_cache.end()) - return iter->second; - return nullptr; - } - /// Register an object into the cache virtual void Register(const T& object) { std::lock_guard lock{mutex}; object->SetIsRegistered(true); interval_cache.add({GetInterval(object), ObjectSet{object}}); - map_cache.insert({object->GetCacheAddr(), object}); + map_cache.insert({object->GetCpuAddr(), object}); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); } @@ -144,7 +128,7 @@ protected: object->SetIsRegistered(false); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); - const CacheAddr addr = object->GetCacheAddr(); + const VAddr addr = object->GetCpuAddr(); interval_cache.subtract({GetInterval(object), ObjectSet{object}}); map_cache.erase(addr); } @@ -173,7 +157,7 @@ protected: private: /// Returns a list of cached objects from the specified memory region, ordered by access time - std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { + std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { if (size == 0) { return {}; } @@ -197,13 +181,13 @@ private: } using ObjectSet = std::set<T>; - using ObjectCache = std::unordered_map<CacheAddr, T>; - using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; + using ObjectCache = std::unordered_map<VAddr, T>; + using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; using ObjectInterval = typename IntervalCache::interval_type; static auto GetInterval(const T& object) { - return ObjectInterval::right_open(object->GetCacheAddr(), - object->GetCacheAddr() + object->GetSizeInBytes()); + return ObjectInterval::right_open(object->GetCpuAddr(), + object->GetCpuAddr() + object->GetSizeInBytes()); } ObjectCache map_cache; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 1a68e3caa..8ae5b9c4e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -53,14 +53,14 @@ public: virtual void FlushAll() = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - virtual void FlushRegion(CacheAddr addr, u64 size) = 0; + virtual void FlushRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; + virtual void InvalidateRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated - virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; + virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; /// Notify the rasterizer to send all written commands to the host GPU. virtual void FlushCommands() = 0; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 0375fca17..cb5792407 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) - : VideoCommon::BufferBlock{cache_addr, size} { +CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); } @@ -47,41 +47,39 @@ OGLBufferCache::~OGLBufferCache() { glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); } -Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { - return std::make_shared<CachedBufferBlock>(cache_addr, size); +Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { + return std::make_shared<CachedBufferBlock>(cpu_addr, size); } void OGLBufferCache::WriteBarrier() { glMemoryBarrier(GL_ALL_BARRIER_BITS); } -const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) { +GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { return buffer->GetHandle(); } -const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { - static const GLuint null_buffer = 0; - return &null_buffer; +GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { + return 0; } void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) { - glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), + glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), data); } void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, u8* data) { MICROPROFILE_SCOPE(OpenGL_Buffer_Download); - glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), + glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), data); } void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) { - glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(), - static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset), - static_cast<GLsizeiptr>(size)); + glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset), + static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); } OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, @@ -89,7 +87,7 @@ OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_poi DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); const GLuint& cbuf = cbufs[cbuf_cursor++]; glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); - return {&cbuf, 0}; + return {cbuf, 0}; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 8c7145443..a74817857 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -31,15 +31,15 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf class CachedBufferBlock : public VideoCommon::BufferBlock { public: - explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); + explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); ~CachedBufferBlock(); - const GLuint* GetHandle() const { - return &gl_buffer.handle; + GLuint GetHandle() const { + return gl_buffer.handle; } private: - OGLBuffer gl_buffer{}; + OGLBuffer gl_buffer; }; class OGLBufferCache final : public GenericBufferCache { @@ -48,18 +48,18 @@ public: const Device& device, std::size_t stream_size); ~OGLBufferCache(); - const GLuint* GetEmptyBuffer(std::size_t) override; + GLuint GetEmptyBuffer(std::size_t) override; void Acquire() noexcept { cbuf_cursor = 0; } protected: - Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; + Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; - void WriteBarrier() override; + GLuint ToHandle(const Buffer& buffer) override; - const GLuint* ToHandle(const Buffer& buffer) override; + void WriteBarrier() override; void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) override; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c286502ba..d83dca25a 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -87,7 +87,7 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; - static std::array<std::size_t, 5> stage_swizzle = {0, 1, 2, 3, 4}; + static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4}; const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS); const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index f12e9f55f..d7ba57aca 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -94,9 +94,9 @@ CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { - VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs)); cache = rhs.cache; type = rhs.type; + CachedQueryBase<HostCounter>::operator=(std::move(rhs)); return *this; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 346feeb2f..175374f0d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -140,8 +140,8 @@ void RasterizerOpenGL::SetupVertexFormat() { const auto attrib = gpu.regs.vertex_attrib_format[index]; const auto gl_index = static_cast<GLuint>(index); - // Ignore invalid attributes. - if (!attrib.IsValid()) { + // Disable constant attributes. + if (attrib.IsConstant()) { glDisableVertexAttribArray(gl_index); continue; } @@ -188,10 +188,8 @@ void RasterizerOpenGL::SetupVertexBuffer() { ASSERT(end > start); const u64 size = end - start + 1; const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); - - // Bind the vertex array to the buffer at the current offset. - vertex_array_pushbuffer.SetVertexBuffer(static_cast<GLuint>(index), vertex_buffer, - vertex_buffer_offset, vertex_array.stride); + glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, + vertex_array.stride); } } @@ -222,7 +220,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); - vertex_array_pushbuffer.SetIndexBuffer(buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); return offset; } @@ -345,7 +343,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() { texture_cache.GuardRenderTargets(true); - View depth_surface = texture_cache.GetDepthBufferSurface(true); + View depth_surface = texture_cache.GetDepthBufferSurface(); const auto& regs = gpu.regs; UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); @@ -354,7 +352,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() { FramebufferCacheKey key; const auto colors_count = static_cast<std::size_t>(regs.rt_control.count); for (std::size_t index = 0; index < colors_count; ++index) { - View color_surface{texture_cache.GetColorBufferSurface(index, true)}; + View color_surface{texture_cache.GetColorBufferSurface(index)}; if (!color_surface) { continue; } @@ -387,12 +385,12 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using View color_surface; if (using_color_fb) { const std::size_t index = regs.clear_buffers.RT; - color_surface = texture_cache.GetColorBufferSurface(index, true); + color_surface = texture_cache.GetColorBufferSurface(index); texture_cache.MarkColorBufferInUse(index); } View depth_surface; if (using_depth_fb || using_stencil_fb) { - depth_surface = texture_cache.GetDepthBufferSurface(true); + depth_surface = texture_cache.GetDepthBufferSurface(); texture_cache.MarkDepthBufferInUse(); } texture_cache.GuardRenderTargets(false); @@ -496,6 +494,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncPrimitiveRestart(); SyncScissorTest(); SyncPointState(); + SyncLineState(); SyncPolygonOffset(); SyncAlphaTest(); SyncFramebufferSRGB(); @@ -523,7 +522,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Prepare vertex array format. SetupVertexFormat(); - vertex_array_pushbuffer.Setup(); // Upload vertex and index data. SetupVertexBuffer(); @@ -533,17 +531,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { index_buffer_offset = SetupIndexBuffer(); } - // Prepare packed bindings. - bind_ubo_pushbuffer.Setup(); - bind_ssbo_pushbuffer.Setup(); - // Setup emulation uniform buffer. GLShader::MaxwellUniformData ubo; ubo.SetFromRegs(gpu); const auto [buffer, offset] = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset, - static_cast<GLsizeiptr>(sizeof(ubo))); + glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, + static_cast<GLsizeiptr>(sizeof(ubo))); // Setup shaders and their used resources. texture_cache.GuardSamplers(true); @@ -556,11 +550,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Signal the buffer cache that we are not going to upload more things. buffer_cache.Unmap(); - // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. - vertex_array_pushbuffer.Bind(); - bind_ubo_pushbuffer.Bind(); - bind_ssbo_pushbuffer.Bind(); - program_manager.BindGraphicsPipeline(); if (texture_cache.TextureBarrier()) { @@ -629,17 +618,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); buffer_cache.Map(buffer_size); - bind_ubo_pushbuffer.Setup(); - bind_ssbo_pushbuffer.Setup(); - SetupComputeConstBuffers(kernel); SetupComputeGlobalMemory(kernel); buffer_cache.Unmap(); - bind_ubo_pushbuffer.Bind(); - bind_ssbo_pushbuffer.Bind(); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; @@ -656,9 +639,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, void RasterizerOpenGL::FlushAll() {} -void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { +void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - if (!addr || !size) { + if (addr == 0 || size == 0) { return; } texture_cache.FlushRegion(addr, size); @@ -666,9 +649,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { query_cache.FlushRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - if (!addr || !size) { + if (addr == 0 || size == 0) { return; } texture_cache.InvalidateRegion(addr, size); @@ -677,7 +660,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { query_cache.InvalidateRegion(addr, size); } -void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { if (Settings::values.use_accurate_gpu_emulation) { FlushRegion(addr, size); } @@ -716,8 +699,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, MICROPROFILE_SCOPE(OpenGL_CacheManagement); - const auto surface{ - texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))}; + const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; if (!surface) { return {}; } @@ -771,8 +753,8 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const const ConstBufferEntry& entry) { if (!buffer.enabled) { // Set values to zero to unbind buffers - bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, - sizeof(float)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, + sizeof(float)); return; } @@ -783,7 +765,7 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const const auto alignment = device.GetUniformBufferAlignment(); const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, device.HasFastBufferSubData()); - bind_ubo_pushbuffer.Push(binding, cbuf, offset, size); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); } void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { @@ -819,7 +801,8 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e const auto alignment{device.GetShaderStorageBufferAlignment()}; const auto [ssbo, buffer_offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); - bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset, + static_cast<GLsizeiptr>(size)); } void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { @@ -1312,6 +1295,19 @@ void RasterizerOpenGL::SyncPointState() { glDisable(GL_PROGRAM_POINT_SIZE); } +void RasterizerOpenGL::SyncLineState() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::LineWidth]) { + return; + } + flags[Dirty::LineWidth] = false; + + const auto& regs = gpu.regs; + oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable); + glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased); +} + void RasterizerOpenGL::SyncPolygonOffset() { auto& gpu = system.GPU().Maxwell3D(); auto& flags = gpu.dirty.flags; @@ -1419,7 +1415,7 @@ void RasterizerOpenGL::EndTransformFeedback() { const GPUVAddr gpu_addr = binding.Address(); const std::size_t size = binding.buffer_size; const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); + glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2d3be2437..caea174d2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -65,9 +65,9 @@ public: void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void FlushAll() override; - void FlushRegion(CacheAddr addr, u64 size) override; - void InvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushCommands() override; void TickFrame() override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, @@ -171,6 +171,9 @@ private: /// Syncs the point state to match the guest state void SyncPointState(); + /// Syncs the line state to match the guest state + void SyncLineState(); + /// Syncs the rasterizer enable state to match the guest state void SyncRasterizeEnable(); @@ -228,9 +231,7 @@ private: static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; - VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker}; - BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; - BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; + GLint vertex_binding = 0; std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> transform_feedback_buffers; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 046ee55a5..6d2ff20f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() { } // Anonymous namespace -CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, +CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, std::shared_ptr<OGLProgram> program) - : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, - cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} + : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, + size_in_bytes{size_in_bytes}, program{std::move(program)} {} CachedShader::~CachedShader() = default; @@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); - return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, - size_in_bytes, std::move(registry), - MakeEntries(ir), std::move(program))); + return std::shared_ptr<CachedShader>(new CachedShader( + params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); } Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { @@ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); - return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, - size_in_bytes, std::move(registry), - MakeEntries(ir), std::move(program))); + return std::shared_ptr<CachedShader>(new CachedShader( + params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); } Shader CachedShader::CreateFromCache(const ShaderParameters& params, const PrecompiledShader& precompiled_shader, std::size_t size_in_bytes) { - return std::shared_ptr<CachedShader>(new CachedShader( - params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, - precompiled_shader.entries, precompiled_shader.program)); + return std::shared_ptr<CachedShader>( + new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry, + precompiled_shader.entries, precompiled_shader.program)); } ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, @@ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const GPUVAddr address{GetShaderAddress(system, program)}; // Look up shader in the cache based on address - const auto host_ptr{memory_manager.GetPointer(address)}; - Shader shader{TryGet(host_ptr)}; + const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; + Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr}; if (shader) { return last_shaders[static_cast<std::size_t>(program)] = shader; } + const auto host_ptr{memory_manager.GetPointer(address)}; + // No shader found - create a new one ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; ProgramCode code_b; @@ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const auto unique_identifier = GetUniqueIdentifier( GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; - const ShaderParameters params{system, disk_cache, device, - cpu_addr, host_ptr, unique_identifier}; + + const ShaderParameters params{system, disk_cache, device, + *cpu_addr, host_ptr, unique_identifier}; const auto found = runtime_cache.find(unique_identifier); if (found == runtime_cache.end()) { @@ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { auto& memory_manager{system.GPU().MemoryManager()}; - const auto host_ptr{memory_manager.GetPointer(code_addr)}; - auto kernel = TryGet(host_ptr); + const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; + + auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr; if (kernel) { return kernel; } + const auto host_ptr{memory_manager.GetPointer(code_addr)}; // No kernel found, create a new one auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; - const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; - const ShaderParameters params{system, disk_cache, device, - cpu_addr, host_ptr, unique_identifier}; + + const ShaderParameters params{system, disk_cache, device, + *cpu_addr, host_ptr, unique_identifier}; const auto found = runtime_cache.find(unique_identifier); if (found == runtime_cache.end()) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 4935019fc..c836df5bd 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -65,11 +65,6 @@ public: /// Gets the GL program handle for the shader GLuint GetHandle() const; - /// Returns the guest CPU address of the shader - VAddr GetCpuAddr() const override { - return cpu_addr; - } - /// Returns the size in bytes of the shader std::size_t GetSizeInBytes() const override { return size_in_bytes; @@ -90,13 +85,12 @@ public: std::size_t size_in_bytes); private: - explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, + explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, std::shared_ptr<OGLProgram> program); std::shared_ptr<VideoCommon::Shader::Registry> registry; ShaderEntries entries; - VAddr cpu_addr = 0; std::size_t size_in_bytes = 0; std::shared_ptr<OGLProgram> program; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 160ae4340..9495f48a2 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -835,7 +835,8 @@ private: void DeclareConstantBuffers() { u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto& [index, cbuf] : ir.GetConstantBuffers()) { + for (const auto& buffers : ir.GetConstantBuffers()) { + const auto index = buffers.first; code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, GetConstBufferBlock(index)); code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); @@ -1819,15 +1820,17 @@ private: } Expression HMergeH0(Operation operation) { - std::string dest = VisitOperand(operation, 0).AsUint(); - std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", src, dest), Type::Uint}; + const std::string dest = VisitOperand(operation, 0).AsUint(); + const std::string src = VisitOperand(operation, 1).AsUint(); + return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), + Type::HalfFloat}; } Expression HMergeH1(Operation operation) { - std::string dest = VisitOperand(operation, 0).AsUint(); - std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", dest, src), Type::Uint}; + const std::string dest = VisitOperand(operation, 0).AsUint(); + const std::string src = VisitOperand(operation, 1).AsUint(); + return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), + Type::HalfFloat}; } Expression HPack2(Operation operation) { @@ -2117,8 +2120,14 @@ private: return {}; } return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), - Visit(operation[1]).As(type)), - type}; + Visit(operation[1]).AsUint()), + Type::Uint}; + } + + template <const std::string_view& opname, Type type> + Expression Reduce(Operation operation) { + code.AddLine("{};", Atomic<opname, type>(operation).GetCode()); + return {}; } Expression Branch(Operation operation) { @@ -2477,6 +2486,20 @@ private: &GLSLDecompiler::Atomic<Func::Or, Type::Int>, &GLSLDecompiler::Atomic<Func::Xor, Type::Int>, + &GLSLDecompiler::Reduce<Func::Add, Type::Uint>, + &GLSLDecompiler::Reduce<Func::Min, Type::Uint>, + &GLSLDecompiler::Reduce<Func::Max, Type::Uint>, + &GLSLDecompiler::Reduce<Func::And, Type::Uint>, + &GLSLDecompiler::Reduce<Func::Or, Type::Uint>, + &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>, + + &GLSLDecompiler::Reduce<Func::Add, Type::Int>, + &GLSLDecompiler::Reduce<Func::Min, Type::Int>, + &GLSLDecompiler::Reduce<Func::Max, Type::Int>, + &GLSLDecompiler::Reduce<Func::And, Type::Int>, + &GLSLDecompiler::Reduce<Func::Or, Type::Int>, + &GLSLDecompiler::Reduce<Func::Xor, Type::Int>, + &GLSLDecompiler::Branch, &GLSLDecompiler::BranchIndirect, &GLSLDecompiler::PushFlowStack, diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 255ac3147..d24fad3de 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -185,6 +185,12 @@ void SetupDirtyPointSize(Tables& tables) { tables[0][OFF(point_sprite_enable)] = PointSize; } +void SetupDirtyLineWidth(Tables& tables) { + tables[0][OFF(line_width_smooth)] = LineWidth; + tables[0][OFF(line_width_aliased)] = LineWidth; + tables[0][OFF(line_smooth_enable)] = LineWidth; +} + void SetupDirtyClipControl(Tables& tables) { auto& table = tables[0]; table[OFF(screen_y_control)] = ClipControl; @@ -233,6 +239,7 @@ void StateTracker::Initialize() { SetupDirtyLogicOp(tables); SetupDirtyFragmentClampColor(tables); SetupDirtyPointSize(tables); + SetupDirtyLineWidth(tables); SetupDirtyClipControl(tables); SetupDirtyDepthClampEnabled(tables); SetupDirtyMisc(tables); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index b882d75c3..0f823288e 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -78,6 +78,7 @@ enum : u8 { LogicOp, FragmentClampColor, PointSize, + LineWidth, ClipControl, DepthClampEnabled, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 36590a6d0..2729d1265 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -411,14 +411,13 @@ CachedSurfaceView::~CachedSurfaceView() = default; void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { ASSERT(params.num_levels == 1); - const GLuint texture = surface.GetTexture(); if (params.num_layers > 1) { // Layered framebuffer attachments UNIMPLEMENTED_IF(params.base_layer != 0); switch (params.target) { case SurfaceTarget::Texture2DArray: - glFramebufferTexture(target, attachment, texture, params.base_level); + glFramebufferTexture(target, attachment, GetTexture(), 0); break; default: UNIMPLEMENTED(); @@ -427,6 +426,7 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { } const GLenum view_target = surface.GetTarget(); + const GLuint texture = surface.GetTexture(); switch (surface.GetSurfaceParams().target) { case SurfaceTarget::Texture1D: glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 89f0e04ef..2c0c77c28 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -191,6 +191,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, case Tegra::Texture::TextureMipmapFilter::Linear: return GL_LINEAR_MIPMAP_LINEAR; } + break; } case Tegra::Texture::TextureFilter::Nearest: { switch (mip_filter_mode) { @@ -201,6 +202,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, case Tegra::Texture::TextureMipmapFilter::Linear: return GL_NEAREST_MIPMAP_LINEAR; } + break; } } LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode)); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index f1a28cc21..b2a179746 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -315,8 +315,8 @@ public: RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, Core::Frontend::GraphicsContext& context) - : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, - frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {} + : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, + has_debug_tool{HasDebugTool()} {} RendererOpenGL::~RendererOpenGL() = default; diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index b751086fa..6d7bb16b2 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -14,68 +14,6 @@ namespace OpenGL { -struct VertexArrayPushBuffer::Entry { - GLuint binding_index{}; - const GLuint* buffer{}; - GLintptr offset{}; - GLsizei stride{}; -}; - -VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker) - : state_tracker{state_tracker} {} - -VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; - -void VertexArrayPushBuffer::Setup() { - index_buffer = nullptr; - vertex_buffers.clear(); -} - -void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) { - index_buffer = buffer; -} - -void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer, - GLintptr offset, GLsizei stride) { - vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride}); -} - -void VertexArrayPushBuffer::Bind() { - if (index_buffer) { - state_tracker.BindIndexBuffer(*index_buffer); - } - - for (const auto& entry : vertex_buffers) { - glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride); - } -} - -struct BindBuffersRangePushBuffer::Entry { - GLuint binding; - const GLuint* buffer; - GLintptr offset; - GLsizeiptr size; -}; - -BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} - -BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; - -void BindBuffersRangePushBuffer::Setup() { - entries.clear(); -} - -void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset, - GLsizeiptr size) { - entries.push_back(Entry{binding, buffer, offset, size}); -} - -void BindBuffersRangePushBuffer::Bind() { - for (const Entry& entry : entries) { - glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size); - } -} - void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { if (!GLAD_GL_KHR_debug) { // We don't need to throw an error as this is just for debugging diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 47ee3177b..9c09ee12c 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -11,49 +11,6 @@ namespace OpenGL { -class StateTracker; - -class VertexArrayPushBuffer final { -public: - explicit VertexArrayPushBuffer(StateTracker& state_tracker); - ~VertexArrayPushBuffer(); - - void Setup(); - - void SetIndexBuffer(const GLuint* buffer); - - void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset, - GLsizei stride); - - void Bind(); - -private: - struct Entry; - - StateTracker& state_tracker; - - const GLuint* index_buffer{}; - std::vector<Entry> vertex_buffers; -}; - -class BindBuffersRangePushBuffer final { -public: - explicit BindBuffersRangePushBuffer(GLenum target); - ~BindBuffersRangePushBuffer(); - - void Setup(); - - void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size); - - void Bind(); - -private: - struct Entry; - - GLenum target; - std::vector<Entry> entries; -}; - void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h deleted file mode 100644 index 323bf6b39..000000000 --- a/src/video_core/renderer_vulkan/declarations.h +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -namespace vk { -class DispatchLoaderDynamic; -} - -namespace Vulkan { -constexpr vk::DispatchLoaderDynamic* dont_use_me_dld = nullptr; -} - -#define VULKAN_HPP_DEFAULT_DISPATCHER (*::Vulkan::dont_use_me_dld) -#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0 -#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 -#include <vulkan/vulkan.hpp> - -namespace Vulkan { - -// vulkan.hpp unique handlers use DispatchLoaderStatic -template <typename T> -using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>; - -using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>; -using UniqueBuffer = UniqueHandle<vk::Buffer>; -using UniqueBufferView = UniqueHandle<vk::BufferView>; -using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>; -using UniqueCommandPool = UniqueHandle<vk::CommandPool>; -using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>; -using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>; -using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>; -using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>; -using UniqueDevice = UniqueHandle<vk::Device>; -using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>; -using UniqueEvent = UniqueHandle<vk::Event>; -using UniqueFence = UniqueHandle<vk::Fence>; -using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>; -using UniqueImage = UniqueHandle<vk::Image>; -using UniqueImageView = UniqueHandle<vk::ImageView>; -using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>; -using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>; -using UniquePipeline = UniqueHandle<vk::Pipeline>; -using UniquePipelineCache = UniqueHandle<vk::PipelineCache>; -using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>; -using UniqueQueryPool = UniqueHandle<vk::QueryPool>; -using UniqueRenderPass = UniqueHandle<vk::RenderPass>; -using UniqueSampler = UniqueHandle<vk::Sampler>; -using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>; -using UniqueSemaphore = UniqueHandle<vk::Semaphore>; -using UniqueShaderModule = UniqueHandle<vk::ShaderModule>; -using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>; -using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>; -using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>; -using UniqueDebugUtilsMessengerEXT = UniqueHandle<vk::DebugUtilsMessengerEXT>; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 7480cb7c3..8681b821f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -2,13 +2,15 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <iterator> + #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" namespace Vulkan::MaxwellToVK { @@ -17,88 +19,89 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; namespace Sampler { -vk::Filter Filter(Tegra::Texture::TextureFilter filter) { +VkFilter Filter(Tegra::Texture::TextureFilter filter) { switch (filter) { case Tegra::Texture::TextureFilter::Linear: - return vk::Filter::eLinear; + return VK_FILTER_LINEAR; case Tegra::Texture::TextureFilter::Nearest: - return vk::Filter::eNearest; + return VK_FILTER_NEAREST; } UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter)); return {}; } -vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { +VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { switch (mipmap_filter) { case Tegra::Texture::TextureMipmapFilter::None: // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to // use an image view with a single mipmap level to emulate this. - return vk::SamplerMipmapMode::eLinear; + return VK_SAMPLER_MIPMAP_MODE_LINEAR; + ; case Tegra::Texture::TextureMipmapFilter::Linear: - return vk::SamplerMipmapMode::eLinear; + return VK_SAMPLER_MIPMAP_MODE_LINEAR; case Tegra::Texture::TextureMipmapFilter::Nearest: - return vk::SamplerMipmapMode::eNearest; + return VK_SAMPLER_MIPMAP_MODE_NEAREST; } UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); return {}; } -vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, - Tegra::Texture::TextureFilter filter) { +VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, + Tegra::Texture::TextureFilter filter) { switch (wrap_mode) { case Tegra::Texture::WrapMode::Wrap: - return vk::SamplerAddressMode::eRepeat; + return VK_SAMPLER_ADDRESS_MODE_REPEAT; case Tegra::Texture::WrapMode::Mirror: - return vk::SamplerAddressMode::eMirroredRepeat; + return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; case Tegra::Texture::WrapMode::ClampToEdge: - return vk::SamplerAddressMode::eClampToEdge; + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::Border: - return vk::SamplerAddressMode::eClampToBorder; + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; case Tegra::Texture::WrapMode::Clamp: - if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { + if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this // by sending an invalid enumeration. - return static_cast<vk::SamplerAddressMode>(0xcafe); + return static_cast<VkSamplerAddressMode>(0xcafe); } // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors switch (filter) { case Tegra::Texture::TextureFilter::Nearest: - return vk::SamplerAddressMode::eClampToEdge; + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; case Tegra::Texture::TextureFilter::Linear: - return vk::SamplerAddressMode::eClampToBorder; + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; } UNREACHABLE(); - return vk::SamplerAddressMode::eClampToEdge; + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: - return vk::SamplerAddressMode::eMirrorClampToEdge; + return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::MirrorOnceBorder: UNIMPLEMENTED(); - return vk::SamplerAddressMode::eMirrorClampToEdge; + return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; default: UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); return {}; } } -vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { +VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { switch (depth_compare_func) { case Tegra::Texture::DepthCompareFunc::Never: - return vk::CompareOp::eNever; + return VK_COMPARE_OP_NEVER; case Tegra::Texture::DepthCompareFunc::Less: - return vk::CompareOp::eLess; + return VK_COMPARE_OP_LESS; case Tegra::Texture::DepthCompareFunc::LessEqual: - return vk::CompareOp::eLessOrEqual; + return VK_COMPARE_OP_LESS_OR_EQUAL; case Tegra::Texture::DepthCompareFunc::Equal: - return vk::CompareOp::eEqual; + return VK_COMPARE_OP_EQUAL; case Tegra::Texture::DepthCompareFunc::NotEqual: - return vk::CompareOp::eNotEqual; + return VK_COMPARE_OP_NOT_EQUAL; case Tegra::Texture::DepthCompareFunc::Greater: - return vk::CompareOp::eGreater; + return VK_COMPARE_OP_GREATER; case Tegra::Texture::DepthCompareFunc::GreaterEqual: - return vk::CompareOp::eGreaterOrEqual; + return VK_COMPARE_OP_GREATER_OR_EQUAL; case Tegra::Texture::DepthCompareFunc::Always: - return vk::CompareOp::eAlways; + return VK_COMPARE_OP_ALWAYS; } UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", static_cast<u32>(depth_compare_func)); @@ -112,92 +115,92 @@ namespace { enum : u32 { Attachable = 1, Storage = 2 }; struct FormatTuple { - vk::Format format; ///< Vulkan format - int usage; ///< Describes image format usage + VkFormat format; ///< Vulkan format + int usage = 0; ///< Describes image format usage } constexpr tex_format_tuples[] = { - {vk::Format::eA8B8G8R8UnormPack32, Attachable | Storage}, // ABGR8U - {vk::Format::eA8B8G8R8SnormPack32, Attachable | Storage}, // ABGR8S - {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI - {vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U - {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U - {vk::Format::eA1R5G5B5UnormPack16, Attachable}, // A1B5G5R5U (flipped with swizzle) - {vk::Format::eR8Unorm, Attachable | Storage}, // R8U - {vk::Format::eR8Uint, Attachable | Storage}, // R8UI - {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F - {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U - {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S - {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI - {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F - {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI - {vk::Format::eBc1RgbaUnormBlock, {}}, // DXT1 - {vk::Format::eBc2UnormBlock, {}}, // DXT23 - {vk::Format::eBc3UnormBlock, {}}, // DXT45 - {vk::Format::eBc4UnormBlock, {}}, // DXN1 - {vk::Format::eBc5UnormBlock, {}}, // DXN2UNORM - {vk::Format::eBc5SnormBlock, {}}, // DXN2SNORM - {vk::Format::eBc7UnormBlock, {}}, // BC7U - {vk::Format::eBc6HUfloatBlock, {}}, // BC6H_UF16 - {vk::Format::eBc6HSfloatBlock, {}}, // BC6H_SF16 - {vk::Format::eAstc4x4UnormBlock, {}}, // ASTC_2D_4X4 - {vk::Format::eB8G8R8A8Unorm, {}}, // BGRA8 - {vk::Format::eR32G32B32A32Sfloat, Attachable | Storage}, // RGBA32F - {vk::Format::eR32G32Sfloat, Attachable | Storage}, // RG32F - {vk::Format::eR32Sfloat, Attachable | Storage}, // R32F - {vk::Format::eR16Sfloat, Attachable | Storage}, // R16F - {vk::Format::eR16Unorm, Attachable | Storage}, // R16U - {vk::Format::eUndefined, {}}, // R16S - {vk::Format::eUndefined, {}}, // R16UI - {vk::Format::eUndefined, {}}, // R16I - {vk::Format::eR16G16Unorm, Attachable | Storage}, // RG16 - {vk::Format::eR16G16Sfloat, Attachable | Storage}, // RG16F - {vk::Format::eUndefined, {}}, // RG16UI - {vk::Format::eUndefined, {}}, // RG16I - {vk::Format::eR16G16Snorm, Attachable | Storage}, // RG16S - {vk::Format::eUndefined, {}}, // RGB32F - {vk::Format::eR8G8B8A8Srgb, Attachable}, // RGBA8_SRGB - {vk::Format::eR8G8Unorm, Attachable | Storage}, // RG8U - {vk::Format::eR8G8Snorm, Attachable | Storage}, // RG8S - {vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI - {vk::Format::eUndefined, {}}, // RGBX16F - {vk::Format::eR32Uint, Attachable | Storage}, // R32UI - {vk::Format::eR32Sint, Attachable | Storage}, // R32I - {vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8 - {vk::Format::eUndefined, {}}, // ASTC_2D_8X5 - {vk::Format::eUndefined, {}}, // ASTC_2D_5X4 - {vk::Format::eUndefined, {}}, // BGRA8_SRGB - {vk::Format::eBc1RgbaSrgbBlock, {}}, // DXT1_SRGB - {vk::Format::eBc2SrgbBlock, {}}, // DXT23_SRGB - {vk::Format::eBc3SrgbBlock, {}}, // DXT45_SRGB - {vk::Format::eBc7SrgbBlock, {}}, // BC7U_SRGB - {vk::Format::eR4G4B4A4UnormPack16, Attachable}, // R4G4B4A4U - {vk::Format::eAstc4x4SrgbBlock, {}}, // ASTC_2D_4X4_SRGB - {vk::Format::eAstc8x8SrgbBlock, {}}, // ASTC_2D_8X8_SRGB - {vk::Format::eAstc8x5SrgbBlock, {}}, // ASTC_2D_8X5_SRGB - {vk::Format::eAstc5x4SrgbBlock, {}}, // ASTC_2D_5X4_SRGB - {vk::Format::eAstc5x5UnormBlock, {}}, // ASTC_2D_5X5 - {vk::Format::eAstc5x5SrgbBlock, {}}, // ASTC_2D_5X5_SRGB - {vk::Format::eAstc10x8UnormBlock, {}}, // ASTC_2D_10X8 - {vk::Format::eAstc10x8SrgbBlock, {}}, // ASTC_2D_10X8_SRGB - {vk::Format::eAstc6x6UnormBlock, {}}, // ASTC_2D_6X6 - {vk::Format::eAstc6x6SrgbBlock, {}}, // ASTC_2D_6X6_SRGB - {vk::Format::eAstc10x10UnormBlock, {}}, // ASTC_2D_10X10 - {vk::Format::eAstc10x10SrgbBlock, {}}, // ASTC_2D_10X10_SRGB - {vk::Format::eAstc12x12UnormBlock, {}}, // ASTC_2D_12X12 - {vk::Format::eAstc12x12SrgbBlock, {}}, // ASTC_2D_12X12_SRGB - {vk::Format::eAstc8x6UnormBlock, {}}, // ASTC_2D_8X6 - {vk::Format::eAstc8x6SrgbBlock, {}}, // ASTC_2D_8X6_SRGB - {vk::Format::eAstc6x5UnormBlock, {}}, // ASTC_2D_6X5 - {vk::Format::eAstc6x5SrgbBlock, {}}, // ASTC_2D_6X5_SRGB - {vk::Format::eE5B9G9R9UfloatPack32, {}}, // E5B9G9R9F + {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // ABGR8U + {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // ABGR8S + {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // ABGR8UI + {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5U + {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10U + {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5U (flipped with swizzle) + {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8U + {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8UI + {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // RGBA16F + {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // RGBA16U + {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // RGBA16S + {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // RGBA16UI + {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // R11FG11FB10F + {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // RGBA32UI + {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // DXT1 + {VK_FORMAT_BC2_UNORM_BLOCK}, // DXT23 + {VK_FORMAT_BC3_UNORM_BLOCK}, // DXT45 + {VK_FORMAT_BC4_UNORM_BLOCK}, // DXN1 + {VK_FORMAT_BC5_UNORM_BLOCK}, // DXN2UNORM + {VK_FORMAT_BC5_SNORM_BLOCK}, // DXN2SNORM + {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7U + {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 + {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 + {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 + {VK_FORMAT_B8G8R8A8_UNORM}, // BGRA8 + {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F + {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F + {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F + {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F + {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U + {VK_FORMAT_UNDEFINED}, // R16S + {VK_FORMAT_UNDEFINED}, // R16UI + {VK_FORMAT_UNDEFINED}, // R16I + {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 + {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F + {VK_FORMAT_UNDEFINED}, // RG16UI + {VK_FORMAT_UNDEFINED}, // RG16I + {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // RG16S + {VK_FORMAT_UNDEFINED}, // RGB32F + {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // RGBA8_SRGB + {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // RG8U + {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // RG8S + {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // RG32UI + {VK_FORMAT_UNDEFINED}, // RGBX16F + {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32UI + {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32I + {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 + {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 + {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 + {VK_FORMAT_UNDEFINED}, // BGRA8_SRGB + {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB + {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB + {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB + {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7U_SRGB + {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // R4G4B4A4U + {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB + {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB + {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB + {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB + {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5 + {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB + {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8 + {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB + {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6 + {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB + {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10 + {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB + {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12 + {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB + {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6 + {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB + {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5 + {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB + {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9F // Depth formats - {vk::Format::eD32Sfloat, Attachable}, // Z32F - {vk::Format::eD16Unorm, Attachable}, // Z16 + {VK_FORMAT_D32_SFLOAT, Attachable}, // Z32F + {VK_FORMAT_D16_UNORM, Attachable}, // Z16 // DepthStencil formats - {vk::Format::eD24UnormS8Uint, Attachable}, // Z24S8 - {vk::Format::eD24UnormS8Uint, Attachable}, // S8Z24 (emulated) - {vk::Format::eD32SfloatS8Uint, Attachable}, // Z32FS8 + {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // Z24S8 + {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8Z24 (emulated) + {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // Z32FS8 }; static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat); @@ -212,106 +215,106 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples)); auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)]; - if (tuple.format == vk::Format::eUndefined) { + if (tuple.format == VK_FORMAT_UNDEFINED) { UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", static_cast<u32>(pixel_format)); - return {vk::Format::eA8B8G8R8UnormPack32, true, true}; + return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true}; } // Use ABGR8 on hardware that doesn't support ASTC natively if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) - ? vk::Format::eA8B8G8R8SrgbPack32 - : vk::Format::eA8B8G8R8UnormPack32; + ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 + : VK_FORMAT_A8B8G8R8_UNORM_PACK32; } const bool attachable = tuple.usage & Attachable; const bool storage = tuple.usage & Storage; - vk::FormatFeatureFlags usage; + VkFormatFeatureFlags usage; if (format_type == FormatType::Buffer) { - usage = vk::FormatFeatureFlagBits::eStorageTexelBuffer | - vk::FormatFeatureFlagBits::eUniformTexelBuffer; + usage = + VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; } else { - usage = vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eTransferDst | - vk::FormatFeatureFlagBits::eTransferSrc; + usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | + VK_FORMAT_FEATURE_TRANSFER_SRC_BIT; if (attachable) { - usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment - : vk::FormatFeatureFlagBits::eColorAttachment; + usage |= IsZetaFormat(pixel_format) ? VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT + : VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; } if (storage) { - usage |= vk::FormatFeatureFlagBits::eStorageImage; + usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; } } return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; } -vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { +VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { switch (stage) { case Tegra::Engines::ShaderType::Vertex: - return vk::ShaderStageFlagBits::eVertex; + return VK_SHADER_STAGE_VERTEX_BIT; case Tegra::Engines::ShaderType::TesselationControl: - return vk::ShaderStageFlagBits::eTessellationControl; + return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; case Tegra::Engines::ShaderType::TesselationEval: - return vk::ShaderStageFlagBits::eTessellationEvaluation; + return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; case Tegra::Engines::ShaderType::Geometry: - return vk::ShaderStageFlagBits::eGeometry; + return VK_SHADER_STAGE_GEOMETRY_BIT; case Tegra::Engines::ShaderType::Fragment: - return vk::ShaderStageFlagBits::eFragment; + return VK_SHADER_STAGE_FRAGMENT_BIT; case Tegra::Engines::ShaderType::Compute: - return vk::ShaderStageFlagBits::eCompute; + return VK_SHADER_STAGE_COMPUTE_BIT; } UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); return {}; } -vk::PrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, - Maxwell::PrimitiveTopology topology) { +VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, + Maxwell::PrimitiveTopology topology) { switch (topology) { case Maxwell::PrimitiveTopology::Points: - return vk::PrimitiveTopology::ePointList; + return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; case Maxwell::PrimitiveTopology::Lines: - return vk::PrimitiveTopology::eLineList; + return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; case Maxwell::PrimitiveTopology::LineStrip: - return vk::PrimitiveTopology::eLineStrip; + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; case Maxwell::PrimitiveTopology::Triangles: - return vk::PrimitiveTopology::eTriangleList; + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; case Maxwell::PrimitiveTopology::TriangleStrip: - return vk::PrimitiveTopology::eTriangleStrip; + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; case Maxwell::PrimitiveTopology::TriangleFan: - return vk::PrimitiveTopology::eTriangleFan; + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; case Maxwell::PrimitiveTopology::Quads: // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases - return vk::PrimitiveTopology::eTriangleList; + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; case Maxwell::PrimitiveTopology::Patches: - return vk::PrimitiveTopology::ePatchList; + return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; default: UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); return {}; } } -vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { +VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { switch (type) { case Maxwell::VertexAttribute::Type::SignedNorm: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return vk::Format::eR8Snorm; + return VK_FORMAT_R8_SNORM; case Maxwell::VertexAttribute::Size::Size_8_8: - return vk::Format::eR8G8Snorm; + return VK_FORMAT_R8G8_SNORM; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return vk::Format::eR8G8B8Snorm; + return VK_FORMAT_R8G8B8_SNORM; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return vk::Format::eR8G8B8A8Snorm; + return VK_FORMAT_R8G8B8A8_SNORM; case Maxwell::VertexAttribute::Size::Size_16: - return vk::Format::eR16Snorm; + return VK_FORMAT_R16_SNORM; case Maxwell::VertexAttribute::Size::Size_16_16: - return vk::Format::eR16G16Snorm; + return VK_FORMAT_R16G16_SNORM; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return vk::Format::eR16G16B16Snorm; + return VK_FORMAT_R16G16B16_SNORM; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return vk::Format::eR16G16B16A16Snorm; + return VK_FORMAT_R16G16B16A16_SNORM; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: - return vk::Format::eA2B10G10R10SnormPack32; + return VK_FORMAT_A2B10G10R10_SNORM_PACK32; default: break; } @@ -319,23 +322,23 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr case Maxwell::VertexAttribute::Type::UnsignedNorm: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return vk::Format::eR8Unorm; + return VK_FORMAT_R8_UNORM; case Maxwell::VertexAttribute::Size::Size_8_8: - return vk::Format::eR8G8Unorm; + return VK_FORMAT_R8G8_UNORM; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return vk::Format::eR8G8B8Unorm; + return VK_FORMAT_R8G8B8_UNORM; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return vk::Format::eR8G8B8A8Unorm; + return VK_FORMAT_R8G8B8A8_UNORM; case Maxwell::VertexAttribute::Size::Size_16: - return vk::Format::eR16Unorm; + return VK_FORMAT_R16_UNORM; case Maxwell::VertexAttribute::Size::Size_16_16: - return vk::Format::eR16G16Unorm; + return VK_FORMAT_R16G16_UNORM; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return vk::Format::eR16G16B16Unorm; + return VK_FORMAT_R16G16B16_UNORM; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return vk::Format::eR16G16B16A16Unorm; + return VK_FORMAT_R16G16B16A16_UNORM; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: - return vk::Format::eA2B10G10R10UnormPack32; + return VK_FORMAT_A2B10G10R10_UNORM_PACK32; default: break; } @@ -343,59 +346,69 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr case Maxwell::VertexAttribute::Type::SignedInt: switch (size) { case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return vk::Format::eR16G16B16A16Sint; + return VK_FORMAT_R16G16B16A16_SINT; case Maxwell::VertexAttribute::Size::Size_8: - return vk::Format::eR8Sint; + return VK_FORMAT_R8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8: - return vk::Format::eR8G8Sint; + return VK_FORMAT_R8G8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return vk::Format::eR8G8B8Sint; + return VK_FORMAT_R8G8B8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return vk::Format::eR8G8B8A8Sint; + return VK_FORMAT_R8G8B8A8_SINT; case Maxwell::VertexAttribute::Size::Size_32: - return vk::Format::eR32Sint; + return VK_FORMAT_R32_SINT; default: break; } + break; case Maxwell::VertexAttribute::Type::UnsignedInt: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return vk::Format::eR8Uint; + return VK_FORMAT_R8_UINT; case Maxwell::VertexAttribute::Size::Size_8_8: - return vk::Format::eR8G8Uint; + return VK_FORMAT_R8G8_UINT; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return vk::Format::eR8G8B8Uint; + return VK_FORMAT_R8G8B8_UINT; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return vk::Format::eR8G8B8A8Uint; + return VK_FORMAT_R8G8B8A8_UINT; + case Maxwell::VertexAttribute::Size::Size_16: + return VK_FORMAT_R16_UINT; + case Maxwell::VertexAttribute::Size::Size_16_16: + return VK_FORMAT_R16G16_UINT; + case Maxwell::VertexAttribute::Size::Size_16_16_16: + return VK_FORMAT_R16G16B16_UINT; + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return VK_FORMAT_R16G16B16A16_UINT; case Maxwell::VertexAttribute::Size::Size_32: - return vk::Format::eR32Uint; + return VK_FORMAT_R32_UINT; case Maxwell::VertexAttribute::Size::Size_32_32: - return vk::Format::eR32G32Uint; + return VK_FORMAT_R32G32_UINT; case Maxwell::VertexAttribute::Size::Size_32_32_32: - return vk::Format::eR32G32B32Uint; + return VK_FORMAT_R32G32B32_UINT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: - return vk::Format::eR32G32B32A32Uint; + return VK_FORMAT_R32G32B32A32_UINT; default: break; } + break; case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return vk::Format::eR8Uscaled; + return VK_FORMAT_R8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8: - return vk::Format::eR8G8Uscaled; + return VK_FORMAT_R8G8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return vk::Format::eR8G8B8Uscaled; + return VK_FORMAT_R8G8B8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return vk::Format::eR8G8B8A8Uscaled; + return VK_FORMAT_R8G8B8A8_USCALED; case Maxwell::VertexAttribute::Size::Size_16: - return vk::Format::eR16Uscaled; + return VK_FORMAT_R16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16: - return vk::Format::eR16G16Uscaled; + return VK_FORMAT_R16G16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return vk::Format::eR16G16B16Uscaled; + return VK_FORMAT_R16G16B16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return vk::Format::eR16G16B16A16Uscaled; + return VK_FORMAT_R16G16B16A16_USCALED; default: break; } @@ -403,21 +416,21 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr case Maxwell::VertexAttribute::Type::SignedScaled: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return vk::Format::eR8Sscaled; + return VK_FORMAT_R8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8: - return vk::Format::eR8G8Sscaled; + return VK_FORMAT_R8G8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return vk::Format::eR8G8B8Sscaled; + return VK_FORMAT_R8G8B8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return vk::Format::eR8G8B8A8Sscaled; + return VK_FORMAT_R8G8B8A8_SSCALED; case Maxwell::VertexAttribute::Size::Size_16: - return vk::Format::eR16Sscaled; + return VK_FORMAT_R16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16: - return vk::Format::eR16G16Sscaled; + return VK_FORMAT_R16G16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return vk::Format::eR16G16B16Sscaled; + return VK_FORMAT_R16G16B16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return vk::Format::eR16G16B16A16Sscaled; + return VK_FORMAT_R16G16B16A16_SSCALED; default: break; } @@ -425,21 +438,21 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr case Maxwell::VertexAttribute::Type::Float: switch (size) { case Maxwell::VertexAttribute::Size::Size_32: - return vk::Format::eR32Sfloat; + return VK_FORMAT_R32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32: - return vk::Format::eR32G32Sfloat; + return VK_FORMAT_R32G32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32_32: - return vk::Format::eR32G32B32Sfloat; + return VK_FORMAT_R32G32B32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: - return vk::Format::eR32G32B32A32Sfloat; + return VK_FORMAT_R32G32B32A32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16: - return vk::Format::eR16Sfloat; + return VK_FORMAT_R16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16: - return vk::Format::eR16G16Sfloat; + return VK_FORMAT_R16G16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return vk::Format::eR16G16B16Sfloat; + return VK_FORMAT_R16G16B16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return vk::Format::eR16G16B16A16Sfloat; + return VK_FORMAT_R16G16B16A16_SFLOAT; default: break; } @@ -450,210 +463,210 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr return {}; } -vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { +VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { switch (comparison) { case Maxwell::ComparisonOp::Never: case Maxwell::ComparisonOp::NeverOld: - return vk::CompareOp::eNever; + return VK_COMPARE_OP_NEVER; case Maxwell::ComparisonOp::Less: case Maxwell::ComparisonOp::LessOld: - return vk::CompareOp::eLess; + return VK_COMPARE_OP_LESS; case Maxwell::ComparisonOp::Equal: case Maxwell::ComparisonOp::EqualOld: - return vk::CompareOp::eEqual; + return VK_COMPARE_OP_EQUAL; case Maxwell::ComparisonOp::LessEqual: case Maxwell::ComparisonOp::LessEqualOld: - return vk::CompareOp::eLessOrEqual; + return VK_COMPARE_OP_LESS_OR_EQUAL; case Maxwell::ComparisonOp::Greater: case Maxwell::ComparisonOp::GreaterOld: - return vk::CompareOp::eGreater; + return VK_COMPARE_OP_GREATER; case Maxwell::ComparisonOp::NotEqual: case Maxwell::ComparisonOp::NotEqualOld: - return vk::CompareOp::eNotEqual; + return VK_COMPARE_OP_NOT_EQUAL; case Maxwell::ComparisonOp::GreaterEqual: case Maxwell::ComparisonOp::GreaterEqualOld: - return vk::CompareOp::eGreaterOrEqual; + return VK_COMPARE_OP_GREATER_OR_EQUAL; case Maxwell::ComparisonOp::Always: case Maxwell::ComparisonOp::AlwaysOld: - return vk::CompareOp::eAlways; + return VK_COMPARE_OP_ALWAYS; } UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); return {}; } -vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) { +VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) { switch (index_format) { case Maxwell::IndexFormat::UnsignedByte: if (!device.IsExtIndexTypeUint8Supported()) { UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device"); - return vk::IndexType::eUint16; + return VK_INDEX_TYPE_UINT16; } - return vk::IndexType::eUint8EXT; + return VK_INDEX_TYPE_UINT8_EXT; case Maxwell::IndexFormat::UnsignedShort: - return vk::IndexType::eUint16; + return VK_INDEX_TYPE_UINT16; case Maxwell::IndexFormat::UnsignedInt: - return vk::IndexType::eUint32; + return VK_INDEX_TYPE_UINT32; } UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format)); return {}; } -vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) { +VkStencilOp StencilOp(Maxwell::StencilOp stencil_op) { switch (stencil_op) { case Maxwell::StencilOp::Keep: case Maxwell::StencilOp::KeepOGL: - return vk::StencilOp::eKeep; + return VK_STENCIL_OP_KEEP; case Maxwell::StencilOp::Zero: case Maxwell::StencilOp::ZeroOGL: - return vk::StencilOp::eZero; + return VK_STENCIL_OP_ZERO; case Maxwell::StencilOp::Replace: case Maxwell::StencilOp::ReplaceOGL: - return vk::StencilOp::eReplace; + return VK_STENCIL_OP_REPLACE; case Maxwell::StencilOp::Incr: case Maxwell::StencilOp::IncrOGL: - return vk::StencilOp::eIncrementAndClamp; + return VK_STENCIL_OP_INCREMENT_AND_CLAMP; case Maxwell::StencilOp::Decr: case Maxwell::StencilOp::DecrOGL: - return vk::StencilOp::eDecrementAndClamp; + return VK_STENCIL_OP_DECREMENT_AND_CLAMP; case Maxwell::StencilOp::Invert: case Maxwell::StencilOp::InvertOGL: - return vk::StencilOp::eInvert; + return VK_STENCIL_OP_INVERT; case Maxwell::StencilOp::IncrWrap: case Maxwell::StencilOp::IncrWrapOGL: - return vk::StencilOp::eIncrementAndWrap; + return VK_STENCIL_OP_INCREMENT_AND_WRAP; case Maxwell::StencilOp::DecrWrap: case Maxwell::StencilOp::DecrWrapOGL: - return vk::StencilOp::eDecrementAndWrap; + return VK_STENCIL_OP_DECREMENT_AND_WRAP; } UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op)); return {}; } -vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) { +VkBlendOp BlendEquation(Maxwell::Blend::Equation equation) { switch (equation) { case Maxwell::Blend::Equation::Add: case Maxwell::Blend::Equation::AddGL: - return vk::BlendOp::eAdd; + return VK_BLEND_OP_ADD; case Maxwell::Blend::Equation::Subtract: case Maxwell::Blend::Equation::SubtractGL: - return vk::BlendOp::eSubtract; + return VK_BLEND_OP_SUBTRACT; case Maxwell::Blend::Equation::ReverseSubtract: case Maxwell::Blend::Equation::ReverseSubtractGL: - return vk::BlendOp::eReverseSubtract; + return VK_BLEND_OP_REVERSE_SUBTRACT; case Maxwell::Blend::Equation::Min: case Maxwell::Blend::Equation::MinGL: - return vk::BlendOp::eMin; + return VK_BLEND_OP_MIN; case Maxwell::Blend::Equation::Max: case Maxwell::Blend::Equation::MaxGL: - return vk::BlendOp::eMax; + return VK_BLEND_OP_MAX; } UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); return {}; } -vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) { +VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor) { switch (factor) { case Maxwell::Blend::Factor::Zero: case Maxwell::Blend::Factor::ZeroGL: - return vk::BlendFactor::eZero; + return VK_BLEND_FACTOR_ZERO; case Maxwell::Blend::Factor::One: case Maxwell::Blend::Factor::OneGL: - return vk::BlendFactor::eOne; + return VK_BLEND_FACTOR_ONE; case Maxwell::Blend::Factor::SourceColor: case Maxwell::Blend::Factor::SourceColorGL: - return vk::BlendFactor::eSrcColor; + return VK_BLEND_FACTOR_SRC_COLOR; case Maxwell::Blend::Factor::OneMinusSourceColor: case Maxwell::Blend::Factor::OneMinusSourceColorGL: - return vk::BlendFactor::eOneMinusSrcColor; + return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; case Maxwell::Blend::Factor::SourceAlpha: case Maxwell::Blend::Factor::SourceAlphaGL: - return vk::BlendFactor::eSrcAlpha; + return VK_BLEND_FACTOR_SRC_ALPHA; case Maxwell::Blend::Factor::OneMinusSourceAlpha: case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: - return vk::BlendFactor::eOneMinusSrcAlpha; + return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; case Maxwell::Blend::Factor::DestAlpha: case Maxwell::Blend::Factor::DestAlphaGL: - return vk::BlendFactor::eDstAlpha; + return VK_BLEND_FACTOR_DST_ALPHA; case Maxwell::Blend::Factor::OneMinusDestAlpha: case Maxwell::Blend::Factor::OneMinusDestAlphaGL: - return vk::BlendFactor::eOneMinusDstAlpha; + return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; case Maxwell::Blend::Factor::DestColor: case Maxwell::Blend::Factor::DestColorGL: - return vk::BlendFactor::eDstColor; + return VK_BLEND_FACTOR_DST_COLOR; case Maxwell::Blend::Factor::OneMinusDestColor: case Maxwell::Blend::Factor::OneMinusDestColorGL: - return vk::BlendFactor::eOneMinusDstColor; + return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; case Maxwell::Blend::Factor::SourceAlphaSaturate: case Maxwell::Blend::Factor::SourceAlphaSaturateGL: - return vk::BlendFactor::eSrcAlphaSaturate; + return VK_BLEND_FACTOR_SRC_ALPHA_SATURATE; case Maxwell::Blend::Factor::Source1Color: case Maxwell::Blend::Factor::Source1ColorGL: - return vk::BlendFactor::eSrc1Color; + return VK_BLEND_FACTOR_SRC1_COLOR; case Maxwell::Blend::Factor::OneMinusSource1Color: case Maxwell::Blend::Factor::OneMinusSource1ColorGL: - return vk::BlendFactor::eOneMinusSrc1Color; + return VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR; case Maxwell::Blend::Factor::Source1Alpha: case Maxwell::Blend::Factor::Source1AlphaGL: - return vk::BlendFactor::eSrc1Alpha; + return VK_BLEND_FACTOR_SRC1_ALPHA; case Maxwell::Blend::Factor::OneMinusSource1Alpha: case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: - return vk::BlendFactor::eOneMinusSrc1Alpha; + return VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA; case Maxwell::Blend::Factor::ConstantColor: case Maxwell::Blend::Factor::ConstantColorGL: - return vk::BlendFactor::eConstantColor; + return VK_BLEND_FACTOR_CONSTANT_COLOR; case Maxwell::Blend::Factor::OneMinusConstantColor: case Maxwell::Blend::Factor::OneMinusConstantColorGL: - return vk::BlendFactor::eOneMinusConstantColor; + return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR; case Maxwell::Blend::Factor::ConstantAlpha: case Maxwell::Blend::Factor::ConstantAlphaGL: - return vk::BlendFactor::eConstantAlpha; + return VK_BLEND_FACTOR_CONSTANT_ALPHA; case Maxwell::Blend::Factor::OneMinusConstantAlpha: case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: - return vk::BlendFactor::eOneMinusConstantAlpha; + return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA; } UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); return {}; } -vk::FrontFace FrontFace(Maxwell::FrontFace front_face) { +VkFrontFace FrontFace(Maxwell::FrontFace front_face) { switch (front_face) { case Maxwell::FrontFace::ClockWise: - return vk::FrontFace::eClockwise; + return VK_FRONT_FACE_CLOCKWISE; case Maxwell::FrontFace::CounterClockWise: - return vk::FrontFace::eCounterClockwise; + return VK_FRONT_FACE_COUNTER_CLOCKWISE; } UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); return {}; } -vk::CullModeFlags CullFace(Maxwell::CullFace cull_face) { +VkCullModeFlags CullFace(Maxwell::CullFace cull_face) { switch (cull_face) { case Maxwell::CullFace::Front: - return vk::CullModeFlagBits::eFront; + return VK_CULL_MODE_FRONT_BIT; case Maxwell::CullFace::Back: - return vk::CullModeFlagBits::eBack; + return VK_CULL_MODE_BACK_BIT; case Maxwell::CullFace::FrontAndBack: - return vk::CullModeFlagBits::eFrontAndBack; + return VK_CULL_MODE_FRONT_AND_BACK; } UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); return {}; } -vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { +VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { switch (swizzle) { case Tegra::Texture::SwizzleSource::Zero: - return vk::ComponentSwizzle::eZero; + return VK_COMPONENT_SWIZZLE_ZERO; case Tegra::Texture::SwizzleSource::R: - return vk::ComponentSwizzle::eR; + return VK_COMPONENT_SWIZZLE_R; case Tegra::Texture::SwizzleSource::G: - return vk::ComponentSwizzle::eG; + return VK_COMPONENT_SWIZZLE_G; case Tegra::Texture::SwizzleSource::B: - return vk::ComponentSwizzle::eB; + return VK_COMPONENT_SWIZZLE_B; case Tegra::Texture::SwizzleSource::A: - return vk::ComponentSwizzle::eA; + return VK_COMPONENT_SWIZZLE_A; case Tegra::Texture::SwizzleSource::OneInt: case Tegra::Texture::SwizzleSource::OneFloat: - return vk::ComponentSwizzle::eOne; + return VK_COMPONENT_SWIZZLE_ONE; } UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle)); return {}; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 24f6ab544..81bce4c6c 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -6,8 +6,8 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" #include "video_core/textures/texture.h" @@ -18,46 +18,45 @@ using PixelFormat = VideoCore::Surface::PixelFormat; namespace Sampler { -vk::Filter Filter(Tegra::Texture::TextureFilter filter); +VkFilter Filter(Tegra::Texture::TextureFilter filter); -vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); +VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); -vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, - Tegra::Texture::TextureFilter filter); +VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, + Tegra::Texture::TextureFilter filter); -vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); +VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); } // namespace Sampler struct FormatInfo { - vk::Format format; + VkFormat format; bool attachable; bool storage; }; FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format); -vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); +VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); -vk::PrimitiveTopology PrimitiveTopology(const VKDevice& device, - Maxwell::PrimitiveTopology topology); +VkPrimitiveTopology PrimitiveTopology(const VKDevice& device, Maxwell::PrimitiveTopology topology); -vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); +VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); -vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison); +VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); -vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format); +VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format); -vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op); +VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); -vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation); +VkBlendOp BlendEquation(Maxwell::Blend::Equation equation); -vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); +VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor); -vk::FrontFace FrontFace(Maxwell::FrontFace front_face); +VkFrontFace FrontFace(Maxwell::FrontFace front_face); -vk::CullModeFlags CullFace(Maxwell::CullFace cull_face); +VkCullModeFlags CullFace(Maxwell::CullFace cull_face); -vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); +VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 6953aaafe..dd590c38b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -2,13 +2,18 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> +#include <array> +#include <cstring> #include <memory> #include <optional> +#include <string> #include <vector> #include <fmt/format.h> #include "common/assert.h" +#include "common/dynamic_library.h" #include "common/logging/log.h" #include "common/telemetry.h" #include "core/core.h" @@ -19,7 +24,6 @@ #include "core/settings.h" #include "core/telemetry_session.h" #include "video_core/gpu.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_device.h" @@ -29,30 +33,145 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/wrapper.h" + +// Include these late to avoid polluting previous headers +#ifdef _WIN32 +#include <windows.h> +// ensure include order +#include <vulkan/vulkan_win32.h> +#endif + +#ifdef __linux__ +#include <X11/Xlib.h> +#include <vulkan/vulkan_wayland.h> +#include <vulkan/vulkan_xlib.h> +#endif namespace Vulkan { namespace { -VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_, +using Core::Frontend::WindowSystemType; + +VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* data, [[maybe_unused]] void* user_data) { - const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_}; const char* message{data->pMessage}; - if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) { + if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { LOG_CRITICAL(Render_Vulkan, "{}", message); - } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning) { + } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) { LOG_WARNING(Render_Vulkan, "{}", message); - } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo) { + } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) { LOG_INFO(Render_Vulkan, "{}", message); - } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose) { + } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) { LOG_DEBUG(Render_Vulkan, "{}", message); } return VK_FALSE; } +Common::DynamicLibrary OpenVulkanLibrary() { + Common::DynamicLibrary library; +#ifdef __APPLE__ + // Check if a path to a specific Vulkan library has been specified. + char* libvulkan_env = getenv("LIBVULKAN_PATH"); + if (!libvulkan_env || !library.Open(libvulkan_env)) { + // Use the libvulkan.dylib from the application bundle. + std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; + library.Open(filename.c_str()); + } +#else + std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1); + if (!library.Open(filename.c_str())) { + // Android devices may not have libvulkan.so.1, only libvulkan.so. + filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); + library.Open(filename.c_str()); + } +#endif + return library; +} + +vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatch& dld, + WindowSystemType window_type = WindowSystemType::Headless, + bool enable_layers = false) { + if (!library.IsOpen()) { + LOG_ERROR(Render_Vulkan, "Vulkan library not available"); + return {}; + } + if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) { + LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan"); + return {}; + } + if (!vk::Load(dld)) { + LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); + return {}; + } + + std::vector<const char*> extensions; + extensions.reserve(6); + switch (window_type) { + case Core::Frontend::WindowSystemType::Headless: + break; +#ifdef _WIN32 + case Core::Frontend::WindowSystemType::Windows: + extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); + break; +#endif +#ifdef __linux__ + case Core::Frontend::WindowSystemType::X11: + extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); + break; + case Core::Frontend::WindowSystemType::Wayland: + extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); + break; +#endif + default: + LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); + break; + } + if (window_type != Core::Frontend::WindowSystemType::Headless) { + extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); + } + if (enable_layers) { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + + const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); + if (!properties) { + LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); + return {}; + } + + for (const char* extension : extensions) { + const auto it = + std::find_if(properties->begin(), properties->end(), [extension](const auto& prop) { + return !std::strcmp(extension, prop.extensionName); + }); + if (it == properties->end()) { + LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); + return {}; + } + } + + static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"}; + vk::Span<const char*> layers = layers_data; + if (!enable_layers) { + layers = {}; + } + vk::Instance instance = vk::Instance::Create(layers, extensions, dld); + if (!instance) { + LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); + return {}; + } + if (!vk::Load(*instance, dld)) { + LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); + } + return instance; +} + std::string GetReadableVersion(u32 version) { return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), VK_VERSION_PATCH(version)); @@ -63,14 +182,14 @@ std::string GetDriverVersion(const VKDevice& device) { // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 const u32 version = device.GetDriverVersion(); - if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { + if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { const u32 major = (version >> 22) & 0x3ff; const u32 minor = (version >> 14) & 0x0ff; const u32 secondary = (version >> 6) & 0x0ff; const u32 tertiary = version & 0x003f; return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary); } - if (device.GetDriverID() == vk::DriverIdKHR::eIntelProprietaryWindows) { + if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) { const u32 major = version >> 14; const u32 minor = version & 0x3fff; return fmt::format("{}.{}", major, minor); @@ -147,27 +266,12 @@ bool RendererVulkan::TryPresent(int /*timeout_ms*/) { } bool RendererVulkan::Init() { - PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; - render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface); - const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr); - - std::optional<vk::DebugUtilsMessengerEXT> callback; - if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) { - callback = CreateDebugCallback(dldi); - if (!callback) { - return false; - } - } - - if (!PickDevices(dldi)) { - if (callback) { - instance.destroy(*callback, nullptr, dldi); - } + library = OpenVulkanLibrary(); + instance = CreateInstance(library, dld, render_window.GetWindowInfo().type, + Settings::values.renderer_debug); + if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { return false; } - debug_callback = UniqueDebugUtilsMessengerEXT( - *callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>( - instance, nullptr, device->GetDispatchLoader())); Report(); @@ -176,7 +280,7 @@ bool RendererVulkan::Init() { resource_manager = std::make_unique<VKResourceManager>(*device); const auto& framebuffer = render_window.GetFramebufferLayout(); - swapchain = std::make_unique<VKSwapchain>(surface, *device); + swapchain = std::make_unique<VKSwapchain>(*surface, *device); swapchain->Create(framebuffer.width, framebuffer.height, false); state_tracker = std::make_unique<StateTracker>(system); @@ -198,10 +302,8 @@ void RendererVulkan::ShutDown() { if (!device) { return; } - const auto dev = device->GetLogical(); - const auto& dld = device->GetDispatchLoader(); - if (dev && dld.vkDeviceWaitIdle) { - dev.waitIdle(dld); + if (const auto& dev = device->GetLogical()) { + dev.WaitIdle(); } rasterizer.reset(); @@ -213,44 +315,94 @@ void RendererVulkan::ShutDown() { device.reset(); } -std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback( - const vk::DispatchLoaderDynamic& dldi) { - const vk::DebugUtilsMessengerCreateInfoEXT callback_ci( - {}, - vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | - vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning | - vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo | - vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose, - vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral | - vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation | - vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance, - &DebugCallback, nullptr); - vk::DebugUtilsMessengerEXT callback; - if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) != - vk::Result::eSuccess) { +bool RendererVulkan::CreateDebugCallback() { + if (!Settings::values.renderer_debug) { + return true; + } + debug_callback = instance.TryCreateDebugCallback(DebugCallback); + if (!debug_callback) { LOG_ERROR(Render_Vulkan, "Failed to create debug callback"); - return {}; + return false; } - return callback; + return true; } -bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) { - const auto devices = instance.enumeratePhysicalDevices(dldi); +bool RendererVulkan::CreateSurface() { + [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo(); + VkSurfaceKHR unsafe_surface = nullptr; + +#ifdef _WIN32 + if (window_info.type == Core::Frontend::WindowSystemType::Windows) { + const HWND hWnd = static_cast<HWND>(window_info.render_surface); + const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, + nullptr, 0, nullptr, hWnd}; + const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR")); + if (!vkCreateWin32SurfaceKHR || + vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface"); + return false; + } + } +#endif +#ifdef __linux__ + if (window_info.type == Core::Frontend::WindowSystemType::X11) { + const VkXlibSurfaceCreateInfoKHR xlib_ci{ + VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, + static_cast<Display*>(window_info.display_connection), + reinterpret_cast<Window>(window_info.render_surface)}; + const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR")); + if (!vkCreateXlibSurfaceKHR || + vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); + return false; + } + } + if (window_info.type == Core::Frontend::WindowSystemType::Wayland) { + const VkWaylandSurfaceCreateInfoKHR wayland_ci{ + VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0, + static_cast<wl_display*>(window_info.display_connection), + static_cast<wl_surface*>(window_info.render_surface)}; + const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR")); + if (!vkCreateWaylandSurfaceKHR || + vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) != + VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); + return false; + } + } +#endif + if (!unsafe_surface) { + LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); + return false; + } + + surface = vk::SurfaceKHR(unsafe_surface, *instance, dld); + return true; +} + +bool RendererVulkan::PickDevices() { + const auto devices = instance.EnumeratePhysicalDevices(); + if (!devices) { + LOG_ERROR(Render_Vulkan, "Failed to enumerate physical devices"); + return false; + } - // TODO(Rodrigo): Choose device from config file const s32 device_index = Settings::values.vulkan_device; - if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { + if (device_index < 0 || device_index >= static_cast<s32>(devices->size())) { LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); return false; } - const vk::PhysicalDevice physical_device = devices[device_index]; - - if (!VKDevice::IsSuitable(dldi, physical_device, surface)) { + const vk::PhysicalDevice physical_device((*devices)[static_cast<std::size_t>(device_index)], + dld); + if (!VKDevice::IsSuitable(physical_device, *surface)) { return false; } - device = std::make_unique<VKDevice>(dldi, physical_device, surface); - return device->Create(dldi, instance); + device = std::make_unique<VKDevice>(*instance, physical_device, *surface, dld); + return device->Create(); } void RendererVulkan::Report() const { @@ -276,4 +428,25 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } +std::vector<std::string> RendererVulkan::EnumerateDevices() { + vk::InstanceDispatch dld; + Common::DynamicLibrary library = OpenVulkanLibrary(); + vk::Instance instance = CreateInstance(library, dld); + if (!instance) { + return {}; + } + + const std::optional physical_devices = instance.EnumeratePhysicalDevices(); + if (!physical_devices) { + return {}; + } + + std::vector<std::string> names; + names.reserve(physical_devices->size()); + for (const auto& device : *physical_devices) { + names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName); + } + return names; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index d14384e79..18270909b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -6,10 +6,13 @@ #include <memory> #include <optional> +#include <string> #include <vector> +#include "common/dynamic_library.h" + #include "video_core/renderer_base.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Core { class System; @@ -44,22 +47,28 @@ public: void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; bool TryPresent(int timeout_ms) override; + static std::vector<std::string> EnumerateDevices(); + private: - std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( - const vk::DispatchLoaderDynamic& dldi); + bool CreateDebugCallback(); - bool PickDevices(const vk::DispatchLoaderDynamic& dldi); + bool CreateSurface(); + + bool PickDevices(); void Report() const; Core::System& system; + Common::DynamicLibrary library; + vk::InstanceDispatch dld; + vk::Instance instance; vk::SurfaceKHR surface; VKScreenInfo screen_info; - UniqueDebugUtilsMessengerEXT debug_callback; + vk::DebugCallback debug_callback; std::unique_ptr<VKDevice> device; std::unique_ptr<VKSwapchain> swapchain; std::unique_ptr<VKMemoryManager> memory_manager; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 855cfc883..fbd406f2b 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -20,7 +20,6 @@ #include "video_core/gpu.h" #include "video_core/morton.h" #include "video_core/rasterizer_interface.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_device.h" @@ -30,6 +29,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" namespace Vulkan { @@ -140,16 +140,25 @@ struct ScreenRectVertex { std::array<f32, 2> position; std::array<f32, 2> tex_coord; - static vk::VertexInputBindingDescription GetDescription() { - return vk::VertexInputBindingDescription(0, sizeof(ScreenRectVertex), - vk::VertexInputRate::eVertex); + static VkVertexInputBindingDescription GetDescription() { + VkVertexInputBindingDescription description; + description.binding = 0; + description.stride = sizeof(ScreenRectVertex); + description.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + return description; } - static std::array<vk::VertexInputAttributeDescription, 2> GetAttributes() { - return {vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32Sfloat, - offsetof(ScreenRectVertex, position)), - vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32Sfloat, - offsetof(ScreenRectVertex, tex_coord))}; + static std::array<VkVertexInputAttributeDescription, 2> GetAttributes() { + std::array<VkVertexInputAttributeDescription, 2> attributes; + attributes[0].location = 0; + attributes[0].binding = 0; + attributes[0].format = VK_FORMAT_R32G32_SFLOAT; + attributes[0].offset = offsetof(ScreenRectVertex, position); + attributes[1].location = 1; + attributes[1].binding = 0; + attributes[1].format = VK_FORMAT_R32G32_SFLOAT; + attributes[1].offset = offsetof(ScreenRectVertex, tex_coord); + return attributes; } }; @@ -172,16 +181,16 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { static_cast<std::size_t>(framebuffer.height) * GetBytesPerPixel(framebuffer); } -vk::Format GetFormat(const Tegra::FramebufferConfig& framebuffer) { +VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { switch (framebuffer.pixel_format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return vk::Format::eA8B8G8R8UnormPack32; + return VK_FORMAT_A8B8G8R8_UNORM_PACK32; case Tegra::FramebufferConfig::PixelFormat::RGB565: - return vk::Format::eR5G6B5UnormPack16; + return VK_FORMAT_R5G6B5_UNORM_PACK16; default: UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", static_cast<u32>(framebuffer.pixel_format)); - return vk::Format::eA8B8G8R8UnormPack32; + return VK_FORMAT_A8B8G8R8_UNORM_PACK32; } } @@ -219,8 +228,8 @@ void VKBlitScreen::Recreate() { CreateDynamicResources(); } -std::tuple<VKFence&, vk::Semaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated) { +std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, + bool use_accelerated) { RefreshResources(framebuffer); // Finish any pending renderpass @@ -255,46 +264,76 @@ std::tuple<VKFence&, vk::Semaphore> VKBlitScreen::Draw(const Tegra::FramebufferC framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, map.GetAddress() + image_offset, host_ptr); - blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eTransfer, - vk::AccessFlagBits::eTransferWrite, - vk::ImageLayout::eTransferDstOptimal); - - const vk::BufferImageCopy copy(image_offset, 0, 0, - {vk::ImageAspectFlagBits::eColor, 0, 0, 1}, {0, 0, 0}, - {framebuffer.width, framebuffer.height, 1}); - scheduler.Record([buffer_handle = *buffer, image = blit_image->GetHandle(), - copy](auto cmdbuf, auto& dld) { - cmdbuf.copyBufferToImage(buffer_handle, image, vk::ImageLayout::eTransferDstOptimal, - {copy}, dld); - }); + blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + VkBufferImageCopy copy; + copy.bufferOffset = image_offset; + copy.bufferRowLength = 0; + copy.bufferImageHeight = 0; + copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copy.imageSubresource.mipLevel = 0; + copy.imageSubresource.baseArrayLayer = 0; + copy.imageSubresource.layerCount = 1; + copy.imageOffset.x = 0; + copy.imageOffset.y = 0; + copy.imageOffset.z = 0; + copy.imageExtent.width = framebuffer.width; + copy.imageExtent.height = framebuffer.height; + copy.imageExtent.depth = 1; + scheduler.Record( + [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); + }); } map.Release(); - blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eFragmentShader, - vk::AccessFlagBits::eShaderRead, - vk::ImageLayout::eShaderReadOnlyOptimal); + blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], descriptor_set = descriptor_sets[image_index], buffer = *buffer, size = swapchain.GetSize(), pipeline = *pipeline, - layout = *pipeline_layout](auto cmdbuf, auto& dld) { - const vk::ClearValue clear_color{std::array{0.0f, 0.0f, 0.0f, 1.0f}}; - const vk::RenderPassBeginInfo renderpass_bi(renderpass, framebuffer, {{0, 0}, size}, 1, - &clear_color); - - cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld); - cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld); - cmdbuf.setViewport( - 0, - {{0.0f, 0.0f, static_cast<f32>(size.width), static_cast<f32>(size.height), 0.0f, 1.0f}}, - dld); - cmdbuf.setScissor(0, {{{0, 0}, size}}, dld); - - cmdbuf.bindVertexBuffers(0, {buffer}, {offsetof(BufferData, vertices)}, dld); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, {descriptor_set}, {}, - dld); - cmdbuf.draw(4, 1, 0, 0, dld); - cmdbuf.endRenderPass(dld); + layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + VkClearValue clear_color; + clear_color.color.float32[0] = 0.0f; + clear_color.color.float32[1] = 0.0f; + clear_color.color.float32[2] = 0.0f; + clear_color.color.float32[3] = 0.0f; + + VkRenderPassBeginInfo renderpass_bi; + renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + renderpass_bi.pNext = nullptr; + renderpass_bi.renderPass = renderpass; + renderpass_bi.framebuffer = framebuffer; + renderpass_bi.renderArea.offset.x = 0; + renderpass_bi.renderArea.offset.y = 0; + renderpass_bi.renderArea.extent = size; + renderpass_bi.clearValueCount = 1; + renderpass_bi.pClearValues = &clear_color; + + VkViewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast<float>(size.width); + viewport.height = static_cast<float>(size.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + + VkRect2D scissor; + scissor.offset.x = 0; + scissor.offset.y = 0; + scissor.extent = size; + + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + + cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices)); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); + cmdbuf.Draw(4, 1, 0, 0); + cmdbuf.EndRenderPass(); }); return {scheduler.GetFence(), *semaphores[image_index]}; @@ -334,165 +373,297 @@ void VKBlitScreen::CreateShaders() { } void VKBlitScreen::CreateSemaphores() { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - semaphores.resize(image_count); - for (std::size_t i = 0; i < image_count; ++i) { - semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld); - } + std::generate(semaphores.begin(), semaphores.end(), + [this] { return device.GetLogical().CreateSemaphore(); }); } void VKBlitScreen::CreateDescriptorPool() { - const std::array<vk::DescriptorPoolSize, 2> pool_sizes{ - vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, static_cast<u32>(image_count)}, - vk::DescriptorPoolSize{vk::DescriptorType::eCombinedImageSampler, - static_cast<u32>(image_count)}}; - const vk::DescriptorPoolCreateInfo pool_ci( - {}, static_cast<u32>(image_count), static_cast<u32>(pool_sizes.size()), pool_sizes.data()); - const auto dev = device.GetLogical(); - descriptor_pool = dev.createDescriptorPoolUnique(pool_ci, nullptr, device.GetDispatchLoader()); + std::array<VkDescriptorPoolSize, 2> pool_sizes; + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + pool_sizes[0].descriptorCount = static_cast<u32>(image_count); + pool_sizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + pool_sizes[1].descriptorCount = static_cast<u32>(image_count); + + VkDescriptorPoolCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; + ci.maxSets = static_cast<u32>(image_count); + ci.poolSizeCount = static_cast<u32>(pool_sizes.size()); + ci.pPoolSizes = pool_sizes.data(); + descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); } void VKBlitScreen::CreateRenderPass() { - const vk::AttachmentDescription color_attachment( - {}, swapchain.GetImageFormat(), vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eClear, - vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare, - vk::AttachmentStoreOp::eDontCare, vk::ImageLayout::eUndefined, - vk::ImageLayout::ePresentSrcKHR); - - const vk::AttachmentReference color_attachment_ref(0, vk::ImageLayout::eColorAttachmentOptimal); - - const vk::SubpassDescription subpass_description({}, vk::PipelineBindPoint::eGraphics, 0, - nullptr, 1, &color_attachment_ref, nullptr, - nullptr, 0, nullptr); - - const vk::SubpassDependency dependency( - VK_SUBPASS_EXTERNAL, 0, vk::PipelineStageFlagBits::eColorAttachmentOutput, - vk::PipelineStageFlagBits::eColorAttachmentOutput, {}, - vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite, {}); - - const vk::RenderPassCreateInfo renderpass_ci({}, 1, &color_attachment, 1, &subpass_description, - 1, &dependency); - - const auto dev = device.GetLogical(); - renderpass = dev.createRenderPassUnique(renderpass_ci, nullptr, device.GetDispatchLoader()); + VkAttachmentDescription color_attachment; + color_attachment.flags = 0; + color_attachment.format = swapchain.GetImageFormat(); + color_attachment.samples = VK_SAMPLE_COUNT_1_BIT; + color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + color_attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + color_attachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + + VkAttachmentReference color_attachment_ref; + color_attachment_ref.attachment = 0; + color_attachment_ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkSubpassDescription subpass_description; + subpass_description.flags = 0; + subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass_description.inputAttachmentCount = 0; + subpass_description.pInputAttachments = nullptr; + subpass_description.colorAttachmentCount = 1; + subpass_description.pColorAttachments = &color_attachment_ref; + subpass_description.pResolveAttachments = nullptr; + subpass_description.pDepthStencilAttachment = nullptr; + subpass_description.preserveAttachmentCount = 0; + subpass_description.pPreserveAttachments = nullptr; + + VkSubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.srcAccessMask = 0; + dependency.dstAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + dependency.dependencyFlags = 0; + + VkRenderPassCreateInfo renderpass_ci; + renderpass_ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + renderpass_ci.pNext = nullptr; + renderpass_ci.flags = 0; + renderpass_ci.attachmentCount = 1; + renderpass_ci.pAttachments = &color_attachment; + renderpass_ci.subpassCount = 1; + renderpass_ci.pSubpasses = &subpass_description; + renderpass_ci.dependencyCount = 1; + renderpass_ci.pDependencies = &dependency; + + renderpass = device.GetLogical().CreateRenderPass(renderpass_ci); } void VKBlitScreen::CreateDescriptorSetLayout() { - const std::array<vk::DescriptorSetLayoutBinding, 2> layout_bindings{ - vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eUniformBuffer, 1, - vk::ShaderStageFlagBits::eVertex, nullptr), - vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eCombinedImageSampler, 1, - vk::ShaderStageFlagBits::eFragment, nullptr)}; - const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci( - {}, static_cast<u32>(layout_bindings.size()), layout_bindings.data()); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld); + std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings; + layout_bindings[0].binding = 0; + layout_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + layout_bindings[0].descriptorCount = 1; + layout_bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + layout_bindings[0].pImmutableSamplers = nullptr; + layout_bindings[1].binding = 1; + layout_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + layout_bindings[1].descriptorCount = 1; + layout_bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + layout_bindings[1].pImmutableSamplers = nullptr; + + VkDescriptorSetLayoutCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.bindingCount = static_cast<u32>(layout_bindings.size()); + ci.pBindings = layout_bindings.data(); + + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); } void VKBlitScreen::CreateDescriptorSets() { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - - descriptor_sets.resize(image_count); - for (std::size_t i = 0; i < image_count; ++i) { - const vk::DescriptorSetLayout layout = *descriptor_set_layout; - const vk::DescriptorSetAllocateInfo descriptor_set_ai(*descriptor_pool, 1, &layout); - const vk::Result result = - dev.allocateDescriptorSets(&descriptor_set_ai, &descriptor_sets[i], dld); - ASSERT(result == vk::Result::eSuccess); - } + const std::vector layouts(image_count, *descriptor_set_layout); + + VkDescriptorSetAllocateInfo ai; + ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + ai.pNext = nullptr; + ai.descriptorPool = *descriptor_pool; + ai.descriptorSetCount = static_cast<u32>(image_count); + ai.pSetLayouts = layouts.data(); + descriptor_sets = descriptor_pool.Allocate(ai); } void VKBlitScreen::CreatePipelineLayout() { - const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &descriptor_set_layout.get(), 0, - nullptr); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - pipeline_layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); + VkPipelineLayoutCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.setLayoutCount = 1; + ci.pSetLayouts = descriptor_set_layout.address(); + ci.pushConstantRangeCount = 0; + ci.pPushConstantRanges = nullptr; + pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); } void VKBlitScreen::CreateGraphicsPipeline() { - const std::array shader_stages = { - vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eVertex, *vertex_shader, - "main", nullptr), - vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eFragment, *fragment_shader, - "main", nullptr)}; + std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages; + shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stages[0].pNext = nullptr; + shader_stages[0].flags = 0; + shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shader_stages[0].module = *vertex_shader; + shader_stages[0].pName = "main"; + shader_stages[0].pSpecializationInfo = nullptr; + shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stages[1].pNext = nullptr; + shader_stages[1].flags = 0; + shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + shader_stages[1].module = *fragment_shader; + shader_stages[1].pName = "main"; + shader_stages[1].pSpecializationInfo = nullptr; const auto vertex_binding_description = ScreenRectVertex::GetDescription(); const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); - const vk::PipelineVertexInputStateCreateInfo vertex_input( - {}, 1, &vertex_binding_description, static_cast<u32>(vertex_attrs_description.size()), - vertex_attrs_description.data()); - - const vk::PipelineInputAssemblyStateCreateInfo input_assembly( - {}, vk::PrimitiveTopology::eTriangleStrip, false); - - // Set a dummy viewport, it's going to be replaced by dynamic states. - const vk::Viewport viewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f); - const vk::Rect2D scissor({0, 0}, {1, 1}); - const vk::PipelineViewportStateCreateInfo viewport_state({}, 1, &viewport, 1, &scissor); - - const vk::PipelineRasterizationStateCreateInfo rasterizer( - {}, false, false, vk::PolygonMode::eFill, vk::CullModeFlagBits::eNone, - vk::FrontFace::eClockwise, false, 0.0f, 0.0f, 0.0f, 1.0f); - - const vk::PipelineMultisampleStateCreateInfo multisampling({}, vk::SampleCountFlagBits::e1, - false, 0.0f, nullptr, false, false); - - const vk::PipelineColorBlendAttachmentState color_blend_attachment( - false, vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd, - vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd, - vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | - vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA); - - const vk::PipelineColorBlendStateCreateInfo color_blending( - {}, false, vk::LogicOp::eCopy, 1, &color_blend_attachment, {0.0f, 0.0f, 0.0f, 0.0f}); - - const std::array<vk::DynamicState, 2> dynamic_states = {vk::DynamicState::eViewport, - vk::DynamicState::eScissor}; - - const vk::PipelineDynamicStateCreateInfo dynamic_state( - {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data()); - - const vk::GraphicsPipelineCreateInfo pipeline_ci( - {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input, - &input_assembly, nullptr, &viewport_state, &rasterizer, &multisampling, nullptr, - &color_blending, &dynamic_state, *pipeline_layout, *renderpass, 0, nullptr, 0); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - pipeline = dev.createGraphicsPipelineUnique({}, pipeline_ci, nullptr, dld); + VkPipelineVertexInputStateCreateInfo vertex_input_ci; + vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertex_input_ci.pNext = nullptr; + vertex_input_ci.flags = 0; + vertex_input_ci.vertexBindingDescriptionCount = 1; + vertex_input_ci.pVertexBindingDescriptions = &vertex_binding_description; + vertex_input_ci.vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}; + vertex_input_ci.pVertexAttributeDescriptions = vertex_attrs_description.data(); + + VkPipelineInputAssemblyStateCreateInfo input_assembly_ci; + input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_assembly_ci.pNext = nullptr; + input_assembly_ci.flags = 0; + input_assembly_ci.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + input_assembly_ci.primitiveRestartEnable = VK_FALSE; + + VkPipelineViewportStateCreateInfo viewport_state_ci; + viewport_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_state_ci.pNext = nullptr; + viewport_state_ci.flags = 0; + viewport_state_ci.viewportCount = 1; + viewport_state_ci.pViewports = nullptr; + viewport_state_ci.scissorCount = 1; + viewport_state_ci.pScissors = nullptr; + + VkPipelineRasterizationStateCreateInfo rasterization_ci; + rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_ci.pNext = nullptr; + rasterization_ci.flags = 0; + rasterization_ci.depthClampEnable = VK_FALSE; + rasterization_ci.rasterizerDiscardEnable = VK_FALSE; + rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; + rasterization_ci.cullMode = VK_CULL_MODE_NONE; + rasterization_ci.frontFace = VK_FRONT_FACE_CLOCKWISE; + rasterization_ci.depthBiasEnable = VK_FALSE; + rasterization_ci.depthBiasConstantFactor = 0.0f; + rasterization_ci.depthBiasClamp = 0.0f; + rasterization_ci.depthBiasSlopeFactor = 0.0f; + rasterization_ci.lineWidth = 1.0f; + + VkPipelineMultisampleStateCreateInfo multisampling_ci; + multisampling_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisampling_ci.pNext = nullptr; + multisampling_ci.flags = 0; + multisampling_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + multisampling_ci.sampleShadingEnable = VK_FALSE; + multisampling_ci.minSampleShading = 0.0f; + multisampling_ci.pSampleMask = nullptr; + multisampling_ci.alphaToCoverageEnable = VK_FALSE; + multisampling_ci.alphaToOneEnable = VK_FALSE; + + VkPipelineColorBlendAttachmentState color_blend_attachment; + color_blend_attachment.blendEnable = VK_FALSE; + color_blend_attachment.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO; + color_blend_attachment.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; + color_blend_attachment.colorBlendOp = VK_BLEND_OP_ADD; + color_blend_attachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + color_blend_attachment.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + color_blend_attachment.alphaBlendOp = VK_BLEND_OP_ADD; + color_blend_attachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + VkPipelineColorBlendStateCreateInfo color_blend_ci; + color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + color_blend_ci.flags = 0; + color_blend_ci.pNext = nullptr; + color_blend_ci.logicOpEnable = VK_FALSE; + color_blend_ci.logicOp = VK_LOGIC_OP_COPY; + color_blend_ci.attachmentCount = 1; + color_blend_ci.pAttachments = &color_blend_attachment; + color_blend_ci.blendConstants[0] = 0.0f; + color_blend_ci.blendConstants[1] = 0.0f; + color_blend_ci.blendConstants[2] = 0.0f; + color_blend_ci.blendConstants[3] = 0.0f; + + static constexpr std::array dynamic_states = {VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamic_state_ci; + dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state_ci.pNext = nullptr; + dynamic_state_ci.flags = 0; + dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size()); + dynamic_state_ci.pDynamicStates = dynamic_states.data(); + + VkGraphicsPipelineCreateInfo pipeline_ci; + pipeline_ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_ci.pNext = nullptr; + pipeline_ci.flags = 0; + pipeline_ci.stageCount = static_cast<u32>(shader_stages.size()); + pipeline_ci.pStages = shader_stages.data(); + pipeline_ci.pVertexInputState = &vertex_input_ci; + pipeline_ci.pInputAssemblyState = &input_assembly_ci; + pipeline_ci.pTessellationState = nullptr; + pipeline_ci.pViewportState = &viewport_state_ci; + pipeline_ci.pRasterizationState = &rasterization_ci; + pipeline_ci.pMultisampleState = &multisampling_ci; + pipeline_ci.pDepthStencilState = nullptr; + pipeline_ci.pColorBlendState = &color_blend_ci; + pipeline_ci.pDynamicState = &dynamic_state_ci; + pipeline_ci.layout = *pipeline_layout; + pipeline_ci.renderPass = *renderpass; + pipeline_ci.subpass = 0; + pipeline_ci.basePipelineHandle = 0; + pipeline_ci.basePipelineIndex = 0; + + pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci); } void VKBlitScreen::CreateSampler() { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - const vk::SamplerCreateInfo sampler_ci( - {}, vk::Filter::eLinear, vk::Filter::eLinear, vk::SamplerMipmapMode::eLinear, - vk::SamplerAddressMode::eClampToBorder, vk::SamplerAddressMode::eClampToBorder, - vk::SamplerAddressMode::eClampToBorder, 0.0f, false, 0.0f, false, vk::CompareOp::eNever, - 0.0f, 0.0f, vk::BorderColor::eFloatOpaqueBlack, false); - sampler = dev.createSamplerUnique(sampler_ci, nullptr, dld); + VkSamplerCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.magFilter = VK_FILTER_LINEAR; + ci.minFilter = VK_FILTER_NEAREST; + ci.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + ci.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + ci.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + ci.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + ci.mipLodBias = 0.0f; + ci.anisotropyEnable = VK_FALSE; + ci.maxAnisotropy = 0.0f; + ci.compareEnable = VK_FALSE; + ci.compareOp = VK_COMPARE_OP_NEVER; + ci.minLod = 0.0f; + ci.maxLod = 0.0f; + ci.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; + ci.unnormalizedCoordinates = VK_FALSE; + + sampler = device.GetLogical().CreateSampler(ci); } void VKBlitScreen::CreateFramebuffers() { - const vk::Extent2D size{swapchain.GetSize()}; - framebuffers.clear(); + const VkExtent2D size{swapchain.GetSize()}; framebuffers.resize(image_count); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); + VkFramebufferCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.renderPass = *renderpass; + ci.attachmentCount = 1; + ci.width = size.width; + ci.height = size.height; + ci.layers = 1; for (std::size_t i = 0; i < image_count; ++i) { - const vk::ImageView image_view{swapchain.GetImageViewIndex(i)}; - const vk::FramebufferCreateInfo framebuffer_ci({}, *renderpass, 1, &image_view, size.width, - size.height, 1); - framebuffers[i] = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); + const VkImageView image_view{swapchain.GetImageViewIndex(i)}; + ci.pAttachments = &image_view; + framebuffers[i] = device.GetLogical().CreateFramebuffer(ci); } } @@ -507,54 +678,86 @@ void VKBlitScreen::ReleaseRawImages() { } void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - - const vk::BufferCreateInfo buffer_ci({}, CalculateBufferSize(framebuffer), - vk::BufferUsageFlagBits::eTransferSrc | - vk::BufferUsageFlagBits::eVertexBuffer | - vk::BufferUsageFlagBits::eUniformBuffer, - vk::SharingMode::eExclusive, 0, nullptr); - buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); - buffer_commit = memory_manager.Commit(*buffer, true); + VkBufferCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.size = CalculateBufferSize(framebuffer); + ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ci.queueFamilyIndexCount = 0; + ci.pQueueFamilyIndices = nullptr; + + buffer = device.GetLogical().CreateBuffer(ci); + buffer_commit = memory_manager.Commit(buffer, true); } void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { raw_images.resize(image_count); raw_buffer_commits.resize(image_count); - const auto format = GetFormat(framebuffer); + VkImageCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.imageType = VK_IMAGE_TYPE_2D; + ci.format = GetFormat(framebuffer); + ci.extent.width = framebuffer.width; + ci.extent.height = framebuffer.height; + ci.extent.depth = 1; + ci.mipLevels = 1; + ci.arrayLayers = 1; + ci.samples = VK_SAMPLE_COUNT_1_BIT; + ci.tiling = VK_IMAGE_TILING_LINEAR; + ci.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ci.queueFamilyIndexCount = 0; + ci.pQueueFamilyIndices = nullptr; + ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + for (std::size_t i = 0; i < image_count; ++i) { - const vk::ImageCreateInfo image_ci( - {}, vk::ImageType::e2D, format, {framebuffer.width, framebuffer.height, 1}, 1, 1, - vk::SampleCountFlagBits::e1, vk::ImageTiling::eOptimal, - vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled, - vk::SharingMode::eExclusive, 0, nullptr, vk::ImageLayout::eUndefined); - - raw_images[i] = - std::make_unique<VKImage>(device, scheduler, image_ci, vk::ImageAspectFlagBits::eColor); + raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); } } -void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const { - const vk::DescriptorSet descriptor_set = descriptor_sets[image_index]; - - const vk::DescriptorBufferInfo buffer_info(*buffer, offsetof(BufferData, uniform), - sizeof(BufferData::uniform)); - const vk::WriteDescriptorSet ubo_write(descriptor_set, 0, 0, 1, - vk::DescriptorType::eUniformBuffer, nullptr, - &buffer_info, nullptr); - - const vk::DescriptorImageInfo image_info(*sampler, image_view, - vk::ImageLayout::eShaderReadOnlyOptimal); - const vk::WriteDescriptorSet sampler_write(descriptor_set, 1, 0, 1, - vk::DescriptorType::eCombinedImageSampler, - &image_info, nullptr, nullptr); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - dev.updateDescriptorSets({ubo_write, sampler_write}, {}, dld); +void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const { + VkDescriptorBufferInfo buffer_info; + buffer_info.buffer = *buffer; + buffer_info.offset = offsetof(BufferData, uniform); + buffer_info.range = sizeof(BufferData::uniform); + + VkWriteDescriptorSet ubo_write; + ubo_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + ubo_write.pNext = nullptr; + ubo_write.dstSet = descriptor_sets[image_index]; + ubo_write.dstBinding = 0; + ubo_write.dstArrayElement = 0; + ubo_write.descriptorCount = 1; + ubo_write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + ubo_write.pImageInfo = nullptr; + ubo_write.pBufferInfo = &buffer_info; + ubo_write.pTexelBufferView = nullptr; + + VkDescriptorImageInfo image_info; + image_info.sampler = *sampler; + image_info.imageView = image_view; + image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + VkWriteDescriptorSet sampler_write; + sampler_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + sampler_write.pNext = nullptr; + sampler_write.dstSet = descriptor_sets[image_index]; + sampler_write.dstBinding = 1; + sampler_write.dstArrayElement = 0; + sampler_write.descriptorCount = 1; + sampler_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + sampler_write.pImageInfo = &image_info; + sampler_write.pBufferInfo = nullptr; + sampler_write.pTexelBufferView = nullptr; + + device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {}); } void VKBlitScreen::SetUniformData(BufferData& data, diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index ea680b3f5..5eb544aea 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -8,9 +8,9 @@ #include <memory> #include <tuple> -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Core { class System; @@ -49,8 +49,8 @@ public: void Recreate(); - std::tuple<VKFence&, vk::Semaphore> Draw(const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated); + std::tuple<VKFence&, VkSemaphore> Draw(const Tegra::FramebufferConfig& framebuffer, + bool use_accelerated); private: struct BufferData; @@ -74,7 +74,7 @@ private: void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); - void UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const; + void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const; void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; @@ -93,23 +93,23 @@ private: const std::size_t image_count; const VKScreenInfo& screen_info; - UniqueShaderModule vertex_shader; - UniqueShaderModule fragment_shader; - UniqueDescriptorPool descriptor_pool; - UniqueDescriptorSetLayout descriptor_set_layout; - UniquePipelineLayout pipeline_layout; - UniquePipeline pipeline; - UniqueRenderPass renderpass; - std::vector<UniqueFramebuffer> framebuffers; - std::vector<vk::DescriptorSet> descriptor_sets; - UniqueSampler sampler; - - UniqueBuffer buffer; + vk::ShaderModule vertex_shader; + vk::ShaderModule fragment_shader; + vk::DescriptorPool descriptor_pool; + vk::DescriptorSetLayout descriptor_set_layout; + vk::PipelineLayout pipeline_layout; + vk::Pipeline pipeline; + vk::RenderPass renderpass; + std::vector<vk::Framebuffer> framebuffers; + vk::DescriptorSets descriptor_sets; + vk::Sampler sampler; + + vk::Buffer buffer; VKMemoryCommit buffer_commit; std::vector<std::unique_ptr<VKFenceWatch>> watches; - std::vector<UniqueSemaphore> semaphores; + std::vector<vk::Semaphore> semaphores; std::vector<std::unique_ptr<VKImage>> raw_images; std::vector<VKMemoryCommit> raw_buffer_commits; u32 raw_width = 0; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1ba544943..81e1de2be 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -11,48 +11,50 @@ #include "common/assert.h" #include "common/bit_util.h" #include "core/core.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { namespace { -const auto BufferUsage = - vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | - vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer; +constexpr VkBufferUsageFlags BUFFER_USAGE = + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; -const auto UploadPipelineStage = - vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput | - vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | - vk::PipelineStageFlagBits::eComputeShader; +constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE = + VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; -const auto UploadAccessBarriers = - vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead | - vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead | - vk::AccessFlagBits::eIndexRead; +constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; -auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { - return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage); +std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { + return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE); } } // Anonymous namespace CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, - CacheAddr cache_addr, std::size_t size) - : VideoCommon::BufferBlock{cache_addr, size} { - const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), - BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | - vk::BufferUsageFlagBits::eTransferDst, - vk::SharingMode::eExclusive, 0, nullptr); - - const auto& dld{device.GetDispatchLoader()}; - const auto dev{device.GetLogical()}; - buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld); - buffer.commit = memory_manager.Commit(*buffer.handle, false); + VAddr cpu_addr, std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size} { + VkBufferCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.size = static_cast<VkDeviceSize>(size); + ci.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ci.queueFamilyIndexCount = 0; + ci.pQueueFamilyIndices = nullptr; + + buffer.handle = device.GetLogical().CreateBuffer(ci); + buffer.commit = memory_manager.Commit(buffer.handle, false); } CachedBufferBlock::~CachedBufferBlock() = default; @@ -60,30 +62,30 @@ CachedBufferBlock::~CachedBufferBlock() = default; VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool) - : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system, - CreateStreamBuffer(device, - scheduler)}, + : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, + CreateStreamBuffer(device, + scheduler)}, device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ staging_pool} {} VKBufferCache::~VKBufferCache() = default; -Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { - return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); +Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { + return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size); } -const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { +VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) { return buffer->GetHandle(); } -const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) { +VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { size = std::max(size, std::size_t(4)); const auto& empty = staging_pool.GetUnusedBuffer(size, false); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) { - cmdbuf.fillBuffer(buffer, 0, size, 0, dld); + scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { + cmdbuf.FillBuffer(buffer, 0, size, 0); }); - return &*empty.handle; + return *empty.handle; } void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, @@ -92,15 +94,22 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st std::memcpy(staging.commit->Map(size), data, size); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, - size](auto cmdbuf, auto& dld) { - cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld); - cmdbuf.pipelineBarrier( - vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, - {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, - VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, - offset, size)}, - {}, dld); + scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, + size](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); + + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = buffer; + barrier.offset = offset; + barrier.size = size; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, + barrier, {}); }); } @@ -108,17 +117,24 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, u8* data) { const auto& staging = staging_pool.GetUnusedBuffer(size, true); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, - size](auto cmdbuf, auto& dld) { - cmdbuf.pipelineBarrier( - vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | - vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer, {}, {}, - {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite, - vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)}, - {}, dld); - cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld); + scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, + size](vk::CommandBuffer cmdbuf) { + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = buffer; + barrier.offset = offset; + barrier.size = size; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); + cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); }); scheduler.Finish(); @@ -128,18 +144,31 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) { scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset, - dst_offset, size](auto cmdbuf, auto& dld) { - cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld); - cmdbuf.pipelineBarrier( - vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, - {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead, - vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size), - vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, - VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer, - dst_offset, size)}, - {}, dld); + scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset, + dst_offset, size](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); + + std::array<VkBufferMemoryBarrier, 2> barriers; + barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barriers[0].pNext = nullptr; + barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].buffer = src_buffer; + barriers[0].offset = src_offset; + barriers[0].size = size; + barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barriers[1].pNext = nullptr; + barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS; + barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].buffer = dst_buffer; + barriers[1].offset = dst_offset; + barriers[1].size = size; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, + barriers, {}); }); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3f38eed0c..3cd2e2774 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -11,11 +11,11 @@ #include "common/common_types.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/rasterizer_cache.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Core { class System; @@ -30,11 +30,11 @@ class VKScheduler; class CachedBufferBlock final : public VideoCommon::BufferBlock { public: explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, - CacheAddr cache_addr, std::size_t size); + VAddr cpu_addr, std::size_t size); ~CachedBufferBlock(); - const vk::Buffer* GetHandle() const { - return &*buffer.handle; + VkBuffer GetHandle() const { + return *buffer.handle; } private: @@ -43,21 +43,21 @@ private: using Buffer = std::shared_ptr<CachedBufferBlock>; -class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> { +class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { public: explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKBufferCache(); - const vk::Buffer* GetEmptyBuffer(std::size_t size) override; + VkBuffer GetEmptyBuffer(std::size_t size) override; protected: - void WriteBarrier() override {} + VkBuffer ToHandle(const Buffer& buffer) override; - Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; + void WriteBarrier() override {} - const vk::Buffer* ToHandle(const Buffer& buffer) override; + Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) override; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 7bdda3d79..878a78755 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -10,13 +10,13 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -114,6 +114,35 @@ constexpr u8 quad_array[] = { 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; +VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { + VkDescriptorSetLayoutBinding binding; + binding.binding = 0; + binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + binding.descriptorCount = 1; + binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + binding.pImmutableSamplers = nullptr; + return binding; +} + +VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() { + VkDescriptorUpdateTemplateEntryKHR entry; + entry.dstBinding = 0; + entry.dstArrayElement = 0; + entry.descriptorCount = 1; + entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + entry.offset = 0; + entry.stride = sizeof(DescriptorUpdateEntry); + return entry; +} + +VkPushConstantRange BuildQuadArrayPassPushConstantRange() { + VkPushConstantRange range; + range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + range.offset = 0; + range.size = sizeof(u32); + return range; +} + // Uint8 SPIR-V module. Generated from the "shaders/" directory. constexpr u8 uint8_pass[] = { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00, @@ -191,53 +220,111 @@ constexpr u8 uint8_pass[] = { 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; +std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings() { + std::array<VkDescriptorSetLayoutBinding, 2> bindings; + bindings[0].binding = 0; + bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[0].descriptorCount = 1; + bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bindings[0].pImmutableSamplers = nullptr; + bindings[1].binding = 1; + bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[1].descriptorCount = 1; + bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bindings[1].pImmutableSamplers = nullptr; + return bindings; +} + +VkDescriptorUpdateTemplateEntryKHR BuildUint8PassDescriptorUpdateTemplateEntry() { + VkDescriptorUpdateTemplateEntryKHR entry; + entry.dstBinding = 0; + entry.dstArrayElement = 0; + entry.descriptorCount = 2; + entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + entry.offset = 0; + entry.stride = sizeof(DescriptorUpdateEntry); + return entry; +} + } // Anonymous namespace VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, - const std::vector<vk::DescriptorSetLayoutBinding>& bindings, - const std::vector<vk::DescriptorUpdateTemplateEntry>& templates, - const std::vector<vk::PushConstantRange> push_constants, - std::size_t code_size, const u8* code) { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - - const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci( - {}, static_cast<u32>(bindings.size()), bindings.data()); - descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld); - - const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, - static_cast<u32>(push_constants.size()), - push_constants.data()); - layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); + vk::Span<VkDescriptorSetLayoutBinding> bindings, + vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, + vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, + const u8* code) { + VkDescriptorSetLayoutCreateInfo descriptor_layout_ci; + descriptor_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_layout_ci.pNext = nullptr; + descriptor_layout_ci.flags = 0; + descriptor_layout_ci.bindingCount = bindings.size(); + descriptor_layout_ci.pBindings = bindings.data(); + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(descriptor_layout_ci); + + VkPipelineLayoutCreateInfo pipeline_layout_ci; + pipeline_layout_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_layout_ci.pNext = nullptr; + pipeline_layout_ci.flags = 0; + pipeline_layout_ci.setLayoutCount = 1; + pipeline_layout_ci.pSetLayouts = descriptor_set_layout.address(); + pipeline_layout_ci.pushConstantRangeCount = push_constants.size(); + pipeline_layout_ci.pPushConstantRanges = push_constants.data(); + layout = device.GetLogical().CreatePipelineLayout(pipeline_layout_ci); if (!templates.empty()) { - const vk::DescriptorUpdateTemplateCreateInfo template_ci( - {}, static_cast<u32>(templates.size()), templates.data(), - vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, - vk::PipelineBindPoint::eGraphics, *layout, 0); - descriptor_template = dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); + VkDescriptorUpdateTemplateCreateInfoKHR template_ci; + template_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; + template_ci.pNext = nullptr; + template_ci.flags = 0; + template_ci.descriptorUpdateEntryCount = templates.size(); + template_ci.pDescriptorUpdateEntries = templates.data(); + template_ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; + template_ci.descriptorSetLayout = *descriptor_set_layout; + template_ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + template_ci.pipelineLayout = *layout; + template_ci.set = 0; + descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR(template_ci); descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); } auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); std::memcpy(code_copy.get(), code, code_size); - const vk::ShaderModuleCreateInfo module_ci({}, code_size, code_copy.get()); - module = dev.createShaderModuleUnique(module_ci, nullptr, dld); - const vk::PipelineShaderStageCreateInfo stage_ci({}, vk::ShaderStageFlagBits::eCompute, *module, - "main", nullptr); + VkShaderModuleCreateInfo module_ci; + module_ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + module_ci.pNext = nullptr; + module_ci.flags = 0; + module_ci.codeSize = code_size; + module_ci.pCode = code_copy.get(); + module = device.GetLogical().CreateShaderModule(module_ci); + + VkComputePipelineCreateInfo pipeline_ci; + pipeline_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + pipeline_ci.pNext = nullptr; + pipeline_ci.flags = 0; + pipeline_ci.layout = *layout; + pipeline_ci.basePipelineHandle = nullptr; + pipeline_ci.basePipelineIndex = 0; - const vk::ComputePipelineCreateInfo pipeline_ci({}, stage_ci, *layout, nullptr, 0); - pipeline = dev.createComputePipelineUnique(nullptr, pipeline_ci, nullptr, dld); + VkPipelineShaderStageCreateInfo& stage_ci = pipeline_ci.stage; + stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stage_ci.pNext = nullptr; + stage_ci.flags = 0; + stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; + stage_ci.module = *module; + stage_ci.pName = "main"; + stage_ci.pSpecializationInfo = nullptr; + + pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci); } VKComputePass::~VKComputePass() = default; -vk::DescriptorSet VKComputePass::CommitDescriptorSet( - VKUpdateDescriptorQueue& update_descriptor_queue, VKFence& fence) { +VkDescriptorSet VKComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, + VKFence& fence) { if (!descriptor_template) { - return {}; + return nullptr; } const auto set = descriptor_allocator->Commit(fence); update_descriptor_queue.Send(*descriptor_template, set); @@ -248,25 +335,21 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, VKUpdateDescriptorQueue& update_descriptor_queue) - : VKComputePass(device, descriptor_pool, - {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1, - vk::ShaderStageFlagBits::eCompute, nullptr)}, - {vk::DescriptorUpdateTemplateEntry(0, 0, 1, vk::DescriptorType::eStorageBuffer, - 0, sizeof(DescriptorUpdateEntry))}, - {vk::PushConstantRange(vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32))}, - std::size(quad_array), quad_array), + : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(), + BuildQuadArrayPassDescriptorUpdateTemplateEntry(), + BuildQuadArrayPassPushConstantRange(), std::size(quad_array), quad_array), scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, update_descriptor_queue{update_descriptor_queue} {} QuadArrayPass::~QuadArrayPass() = default; -std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { +std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { const u32 num_triangle_vertices = num_vertices * 6 / 4; const std::size_t staging_size = num_triangle_vertices * sizeof(u32); auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); update_descriptor_queue.Acquire(); - update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size); + update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); scheduler.RequestOutsideRenderPassOperationContext(); @@ -274,20 +357,25 @@ std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_ver ASSERT(num_vertices % 4 == 0); const u32 num_quads = num_vertices / 4; scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads, - first, set](auto cmdbuf, auto& dld) { + first, set](vk::CommandBuffer cmdbuf) { constexpr u32 dispatch_size = 1024; - cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld); - cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(first), &first, - dld); - cmdbuf.dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1, dld); - - const vk::BufferMemoryBarrier barrier( - vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead, - VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0, - static_cast<vk::DeviceSize>(num_quads) * 6 * sizeof(u32)); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first); + cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1); + + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = buffer; + barrier.offset = 0; + barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {}); }); return {*buffer.handle, 0}; } @@ -295,45 +383,46 @@ std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_ver Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, VKUpdateDescriptorQueue& update_descriptor_queue) - : VKComputePass(device, descriptor_pool, - {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1, - vk::ShaderStageFlagBits::eCompute, nullptr), - vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eStorageBuffer, 1, - vk::ShaderStageFlagBits::eCompute, nullptr)}, - {vk::DescriptorUpdateTemplateEntry(0, 0, 2, vk::DescriptorType::eStorageBuffer, - 0, sizeof(DescriptorUpdateEntry))}, - {}, std::size(uint8_pass), uint8_pass), + : VKComputePass(device, descriptor_pool, BuildUint8PassDescriptorSetBindings(), + BuildUint8PassDescriptorUpdateTemplateEntry(), {}, std::size(uint8_pass), + uint8_pass), scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, update_descriptor_queue{update_descriptor_queue} {} Uint8Pass::~Uint8Pass() = default; -std::pair<const vk::Buffer*, u64> Uint8Pass::Assemble(u32 num_vertices, vk::Buffer src_buffer, - u64 src_offset) { +std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, + u64 src_offset) { const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16)); auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); update_descriptor_queue.Acquire(); - update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices); - update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size); + update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); + update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, - num_vertices](auto cmdbuf, auto& dld) { + num_vertices](vk::CommandBuffer cmdbuf) { constexpr u32 dispatch_size = 1024; - cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld); - cmdbuf.dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1, dld); - - const vk::BufferMemoryBarrier barrier( - vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead, - VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0, - static_cast<vk::DeviceSize>(num_vertices) * sizeof(u16)); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); + cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); + + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = buffer; + barrier.offset = 0; + barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16)); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); }); - return {&*buffer.handle, 0}; + return {*buffer.handle, 0}; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 7057eb837..ec80c8683 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -8,8 +8,8 @@ #include <utility> #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -22,24 +22,24 @@ class VKUpdateDescriptorQueue; class VKComputePass { public: explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, - const std::vector<vk::DescriptorSetLayoutBinding>& bindings, - const std::vector<vk::DescriptorUpdateTemplateEntry>& templates, - const std::vector<vk::PushConstantRange> push_constants, - std::size_t code_size, const u8* code); + vk::Span<VkDescriptorSetLayoutBinding> bindings, + vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, + vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, + const u8* code); ~VKComputePass(); protected: - vk::DescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, - VKFence& fence); + VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, + VKFence& fence); - UniqueDescriptorUpdateTemplate descriptor_template; - UniquePipelineLayout layout; - UniquePipeline pipeline; + vk::DescriptorUpdateTemplateKHR descriptor_template; + vk::PipelineLayout layout; + vk::Pipeline pipeline; private: - UniqueDescriptorSetLayout descriptor_set_layout; + vk::DescriptorSetLayout descriptor_set_layout; std::optional<DescriptorAllocator> descriptor_allocator; - UniqueShaderModule module; + vk::ShaderModule module; }; class QuadArrayPass final : public VKComputePass { @@ -50,7 +50,7 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue); ~QuadArrayPass(); - std::pair<const vk::Buffer&, vk::DeviceSize> Assemble(u32 num_vertices, u32 first); + std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first); private: VKScheduler& scheduler; @@ -65,8 +65,7 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue); ~Uint8Pass(); - std::pair<const vk::Buffer*, u64> Assemble(u32 num_vertices, vk::Buffer src_buffer, - u64 src_offset); + std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); private: VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 60f57d83e..23beafa4f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -5,7 +5,6 @@ #include <memory> #include <vector> -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_device.h" @@ -14,6 +13,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -30,7 +30,7 @@ VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& schedu VKComputePipeline::~VKComputePipeline() = default; -vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() { +VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { if (!descriptor_template) { return {}; } @@ -39,74 +39,109 @@ vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() { return set; } -UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { - std::vector<vk::DescriptorSetLayoutBinding> bindings; +vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { + std::vector<VkDescriptorSetLayoutBinding> bindings; u32 binding = 0; - const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { + const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { // TODO(Rodrigo): Maybe make individual bindings here? for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { - bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute, - nullptr); + VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); + entry.binding = binding++; + entry.descriptorType = descriptor_type; + entry.descriptorCount = 1; + entry.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + entry.pImmutableSamplers = nullptr; } }; - AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); - AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); - AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); - AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); - AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); - - const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci( - {}, static_cast<u32>(bindings.size()), bindings.data()); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld); + add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); + add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); + add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size()); + add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); + add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); + + VkDescriptorSetLayoutCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.bindingCount = static_cast<u32>(bindings.size()); + ci.pBindings = bindings.data(); + return device.GetLogical().CreateDescriptorSetLayout(ci); } -UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const { - const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr); - const auto dev = device.GetLogical(); - return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader()); +vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { + VkPipelineLayoutCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.setLayoutCount = 1; + ci.pSetLayouts = descriptor_set_layout.address(); + ci.pushConstantRangeCount = 0; + ci.pPushConstantRanges = nullptr; + return device.GetLogical().CreatePipelineLayout(ci); } -UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const { - std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; +vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { + std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries; u32 binding = 0; u32 offset = 0; FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); if (template_entries.empty()) { // If the shader doesn't use descriptor sets, skip template creation. - return UniqueDescriptorUpdateTemplate{}; + return {}; } - const vk::DescriptorUpdateTemplateCreateInfo template_ci( - {}, static_cast<u32>(template_entries.size()), template_entries.data(), - vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, - vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); + VkDescriptorUpdateTemplateCreateInfoKHR ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; + ci.pNext = nullptr; + ci.flags = 0; + ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()); + ci.pDescriptorUpdateEntries = template_entries.data(); + ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; + ci.descriptorSetLayout = *descriptor_set_layout; + ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + ci.pipelineLayout = *layout; + ci.set = DESCRIPTOR_SET; + return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); } -UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { - const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data()); - const auto dev = device.GetLogical(); - return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader()); +vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { + VkShaderModuleCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.codeSize = code.size() * sizeof(u32); + ci.pCode = code.data(); + return device.GetLogical().CreateShaderModule(ci); } -UniquePipeline VKComputePipeline::CreatePipeline() const { - vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute, - *shader_module, "main", nullptr); - vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; +vk::Pipeline VKComputePipeline::CreatePipeline() const { + VkComputePipelineCreateInfo ci; + VkPipelineShaderStageCreateInfo& stage_ci = ci.stage; + stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stage_ci.pNext = nullptr; + stage_ci.flags = 0; + stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; + stage_ci.module = *shader_module; + stage_ci.pName = "main"; + stage_ci.pSpecializationInfo = nullptr; + + VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; + subgroup_size_ci.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; + subgroup_size_ci.pNext = nullptr; subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; - if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) { - shader_stage_ci.pNext = &subgroup_size_ci; + + if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { + stage_ci.pNext = &subgroup_size_ci; } - const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0); - const auto dev = device.GetLogical(); - return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader()); + ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.layout = *layout; + ci.basePipelineHandle = nullptr; + ci.basePipelineIndex = 0; + return device.GetLogical().CreateComputePipeline(ci); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 22235c6c9..33b9af29e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -7,9 +7,9 @@ #include <memory> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -25,42 +25,42 @@ public: const SPIRVShader& shader); ~VKComputePipeline(); - vk::DescriptorSet CommitDescriptorSet(); + VkDescriptorSet CommitDescriptorSet(); - vk::Pipeline GetHandle() const { + VkPipeline GetHandle() const { return *pipeline; } - vk::PipelineLayout GetLayout() const { + VkPipelineLayout GetLayout() const { return *layout; } - const ShaderEntries& GetEntries() { + const ShaderEntries& GetEntries() const { return entries; } private: - UniqueDescriptorSetLayout CreateDescriptorSetLayout() const; + vk::DescriptorSetLayout CreateDescriptorSetLayout() const; - UniquePipelineLayout CreatePipelineLayout() const; + vk::PipelineLayout CreatePipelineLayout() const; - UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const; + vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; - UniqueShaderModule CreateShaderModule(const std::vector<u32>& code) const; + vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const; - UniquePipeline CreatePipeline() const; + vk::Pipeline CreatePipeline() const; const VKDevice& device; VKScheduler& scheduler; ShaderEntries entries; - UniqueDescriptorSetLayout descriptor_set_layout; + vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; VKUpdateDescriptorQueue& update_descriptor_queue; - UniquePipelineLayout layout; - UniqueDescriptorUpdateTemplate descriptor_template; - UniqueShaderModule shader_module; - UniquePipeline pipeline; + vk::PipelineLayout layout; + vk::DescriptorUpdateTemplateKHR descriptor_template; + vk::ShaderModule shader_module; + vk::Pipeline pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index cc7c281a0..e9d528aa6 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -6,10 +6,10 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -17,19 +17,18 @@ namespace Vulkan { constexpr std::size_t SETS_GROW_RATE = 0x20; DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool, - vk::DescriptorSetLayout layout) + VkDescriptorSetLayout layout) : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {} DescriptorAllocator::~DescriptorAllocator() = default; -vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) { - return *descriptors[CommitResource(fence)]; +VkDescriptorSet DescriptorAllocator::Commit(VKFence& fence) { + const std::size_t index = CommitResource(fence); + return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; } void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin); - descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()), - std::make_move_iterator(new_sets.end())); + descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); } VKDescriptorPool::VKDescriptorPool(const VKDevice& device) @@ -37,53 +36,50 @@ VKDescriptorPool::VKDescriptorPool(const VKDevice& device) VKDescriptorPool::~VKDescriptorPool() = default; -vk::DescriptorPool VKDescriptorPool::AllocateNewPool() { +vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { static constexpr u32 num_sets = 0x20000; - static constexpr vk::DescriptorPoolSize pool_sizes[] = { - {vk::DescriptorType::eUniformBuffer, num_sets * 90}, - {vk::DescriptorType::eStorageBuffer, num_sets * 60}, - {vk::DescriptorType::eUniformTexelBuffer, num_sets * 64}, - {vk::DescriptorType::eCombinedImageSampler, num_sets * 64}, - {vk::DescriptorType::eStorageImage, num_sets * 40}}; - - const vk::DescriptorPoolCreateInfo create_info( - vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets, - static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes)); - const auto dev = device.GetLogical(); - return *pools.emplace_back( - dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader())); + static constexpr VkDescriptorPoolSize pool_sizes[] = { + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90}, + {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, + {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, + {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; + + VkDescriptorPoolCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; + ci.maxSets = num_sets; + ci.poolSizeCount = static_cast<u32>(std::size(pool_sizes)); + ci.pPoolSizes = std::data(pool_sizes); + return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); } -std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors( - vk::DescriptorSetLayout layout, std::size_t count) { - std::vector layout_copies(count, layout); - vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count), - layout_copies.data()); - - std::vector<vk::DescriptorSet> sets(count); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) { - case vk::Result::eSuccess: - break; - case vk::Result::eErrorOutOfPoolMemory: - active_pool = AllocateNewPool(); - allocate_info.descriptorPool = active_pool; - if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) { - break; - } - [[fallthrough]]; - default: - vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique"); +vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, + std::size_t count) { + const std::vector layout_copies(count, layout); + VkDescriptorSetAllocateInfo ai; + ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + ai.pNext = nullptr; + ai.descriptorPool = **active_pool; + ai.descriptorSetCount = static_cast<u32>(count); + ai.pSetLayouts = layout_copies.data(); + + vk::DescriptorSets sets = active_pool->Allocate(ai); + if (!sets.IsOutOfPoolMemory()) { + return sets; } - vk::PoolFree deleter(dev, active_pool, dld); - std::vector<UniqueDescriptorSet> unique_sets; - unique_sets.reserve(count); - for (const auto set : sets) { - unique_sets.push_back(UniqueDescriptorSet{set, deleter}); + // Our current pool is out of memory. Allocate a new one and retry + active_pool = AllocateNewPool(); + ai.descriptorPool = **active_pool; + sets = active_pool->Allocate(ai); + if (!sets.IsOutOfPoolMemory()) { + return sets; } - return unique_sets; + + // After allocating a new pool, we are out of memory again. We can't handle this from here. + throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index a441dbc0f..ab40c70f0 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -8,8 +8,8 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -17,21 +17,21 @@ class VKDescriptorPool; class DescriptorAllocator final : public VKFencedPool { public: - explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout); + explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); ~DescriptorAllocator() override; DescriptorAllocator(const DescriptorAllocator&) = delete; - vk::DescriptorSet Commit(VKFence& fence); + VkDescriptorSet Commit(VKFence& fence); protected: void Allocate(std::size_t begin, std::size_t end) override; private: VKDescriptorPool& descriptor_pool; - const vk::DescriptorSetLayout layout; + const VkDescriptorSetLayout layout; - std::vector<UniqueDescriptorSet> descriptors; + std::vector<vk::DescriptorSets> descriptors_allocations; }; class VKDescriptorPool final { @@ -42,15 +42,14 @@ public: ~VKDescriptorPool(); private: - vk::DescriptorPool AllocateNewPool(); + vk::DescriptorPool* AllocateNewPool(); - std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout, - std::size_t count); + vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); const VKDevice& device; - std::vector<UniqueDescriptorPool> pools; - vk::DescriptorPool active_pool; + std::vector<vk::DescriptorPool> pools; + vk::DescriptorPool* active_pool; }; } // namespace Vulkan
\ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 7aafb5e59..52d29e49d 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -6,14 +6,15 @@ #include <chrono> #include <cstdlib> #include <optional> -#include <set> #include <string_view> #include <thread> +#include <unordered_set> #include <vector> + #include "common/assert.h" #include "core/settings.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -21,49 +22,43 @@ namespace { namespace Alternatives { -constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint, - vk::Format::eD16UnormS8Uint, vk::Format{}}; -constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint, - vk::Format::eD32SfloatS8Uint, vk::Format{}}; +constexpr std::array Depth24UnormS8_UINT = {VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_D16_UNORM_S8_UINT, VkFormat{}}; +constexpr std::array Depth16UnormS8_UINT = {VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, VkFormat{}}; } // namespace Alternatives +constexpr std::array REQUIRED_EXTENSIONS = { + VK_KHR_SWAPCHAIN_EXTENSION_NAME, + VK_KHR_16BIT_STORAGE_EXTENSION_NAME, + VK_KHR_8BIT_STORAGE_EXTENSION_NAME, + VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, + VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, + VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, + VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, + VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, + VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, +}; + template <typename T> void SetNext(void**& next, T& data) { *next = &data; next = &data.pNext; } -template <typename T> -T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) { - vk::PhysicalDeviceFeatures2 features; - T extension_features; - features.pNext = &extension_features; - physical.getFeatures2(&features, dldi); - return extension_features; -} - -template <typename T> -T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) { - vk::PhysicalDeviceProperties2 properties; - T extension_properties; - properties.pNext = &extension_properties; - physical.getProperties2(&properties, dldi); - return extension_properties; -} - -constexpr const vk::Format* GetFormatAlternatives(vk::Format format) { +constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { switch (format) { - case vk::Format::eD24UnormS8Uint: - return Alternatives::Depth24UnormS8Uint.data(); - case vk::Format::eD16UnormS8Uint: - return Alternatives::Depth16UnormS8Uint.data(); + case VK_FORMAT_D24_UNORM_S8_UINT: + return Alternatives::Depth24UnormS8_UINT.data(); + case VK_FORMAT_D16_UNORM_S8_UINT: + return Alternatives::Depth16UnormS8_UINT.data(); default: return nullptr; } } -vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, FormatType format_type) { +VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType format_type) { switch (format_type) { case FormatType::Linear: return properties.linearTilingFeatures; @@ -76,79 +71,220 @@ vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, Format } } +std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( + vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { + static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32, + VK_FORMAT_A8B8G8R8_UINT_PACK32, + VK_FORMAT_A8B8G8R8_SNORM_PACK32, + VK_FORMAT_A8B8G8R8_SRGB_PACK32, + VK_FORMAT_B5G6R5_UNORM_PACK16, + VK_FORMAT_A2B10G10R10_UNORM_PACK32, + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_R32G32B32A32_UINT, + VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32_UINT, + VK_FORMAT_R16G16B16A16_UINT, + VK_FORMAT_R16G16B16A16_SNORM, + VK_FORMAT_R16G16B16A16_UNORM, + VK_FORMAT_R16G16_UNORM, + VK_FORMAT_R16G16_SNORM, + VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_R16_UNORM, + VK_FORMAT_R8G8B8A8_SRGB, + VK_FORMAT_R8G8_UNORM, + VK_FORMAT_R8G8_SNORM, + VK_FORMAT_R8_UNORM, + VK_FORMAT_R8_UINT, + VK_FORMAT_B10G11R11_UFLOAT_PACK32, + VK_FORMAT_R32_SFLOAT, + VK_FORMAT_R32_UINT, + VK_FORMAT_R32_SINT, + VK_FORMAT_R16_SFLOAT, + VK_FORMAT_R16G16B16A16_SFLOAT, + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_R4G4B4A4_UNORM_PACK16, + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_D16_UNORM, + VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, + VK_FORMAT_BC2_UNORM_BLOCK, + VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_BC5_UNORM_BLOCK, + VK_FORMAT_BC5_SNORM_BLOCK, + VK_FORMAT_BC7_UNORM_BLOCK, + VK_FORMAT_BC6H_UFLOAT_BLOCK, + VK_FORMAT_BC6H_SFLOAT_BLOCK, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, + VK_FORMAT_BC2_SRGB_BLOCK, + VK_FORMAT_BC3_SRGB_BLOCK, + VK_FORMAT_BC7_SRGB_BLOCK, + VK_FORMAT_ASTC_4x4_SRGB_BLOCK, + VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_5x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK, + VK_FORMAT_ASTC_5x5_SRGB_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, + VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK, + VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK, + VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK, + VK_FORMAT_ASTC_12x12_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK, + VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}; + std::unordered_map<VkFormat, VkFormatProperties> format_properties; + for (const auto format : formats) { + format_properties.emplace(format, physical.GetFormatProperties(format)); + } + return format_properties; +} + } // Anonymous namespace -VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, - vk::SurfaceKHR surface) - : physical{physical}, properties{physical.getProperties(dldi)}, - format_properties{GetFormatProperties(dldi, physical)} { - SetupFamilies(dldi, surface); - SetupFeatures(dldi); +VKDevice::VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, + const vk::InstanceDispatch& dld) + : dld{dld}, physical{physical}, properties{physical.GetProperties()}, + format_properties{GetFormatProperties(physical, dld)} { + SetupFamilies(surface); + SetupFeatures(); } VKDevice::~VKDevice() = default; -bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { +bool VKDevice::Create() { const auto queue_cis = GetDeviceQueueCreateInfos(); - const std::vector extensions = LoadExtensions(dldi); + const std::vector extensions = LoadExtensions(); - vk::PhysicalDeviceFeatures2 features2; + VkPhysicalDeviceFeatures2 features2; + features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + features2.pNext = nullptr; void** next = &features2.pNext; + auto& features = features2.features; - features.vertexPipelineStoresAndAtomics = true; + features.robustBufferAccess = false; + features.fullDrawIndexUint32 = false; + features.imageCubeArray = false; features.independentBlend = true; + features.geometryShader = true; + features.tessellationShader = true; + features.sampleRateShading = false; + features.dualSrcBlend = false; + features.logicOp = false; + features.multiDrawIndirect = false; + features.drawIndirectFirstInstance = false; features.depthClamp = true; - features.samplerAnisotropy = true; + features.depthBiasClamp = true; + features.fillModeNonSolid = false; + features.depthBounds = false; + features.wideLines = false; features.largePoints = true; + features.alphaToOne = false; features.multiViewport = true; - features.depthBiasClamp = true; - features.geometryShader = true; - features.tessellationShader = true; + features.samplerAnisotropy = true; + features.textureCompressionETC2 = false; + features.textureCompressionASTC_LDR = is_optimal_astc_supported; + features.textureCompressionBC = false; features.occlusionQueryPrecise = true; + features.pipelineStatisticsQuery = false; + features.vertexPipelineStoresAndAtomics = true; features.fragmentStoresAndAtomics = true; + features.shaderTessellationAndGeometryPointSize = false; features.shaderImageGatherExtended = true; + features.shaderStorageImageExtendedFormats = false; + features.shaderStorageImageMultisample = false; features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; features.shaderStorageImageWriteWithoutFormat = true; - features.textureCompressionASTC_LDR = is_optimal_astc_supported; - - vk::PhysicalDevice16BitStorageFeaturesKHR bit16_storage; + features.shaderUniformBufferArrayDynamicIndexing = false; + features.shaderSampledImageArrayDynamicIndexing = false; + features.shaderStorageBufferArrayDynamicIndexing = false; + features.shaderStorageImageArrayDynamicIndexing = false; + features.shaderClipDistance = false; + features.shaderCullDistance = false; + features.shaderFloat64 = false; + features.shaderInt64 = false; + features.shaderInt16 = false; + features.shaderResourceResidency = false; + features.shaderResourceMinLod = false; + features.sparseBinding = false; + features.sparseResidencyBuffer = false; + features.sparseResidencyImage2D = false; + features.sparseResidencyImage3D = false; + features.sparseResidency2Samples = false; + features.sparseResidency4Samples = false; + features.sparseResidency8Samples = false; + features.sparseResidency16Samples = false; + features.sparseResidencyAliased = false; + features.variableMultisampleRate = false; + features.inheritedQueries = false; + + VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage; + bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR; + bit16_storage.pNext = nullptr; + bit16_storage.storageBuffer16BitAccess = false; bit16_storage.uniformAndStorageBuffer16BitAccess = true; + bit16_storage.storagePushConstant16 = false; + bit16_storage.storageInputOutput16 = false; SetNext(next, bit16_storage); - vk::PhysicalDevice8BitStorageFeaturesKHR bit8_storage; + VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage; + bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR; + bit8_storage.pNext = nullptr; + bit8_storage.storageBuffer8BitAccess = false; bit8_storage.uniformAndStorageBuffer8BitAccess = true; + bit8_storage.storagePushConstant8 = false; SetNext(next, bit8_storage); - vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; + VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; + host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT; host_query_reset.hostQueryReset = true; SetNext(next, host_query_reset); - vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { + float16_int8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; + float16_int8.pNext = nullptr; float16_int8.shaderFloat16 = true; + float16_int8.shaderInt8 = false; SetNext(next, float16_int8); } else { LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); } - vk::PhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { + std430_layout.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR; + std430_layout.pNext = nullptr; std430_layout.uniformBufferStandardLayout = true; SetNext(next, std430_layout); } else { LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); } - vk::PhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; + VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; if (ext_index_type_uint8) { + index_type_uint8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT; + index_type_uint8.pNext = nullptr; index_type_uint8.indexTypeUint8 = true; SetNext(next, index_type_uint8); } else { LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); } - vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; + VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; if (ext_transform_feedback) { + transform_feedback.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; + transform_feedback.pNext = nullptr; transform_feedback.transformFeedback = true; transform_feedback.geometryStreams = true; SetNext(next, transform_feedback); @@ -160,62 +296,48 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } - vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0, - nullptr, static_cast<u32>(extensions.size()), extensions.data(), - nullptr); - device_ci.pNext = &features2; - - vk::Device dummy_logical; - if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) { - LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); + logical = vk::Device::Create(physical, queue_cis, extensions, features2, dld); + if (!logical) { + LOG_ERROR(Render_Vulkan, "Failed to create logical device"); return false; } - dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr); - logical = UniqueDevice( - dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld)); - CollectTelemetryParameters(); - graphics_queue = logical->getQueue(graphics_family, 0, dld); - present_queue = logical->getQueue(present_family, 0, dld); + graphics_queue = logical.GetQueue(graphics_family); + present_queue = logical.GetQueue(present_family); return true; } -vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, - vk::FormatFeatureFlags wanted_usage, - FormatType format_type) const { +VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const { if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { return wanted_format; } // The wanted format is not supported by hardware, search for alternatives - const vk::Format* alternatives = GetFormatAlternatives(wanted_format); + const VkFormat* alternatives = GetFormatAlternatives(wanted_format); if (alternatives == nullptr) { UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host " "hardware does not support it", - vk::to_string(wanted_format), vk::to_string(wanted_usage), - static_cast<u32>(format_type)); + wanted_format, wanted_usage, format_type); return wanted_format; } std::size_t i = 0; - for (vk::Format alternative = alternatives[0]; alternative != vk::Format{}; - alternative = alternatives[++i]) { + for (VkFormat alternative = *alternatives; alternative; alternative = alternatives[++i]) { if (!IsFormatSupported(alternative, wanted_usage, format_type)) { continue; } LOG_WARNING(Render_Vulkan, "Emulating format={} with alternative format={} with usage={} and type={}", - static_cast<u32>(wanted_format), static_cast<u32>(alternative), - static_cast<u32>(wanted_usage), static_cast<u32>(format_type)); + wanted_format, alternative, wanted_usage, format_type); return alternative; } // No alternatives found, panic UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and " "doesn't support any of the alternatives", - static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), - static_cast<u32>(format_type)); + wanted_format, wanted_usage, format_type); return wanted_format; } @@ -229,38 +351,39 @@ void VKDevice::ReportLoss() const { return; } - [[maybe_unused]] const std::vector data = graphics_queue.getCheckpointDataNV(dld); + [[maybe_unused]] const std::vector data = graphics_queue.GetCheckpointDataNV(dld); // Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be // executed. It can be done on a debugger by evaluating the expression: // *(VKGraphicsPipeline*)data[0] } -bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, - const vk::DispatchLoaderDynamic& dldi) const { +bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { + // Disable for now to avoid converting ASTC twice. static constexpr std::array astc_formats = { - vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, - vk::Format::eAstc5x4UnormBlock, vk::Format::eAstc5x4SrgbBlock, - vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock, - vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock, - vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock, - vk::Format::eAstc8x5UnormBlock, vk::Format::eAstc8x5SrgbBlock, - vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock, - vk::Format::eAstc8x8UnormBlock, vk::Format::eAstc8x8SrgbBlock, - vk::Format::eAstc10x5UnormBlock, vk::Format::eAstc10x5SrgbBlock, - vk::Format::eAstc10x6UnormBlock, vk::Format::eAstc10x6SrgbBlock, - vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock, - vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock, - vk::Format::eAstc12x10UnormBlock, vk::Format::eAstc12x10SrgbBlock, - vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock}; + VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, VK_FORMAT_ASTC_6x5_SRGB_BLOCK, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x5_UNORM_BLOCK, VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x8_UNORM_BLOCK, VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x5_UNORM_BLOCK, VK_FORMAT_ASTC_10x5_SRGB_BLOCK, + VK_FORMAT_ASTC_10x6_UNORM_BLOCK, VK_FORMAT_ASTC_10x6_SRGB_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, + }; if (!features.textureCompressionASTC_LDR) { return false; } const auto format_feature_usage{ - vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | - vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | - vk::FormatFeatureFlagBits::eTransferDst}; + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT}; for (const auto format : astc_formats) { - const auto format_properties{physical.getFormatProperties(format, dldi)}; + const auto format_properties{physical.GetFormatProperties(format)}; if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { return false; } @@ -268,62 +391,49 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features return true; } -bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, +bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const { const auto it = format_properties.find(wanted_format); if (it == format_properties.end()) { - UNIMPLEMENTED_MSG("Unimplemented format query={}", vk::to_string(wanted_format)); + UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format); return true; } const auto supported_usage = GetFormatFeatures(it->second, format_type); return (supported_usage & wanted_usage) == wanted_usage; } -bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, - vk::SurfaceKHR surface) { +bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { bool is_suitable = true; + std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; - constexpr std::array required_extensions = { - VK_KHR_SWAPCHAIN_EXTENSION_NAME, - VK_KHR_16BIT_STORAGE_EXTENSION_NAME, - VK_KHR_8BIT_STORAGE_EXTENSION_NAME, - VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, - VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, - VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, - VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, - VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, - }; - std::bitset<required_extensions.size()> available_extensions{}; - - for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { - for (std::size_t i = 0; i < required_extensions.size(); ++i) { + for (const auto& prop : physical.EnumerateDeviceExtensionProperties()) { + for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { if (available_extensions[i]) { continue; } - available_extensions[i] = - required_extensions[i] == std::string_view{prop.extensionName}; + const std::string_view name{prop.extensionName}; + available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; } } if (!available_extensions.all()) { - for (std::size_t i = 0; i < required_extensions.size(); ++i) { + for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { if (available_extensions[i]) { continue; } - LOG_ERROR(Render_Vulkan, "Missing required extension: {}", required_extensions[i]); + LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); is_suitable = false; } } bool has_graphics{}, has_present{}; - const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); + const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { const auto& family = queue_family_properties[i]; if (family.queueCount == 0) { continue; } - has_graphics |= - (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); - has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0; + has_graphics |= family.queueFlags & VK_QUEUE_GRAPHICS_BIT; + has_present |= physical.GetSurfaceSupportKHR(i, surface); } if (!has_graphics || !has_present) { LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue"); @@ -331,7 +441,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev } // TODO(Rodrigo): Check if the device matches all requeriments. - const auto properties{physical.getProperties(dldi)}; + const auto properties{physical.GetProperties()}; const auto& limits{properties.limits}; constexpr u32 required_ubo_size = 65536; @@ -348,7 +458,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev is_suitable = false; } - const auto features{physical.getFeatures(dldi)}; + const auto features{physical.GetFeatures()}; const std::array feature_report = { std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), std::make_pair(features.independentBlend, "independentBlend"), @@ -380,9 +490,9 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev return is_suitable; } -std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { +std::vector<const char*> VKDevice::LoadExtensions() { std::vector<const char*> extensions; - const auto Test = [&](const vk::ExtensionProperties& extension, + const auto Test = [&](const VkExtensionProperties& extension, std::optional<std::reference_wrapper<bool>> status, const char* name, bool push) { if (extension.extensionName != std::string_view(name)) { @@ -396,22 +506,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami } }; - extensions.reserve(15); - extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); - extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); - extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); - extensions.push_back(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME); - extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); - extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); - extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); - extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME); - - [[maybe_unused]] const bool nsight = - std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); + extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); + extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); + bool has_khr_shader_float16_int8{}; bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; - for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { + for (const auto& extension : physical.EnumerateDeviceExtensionProperties()) { Test(extension, khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, @@ -431,38 +532,67 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami } } + VkPhysicalDeviceFeatures2KHR features; + features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; + + VkPhysicalDeviceProperties2KHR properties; + properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + if (has_khr_shader_float16_int8) { - is_float16_supported = - GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features; + float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; + float16_int8_features.pNext = nullptr; + features.pNext = &float16_int8_features; + + physical.GetFeatures2KHR(features); + is_float16_supported = float16_int8_features.shaderFloat16; extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); } if (has_ext_subgroup_size_control) { - const auto features = - GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); - const auto properties = - GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi); - - is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize; - - if (features.subgroupSizeControl && properties.minSubgroupSize <= GuestWarpSize && - properties.maxSubgroupSize >= GuestWarpSize) { + VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features; + subgroup_features.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT; + subgroup_features.pNext = nullptr; + features.pNext = &subgroup_features; + physical.GetFeatures2KHR(features); + + VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_properties; + subgroup_properties.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT; + subgroup_properties.pNext = nullptr; + properties.pNext = &subgroup_properties; + physical.GetProperties2KHR(properties); + + is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize; + + if (subgroup_features.subgroupSizeControl && + subgroup_properties.minSubgroupSize <= GuestWarpSize && + subgroup_properties.maxSubgroupSize >= GuestWarpSize) { extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); - guest_warp_stages = properties.requiredSubgroupSizeStages; + guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; } } else { is_warp_potentially_bigger = true; } if (has_ext_transform_feedback) { - const auto features = - GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi); - const auto properties = - GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi); - - if (features.transformFeedback && features.geometryStreams && - properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && - properties.transformFeedbackQueries && properties.transformFeedbackDraw) { + VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; + tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; + tfb_features.pNext = nullptr; + features.pNext = &tfb_features; + physical.GetFeatures2KHR(features); + + VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties; + tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; + tfb_properties.pNext = nullptr; + properties.pNext = &tfb_properties; + physical.GetProperties2KHR(properties); + + if (tfb_features.transformFeedback && tfb_features.geometryStreams && + tfb_properties.maxTransformFeedbackStreams >= 4 && + tfb_properties.maxTransformFeedbackBuffers && tfb_properties.transformFeedbackQueries && + tfb_properties.transformFeedbackDraw) { extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); ext_transform_feedback = true; } @@ -471,10 +601,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami return extensions; } -void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { +void VKDevice::SetupFamilies(VkSurfaceKHR surface) { std::optional<u32> graphics_family_, present_family_; - const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); + const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { if (graphics_family_ && present_family_) break; @@ -483,10 +613,12 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK if (queue_family.queueCount == 0) continue; - if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) + if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) { graphics_family_ = i; - if (physical.getSurfaceSupportKHR(i, surface, dldi)) + } + if (physical.GetSurfaceSupportKHR(i, surface)) { present_family_ = i; + } } ASSERT(graphics_family_ && present_family_); @@ -494,121 +626,49 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK present_family = *present_family_; } -void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { - const auto supported_features{physical.getFeatures(dldi)}; +void VKDevice::SetupFeatures() { + const auto supported_features{physical.GetFeatures()}; is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; - is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); + is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); } void VKDevice::CollectTelemetryParameters() { - const auto driver = GetProperties<vk::PhysicalDeviceDriverPropertiesKHR>(physical, dld); + VkPhysicalDeviceDriverPropertiesKHR driver; + driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR; + driver.pNext = nullptr; + + VkPhysicalDeviceProperties2KHR properties; + properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + properties.pNext = &driver; + physical.GetProperties2KHR(properties); + driver_id = driver.driverID; vendor_name = driver.driverName; - const auto extensions = physical.enumerateDeviceExtensionProperties(nullptr, dld); + const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); reported_extensions.reserve(std::size(extensions)); for (const auto& extension : extensions) { reported_extensions.push_back(extension.extensionName); } } -std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { - static const float QUEUE_PRIORITY = 1.0f; +std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { + static constexpr float QUEUE_PRIORITY = 1.0f; - std::set<u32> unique_queue_families = {graphics_family, present_family}; - std::vector<vk::DeviceQueueCreateInfo> queue_cis; + std::unordered_set<u32> unique_queue_families = {graphics_family, present_family}; + std::vector<VkDeviceQueueCreateInfo> queue_cis; - for (u32 queue_family : unique_queue_families) - queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY}); + for (const u32 queue_family : unique_queue_families) { + VkDeviceQueueCreateInfo& ci = queue_cis.emplace_back(); + ci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.queueFamilyIndex = queue_family; + ci.queueCount = 1; + ci.pQueuePriorities = &QUEUE_PRIORITY; + } return queue_cis; } -std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( - const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { - static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, - vk::Format::eA8B8G8R8UintPack32, - vk::Format::eA8B8G8R8SnormPack32, - vk::Format::eA8B8G8R8SrgbPack32, - vk::Format::eB5G6R5UnormPack16, - vk::Format::eA2B10G10R10UnormPack32, - vk::Format::eA1R5G5B5UnormPack16, - vk::Format::eR32G32B32A32Sfloat, - vk::Format::eR32G32B32A32Uint, - vk::Format::eR32G32Sfloat, - vk::Format::eR32G32Uint, - vk::Format::eR16G16B16A16Uint, - vk::Format::eR16G16B16A16Snorm, - vk::Format::eR16G16B16A16Unorm, - vk::Format::eR16G16Unorm, - vk::Format::eR16G16Snorm, - vk::Format::eR16G16Sfloat, - vk::Format::eR16Unorm, - vk::Format::eR8G8B8A8Srgb, - vk::Format::eR8G8Unorm, - vk::Format::eR8G8Snorm, - vk::Format::eR8Unorm, - vk::Format::eR8Uint, - vk::Format::eB10G11R11UfloatPack32, - vk::Format::eR32Sfloat, - vk::Format::eR32Uint, - vk::Format::eR32Sint, - vk::Format::eR16Sfloat, - vk::Format::eR16G16B16A16Sfloat, - vk::Format::eB8G8R8A8Unorm, - vk::Format::eR4G4B4A4UnormPack16, - vk::Format::eD32Sfloat, - vk::Format::eD16Unorm, - vk::Format::eD16UnormS8Uint, - vk::Format::eD24UnormS8Uint, - vk::Format::eD32SfloatS8Uint, - vk::Format::eBc1RgbaUnormBlock, - vk::Format::eBc2UnormBlock, - vk::Format::eBc3UnormBlock, - vk::Format::eBc4UnormBlock, - vk::Format::eBc5UnormBlock, - vk::Format::eBc5SnormBlock, - vk::Format::eBc7UnormBlock, - vk::Format::eBc6HUfloatBlock, - vk::Format::eBc6HSfloatBlock, - vk::Format::eBc1RgbaSrgbBlock, - vk::Format::eBc2SrgbBlock, - vk::Format::eBc3SrgbBlock, - vk::Format::eBc7SrgbBlock, - vk::Format::eAstc4x4UnormBlock, - vk::Format::eAstc4x4SrgbBlock, - vk::Format::eAstc5x4UnormBlock, - vk::Format::eAstc5x4SrgbBlock, - vk::Format::eAstc5x5UnormBlock, - vk::Format::eAstc5x5SrgbBlock, - vk::Format::eAstc6x5UnormBlock, - vk::Format::eAstc6x5SrgbBlock, - vk::Format::eAstc6x6UnormBlock, - vk::Format::eAstc6x6SrgbBlock, - vk::Format::eAstc8x5UnormBlock, - vk::Format::eAstc8x5SrgbBlock, - vk::Format::eAstc8x6UnormBlock, - vk::Format::eAstc8x6SrgbBlock, - vk::Format::eAstc8x8UnormBlock, - vk::Format::eAstc8x8SrgbBlock, - vk::Format::eAstc10x5UnormBlock, - vk::Format::eAstc10x5SrgbBlock, - vk::Format::eAstc10x6UnormBlock, - vk::Format::eAstc10x6SrgbBlock, - vk::Format::eAstc10x8UnormBlock, - vk::Format::eAstc10x8SrgbBlock, - vk::Format::eAstc10x10UnormBlock, - vk::Format::eAstc10x10SrgbBlock, - vk::Format::eAstc12x10UnormBlock, - vk::Format::eAstc12x10SrgbBlock, - vk::Format::eAstc12x12UnormBlock, - vk::Format::eAstc12x12SrgbBlock, - vk::Format::eE5B9G9R9UfloatPack32}; - std::unordered_map<vk::Format, vk::FormatProperties> format_properties; - for (const auto format : formats) { - format_properties.emplace(format, physical.getFormatProperties(format, dldi)); - } - return format_properties; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 6e656517f..60d64572a 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -8,8 +8,9 @@ #include <string_view> #include <unordered_map> #include <vector> + #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -22,12 +23,12 @@ const u32 GuestWarpSize = 32; /// Handles data specific to a physical device. class VKDevice final { public: - explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, - vk::SurfaceKHR surface); + explicit VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, + const vk::InstanceDispatch& dld); ~VKDevice(); /// Initializes the device. Returns true on success. - bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance); + bool Create(); /** * Returns a format supported by the device for the passed requeriments. @@ -36,20 +37,20 @@ public: * @param format_type Format type usage. * @returns A format supported by the device. */ - vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, - FormatType format_type) const; + VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const; /// Reports a device loss. void ReportLoss() const; /// Returns the dispatch loader with direct function pointers of the device. - const vk::DispatchLoaderDynamic& GetDispatchLoader() const { + const vk::DeviceDispatch& GetDispatchLoader() const { return dld; } /// Returns the logical device. - vk::Device GetLogical() const { - return logical.get(); + const vk::Device& GetLogical() const { + return logical; } /// Returns the physical device. @@ -79,7 +80,7 @@ public: /// Returns true if the device is integrated with the host CPU. bool IsIntegrated() const { - return properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu; + return properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; } /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. @@ -98,27 +99,27 @@ public: } /// Returns the driver ID. - vk::DriverIdKHR GetDriverID() const { + VkDriverIdKHR GetDriverID() const { return driver_id; } /// Returns uniform buffer alignment requeriment. - vk::DeviceSize GetUniformBufferAlignment() const { + VkDeviceSize GetUniformBufferAlignment() const { return properties.limits.minUniformBufferOffsetAlignment; } /// Returns storage alignment requeriment. - vk::DeviceSize GetStorageBufferAlignment() const { + VkDeviceSize GetStorageBufferAlignment() const { return properties.limits.minStorageBufferOffsetAlignment; } /// Returns the maximum range for storage buffers. - vk::DeviceSize GetMaxStorageBufferRange() const { + VkDeviceSize GetMaxStorageBufferRange() const { return properties.limits.maxStorageBufferRange; } /// Returns the maximum size for push constants. - vk::DeviceSize GetMaxPushConstantsSize() const { + VkDeviceSize GetMaxPushConstantsSize() const { return properties.limits.maxPushConstantsSize; } @@ -138,8 +139,8 @@ public: } /// Returns true if the device can be forced to use the guest warp size. - bool IsGuestWarpSizeSupported(vk::ShaderStageFlagBits stage) const { - return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; + bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const { + return guest_warp_stages & stage; } /// Returns true if formatless image load is supported. @@ -188,50 +189,44 @@ public: } /// Checks if the physical device is suitable. - static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, - vk::SurfaceKHR surface); + static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); private: /// Loads extensions into a vector and stores available ones in this object. - std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi); + std::vector<const char*> LoadExtensions(); /// Sets up queue families. - void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); + void SetupFamilies(VkSurfaceKHR surface); /// Sets up device features. - void SetupFeatures(const vk::DispatchLoaderDynamic& dldi); + void SetupFeatures(); /// Collects telemetry information from the device. void CollectTelemetryParameters(); /// Returns a list of queue initialization descriptors. - std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; + std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; /// Returns true if ASTC textures are natively supported. - bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, - const vk::DispatchLoaderDynamic& dldi) const; + bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; /// Returns true if a format is supported. - bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, + bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; - /// Returns the device properties for Vulkan formats. - static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( - const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); - - const vk::PhysicalDevice physical; ///< Physical device. - vk::DispatchLoaderDynamic dld; ///< Device function pointers. - vk::PhysicalDeviceProperties properties; ///< Device properties. - UniqueDevice logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 graphics_family{}; ///< Main graphics queue family index. - u32 present_family{}; ///< Main present queue family index. - vk::DriverIdKHR driver_id{}; ///< Driver ID. - vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed - bool is_optimal_astc_supported{}; ///< Support for native ASTC. - bool is_float16_supported{}; ///< Support for float16 arithmetics. - bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + VkPhysicalDeviceProperties properties; ///< Device properties. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + VkDriverIdKHR driver_id{}; ///< Driver ID. + VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed + bool is_optimal_astc_supported{}; ///< Support for native ASTC. + bool is_float16_supported{}; ///< Support for float16 arithmetics. + bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. @@ -245,7 +240,7 @@ private: std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. /// Format properties dictionary. - std::unordered_map<vk::Format, vk::FormatProperties> format_properties; + std::unordered_map<VkFormat, VkFormatProperties> format_properties; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6a02403c1..b540b838d 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -2,11 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> +#include <cstring> #include <vector> + #include "common/assert.h" #include "common/common_types.h" #include "common/microprofile.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" @@ -16,6 +18,7 @@ #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -23,21 +26,26 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); namespace { -vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) { - return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail), - MaxwellToVK::StencilOp(face.action_depth_pass), - MaxwellToVK::StencilOp(face.action_depth_fail), - MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0); +VkStencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) { + VkStencilOpState state; + state.failOp = MaxwellToVK::StencilOp(face.action_stencil_fail); + state.passOp = MaxwellToVK::StencilOp(face.action_depth_pass); + state.depthFailOp = MaxwellToVK::StencilOp(face.action_depth_fail); + state.compareOp = MaxwellToVK::ComparisonOp(face.test_func); + state.compareMask = 0; + state.writeMask = 0; + state.reference = 0; + return state; } -bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) { +bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { static constexpr std::array unsupported_topologies = { - vk::PrimitiveTopology::ePointList, - vk::PrimitiveTopology::eLineList, - vk::PrimitiveTopology::eTriangleList, - vk::PrimitiveTopology::eLineListWithAdjacency, - vk::PrimitiveTopology::eTriangleListWithAdjacency, - vk::PrimitiveTopology::ePatchList}; + VK_PRIMITIVE_TOPOLOGY_POINT_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), topology) == std::end(unsupported_topologies); } @@ -49,7 +57,7 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche VKUpdateDescriptorQueue& update_descriptor_queue, VKRenderPassCache& renderpass_cache, const GraphicsPipelineCacheKey& key, - const std::vector<vk::DescriptorSetLayoutBinding>& bindings, + vk::Span<VkDescriptorSetLayoutBinding> bindings, const SPIRVProgram& program) : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, @@ -63,7 +71,7 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche VKGraphicsPipeline::~VKGraphicsPipeline() = default; -vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { +VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { if (!descriptor_template) { return {}; } @@ -72,27 +80,32 @@ vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { return set; } -UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( - const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const { - const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci( - {}, static_cast<u32>(bindings.size()), bindings.data()); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld); +vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( + vk::Span<VkDescriptorSetLayoutBinding> bindings) const { + VkDescriptorSetLayoutCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.bindingCount = bindings.size(); + ci.pBindings = bindings.data(); + return device.GetLogical().CreateDescriptorSetLayout(ci); } -UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { - const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0, - nullptr); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); +vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { + VkPipelineLayoutCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.setLayoutCount = 1; + ci.pSetLayouts = descriptor_set_layout.address(); + ci.pushConstantRangeCount = 0; + ci.pPushConstantRanges = nullptr; + return device.GetLogical().CreatePipelineLayout(ci); } -UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate( +vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( const SPIRVProgram& program) const { - std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; + std::vector<VkDescriptorUpdateTemplateEntry> template_entries; u32 binding = 0; u32 offset = 0; for (const auto& stage : program) { @@ -102,38 +115,47 @@ UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplat } if (template_entries.empty()) { // If the shader doesn't use descriptor sets, skip template creation. - return UniqueDescriptorUpdateTemplate{}; + return {}; } - const vk::DescriptorUpdateTemplateCreateInfo template_ci( - {}, static_cast<u32>(template_entries.size()), template_entries.data(), - vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, - vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); + VkDescriptorUpdateTemplateCreateInfoKHR ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; + ci.pNext = nullptr; + ci.flags = 0; + ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()); + ci.pDescriptorUpdateEntries = template_entries.data(); + ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; + ci.descriptorSetLayout = *descriptor_set_layout; + ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + ci.pipelineLayout = *layout; + ci.set = DESCRIPTOR_SET; + return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); } -std::vector<UniqueShaderModule> VKGraphicsPipeline::CreateShaderModules( +std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( const SPIRVProgram& program) const { - std::vector<UniqueShaderModule> modules; - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); + VkShaderModuleCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + + std::vector<vk::ShaderModule> modules; + modules.reserve(Maxwell::MaxShaderStage); for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { const auto& stage = program[i]; if (!stage) { continue; } - const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32), - stage->code.data()); - modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld)); + + ci.codeSize = stage->code.size() * sizeof(u32); + ci.pCode = stage->code.data(); + modules.push_back(device.GetLogical().CreateShaderModule(ci)); } return modules; } -UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, - const SPIRVProgram& program) const { +vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, + const SPIRVProgram& program) const { const auto& vi = fixed_state.vertex_input; const auto& ia = fixed_state.input_assembly; const auto& ds = fixed_state.depth_stencil; @@ -141,19 +163,26 @@ UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& render const auto& ts = fixed_state.tessellation; const auto& rs = fixed_state.rasterizer; - std::vector<vk::VertexInputBindingDescription> vertex_bindings; - std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; + std::vector<VkVertexInputBindingDescription> vertex_bindings; + std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; for (std::size_t i = 0; i < vi.num_bindings; ++i) { const auto& binding = vi.bindings[i]; const bool instanced = binding.divisor != 0; - const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex; - vertex_bindings.emplace_back(binding.index, binding.stride, rate); + const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + + auto& vertex_binding = vertex_bindings.emplace_back(); + vertex_binding.binding = binding.index; + vertex_binding.stride = binding.stride; + vertex_binding.inputRate = rate; + if (instanced) { - vertex_binding_divisors.emplace_back(binding.index, binding.divisor); + auto& binding_divisor = vertex_binding_divisors.emplace_back(); + binding_divisor.binding = binding.index; + binding_divisor.divisor = binding.divisor; } } - std::vector<vk::VertexInputAttributeDescription> vertex_attributes; + std::vector<VkVertexInputAttributeDescription> vertex_attributes; const auto& input_attributes = program[0]->entries.attributes; for (std::size_t i = 0; i < vi.num_attributes; ++i) { const auto& attribute = vi.attributes[i]; @@ -161,109 +190,194 @@ UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& render // Skip attributes not used by the vertex shaders. continue; } - vertex_attributes.emplace_back(attribute.index, attribute.buffer, - MaxwellToVK::VertexFormat(attribute.type, attribute.size), - attribute.offset); + auto& vertex_attribute = vertex_attributes.emplace_back(); + vertex_attribute.location = attribute.index; + vertex_attribute.binding = attribute.buffer; + vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.type, attribute.size); + vertex_attribute.offset = attribute.offset; } - vk::PipelineVertexInputStateCreateInfo vertex_input_ci( - {}, static_cast<u32>(vertex_bindings.size()), vertex_bindings.data(), - static_cast<u32>(vertex_attributes.size()), vertex_attributes.data()); - - const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci( - static_cast<u32>(vertex_binding_divisors.size()), vertex_binding_divisors.data()); + VkPipelineVertexInputStateCreateInfo vertex_input_ci; + vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertex_input_ci.pNext = nullptr; + vertex_input_ci.flags = 0; + vertex_input_ci.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()); + vertex_input_ci.pVertexBindingDescriptions = vertex_bindings.data(); + vertex_input_ci.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()); + vertex_input_ci.pVertexAttributeDescriptions = vertex_attributes.data(); + + VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci; + input_divisor_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT; + input_divisor_ci.pNext = nullptr; + input_divisor_ci.vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()); + input_divisor_ci.pVertexBindingDivisors = vertex_binding_divisors.data(); if (!vertex_binding_divisors.empty()) { - vertex_input_ci.pNext = &vertex_input_divisor_ci; + vertex_input_ci.pNext = &input_divisor_ci; } - const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology); - const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci( - {}, primitive_topology, - ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology)); - - const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points); - - const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr, - Maxwell::NumViewports, nullptr); - - // TODO(Rodrigo): Find out what's the default register value for front face - const vk::PipelineRasterizationStateCreateInfo rasterizer_ci( - {}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill, - rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone, - MaxwellToVK::FrontFace(rs.front_face), rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f); - - const vk::PipelineMultisampleStateCreateInfo multisampling_ci( - {}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false); - - const vk::CompareOp depth_test_compare = ds.depth_test_enable - ? MaxwellToVK::ComparisonOp(ds.depth_test_function) - : vk::CompareOp::eAlways; - - const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci( - {}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable, - ds.stencil_enable, GetStencilFaceState(ds.front_stencil), - GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f); - - std::array<vk::PipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; + VkPipelineInputAssemblyStateCreateInfo input_assembly_ci; + input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_assembly_ci.pNext = nullptr; + input_assembly_ci.flags = 0; + input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, ia.topology); + input_assembly_ci.primitiveRestartEnable = + ia.primitive_restart_enable && SupportsPrimitiveRestart(input_assembly_ci.topology); + + VkPipelineTessellationStateCreateInfo tessellation_ci; + tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; + tessellation_ci.pNext = nullptr; + tessellation_ci.flags = 0; + tessellation_ci.patchControlPoints = ts.patch_control_points; + + VkPipelineViewportStateCreateInfo viewport_ci; + viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_ci.pNext = nullptr; + viewport_ci.flags = 0; + viewport_ci.viewportCount = Maxwell::NumViewports; + viewport_ci.pViewports = nullptr; + viewport_ci.scissorCount = Maxwell::NumViewports; + viewport_ci.pScissors = nullptr; + + VkPipelineRasterizationStateCreateInfo rasterization_ci; + rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_ci.pNext = nullptr; + rasterization_ci.flags = 0; + rasterization_ci.depthClampEnable = rs.depth_clamp_enable; + rasterization_ci.rasterizerDiscardEnable = VK_FALSE; + rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; + rasterization_ci.cullMode = + rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : VK_CULL_MODE_NONE; + rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.front_face); + rasterization_ci.depthBiasEnable = rs.depth_bias_enable; + rasterization_ci.depthBiasConstantFactor = 0.0f; + rasterization_ci.depthBiasClamp = 0.0f; + rasterization_ci.depthBiasSlopeFactor = 0.0f; + rasterization_ci.lineWidth = 1.0f; + + VkPipelineMultisampleStateCreateInfo multisample_ci; + multisample_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisample_ci.pNext = nullptr; + multisample_ci.flags = 0; + multisample_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + multisample_ci.sampleShadingEnable = VK_FALSE; + multisample_ci.minSampleShading = 0.0f; + multisample_ci.pSampleMask = nullptr; + multisample_ci.alphaToCoverageEnable = VK_FALSE; + multisample_ci.alphaToOneEnable = VK_FALSE; + + VkPipelineDepthStencilStateCreateInfo depth_stencil_ci; + depth_stencil_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + depth_stencil_ci.pNext = nullptr; + depth_stencil_ci.flags = 0; + depth_stencil_ci.depthTestEnable = ds.depth_test_enable; + depth_stencil_ci.depthWriteEnable = ds.depth_write_enable; + depth_stencil_ci.depthCompareOp = ds.depth_test_enable + ? MaxwellToVK::ComparisonOp(ds.depth_test_function) + : VK_COMPARE_OP_ALWAYS; + depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable; + depth_stencil_ci.stencilTestEnable = ds.stencil_enable; + depth_stencil_ci.front = GetStencilFaceState(ds.front_stencil); + depth_stencil_ci.back = GetStencilFaceState(ds.back_stencil); + depth_stencil_ci.minDepthBounds = 0.0f; + depth_stencil_ci.maxDepthBounds = 0.0f; + + std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; const std::size_t num_attachments = std::min(cd.attachments_count, renderpass_params.color_attachments.size()); for (std::size_t i = 0; i < num_attachments; ++i) { - constexpr std::array component_table{ - vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG, - vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA}; + static constexpr std::array component_table = { + VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, + VK_COLOR_COMPONENT_A_BIT}; const auto& blend = cd.attachments[i]; - vk::ColorComponentFlags color_components{}; + VkColorComponentFlags color_components = 0; for (std::size_t j = 0; j < component_table.size(); ++j) { - if (blend.components[j]) + if (blend.components[j]) { color_components |= component_table[j]; + } } - cb_attachments[i] = vk::PipelineColorBlendAttachmentState( - blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func), - MaxwellToVK::BlendFactor(blend.dst_rgb_func), - MaxwellToVK::BlendEquation(blend.rgb_equation), - MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func), - MaxwellToVK::BlendEquation(blend.a_equation), color_components); + VkPipelineColorBlendAttachmentState& attachment = cb_attachments[i]; + attachment.blendEnable = blend.enable; + attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.src_rgb_func); + attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.dst_rgb_func); + attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.rgb_equation); + attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.src_a_func); + attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.dst_a_func); + attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.a_equation); + attachment.colorWriteMask = color_components; } - const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy, - static_cast<u32>(num_attachments), - cb_attachments.data(), {}); - - constexpr std::array dynamic_states = { - vk::DynamicState::eViewport, vk::DynamicState::eScissor, - vk::DynamicState::eDepthBias, vk::DynamicState::eBlendConstants, - vk::DynamicState::eDepthBounds, vk::DynamicState::eStencilCompareMask, - vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference}; - const vk::PipelineDynamicStateCreateInfo dynamic_state_ci( - {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data()); - - vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; + + VkPipelineColorBlendStateCreateInfo color_blend_ci; + color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + color_blend_ci.pNext = nullptr; + color_blend_ci.flags = 0; + color_blend_ci.logicOpEnable = VK_FALSE; + color_blend_ci.logicOp = VK_LOGIC_OP_COPY; + color_blend_ci.attachmentCount = static_cast<u32>(num_attachments); + color_blend_ci.pAttachments = cb_attachments.data(); + std::memset(color_blend_ci.blendConstants, 0, sizeof(color_blend_ci.blendConstants)); + + static constexpr std::array dynamic_states = { + VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE}; + + VkPipelineDynamicStateCreateInfo dynamic_state_ci; + dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state_ci.pNext = nullptr; + dynamic_state_ci.flags = 0; + dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size()); + dynamic_state_ci.pDynamicStates = dynamic_states.data(); + + VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; + subgroup_size_ci.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; + subgroup_size_ci.pNext = nullptr; subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; - std::vector<vk::PipelineShaderStageCreateInfo> shader_stages; + std::vector<VkPipelineShaderStageCreateInfo> shader_stages; std::size_t module_index = 0; for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { if (!program[stage]) { continue; } - const auto stage_enum = static_cast<Tegra::Engines::ShaderType>(stage); - const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum); - auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage, - *modules[module_index++], "main", nullptr); - if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) { + VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); + stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stage_ci.pNext = nullptr; + stage_ci.flags = 0; + stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)); + stage_ci.module = *modules[module_index++]; + stage_ci.pName = "main"; + stage_ci.pSpecializationInfo = nullptr; + + if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { stage_ci.pNext = &subgroup_size_ci; } } - const vk::GraphicsPipelineCreateInfo create_info( - {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input_ci, - &input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci, - &depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld); + VkGraphicsPipelineCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.stageCount = static_cast<u32>(shader_stages.size()); + ci.pStages = shader_stages.data(); + ci.pVertexInputState = &vertex_input_ci; + ci.pInputAssemblyState = &input_assembly_ci; + ci.pTessellationState = &tessellation_ci; + ci.pViewportState = &viewport_ci; + ci.pRasterizationState = &rasterization_ci; + ci.pMultisampleState = &multisample_ci; + ci.pDepthStencilState = &depth_stencil_ci; + ci.pColorBlendState = &color_blend_ci; + ci.pDynamicState = &dynamic_state_ci; + ci.layout = *layout; + ci.renderPass = renderpass; + ci.subpass = 0; + ci.basePipelineHandle = nullptr; + ci.basePipelineIndex = 0; + return device.GetLogical().CreateGraphicsPipeline(ci); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4f5e4ea2d..7aba70960 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -11,12 +11,12 @@ #include <vector> #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -39,52 +39,52 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue, VKRenderPassCache& renderpass_cache, const GraphicsPipelineCacheKey& key, - const std::vector<vk::DescriptorSetLayoutBinding>& bindings, + vk::Span<VkDescriptorSetLayoutBinding> bindings, const SPIRVProgram& program); ~VKGraphicsPipeline(); - vk::DescriptorSet CommitDescriptorSet(); + VkDescriptorSet CommitDescriptorSet(); - vk::Pipeline GetHandle() const { + VkPipeline GetHandle() const { return *pipeline; } - vk::PipelineLayout GetLayout() const { + VkPipelineLayout GetLayout() const { return *layout; } - vk::RenderPass GetRenderPass() const { + VkRenderPass GetRenderPass() const { return renderpass; } private: - UniqueDescriptorSetLayout CreateDescriptorSetLayout( - const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const; + vk::DescriptorSetLayout CreateDescriptorSetLayout( + vk::Span<VkDescriptorSetLayoutBinding> bindings) const; - UniquePipelineLayout CreatePipelineLayout() const; + vk::PipelineLayout CreatePipelineLayout() const; - UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate( + vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( const SPIRVProgram& program) const; - std::vector<UniqueShaderModule> CreateShaderModules(const SPIRVProgram& program) const; + std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; - UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params, - const SPIRVProgram& program) const; + vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params, + const SPIRVProgram& program) const; const VKDevice& device; VKScheduler& scheduler; const FixedPipelineState fixed_state; const u64 hash; - UniqueDescriptorSetLayout descriptor_set_layout; + vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; VKUpdateDescriptorQueue& update_descriptor_queue; - UniquePipelineLayout layout; - UniqueDescriptorUpdateTemplate descriptor_template; - std::vector<UniqueShaderModule> modules; + vk::PipelineLayout layout; + vk::DescriptorUpdateTemplateKHR descriptor_template; + std::vector<vk::ShaderModule> modules; - vk::RenderPass renderpass; - UniquePipeline pipeline; + VkRenderPass renderpass; + vk::Pipeline pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp index 4bcbef959..9bceb3861 100644 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ b/src/video_core/renderer_vulkan/vk_image.cpp @@ -6,22 +6,21 @@ #include <vector> #include "common/assert.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_image.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { -VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, - const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask) +VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, const VkImageCreateInfo& image_ci, + VkImageAspectFlags aspect_mask) : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask}, image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} { UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0, "Queue family tracking is not implemented"); - const auto dev = device.GetLogical(); - image = dev.createImageUnique(image_ci, nullptr, device.GetDispatchLoader()); + image = device.GetLogical().CreateImage(image_ci); const u32 num_ranges = image_num_layers * image_num_levels; barriers.resize(num_ranges); @@ -31,8 +30,8 @@ VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, VKImage::~VKImage() = default; void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, - vk::ImageLayout new_layout) { + VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, + VkImageLayout new_layout) { if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) { return; } @@ -43,9 +42,21 @@ void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num const u32 layer = base_layer + layer_it; const u32 level = base_level + level_it; auto& state = GetSubrangeState(layer, level); - barriers[cursor] = vk::ImageMemoryBarrier( - state.access, new_access, state.layout, new_layout, VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, *image, {aspect_mask, level, 1, layer, 1}); + auto& barrier = barriers[cursor]; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = state.access; + barrier.dstAccessMask = new_access; + barrier.oldLayout = state.layout; + barrier.newLayout = new_layout; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = *image; + barrier.subresourceRange.aspectMask = aspect_mask; + barrier.subresourceRange.baseMipLevel = level; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = layer; + barrier.subresourceRange.layerCount = 1; state.access = new_access; state.layout = new_layout; } @@ -53,16 +64,16 @@ void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([barriers = barriers, cursor](auto cmdbuf, auto& dld) { + scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) { // TODO(Rodrigo): Implement a way to use the latest stage across subresources. - constexpr auto stage_stub = vk::PipelineStageFlagBits::eAllCommands; - cmdbuf.pipelineBarrier(stage_stub, stage_stub, {}, 0, nullptr, 0, nullptr, - static_cast<u32>(cursor), barriers.data(), dld); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {}, + vk::Span(barriers.data(), cursor)); }); } bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept { + VkAccessFlags new_access, VkImageLayout new_layout) noexcept { const bool is_full_range = base_layer == 0 && num_layers == image_num_layers && base_level == 0 && num_levels == image_num_levels; if (!is_full_range) { @@ -91,11 +102,21 @@ bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num void VKImage::CreatePresentView() { // Image type has to be 2D to be presented. - const vk::ImageViewCreateInfo image_view_ci({}, *image, vk::ImageViewType::e2D, format, {}, - {aspect_mask, 0, 1, 0, 1}); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - present_view = dev.createImageViewUnique(image_view_ci, nullptr, dld); + VkImageViewCreateInfo image_view_ci; + image_view_ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + image_view_ci.pNext = nullptr; + image_view_ci.flags = 0; + image_view_ci.image = *image; + image_view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; + image_view_ci.format = format; + image_view_ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; + image_view_ci.subresourceRange.aspectMask = aspect_mask; + image_view_ci.subresourceRange.baseMipLevel = 0; + image_view_ci.subresourceRange.levelCount = 1; + image_view_ci.subresourceRange.baseArrayLayer = 0; + image_view_ci.subresourceRange.layerCount = 1; + present_view = device.GetLogical().CreateImageView(image_view_ci); } VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h index b78242512..b4d7229e5 100644 --- a/src/video_core/renderer_vulkan/vk_image.h +++ b/src/video_core/renderer_vulkan/vk_image.h @@ -8,7 +8,7 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -18,16 +18,16 @@ class VKScheduler; class VKImage { public: explicit VKImage(const VKDevice& device, VKScheduler& scheduler, - const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask); + const VkImageCreateInfo& image_ci, VkImageAspectFlags aspect_mask); ~VKImage(); /// Records in the passed command buffer an image transition and updates the state of the image. void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, - vk::ImageLayout new_layout); + VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, + VkImageLayout new_layout); /// Returns a view compatible with presentation, the image has to be 2D. - vk::ImageView GetPresentView() { + VkImageView GetPresentView() { if (!present_view) { CreatePresentView(); } @@ -35,28 +35,28 @@ public: } /// Returns the Vulkan image handler. - vk::Image GetHandle() const { - return *image; + const vk::Image& GetHandle() const { + return image; } /// Returns the Vulkan format for this image. - vk::Format GetFormat() const { + VkFormat GetFormat() const { return format; } /// Returns the Vulkan aspect mask. - vk::ImageAspectFlags GetAspectMask() const { + VkImageAspectFlags GetAspectMask() const { return aspect_mask; } private: struct SubrangeState final { - vk::AccessFlags access{}; ///< Current access bits. - vk::ImageLayout layout = vk::ImageLayout::eUndefined; ///< Current image layout. + VkAccessFlags access = 0; ///< Current access bits. + VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout. }; bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept; + VkAccessFlags new_access, VkImageLayout new_layout) noexcept; /// Creates a presentation view. void CreatePresentView(); @@ -67,16 +67,16 @@ private: const VKDevice& device; ///< Device handler. VKScheduler& scheduler; ///< Device scheduler. - const vk::Format format; ///< Vulkan format. - const vk::ImageAspectFlags aspect_mask; ///< Vulkan aspect mask. - const u32 image_num_layers; ///< Number of layers. - const u32 image_num_levels; ///< Number of mipmap levels. + const VkFormat format; ///< Vulkan format. + const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask. + const u32 image_num_layers; ///< Number of layers. + const u32 image_num_levels; ///< Number of mipmap levels. - UniqueImage image; ///< Image handle. - UniqueImageView present_view; ///< Image view compatible with presentation. + vk::Image image; ///< Image handle. + vk::ImageView present_view; ///< Image view compatible with presentation. - std::vector<vk::ImageMemoryBarrier> barriers; ///< Pool of barriers. - std::vector<SubrangeState> subrange_states; ///< Current subrange state. + std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers. + std::vector<SubrangeState> subrange_states; ///< Current subrange state. bool state_diverged = false; ///< True when subresources mismatch in layout. }; diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 9cc9979d0..6a9e658bf 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -11,9 +11,9 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -30,17 +30,11 @@ u64 GetAllocationChunkSize(u64 required_size) { class VKMemoryAllocation final { public: explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, - vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type) - : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size}, - shifted_type{ShiftType(type)} {} - - ~VKMemoryAllocation() { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - dev.free(memory, nullptr, dld); - } + VkMemoryPropertyFlags properties, u64 allocation_size, u32 type) + : device{device}, memory{std::move(memory)}, properties{properties}, + allocation_size{allocation_size}, shifted_type{ShiftType(type)} {} - VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) { + VKMemoryCommit Commit(VkDeviceSize commit_size, VkDeviceSize alignment) { auto found = TryFindFreeSection(free_iterator, allocation_size, static_cast<u64>(commit_size), static_cast<u64>(alignment)); if (!found) { @@ -73,9 +67,8 @@ public: } /// Returns whether this allocation is compatible with the arguments. - bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const { - return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) && - (type_mask & shifted_type) != 0; + bool IsCompatible(VkMemoryPropertyFlags wanted_properties, u32 type_mask) const { + return (wanted_properties & properties) && (type_mask & shifted_type) != 0; } private: @@ -111,11 +104,11 @@ private: return std::nullopt; } - const VKDevice& device; ///< Vulkan device. - const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. - const vk::MemoryPropertyFlags properties; ///< Vulkan properties. - const u64 allocation_size; ///< Size of this allocation. - const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. + const VKDevice& device; ///< Vulkan device. + const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. + const VkMemoryPropertyFlags properties; ///< Vulkan properties. + const u64 allocation_size; ///< Size of this allocation. + const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. /// Hints where the next free region is likely going to be. u64 free_iterator{}; @@ -125,22 +118,20 @@ private: }; VKMemoryManager::VKMemoryManager(const VKDevice& device) - : device{device}, properties{device.GetPhysical().getMemoryProperties( - device.GetDispatchLoader())}, + : device{device}, properties{device.GetPhysical().GetMemoryProperties()}, is_memory_unified{GetMemoryUnified(properties)} {} VKMemoryManager::~VKMemoryManager() = default; -VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements, +VKMemoryCommit VKMemoryManager::Commit(const VkMemoryRequirements& requirements, bool host_visible) { const u64 chunk_size = GetAllocationChunkSize(requirements.size); // When a host visible commit is asked, search for host visible and coherent, otherwise search // for a fast device local type. - const vk::MemoryPropertyFlags wanted_properties = - host_visible - ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent - : vk::MemoryPropertyFlagBits::eDeviceLocal; + const VkMemoryPropertyFlags wanted_properties = + host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; if (auto commit = TryAllocCommit(requirements, wanted_properties)) { return commit; @@ -161,23 +152,19 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirement return commit; } -VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible); - dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld); +VKMemoryCommit VKMemoryManager::Commit(const vk::Buffer& buffer, bool host_visible) { + auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), host_visible); + buffer.BindMemory(commit->GetMemory(), commit->GetOffset()); return commit; } -VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible); - dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld); +VKMemoryCommit VKMemoryManager::Commit(const vk::Image& image, bool host_visible) { + auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), host_visible); + image.BindMemory(commit->GetMemory(), commit->GetOffset()); return commit; } -bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, +bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, u64 size) { const u32 type = [&] { for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { @@ -191,24 +178,26 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 return 0U; }(); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - // Try to allocate found type. - const vk::MemoryAllocateInfo memory_ai(size, type); - vk::DeviceMemory memory; - if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); - res != vk::Result::eSuccess) { - LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res)); + VkMemoryAllocateInfo memory_ai; + memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memory_ai.pNext = nullptr; + memory_ai.allocationSize = size; + memory_ai.memoryTypeIndex = type; + + vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory(memory_ai); + if (!memory) { + LOG_CRITICAL(Render_Vulkan, "Device allocation failed!"); return false; } - allocations.push_back( - std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type)); + + allocations.push_back(std::make_unique<VKMemoryAllocation>(device, std::move(memory), + wanted_properties, size, type)); return true; } -VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements, - vk::MemoryPropertyFlags wanted_properties) { +VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags wanted_properties) { for (auto& allocation : allocations) { if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) { continue; @@ -220,10 +209,9 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& req return {}; } -/*static*/ bool VKMemoryManager::GetMemoryUnified( - const vk::PhysicalDeviceMemoryProperties& properties) { +bool VKMemoryManager::GetMemoryUnified(const VkPhysicalDeviceMemoryProperties& properties) { for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) { - if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { + if (!(properties.memoryHeaps[heap_index].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)) { // Memory is considered unified when heaps are device local only. return false; } @@ -232,23 +220,19 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& req } VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, - vk::DeviceMemory memory, u64 begin, u64 end) - : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {} + const vk::DeviceMemory& memory, u64 begin, u64 end) + : device{device}, memory{memory}, interval{begin, end}, allocation{allocation} {} VKMemoryCommitImpl::~VKMemoryCommitImpl() { allocation->Free(this); } MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { - const auto dev = device.GetLogical(); - const auto address = reinterpret_cast<u8*>( - dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader())); - return MemoryMap{this, address}; + return MemoryMap{this, memory.Map(interval.first + offset_, size)}; } void VKMemoryCommitImpl::Unmap() const { - const auto dev = device.GetLogical(); - dev.unmapMemory(memory, device.GetDispatchLoader()); + memory.Unmap(); } MemoryMap VKMemoryCommitImpl::Map() const { diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index cd00bb91b..5b6858e9b 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -8,7 +8,7 @@ #include <utility> #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -32,13 +32,13 @@ public: * memory. When passing false, it will try to allocate device local memory. * @returns A memory commit. */ - VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible); + VKMemoryCommit Commit(const VkMemoryRequirements& requirements, bool host_visible); /// Commits memory required by the buffer and binds it. - VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible); + VKMemoryCommit Commit(const vk::Buffer& buffer, bool host_visible); /// Commits memory required by the image and binds it. - VKMemoryCommit Commit(vk::Image image, bool host_visible); + VKMemoryCommit Commit(const vk::Image& image, bool host_visible); /// Returns true if the memory allocations are done always in host visible and coherent memory. bool IsMemoryUnified() const { @@ -47,18 +47,18 @@ public: private: /// Allocates a chunk of memory. - bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); + bool AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); /// Tries to allocate a memory commit. - VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements, - vk::MemoryPropertyFlags wanted_properties); + VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags wanted_properties); /// Returns true if the device uses an unified memory model. - static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties); + static bool GetMemoryUnified(const VkPhysicalDeviceMemoryProperties& properties); - const VKDevice& device; ///< Device handler. - const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties. - const bool is_memory_unified; ///< True if memory model is unified. + const VKDevice& device; ///< Device handler. + const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. + const bool is_memory_unified; ///< True if memory model is unified. std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations. }; @@ -68,7 +68,7 @@ class VKMemoryCommitImpl final { public: explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, - vk::DeviceMemory memory, u64 begin, u64 end); + const vk::DeviceMemory& memory, u64 begin, u64 end); ~VKMemoryCommitImpl(); /// Maps a memory region and returns a pointer to it. @@ -80,13 +80,13 @@ public: MemoryMap Map() const; /// Returns the Vulkan memory handler. - vk::DeviceMemory GetMemory() const { - return memory; + VkDeviceMemory GetMemory() const { + return *memory; } /// Returns the start position of the commit relative to the allocation. - vk::DeviceSize GetOffset() const { - return static_cast<vk::DeviceSize>(interval.first); + VkDeviceSize GetOffset() const { + return static_cast<VkDeviceSize>(interval.first); } private: @@ -94,8 +94,8 @@ private: void Unmap() const; const VKDevice& device; ///< Vulkan device. + const vk::DeviceMemory& memory; ///< Vulkan device memory handler. std::pair<u64, u64> interval{}; ///< Interval where the commit exists. - vk::DeviceMemory memory; ///< Vulkan device memory handler. VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 557b9d662..90e3a8edd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -13,7 +13,6 @@ #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" @@ -26,6 +25,7 @@ #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/compiler_settings.h" namespace Vulkan { @@ -36,12 +36,11 @@ using Tegra::Engines::ShaderType; namespace { -// C++20's using enum -constexpr auto eUniformBuffer = vk::DescriptorType::eUniformBuffer; -constexpr auto eStorageBuffer = vk::DescriptorType::eStorageBuffer; -constexpr auto eUniformTexelBuffer = vk::DescriptorType::eUniformTexelBuffer; -constexpr auto eCombinedImageSampler = vk::DescriptorType::eCombinedImageSampler; -constexpr auto eStorageImage = vk::DescriptorType::eStorageImage; +constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; +constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; +constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; +constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; +constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ VideoCommon::Shader::CompileDepth::FullDecompile}; @@ -126,43 +125,48 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { } } -template <vk::DescriptorType descriptor_type, class Container> -void AddBindings(std::vector<vk::DescriptorSetLayoutBinding>& bindings, u32& binding, - vk::ShaderStageFlags stage_flags, const Container& container) { +template <VkDescriptorType descriptor_type, class Container> +void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding, + VkShaderStageFlags stage_flags, const Container& container) { const u32 num_entries = static_cast<u32>(std::size(container)); for (std::size_t i = 0; i < num_entries; ++i) { u32 count = 1; - if constexpr (descriptor_type == eCombinedImageSampler) { + if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { // Combined image samplers can be arrayed. count = container[i].Size(); } - bindings.emplace_back(binding++, descriptor_type, count, stage_flags, nullptr); + VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); + entry.binding = binding++; + entry.descriptorType = descriptor_type; + entry.descriptorCount = count; + entry.stageFlags = stage_flags; + entry.pImmutableSamplers = nullptr; } } u32 FillDescriptorLayout(const ShaderEntries& entries, - std::vector<vk::DescriptorSetLayoutBinding>& bindings, + std::vector<VkDescriptorSetLayoutBinding>& bindings, Maxwell::ShaderProgram program_type, u32 base_binding) { const ShaderType stage = GetStageFromProgram(program_type); - const vk::ShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); + const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); u32 binding = base_binding; - AddBindings<eUniformBuffer>(bindings, binding, flags, entries.const_buffers); - AddBindings<eStorageBuffer>(bindings, binding, flags, entries.global_buffers); - AddBindings<eUniformTexelBuffer>(bindings, binding, flags, entries.texel_buffers); - AddBindings<eCombinedImageSampler>(bindings, binding, flags, entries.samplers); - AddBindings<eStorageImage>(bindings, binding, flags, entries.images); + AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); + AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); + AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers); + AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); + AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); return binding; } } // Anonymous namespace CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, - GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - ProgramCode program_code, u32 main_offset) - : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, - program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, - shader_ir{this->program_code, main_offset, compiler_settings, registry}, + GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, + u32 main_offset) + : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)}, + registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, + compiler_settings, registry}, entries{GenerateShaderEntries(shader_ir)} {} CachedShader::~CachedShader() = default; @@ -201,19 +205,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { auto& memory_manager{system.GPU().MemoryManager()}; const GPUVAddr program_addr{GetShaderAddress(system, program)}; - const auto host_ptr{memory_manager.GetPointer(program_addr)}; - auto shader = TryGet(host_ptr); + const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + ASSERT(cpu_addr); + auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; if (!shader) { + const auto host_ptr{memory_manager.GetPointer(program_addr)}; + // No shader found - create a new one constexpr u32 stage_offset = 10; const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); - const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); - ASSERT(cpu_addr); - shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, - host_ptr, std::move(code), stage_offset); + std::move(code), stage_offset); Register(shader); } shaders[index] = std::move(shader); @@ -253,18 +257,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach auto& memory_manager = system.GPU().MemoryManager(); const auto program_addr = key.shader; - const auto host_ptr = memory_manager.GetPointer(program_addr); - auto shader = TryGet(host_ptr); + const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + ASSERT(cpu_addr); + + auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; if (!shader) { // No shader found - create a new one - const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); - ASSERT(cpu_addr); + const auto host_ptr = memory_manager.GetPointer(program_addr); auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); constexpr u32 kernel_main_offset = 0; shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, - program_addr, *cpu_addr, host_ptr, std::move(code), + program_addr, *cpu_addr, std::move(code), kernel_main_offset); Register(shader); } @@ -317,7 +322,7 @@ void VKPipelineCache::Unregister(const Shader& shader) { RasterizerCache::Unregister(shader); } -std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> +std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { const auto& fixed_state = key.fixed_state; auto& memory_manager = system.GPU().MemoryManager(); @@ -334,7 +339,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; SPIRVProgram program; - std::vector<vk::DescriptorSetLayoutBinding> bindings; + std::vector<VkDescriptorSetLayoutBinding> bindings; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); @@ -345,8 +350,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { } const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - const auto shader = TryGet(host_ptr); + const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + const auto shader = TryGet(*cpu_addr); ASSERT(shader); const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 @@ -369,32 +375,49 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { return {std::move(program), std::move(bindings)}; } -template <vk::DescriptorType descriptor_type, class Container> -void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, u32& binding, +template <VkDescriptorType descriptor_type, class Container> +void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding, u32& offset, const Container& container) { static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); const u32 count = static_cast<u32>(std::size(container)); - if constexpr (descriptor_type == eCombinedImageSampler) { + if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { for (u32 i = 0; i < count; ++i) { const u32 num_samplers = container[i].Size(); - template_entries.emplace_back(binding, 0, num_samplers, descriptor_type, offset, - entry_size); + VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); + entry.dstBinding = binding; + entry.dstArrayElement = 0; + entry.descriptorCount = num_samplers; + entry.descriptorType = descriptor_type; + entry.offset = offset; + entry.stride = entry_size; + ++binding; offset += num_samplers * entry_size; } return; } - if constexpr (descriptor_type == eUniformTexelBuffer) { + if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) { // Nvidia has a bug where updating multiple uniform texels at once causes the driver to // crash. for (u32 i = 0; i < count; ++i) { - template_entries.emplace_back(binding + i, 0, 1, descriptor_type, - offset + i * entry_size, entry_size); + VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); + entry.dstBinding = binding + i; + entry.dstArrayElement = 0; + entry.descriptorCount = 1; + entry.descriptorType = descriptor_type; + entry.offset = offset + i * entry_size; + entry.stride = entry_size; } } else if (count > 0) { - template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); + VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); + entry.dstBinding = binding; + entry.dstArrayElement = 0; + entry.descriptorCount = count; + entry.descriptorType = descriptor_type; + entry.offset = offset; + entry.stride = entry_size; } offset += count * entry_size; binding += count; @@ -402,12 +425,12 @@ void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, void FillDescriptorUpdateTemplateEntries( const ShaderEntries& entries, u32& binding, u32& offset, - std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { - AddEntry<eUniformBuffer>(template_entries, offset, binding, entries.const_buffers); - AddEntry<eStorageBuffer>(template_entries, offset, binding, entries.global_buffers); - AddEntry<eUniformTexelBuffer>(template_entries, offset, binding, entries.texel_buffers); - AddEntry<eCombinedImageSampler>(template_entries, offset, binding, entries.samplers); - AddEntry<eStorageImage>(template_entries, offset, binding, entries.images); + std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { + AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); + AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); + AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers); + AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); + AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index c4c112290..7ccdb7083 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,12 +19,12 @@ #include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_cache.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" #include "video_core/surface.h" @@ -113,17 +113,13 @@ namespace Vulkan { class CachedShader final : public RasterizerCacheObject { public: explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, - VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); + VAddr cpu_addr, ProgramCode program_code, u32 main_offset); ~CachedShader(); GPUVAddr GetGpuAddr() const { return gpu_addr; } - VAddr GetCpuAddr() const override { - return cpu_addr; - } - std::size_t GetSizeInBytes() const override { return program_code.size() * sizeof(u64); } @@ -149,7 +145,6 @@ private: Tegra::Engines::ShaderType stage); GPUVAddr gpu_addr{}; - VAddr cpu_addr{}; ProgramCode program_code; VideoCommon::Shader::Registry registry; VideoCommon::Shader::ShaderIR shader_ir; @@ -177,7 +172,7 @@ protected: void FlushObjectInner(const Shader& object) override {} private: - std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders( + std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( const GraphicsPipelineCacheKey& key); Core::System& system; @@ -199,6 +194,6 @@ private: void FillDescriptorUpdateTemplateEntries( const ShaderEntries& entries, u32& binding, u32& offset, - std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries); + std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index ffbf60dda..0966c7ff7 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -8,19 +8,19 @@ #include <utility> #include <vector> -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { namespace { -constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion}; +constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION}; -constexpr vk::QueryType GetTarget(VideoCore::QueryType type) { +constexpr VkQueryType GetTarget(VideoCore::QueryType type) { return QUERY_TARGETS[static_cast<std::size_t>(type)]; } @@ -35,29 +35,34 @@ void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) type = type_; } -std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) { +std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) { std::size_t index; do { index = CommitResource(fence); } while (usage[index]); usage[index] = true; - return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)}; + return {*pools[index / GROW_STEP], static_cast<u32>(index % GROW_STEP)}; } void QueryPool::Allocate(std::size_t begin, std::size_t end) { usage.resize(end); - const auto dev = device->GetLogical(); - const u32 size = static_cast<u32>(end - begin); - const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {}); - pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader())); + VkQueryPoolCreateInfo query_pool_ci; + query_pool_ci.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + query_pool_ci.pNext = nullptr; + query_pool_ci.flags = 0; + query_pool_ci.queryType = GetTarget(type); + query_pool_ci.queryCount = static_cast<u32>(end - begin); + query_pool_ci.pipelineStatistics = 0; + pools.push_back(device->GetLogical().CreateQueryPool(query_pool_ci)); } -void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) { +void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { const auto it = - std::find_if(std::begin(pools), std::end(pools), - [query_pool = query.first](auto& pool) { return query_pool == *pool; }); + std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) { + return query_pool == *pool; + }); ASSERT(it != std::end(pools)); const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); @@ -76,12 +81,11 @@ VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& VKQueryCache::~VKQueryCache() = default; -std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { +std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence()); } -void VKQueryCache::Reserve(VideoCore::QueryType type, - std::pair<vk::QueryPool, std::uint32_t> query) { +void VKQueryCache::Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query) { query_pools[static_cast<std::size_t>(type)].Reserve(query); } @@ -89,10 +93,10 @@ HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> depen VideoCore::QueryType type) : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { - const auto dev = cache.Device().GetLogical(); - cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) { - dev.resetQueryPoolEXT(query.first, query.second, 1, dld); - cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld); + const vk::Device* logical = &cache.Device().GetLogical(); + cache.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { + logical->ResetQueryPoolEXT(query.first, query.second, 1); + cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); }); } @@ -101,22 +105,16 @@ HostCounter::~HostCounter() { } void HostCounter::EndQuery() { - cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) { - cmdbuf.endQuery(query.first, query.second, dld); - }); + cache.Scheduler().Record( + [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); } u64 HostCounter::BlockingQuery() const { if (ticks >= cache.Scheduler().Ticks()) { cache.Scheduler().Flush(); } - - const auto dev = cache.Device().GetLogical(); - const auto& dld = cache.Device().GetDispatchLoader(); - u64 value; - dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value), - vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld); - return value; + return cache.Device().GetLogical().GetQueryResult<u64>( + query.first, query.second, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index c3092ee96..b63784f4b 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -12,8 +12,8 @@ #include "common/common_types.h" #include "video_core/query_cache.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace VideoCore { class RasterizerInterface; @@ -36,9 +36,9 @@ public: void Initialize(const VKDevice& device, VideoCore::QueryType type); - std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence); + std::pair<VkQueryPool, u32> Commit(VKFence& fence); - void Reserve(std::pair<vk::QueryPool, std::uint32_t> query); + void Reserve(std::pair<VkQueryPool, u32> query); protected: void Allocate(std::size_t begin, std::size_t end) override; @@ -49,7 +49,7 @@ private: const VKDevice* device = nullptr; VideoCore::QueryType type = {}; - std::vector<UniqueQueryPool> pools; + std::vector<vk::QueryPool> pools; std::vector<bool> usage; }; @@ -61,9 +61,9 @@ public: const VKDevice& device, VKScheduler& scheduler); ~VKQueryCache(); - std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type); + std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); - void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query); + void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query); const VKDevice& Device() const noexcept { return device; @@ -91,7 +91,7 @@ private: VKQueryCache& cache; const VideoCore::QueryType type; - const std::pair<vk::QueryPool, std::uint32_t> query; + const std::pair<VkQueryPool, u32> query; const u64 ticks; }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 58c69b786..4ca0febb8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -19,7 +19,6 @@ #include "core/memory.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -39,6 +38,7 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -60,32 +60,42 @@ namespace { constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); -vk::Viewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { - const auto& viewport = regs.viewport_transform[index]; - const float x = viewport.translate_x - viewport.scale_x; - const float y = viewport.translate_y - viewport.scale_y; - const float width = viewport.scale_x * 2.0f; - const float height = viewport.scale_y * 2.0f; +VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { + const auto& src = regs.viewport_transform[index]; + const float width = src.scale_x * 2.0f; + const float height = src.scale_y * 2.0f; - const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; - float near = viewport.translate_z - viewport.scale_z * reduce_z; - float far = viewport.translate_z + viewport.scale_z; + VkViewport viewport; + viewport.x = src.translate_x - src.scale_x; + viewport.y = src.translate_y - src.scale_y; + viewport.width = width != 0.0f ? width : 1.0f; + viewport.height = height != 0.0f ? height : 1.0f; + + const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; + viewport.minDepth = src.translate_z - src.scale_z * reduce_z; + viewport.maxDepth = src.translate_z + src.scale_z; if (!device.IsExtDepthRangeUnrestrictedSupported()) { - near = std::clamp(near, 0.0f, 1.0f); - far = std::clamp(far, 0.0f, 1.0f); + viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); + viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); } - - return vk::Viewport(x, y, width != 0 ? width : 1.0f, height != 0 ? height : 1.0f, near, far); + return viewport; } -constexpr vk::Rect2D GetScissorState(const Maxwell& regs, std::size_t index) { - const auto& scissor = regs.scissor_test[index]; - if (!scissor.enable) { - return {{0, 0}, {INT32_MAX, INT32_MAX}}; +VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { + const auto& src = regs.scissor_test[index]; + VkRect2D scissor; + if (src.enable) { + scissor.offset.x = static_cast<s32>(src.min_x); + scissor.offset.y = static_cast<s32>(src.min_y); + scissor.extent.width = src.max_x - src.min_x; + scissor.extent.height = src.max_y - src.min_y; + } else { + scissor.offset.x = 0; + scissor.offset.y = 0; + scissor.extent.width = std::numeric_limits<s32>::max(); + scissor.extent.height = std::numeric_limits<s32>::max(); } - const u32 width = scissor.max_x - scissor.min_x; - const u32 height = scissor.max_y - scissor.min_y; - return {{static_cast<s32>(scissor.min_x), static_cast<s32>(scissor.min_y)}, {width, height}}; + return scissor; } std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( @@ -97,8 +107,8 @@ std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( return addresses; } -void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlags pipeline_stage, - vk::AccessFlags access) { +void TransitionImages(const std::vector<ImageView>& views, VkPipelineStageFlags pipeline_stage, + VkAccessFlags access) { for (auto& [view, layout] : views) { view->Transition(*layout, pipeline_stage, access); } @@ -127,13 +137,13 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry class BufferBindings final { public: - void AddVertexBinding(const vk::Buffer* buffer, vk::DeviceSize offset) { - vertex.buffer_ptrs[vertex.num_buffers] = buffer; + void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset) { + vertex.buffers[vertex.num_buffers] = buffer; vertex.offsets[vertex.num_buffers] = offset; ++vertex.num_buffers; } - void SetIndexBinding(const vk::Buffer* buffer, vk::DeviceSize offset, vk::IndexType type) { + void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) { index.buffer = buffer; index.offset = offset; index.type = type; @@ -217,19 +227,19 @@ private: // Some of these fields are intentionally left uninitialized to avoid initializing them twice. struct { std::size_t num_buffers = 0; - std::array<const vk::Buffer*, Maxwell::NumVertexArrays> buffer_ptrs; - std::array<vk::DeviceSize, Maxwell::NumVertexArrays> offsets; + std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; + std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; } vertex; struct { - const vk::Buffer* buffer = nullptr; - vk::DeviceSize offset; - vk::IndexType type; + VkBuffer buffer = nullptr; + VkDeviceSize offset; + VkIndexType type; } index; template <std::size_t N> void BindStatic(VKScheduler& scheduler) const { - if (index.buffer != nullptr) { + if (index.buffer) { BindStatic<N, true>(scheduler); } else { BindStatic<N, false>(scheduler); @@ -243,38 +253,31 @@ private: return; } - std::array<vk::Buffer, N> buffers; - std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(), - [](const auto ptr) { return *ptr; }); - - std::array<vk::DeviceSize, N> offsets; + std::array<VkBuffer, N> buffers; + std::array<VkDeviceSize, N> offsets; + std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin()); std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); if constexpr (is_indexed) { // Indexed draw - scheduler.Record([buffers, offsets, index_buffer = *index.buffer, - index_offset = index.offset, - index_type = index.type](auto cmdbuf, auto& dld) { - cmdbuf.bindIndexBuffer(index_buffer, index_offset, index_type, dld); - cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), - dld); + scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); + cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); }); } else { // Array draw - scheduler.Record([buffers, offsets](auto cmdbuf, auto& dld) { - cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), - dld); + scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); }); } } }; -void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf, - const vk::DispatchLoaderDynamic& dld) const { +void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { if (is_indexed) { - cmdbuf.drawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance, dld); + cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance); } else { - cmdbuf.draw(num_vertices, num_instances, base_vertex, base_instance, dld); + cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance); } } @@ -337,7 +340,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { const auto renderpass = pipeline.GetRenderPass(); const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); - scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); + scheduler.RequestRenderpass(renderpass, framebuffer, render_area); UpdateDynamicStates(); @@ -345,19 +348,19 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { if (device.IsNvDeviceDiagnosticCheckpoints()) { scheduler.Record( - [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); + [&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(&pipeline); }); } BeginTransformFeedback(); const auto pipeline_layout = pipeline.GetLayout(); const auto descriptor_set = pipeline.CommitDescriptorSet(); - scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { + scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { if (descriptor_set) { - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout, - DESCRIPTOR_SET, 1, &descriptor_set, 0, nullptr, dld); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, + DESCRIPTOR_SET, descriptor_set, {}); } - draw_params.Draw(cmdbuf, dld); + draw_params.Draw(cmdbuf); }); EndTransformFeedback(); @@ -389,48 +392,54 @@ void RasterizerVulkan::Clear() { DEBUG_ASSERT(texceptions.none()); SetupImageTransitions(0, color_attachments, zeta_attachment); - const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); + const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); - scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); + scheduler.RequestRenderpass(renderpass, framebuffer, render_area); - const auto& scissor = regs.scissor_test[0]; - const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y); - vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y}; - scissor_extent.width = std::min(scissor_extent.width, render_area.width); - scissor_extent.height = std::min(scissor_extent.height, render_area.height); - - const u32 layer = regs.clear_buffers.layer; - const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1); + VkClearRect clear_rect; + clear_rect.baseArrayLayer = regs.clear_buffers.layer; + clear_rect.layerCount = 1; + clear_rect.rect = GetScissorState(regs, 0); + clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width); + clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height); if (use_color) { - const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], - regs.clear_color[2], regs.clear_color[3]}; - const vk::ClearValue clear_value{clear_color}; + VkClearValue clear_value; + std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color)); + const u32 color_attachment = regs.clear_buffers.RT; - scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) { - const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment, - clear_value); - cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); + scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { + VkClearAttachment attachment; + attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + attachment.colorAttachment = color_attachment; + attachment.clearValue = clear_value; + cmdbuf.ClearAttachments(attachment, clear_rect); }); } if (!use_depth && !use_stencil) { return; } - vk::ImageAspectFlags aspect_flags; + VkImageAspectFlags aspect_flags = 0; if (use_depth) { - aspect_flags |= vk::ImageAspectFlagBits::eDepth; + aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT; } if (use_stencil) { - aspect_flags |= vk::ImageAspectFlagBits::eStencil; + aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; } scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, - clear_rect, aspect_flags](auto cmdbuf, auto& dld) { - const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil); - const vk::ClearValue clear_value{clear_zeta}; - const vk::ClearAttachment attachment(aspect_flags, 0, clear_value); - cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); + clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { + VkClearValue clear_value; + clear_value.depthStencil.depth = clear_depth; + clear_value.depthStencil.stencil = clear_stencil; + + VkClearAttachment attachment; + attachment.aspectMask = aspect_flags; + attachment.colorAttachment = 0; + attachment.clearValue.depthStencil.depth = clear_depth; + attachment.clearValue.depthStencil.stencil = clear_stencil; + cmdbuf.ClearAttachments(attachment, clear_rect); }); } @@ -463,24 +472,24 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Unmap(); - TransitionImages(sampled_views, vk::PipelineStageFlagBits::eComputeShader, - vk::AccessFlagBits::eShaderRead); - TransitionImages(image_views, vk::PipelineStageFlagBits::eComputeShader, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); + TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT); + TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); if (device.IsNvDeviceDiagnosticCheckpoints()) { scheduler.Record( - [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(nullptr, dld); }); + [&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(nullptr); }); } scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), layout = pipeline.GetLayout(), - descriptor_set = pipeline.CommitDescriptorSet()](auto cmdbuf, auto& dld) { - cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline_handle, dld); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, DESCRIPTOR_SET, 1, - &descriptor_set, 0, nullptr, dld); - cmdbuf.dispatch(grid_x, grid_y, grid_z, dld); + descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET, + descriptor_set, {}); + cmdbuf.Dispatch(grid_x, grid_y, grid_z); }); } @@ -495,20 +504,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, void RasterizerVulkan::FlushAll() {} -void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { +void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { + if (addr == 0 || size == 0) { + return; + } texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size); } -void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { + if (addr == 0 || size == 0) { + return; + } texture_cache.InvalidateRegion(addr, size); pipeline_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size); } -void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { FlushRegion(addr, size); InvalidateRegion(addr, size); } @@ -540,8 +555,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return false; } - const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; - const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)}; + const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; if (!surface) { return false; } @@ -594,7 +608,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { Texceptions texceptions; for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { if (update_rendertargets) { - color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); + color_attachments[rt] = texture_cache.GetColorBufferSurface(rt); } if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { texceptions[rt] = true; @@ -602,7 +616,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { } if (update_rendertargets) { - zeta_attachment = texture_cache.GetDepthBufferSurface(true); + zeta_attachment = texture_cache.GetDepthBufferSurface(); } if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { texceptions[ZETA_TEXCEPTION_INDEX] = true; @@ -620,13 +634,13 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen continue; } overlap = true; - *layout = vk::ImageLayout::eGeneral; + *layout = VK_IMAGE_LAYOUT_GENERAL; } return overlap; } -std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers( - vk::RenderPass renderpass) { +std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( + VkRenderPass renderpass) { FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; @@ -653,15 +667,20 @@ std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffer const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); auto& framebuffer = fbentry->second; if (is_cache_miss) { - const vk::FramebufferCreateInfo framebuffer_ci( - {}, key.renderpass, static_cast<u32>(key.views.size()), key.views.data(), key.width, - key.height, key.layers); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); - } - - return {*framebuffer, vk::Extent2D{key.width, key.height}}; + VkFramebufferCreateInfo framebuffer_ci; + framebuffer_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + framebuffer_ci.pNext = nullptr; + framebuffer_ci.flags = 0; + framebuffer_ci.renderPass = key.renderpass; + framebuffer_ci.attachmentCount = static_cast<u32>(key.views.size()); + framebuffer_ci.pAttachments = key.views.data(); + framebuffer_ci.width = key.width; + framebuffer_ci.height = key.height; + framebuffer_ci.layers = key.layers; + framebuffer = device.GetLogical().CreateFramebuffer(framebuffer_ci); + } + + return {*framebuffer, VkExtent2D{key.width, key.height}}; } RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, @@ -709,10 +728,9 @@ void RasterizerVulkan::SetupShaderDescriptors( void RasterizerVulkan::SetupImageTransitions( Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments, const View& zeta_attachment) { - TransitionImages(sampled_views, vk::PipelineStageFlagBits::eAllGraphics, - vk::AccessFlagBits::eShaderRead); - TransitionImages(image_views, vk::PipelineStageFlagBits::eAllGraphics, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); + TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT); + TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) { const auto color_attachment = color_attachments[rt]; @@ -720,19 +738,19 @@ void RasterizerVulkan::SetupImageTransitions( continue; } const auto image_layout = - texceptions[rt] ? vk::ImageLayout::eGeneral : vk::ImageLayout::eColorAttachmentOptimal; - color_attachment->Transition( - image_layout, vk::PipelineStageFlagBits::eColorAttachmentOutput, - vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite); + texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); } if (zeta_attachment != nullptr) { const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] - ? vk::ImageLayout::eGeneral - : vk::ImageLayout::eDepthStencilAttachmentOptimal; - zeta_attachment->Transition(image_layout, vk::PipelineStageFlagBits::eLateFragmentTests, - vk::AccessFlagBits::eDepthStencilAttachmentRead | - vk::AccessFlagBits::eDepthStencilAttachmentWrite); + ? VK_IMAGE_LAYOUT_GENERAL + : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + zeta_attachment->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); } } @@ -768,9 +786,9 @@ void RasterizerVulkan::BeginTransformFeedback() { const std::size_t size = binding.buffer_size; const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) { - cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld); - cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld); + scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { + cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); + cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } @@ -781,7 +799,7 @@ void RasterizerVulkan::EndTransformFeedback() { } scheduler.Record( - [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); }); + [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, @@ -832,7 +850,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar } else { const auto [buffer, offset] = quad_array_pass.Assemble(params.num_vertices, params.base_vertex); - buffer_bindings.SetIndexBinding(&buffer, offset, vk::IndexType::eUint32); + buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); params.base_vertex = 0; params.num_vertices = params.num_vertices * 6 / 4; params.is_indexed = true; @@ -848,7 +866,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar auto format = regs.index_array.format; const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) { - std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset); + std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset); format = Maxwell::IndexFormat::UnsignedShort; } @@ -985,8 +1003,8 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd const auto size = memory_manager.Read<u32>(address + 8); if (size == 0) { - // Sometimes global memory pointers don't have a proper size. Upload a dummy entry because - // Vulkan doesn't like empty buffers. + // Sometimes global memory pointers don't have a proper size. Upload a dummy entry + // because Vulkan doesn't like empty buffers. constexpr std::size_t dummy_size = 4; const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); @@ -1017,7 +1035,7 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu update_descriptor_queue.AddSampledImage(sampler, image_view); const auto image_layout = update_descriptor_queue.GetLastImageLayout(); - *image_layout = vk::ImageLayout::eShaderReadOnlyOptimal; + *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; sampled_views.push_back(ImageView{std::move(view), image_layout}); } @@ -1034,7 +1052,7 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima update_descriptor_queue.AddImage(image_view); const auto image_layout = update_descriptor_queue.GetLastImageLayout(); - *image_layout = vk::ImageLayout::eGeneral; + *image_layout = VK_IMAGE_LAYOUT_GENERAL; image_views.push_back(ImageView{std::move(view), image_layout}); } @@ -1051,9 +1069,7 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; - scheduler.Record([viewports](auto cmdbuf, auto& dld) { - cmdbuf.setViewport(0, static_cast<u32>(viewports.size()), viewports.data(), dld); - }); + scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); } void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1067,9 +1083,7 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), GetScissorState(regs, 15)}; - scheduler.Record([scissors](auto cmdbuf, auto& dld) { - cmdbuf.setScissor(0, static_cast<u32>(scissors.size()), scissors.data(), dld); - }); + scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); } void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1077,8 +1091,8 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { return; } scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, - factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { - cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); + factor = regs.polygon_offset_factor](vk::CommandBuffer cmdbuf) { + cmdbuf.SetDepthBias(constant, clamp, factor / 2.0f); }); } @@ -1088,9 +1102,8 @@ void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& reg } const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, regs.blend_color.a}; - scheduler.Record([blend_color](auto cmdbuf, auto& dld) { - cmdbuf.setBlendConstants(blend_color.data(), dld); - }); + scheduler.Record( + [blend_color](vk::CommandBuffer cmdbuf) { cmdbuf.SetBlendConstants(blend_color.data()); }); } void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1098,7 +1111,7 @@ void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) return; } scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( - auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); + vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBounds(min, max); }); } void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1111,24 +1124,24 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask, front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref, back_write_mask = regs.stencil_back_mask, - back_test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { + back_test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) { // Front face - cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front_ref, dld); - cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front_write_mask, dld); - cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front_test_mask, dld); + cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_BIT, front_ref); + cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_BIT, front_write_mask); + cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_BIT, front_test_mask); // Back face - cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back_ref, dld); - cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back_write_mask, dld); - cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back_test_mask, dld); + cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref); + cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask); + cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask); }); } else { // Front face defines both faces scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask, - test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { - cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref, dld); - cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, write_mask, dld); - cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, test_mask, dld); + test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) { + cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, ref); + cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, write_mask); + cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, test_mask); }); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3185868e9..46037860a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -17,7 +17,6 @@ #include "video_core/memory_manager.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_interface.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" @@ -32,6 +31,7 @@ #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Core { class System; @@ -49,11 +49,10 @@ namespace Vulkan { struct VKScreenInfo; -using ImageViewsPack = - boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>; +using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>; struct FramebufferCacheKey { - vk::RenderPass renderpass{}; + VkRenderPass renderpass{}; u32 width = 0; u32 height = 0; u32 layers = 0; @@ -101,7 +100,7 @@ class BufferBindings; struct ImageView { View view; - vk::ImageLayout* layout = nullptr; + VkImageLayout* layout = nullptr; }; class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { @@ -118,9 +117,9 @@ public: void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void FlushAll() override; - void FlushRegion(CacheAddr addr, u64 size) override; - void InvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushCommands() override; void TickFrame() override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, @@ -137,7 +136,7 @@ public: private: struct DrawParameters { - void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const; + void Draw(vk::CommandBuffer cmdbuf) const; u32 base_instance = 0; u32 num_instances = 0; @@ -154,7 +153,7 @@ private: Texceptions UpdateAttachments(); - std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass); + std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); /// Setups geometry buffers and state. DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, @@ -272,7 +271,7 @@ private: u32 draw_counter = 0; // TODO(Rodrigo): Invalidate on image destruction - std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache; + std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 93f5d7ba0..4e5286a69 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -6,10 +6,10 @@ #include <vector> #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -17,7 +17,7 @@ VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {} VKRenderPassCache::~VKRenderPassCache() = default; -vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { +VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { const auto [pair, is_cache_miss] = cache.try_emplace(params); auto& entry = pair->second; if (is_cache_miss) { @@ -26,9 +26,9 @@ vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) return *entry; } -UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { - std::vector<vk::AttachmentDescription> descriptors; - std::vector<vk::AttachmentReference> color_references; +vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { + std::vector<VkAttachmentDescription> descriptors; + std::vector<VkAttachmentReference> color_references; for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) { const auto attachment = params.color_attachments[rt]; @@ -39,16 +39,25 @@ UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& par // TODO(Rodrigo): Add eMayAlias when it's needed. const auto color_layout = attachment.is_texception - ? vk::ImageLayout::eGeneral - : vk::ImageLayout::eColorAttachmentOptimal; - descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format, - vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, - vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare, - vk::AttachmentStoreOp::eDontCare, color_layout, color_layout); - color_references.emplace_back(static_cast<u32>(rt), color_layout); + ? VK_IMAGE_LAYOUT_GENERAL + : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + VkAttachmentDescription& descriptor = descriptors.emplace_back(); + descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT; + descriptor.format = format.format; + descriptor.samples = VK_SAMPLE_COUNT_1_BIT; + descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + descriptor.initialLayout = color_layout; + descriptor.finalLayout = color_layout; + + VkAttachmentReference& reference = color_references.emplace_back(); + reference.attachment = static_cast<u32>(rt); + reference.layout = color_layout; } - vk::AttachmentReference zeta_attachment_ref; + VkAttachmentReference zeta_attachment_ref; if (params.has_zeta) { const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format); @@ -56,45 +65,68 @@ UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& par static_cast<u32>(params.zeta_pixel_format)); const auto zeta_layout = params.zeta_texception - ? vk::ImageLayout::eGeneral - : vk::ImageLayout::eDepthStencilAttachmentOptimal; - descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format, - vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, - vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad, - vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout); - zeta_attachment_ref = - vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout); + ? VK_IMAGE_LAYOUT_GENERAL + : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + VkAttachmentDescription& descriptor = descriptors.emplace_back(); + descriptor.flags = 0; + descriptor.format = format.format; + descriptor.samples = VK_SAMPLE_COUNT_1_BIT; + descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + descriptor.initialLayout = zeta_layout; + descriptor.finalLayout = zeta_layout; + + zeta_attachment_ref.attachment = static_cast<u32>(params.color_attachments.size()); + zeta_attachment_ref.layout = zeta_layout; } - const vk::SubpassDescription subpass_description( - {}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()), - color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0, - nullptr); - - vk::AccessFlags access; - vk::PipelineStageFlags stage; + VkSubpassDescription subpass_description; + subpass_description.flags = 0; + subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass_description.inputAttachmentCount = 0; + subpass_description.pInputAttachments = nullptr; + subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size()); + subpass_description.pColorAttachments = color_references.data(); + subpass_description.pResolveAttachments = nullptr; + subpass_description.pDepthStencilAttachment = params.has_zeta ? &zeta_attachment_ref : nullptr; + subpass_description.preserveAttachmentCount = 0; + subpass_description.pPreserveAttachments = nullptr; + + VkAccessFlags access = 0; + VkPipelineStageFlags stage = 0; if (!color_references.empty()) { - access |= - vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite; - stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput; + access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; } if (params.has_zeta) { - access |= vk::AccessFlagBits::eDepthStencilAttachmentRead | - vk::AccessFlagBits::eDepthStencilAttachmentWrite; - stage |= vk::PipelineStageFlagBits::eLateFragmentTests; + access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; } - const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access, - {}); - - const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()), - descriptors.data(), 1, &subpass_description, 1, - &subpass_dependency); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createRenderPassUnique(create_info, nullptr, dld); + VkSubpassDependency subpass_dependency; + subpass_dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + subpass_dependency.dstSubpass = 0; + subpass_dependency.srcStageMask = stage; + subpass_dependency.dstStageMask = stage; + subpass_dependency.srcAccessMask = 0; + subpass_dependency.dstAccessMask = access; + subpass_dependency.dependencyFlags = 0; + + VkRenderPassCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.attachmentCount = static_cast<u32>(descriptors.size()); + ci.pAttachments = descriptors.data(); + ci.subpassCount = 1; + ci.pSubpasses = &subpass_description; + ci.dependencyCount = 1; + ci.pDependencies = &subpass_dependency; + return device.GetLogical().CreateRenderPass(ci); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h index b49b2db48..921b6efb5 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h @@ -12,7 +12,7 @@ #include <boost/functional/hash.hpp> #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" namespace Vulkan { @@ -85,13 +85,13 @@ public: explicit VKRenderPassCache(const VKDevice& device); ~VKRenderPassCache(); - vk::RenderPass GetRenderPass(const RenderPassParams& params); + VkRenderPass GetRenderPass(const RenderPassParams& params); private: - UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const; + vk::RenderPass CreateRenderPass(const RenderPassParams& params) const; const VKDevice& device; - std::unordered_map<RenderPassParams, UniqueRenderPass> cache; + std::unordered_map<RenderPassParams, vk::RenderPass> cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp index 525b4bb46..dc06f545a 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp @@ -6,83 +6,83 @@ #include <optional> #include "common/assert.h" #include "common/logging/log.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { +namespace { + // TODO(Rodrigo): Fine tune these numbers. constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; constexpr std::size_t FENCES_GROW_STEP = 0x40; +VkFenceCreateInfo BuildFenceCreateInfo() { + VkFenceCreateInfo fence_ci; + fence_ci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_ci.pNext = nullptr; + fence_ci.flags = 0; + return fence_ci; +} + +} // Anonymous namespace + class CommandBufferPool final : public VKFencedPool { public: CommandBufferPool(const VKDevice& device) : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {} void Allocate(std::size_t begin, std::size_t end) override { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - const u32 graphics_family = device.GetGraphicsFamily(); - - auto pool = std::make_unique<Pool>(); - // Command buffers are going to be commited, recorded, executed every single usage cycle. // They are also going to be reseted when commited. - const auto pool_flags = vk::CommandPoolCreateFlagBits::eTransient | - vk::CommandPoolCreateFlagBits::eResetCommandBuffer; - const vk::CommandPoolCreateInfo cmdbuf_pool_ci(pool_flags, graphics_family); - pool->handle = dev.createCommandPoolUnique(cmdbuf_pool_ci, nullptr, dld); - - const vk::CommandBufferAllocateInfo cmdbuf_ai(*pool->handle, - vk::CommandBufferLevel::ePrimary, - static_cast<u32>(COMMAND_BUFFER_POOL_SIZE)); - pool->cmdbufs = - dev.allocateCommandBuffersUnique<std::allocator<UniqueCommandBuffer>>(cmdbuf_ai, dld); - - pools.push_back(std::move(pool)); + VkCommandPoolCreateInfo command_pool_ci; + command_pool_ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + command_pool_ci.pNext = nullptr; + command_pool_ci.flags = + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + command_pool_ci.queueFamilyIndex = device.GetGraphicsFamily(); + + Pool& pool = pools.emplace_back(); + pool.handle = device.GetLogical().CreateCommandPool(command_pool_ci); + pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE); } - vk::CommandBuffer Commit(VKFence& fence) { + VkCommandBuffer Commit(VKFence& fence) { const std::size_t index = CommitResource(fence); const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; - return *pools[pool_index]->cmdbufs[sub_index]; + return pools[pool_index].cmdbufs[sub_index]; } private: struct Pool { - UniqueCommandPool handle; - std::vector<UniqueCommandBuffer> cmdbufs; + vk::CommandPool handle; + vk::CommandBuffers cmdbufs; }; const VKDevice& device; - - std::vector<std::unique_ptr<Pool>> pools; + std::vector<Pool> pools; }; VKResource::VKResource() = default; VKResource::~VKResource() = default; -VKFence::VKFence(const VKDevice& device, UniqueFence handle) - : device{device}, handle{std::move(handle)} {} +VKFence::VKFence(const VKDevice& device) + : device{device}, handle{device.GetLogical().CreateFence(BuildFenceCreateInfo())} {} VKFence::~VKFence() = default; void VKFence::Wait() { - static constexpr u64 timeout = std::numeric_limits<u64>::max(); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - switch (const auto result = dev.waitForFences(1, &*handle, true, timeout, dld)) { - case vk::Result::eSuccess: + switch (const VkResult result = handle.Wait()) { + case VK_SUCCESS: return; - case vk::Result::eErrorDeviceLost: + case VK_ERROR_DEVICE_LOST: device.ReportLoss(); [[fallthrough]]; default: - vk::throwResultException(result, "vk::waitForFences"); + throw vk::Exception(result); } } @@ -107,13 +107,11 @@ bool VKFence::Tick(bool gpu_wait, bool owner_wait) { return false; } - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); if (gpu_wait) { // Wait for the fence if it has been requested. - dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld); + (void)handle.Wait(); } else { - if (dev.getFenceStatus(*handle, dld) != vk::Result::eSuccess) { + if (handle.GetStatus() != VK_SUCCESS) { // Vulkan fence is not ready, not much it can do here return false; } @@ -126,7 +124,7 @@ bool VKFence::Tick(bool gpu_wait, bool owner_wait) { protected_resources.clear(); // Prepare fence for reusage. - dev.resetFences({*handle}, dld); + handle.Reset(); is_used = false; return true; } @@ -299,21 +297,16 @@ VKFence& VKResourceManager::CommitFence() { return *found_fence; } -vk::CommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) { +VkCommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) { return command_buffer_pool->Commit(fence); } void VKResourceManager::GrowFences(std::size_t new_fences_count) { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - const vk::FenceCreateInfo fence_ci; - const std::size_t previous_size = fences.size(); fences.resize(previous_size + new_fences_count); - std::generate(fences.begin() + previous_size, fences.end(), [&]() { - return std::make_unique<VKFence>(device, dev.createFenceUnique(fence_ci, nullptr, dld)); - }); + std::generate(fences.begin() + previous_size, fences.end(), + [this] { return std::make_unique<VKFence>(device); }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h index d4cbc95a5..f683d2276 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.h +++ b/src/video_core/renderer_vulkan/vk_resource_manager.h @@ -7,7 +7,7 @@ #include <cstddef> #include <memory> #include <vector> -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -42,7 +42,7 @@ class VKFence { friend class VKResourceManager; public: - explicit VKFence(const VKDevice& device, UniqueFence handle); + explicit VKFence(const VKDevice& device); ~VKFence(); /** @@ -69,7 +69,7 @@ public: void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept; /// Retreives the fence. - operator vk::Fence() const { + operator VkFence() const { return *handle; } @@ -87,7 +87,7 @@ private: bool Tick(bool gpu_wait, bool owner_wait); const VKDevice& device; ///< Device handler - UniqueFence handle; ///< Vulkan fence + vk::Fence handle; ///< Vulkan fence std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence bool is_owned = false; ///< The fence has been commited but not released yet. bool is_used = false; ///< The fence has been commited but it has not been checked to be free. @@ -181,7 +181,7 @@ public: VKFence& CommitFence(); /// Commits an unused command buffer and protects it with a fence. - vk::CommandBuffer CommitCommandBuffer(VKFence& fence); + VkCommandBuffer CommitCommandBuffer(VKFence& fence); private: /// Allocates new fences. diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 204b7c39c..07bbcf520 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -7,64 +7,64 @@ #include <unordered_map> #include "common/assert.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/textures/texture.h" namespace Vulkan { -static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> color) { +namespace { + +VkBorderColor ConvertBorderColor(std::array<float, 4> color) { // TODO(Rodrigo): Manage integer border colors if (color == std::array<float, 4>{0, 0, 0, 0}) { - return vk::BorderColor::eFloatTransparentBlack; + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; } else if (color == std::array<float, 4>{0, 0, 0, 1}) { - return vk::BorderColor::eFloatOpaqueBlack; + return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; } else if (color == std::array<float, 4>{1, 1, 1, 1}) { - return vk::BorderColor::eFloatOpaqueWhite; + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + } + if (color[0] + color[1] + color[2] > 1.35f) { + // If color elements are brighter than roughly 0.5 average, use white border + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + } else if (color[3] > 0.5f) { + return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; } else { - if (color[0] + color[1] + color[2] > 1.35f) { - // If color elements are brighter than roughly 0.5 average, use white border - return vk::BorderColor::eFloatOpaqueWhite; - } - if (color[3] > 0.5f) { - return vk::BorderColor::eFloatOpaqueBlack; - } - return vk::BorderColor::eFloatTransparentBlack; + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; } } +} // Anonymous namespace + VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {} VKSamplerCache::~VKSamplerCache() = default; -UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { - const float max_anisotropy{tsc.GetMaxAnisotropy()}; - const bool has_anisotropy{max_anisotropy > 1.0f}; - - const auto border_color{tsc.GetBorderColor()}; - const auto vk_border_color{TryConvertBorderColor(border_color)}; - - constexpr bool unnormalized_coords{false}; - - const vk::SamplerCreateInfo sampler_ci( - {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), - MaxwellToVK::Sampler::Filter(tsc.min_filter), - MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), - MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), - MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), - MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(), - has_anisotropy, max_anisotropy, tsc.depth_compare_enabled, - MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(), - tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), - unnormalized_coords); - - const auto& dld{device.GetDispatchLoader()}; - const auto dev{device.GetLogical()}; - return dev.createSamplerUnique(sampler_ci, nullptr, dld); +vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { + VkSamplerCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter); + ci.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter); + ci.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter); + ci.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter); + ci.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter); + ci.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter); + ci.mipLodBias = tsc.GetLodBias(); + ci.anisotropyEnable = tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE; + ci.maxAnisotropy = tsc.GetMaxAnisotropy(); + ci.compareEnable = tsc.depth_compare_enabled; + ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func); + ci.minLod = tsc.GetMinLod(); + ci.maxLod = tsc.GetMaxLod(); + ci.borderColor = ConvertBorderColor(tsc.GetBorderColor()); + ci.unnormalizedCoordinates = VK_FALSE; + return device.GetLogical().CreateSampler(ci); } -vk::Sampler VKSamplerCache::ToSamplerType(const UniqueSampler& sampler) const { +VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { return *sampler; } diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h index 1f73b716b..a33d1c0ee 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h @@ -4,7 +4,7 @@ #pragma once -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/sampler_cache.h" #include "video_core/textures/texture.h" @@ -12,15 +12,15 @@ namespace Vulkan { class VKDevice; -class VKSamplerCache final : public VideoCommon::SamplerCache<vk::Sampler, UniqueSampler> { +class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> { public: explicit VKSamplerCache(const VKDevice& device); ~VKSamplerCache(); protected: - UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; + vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; - vk::Sampler ToSamplerType(const UniqueSampler& sampler) const override; + VkSampler ToSamplerType(const vk::Sampler& sampler) const override; private: const VKDevice& device; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index b61d4fe63..900f551b3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -10,23 +10,22 @@ #include "common/assert.h" #include "common/microprofile.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_WaitForWorker); -void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf, - const vk::DispatchLoaderDynamic& dld) { +void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { auto command = first; while (command != nullptr) { auto next = command->GetNext(); - command->Execute(cmdbuf, dld); + command->Execute(cmdbuf); command->~Command(); command = next; } @@ -51,7 +50,7 @@ VKScheduler::~VKScheduler() { worker_thread.join(); } -void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) { +void VKScheduler::Flush(bool release_fence, VkSemaphore semaphore) { SubmitExecution(semaphore); if (release_fence) { current_fence->Release(); @@ -59,7 +58,7 @@ void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) { AllocateNewContext(); } -void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) { +void VKScheduler::Finish(bool release_fence, VkSemaphore semaphore) { SubmitExecution(semaphore); current_fence->Wait(); if (release_fence) { @@ -89,17 +88,34 @@ void VKScheduler::DispatchWork() { AcquireNewChunk(); } -void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) { - if (state.renderpass && renderpass_bi == *state.renderpass) { +void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, + VkExtent2D render_area) { + if (renderpass == state.renderpass && framebuffer == state.framebuffer && + render_area.width == state.render_area.width && + render_area.height == state.render_area.height) { return; } - const bool end_renderpass = state.renderpass.has_value(); - state.renderpass = renderpass_bi; - Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) { + const bool end_renderpass = state.renderpass != nullptr; + state.renderpass = renderpass; + state.framebuffer = framebuffer; + state.render_area = render_area; + + VkRenderPassBeginInfo renderpass_bi; + renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + renderpass_bi.pNext = nullptr; + renderpass_bi.renderPass = renderpass; + renderpass_bi.framebuffer = framebuffer; + renderpass_bi.renderArea.offset.x = 0; + renderpass_bi.renderArea.offset.y = 0; + renderpass_bi.renderArea.extent = render_area; + renderpass_bi.clearValueCount = 0; + renderpass_bi.pClearValues = nullptr; + + Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { if (end_renderpass) { - cmdbuf.endRenderPass(dld); + cmdbuf.EndRenderPass(); } - cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld); + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); }); } @@ -107,13 +123,13 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() { EndRenderPass(); } -void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) { +void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { if (state.graphics_pipeline == pipeline) { return; } state.graphics_pipeline = pipeline; - Record([pipeline](auto cmdbuf, auto& dld) { - cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld); + Record([pipeline](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); }); } @@ -126,37 +142,50 @@ void VKScheduler::WorkerThread() { } auto extracted_chunk = std::move(chunk_queue.Front()); chunk_queue.Pop(); - extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader()); + extracted_chunk->ExecuteAll(current_cmdbuf); chunk_reserve.Push(std::move(extracted_chunk)); } while (!quit); } -void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { +void VKScheduler::SubmitExecution(VkSemaphore semaphore) { EndPendingOperations(); InvalidateState(); WaitWorker(); std::unique_lock lock{mutex}; - const auto queue = device.GetGraphicsQueue(); - const auto& dld = device.GetDispatchLoader(); - current_cmdbuf.end(dld); + current_cmdbuf.End(); - const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, ¤t_cmdbuf, semaphore ? 1U : 0U, - &semaphore); - queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld); + VkSubmitInfo submit_info; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = nullptr; + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = nullptr; + submit_info.pWaitDstStageMask = nullptr; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = current_cmdbuf.address(); + submit_info.signalSemaphoreCount = semaphore ? 1 : 0; + submit_info.pSignalSemaphores = &semaphore; + device.GetGraphicsQueue().Submit(submit_info, *current_fence); } void VKScheduler::AllocateNewContext() { ++ticks; + VkCommandBufferBeginInfo cmdbuf_bi; + cmdbuf_bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cmdbuf_bi.pNext = nullptr; + cmdbuf_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + cmdbuf_bi.pInheritanceInfo = nullptr; + std::unique_lock lock{mutex}; current_fence = next_fence; next_fence = &resource_manager.CommitFence(); - current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); - current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, - device.GetDispatchLoader()); + current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence), + device.GetDispatchLoader()); + current_cmdbuf.Begin(cmdbuf_bi); + // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { query_cache->UpdateCounters(); @@ -177,8 +206,8 @@ void VKScheduler::EndRenderPass() { if (!state.renderpass) { return; } - state.renderpass = std::nullopt; - Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); }); + state.renderpass = nullptr; + Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); }); } void VKScheduler::AcquireNewChunk() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index c7cc291c3..82a8adc69 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -13,7 +13,7 @@ #include <utility> #include "common/common_types.h" #include "common/threadsafe_queue.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -49,10 +49,10 @@ public: ~VKScheduler(); /// Sends the current execution context to the GPU. - void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr); + void Flush(bool release_fence = true, VkSemaphore semaphore = nullptr); /// Sends the current execution context to the GPU and waits for it to complete. - void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr); + void Finish(bool release_fence = true, VkSemaphore semaphore = nullptr); /// Waits for the worker thread to finish executing everything. After this function returns it's /// safe to touch worker resources. @@ -62,14 +62,15 @@ public: void DispatchWork(); /// Requests to begin a renderpass. - void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi); + void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, + VkExtent2D render_area); /// Requests the current executino context to be able to execute operations only allowed outside /// of a renderpass. void RequestOutsideRenderPassOperationContext(); /// Binds a pipeline to the current execution context. - void BindGraphicsPipeline(vk::Pipeline pipeline); + void BindGraphicsPipeline(VkPipeline pipeline); /// Assigns the query cache. void SetQueryCache(VKQueryCache& query_cache_) { @@ -101,8 +102,7 @@ private: public: virtual ~Command() = default; - virtual void Execute(vk::CommandBuffer cmdbuf, - const vk::DispatchLoaderDynamic& dld) const = 0; + virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; Command* GetNext() const { return next; @@ -125,9 +125,8 @@ private: TypedCommand(TypedCommand&&) = delete; TypedCommand& operator=(TypedCommand&&) = delete; - void Execute(vk::CommandBuffer cmdbuf, - const vk::DispatchLoaderDynamic& dld) const override { - command(cmdbuf, dld); + void Execute(vk::CommandBuffer cmdbuf) const override { + command(cmdbuf); } private: @@ -136,7 +135,7 @@ private: class CommandChunk final { public: - void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld); + void ExecuteAll(vk::CommandBuffer cmdbuf); template <typename T> bool Record(T& command) { @@ -175,7 +174,7 @@ private: void WorkerThread(); - void SubmitExecution(vk::Semaphore semaphore); + void SubmitExecution(VkSemaphore semaphore); void AllocateNewContext(); @@ -198,8 +197,10 @@ private: VKFence* next_fence = nullptr; struct State { - std::optional<vk::RenderPassBeginInfo> renderpass; - vk::Pipeline graphics_pipeline; + VkRenderPass renderpass = nullptr; + VkFramebuffer framebuffer = nullptr; + VkExtent2D render_area = {0, 0}; + VkPipeline graphics_pipeline = nullptr; } state; std::unique_ptr<CommandChunk> chunk; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b9f9e2714..aaa138f52 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -801,7 +801,7 @@ private: if (IsOutputAttributeArray()) { const u32 num = GetNumOutputVertices(); type = TypeArray(type, Constant(t_uint, num)); - if (device.GetDriverID() != vk::DriverIdKHR::eIntelProprietaryWindows) { + if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) { // Intel's proprietary driver fails to setup defaults for arrayed output // attributes. varying_default = ConstantComposite(type, std::vector(num, varying_default)); @@ -1938,11 +1938,8 @@ private: return {}; } - template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, - Type value_type = result_type> + template <Id (Module::*func)(Id, Id, Id, Id, Id)> Expression Atomic(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - Id pointer; if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { pointer = GetSharedMemoryPointer(*smem); @@ -1950,15 +1947,19 @@ private: pointer = GetGlobalMemoryPointer(*gmem); } else { UNREACHABLE(); - return {Constant(type_def, 0), result_type}; + return {v_float_zero, Type::Float}; } - - const Id value = As(Visit(operation[1]), value_type); - const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); - const Id semantics = Constant(type_def, 0); + const Id semantics = Constant(t_uint, 0); + const Id value = AsUint(Visit(operation[1])); + + return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; + } - return {(this->*func)(type_def, pointer, scope, semantics, value), result_type}; + template <Id (Module::*func)(Id, Id, Id, Id, Id)> + Expression Reduce(Operation operation) { + Atomic<func>(operation); + return {}; } Expression Branch(Operation operation) { @@ -2547,21 +2548,35 @@ private: &SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageExchange, - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>, - - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, + + &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, + + &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, + + &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index b97c4cb3d..784839327 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -8,27 +8,25 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { -UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { +vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { // Avoid undefined behavior by copying to a staging allocation ASSERT(code_size % sizeof(u32) == 0); const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); std::memcpy(data.get(), code_data, code_size); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - const vk::ShaderModuleCreateInfo shader_ci({}, code_size, data.get()); - vk::ShaderModule shader_module; - if (dev.createShaderModule(&shader_ci, nullptr, &shader_module, dld) != vk::Result::eSuccess) { - UNREACHABLE_MSG("Shader module failed to build!"); - } - - return UniqueShaderModule(shader_module, vk::ObjectDestroy(dev, nullptr, dld)); + VkShaderModuleCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.codeSize = code_size; + ci.pCode = data.get(); + return device.GetLogical().CreateShaderModule(ci); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index c06d65970..be38d6697 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h @@ -6,12 +6,12 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { class VKDevice; -UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); +vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 374959f82..94d954d7a 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -13,6 +13,7 @@ #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -71,17 +72,23 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_ } VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { - const auto usage = - vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | - vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer | - vk::BufferUsageFlagBits::eIndexBuffer; const u32 log2 = Common::Log2Ceil64(size); - const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0, - nullptr); - const auto dev = device.GetLogical(); + + VkBufferCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.size = 1ULL << log2; + ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ci.queueFamilyIndexCount = 0; + ci.pQueueFamilyIndices = nullptr; + auto buffer = std::make_unique<VKBuffer>(); - buffer->handle = dev.createBufferUnique(buffer_ci, nullptr, device.GetDispatchLoader()); - buffer->commit = memory_manager.Commit(*buffer->handle, host_visible); + buffer->handle = device.GetLogical().CreateBuffer(ci); + buffer->commit = memory_manager.Commit(buffer->handle, host_visible); auto& entries = GetCache(host_visible)[log2].entries; return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 4d9488f49..a0840ff8c 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -11,9 +11,9 @@ #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -22,7 +22,7 @@ class VKFenceWatch; class VKScheduler; struct VKBuffer final { - UniqueBuffer handle; + vk::Buffer handle; VKMemoryCommit commit; }; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index d48d3b44c..38a93a01a 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -9,11 +9,11 @@ #include "common/alignment.h" #include "common/assert.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -25,8 +25,8 @@ constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, - vk::MemoryPropertyFlags wanted) { - const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader()); + VkMemoryPropertyFlags wanted) { + const auto properties = device.GetPhysical().GetMemoryProperties(); for (u32 i = 0; i < properties.memoryTypeCount; i++) { if (!(filter & (1 << i))) { continue; @@ -35,13 +35,13 @@ std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, return i; } } - return {}; + return std::nullopt; } } // Anonymous namespace VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, - vk::BufferUsageFlags usage) + VkBufferUsageFlags usage) : device{device}, scheduler{scheduler} { CreateBuffers(usage); ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); @@ -78,17 +78,13 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { invalidated = true; } - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld)); - return {pointer, offset, invalidated}; + return {memory.Map(offset, size), offset, invalidated}; } void VKStreamBuffer::Unmap(u64 size) { ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); - const auto dev = device.GetLogical(); - dev.unmapMemory(*memory, device.GetDispatchLoader()); + memory.Unmap(); offset += size; @@ -101,30 +97,42 @@ void VKStreamBuffer::Unmap(u64 size) { watch.fence.Watch(scheduler.GetFence()); } -void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) { - const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive, - 0, nullptr); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); +void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { + VkBufferCreateInfo buffer_ci; + buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_ci.pNext = nullptr; + buffer_ci.flags = 0; + buffer_ci.size = STREAM_BUFFER_SIZE; + buffer_ci.usage = usage; + buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + buffer_ci.queueFamilyIndexCount = 0; + buffer_ci.pQueueFamilyIndices = nullptr; + + const auto& dev = device.GetLogical(); + buffer = dev.CreateBuffer(buffer_ci); - const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld); + const auto& dld = device.GetDispatchLoader(); + const auto requirements = dev.GetBufferMemoryRequirements(*buffer); // Prefer device local host visible allocations (this should hit AMD's pinned memory). - auto type = FindMemoryType(device, requirements.memoryTypeBits, - vk::MemoryPropertyFlagBits::eHostVisible | - vk::MemoryPropertyFlagBits::eHostCoherent | - vk::MemoryPropertyFlagBits::eDeviceLocal); + auto type = + FindMemoryType(device, requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); if (!type) { // Otherwise search for a host visible allocation. type = FindMemoryType(device, requirements.memoryTypeBits, - vk::MemoryPropertyFlagBits::eHostVisible | - vk::MemoryPropertyFlagBits::eHostCoherent); + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); ASSERT_MSG(type, "No host visible and coherent memory type found"); } - const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type); - memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld); - - dev.bindBufferMemory(*buffer, *memory, 0, dld); + VkMemoryAllocateInfo memory_ai; + memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memory_ai.pNext = nullptr; + memory_ai.allocationSize = requirements.size; + memory_ai.memoryTypeIndex = *type; + + memory = dev.AllocateMemory(memory_ai); + buffer.BindMemory(*memory, 0); } void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) { diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 187c0c612..58ce8b973 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -9,7 +9,7 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -21,7 +21,7 @@ class VKScheduler; class VKStreamBuffer final { public: explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, - vk::BufferUsageFlags usage); + VkBufferUsageFlags usage); ~VKStreamBuffer(); /** @@ -35,7 +35,7 @@ public: /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. void Unmap(u64 size); - vk::Buffer GetHandle() const { + VkBuffer GetHandle() const { return *buffer; } @@ -46,20 +46,18 @@ private: }; /// Creates Vulkan buffer handles committing the required the required memory. - void CreateBuffers(vk::BufferUsageFlags usage); + void CreateBuffers(VkBufferUsageFlags usage); /// Increases the amount of watches available. void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); void WaitPendingOperations(u64 requested_upper_bound); - const VKDevice& device; ///< Vulkan device manager. - VKScheduler& scheduler; ///< Command scheduler. - const vk::AccessFlags access; ///< Access usage of this stream buffer. - const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. + const VKDevice& device; ///< Vulkan device manager. + VKScheduler& scheduler; ///< Command scheduler. - UniqueBuffer buffer; ///< Mapped buffer. - UniqueDeviceMemory memory; ///< Memory allocation. + vk::Buffer buffer; ///< Mapped buffer. + vk::DeviceMemory memory; ///< Memory allocation. u64 offset{}; ///< Buffer iterator. u64 mapped_size{}; ///< Size reserved for the current copy. diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 9e73fa9cd..bffd8f32a 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -11,69 +11,64 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/frontend/framebuffer_layout.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { namespace { -vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats, - bool srgb) { - if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) { - vk::SurfaceFormatKHR format; - format.format = vk::Format::eB8G8R8A8Unorm; - format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear; +VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) { + if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) { + VkSurfaceFormatKHR format; + format.format = VK_FORMAT_B8G8R8A8_UNORM; + format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; return format; } const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) { - const auto request_format = srgb ? vk::Format::eB8G8R8A8Srgb : vk::Format::eB8G8R8A8Unorm; + const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; return format.format == request_format && - format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear; + format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; }); return found != formats.end() ? *found : formats[0]; } -vk::PresentModeKHR ChooseSwapPresentMode(const std::vector<vk::PresentModeKHR>& modes) { +VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) { // Mailbox doesn't lock the application like fifo (vsync), prefer it - const auto& found = std::find_if(modes.begin(), modes.end(), [](const auto& mode) { - return mode == vk::PresentModeKHR::eMailbox; - }); - return found != modes.end() ? *found : vk::PresentModeKHR::eFifo; + const auto found = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR); + return found != modes.end() ? *found : VK_PRESENT_MODE_FIFO_KHR; } -vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, - u32 height) { +VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) { constexpr auto undefined_size{std::numeric_limits<u32>::max()}; if (capabilities.currentExtent.width != undefined_size) { return capabilities.currentExtent; } - vk::Extent2D extent = {width, height}; + VkExtent2D extent; extent.width = std::max(capabilities.minImageExtent.width, - std::min(capabilities.maxImageExtent.width, extent.width)); + std::min(capabilities.maxImageExtent.width, width)); extent.height = std::max(capabilities.minImageExtent.height, - std::min(capabilities.maxImageExtent.height, extent.height)); + std::min(capabilities.maxImageExtent.height, height)); return extent; } } // Anonymous namespace -VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device) +VKSwapchain::VKSwapchain(VkSurfaceKHR surface, const VKDevice& device) : surface{surface}, device{device} {} VKSwapchain::~VKSwapchain() = default; void VKSwapchain::Create(u32 width, u32 height, bool srgb) { - const auto& dld = device.GetDispatchLoader(); const auto physical_device = device.GetPhysical(); - const auto capabilities{physical_device.getSurfaceCapabilitiesKHR(surface, dld)}; + const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { return; } - device.GetLogical().waitIdle(dld); + device.GetLogical().WaitIdle(); Destroy(); CreateSwapchain(capabilities, width, height, srgb); @@ -84,10 +79,8 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { } void VKSwapchain::AcquireNextImage() { - const auto dev{device.GetLogical()}; - const auto& dld{device.GetDispatchLoader()}; - dev.acquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), - *present_semaphores[frame_index], {}, &image_index, dld); + device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), + *present_semaphores[frame_index], {}, &image_index); if (auto& fence = fences[image_index]; fence) { fence->Wait(); @@ -96,29 +89,37 @@ void VKSwapchain::AcquireNextImage() { } } -bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) { - const vk::Semaphore present_semaphore{*present_semaphores[frame_index]}; - const std::array<vk::Semaphore, 2> semaphores{present_semaphore, render_semaphore}; - const u32 wait_semaphore_count{render_semaphore ? 2U : 1U}; - const auto& dld{device.GetDispatchLoader()}; +bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) { + const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; + const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; const auto present_queue{device.GetPresentQueue()}; bool recreated = false; - const vk::PresentInfoKHR present_info(wait_semaphore_count, semaphores.data(), 1, - &swapchain.get(), &image_index, {}); - switch (const auto result = present_queue.presentKHR(&present_info, dld); result) { - case vk::Result::eSuccess: + VkPresentInfoKHR present_info; + present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + present_info.pNext = nullptr; + present_info.waitSemaphoreCount = render_semaphore ? 2U : 1U; + present_info.pWaitSemaphores = semaphores.data(); + present_info.swapchainCount = 1; + present_info.pSwapchains = swapchain.address(); + present_info.pImageIndices = &image_index; + present_info.pResults = nullptr; + + switch (const VkResult result = present_queue.Present(present_info)) { + case VK_SUCCESS: + break; + case VK_SUBOPTIMAL_KHR: + LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); break; - case vk::Result::eErrorOutOfDateKHR: + case VK_ERROR_OUT_OF_DATE_KHR: if (current_width > 0 && current_height > 0) { Create(current_width, current_height, current_srgb); recreated = true; } break; default: - LOG_CRITICAL(Render_Vulkan, "Vulkan failed to present swapchain due to {}!", - vk::to_string(result)); - UNREACHABLE(); + LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); + break; } ASSERT(fences[image_index] == nullptr); @@ -132,74 +133,92 @@ bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebu return framebuffer.width != current_width || framebuffer.height != current_height; } -void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, +void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, bool srgb) { - const auto& dld{device.GetDispatchLoader()}; const auto physical_device{device.GetPhysical()}; - const auto formats{physical_device.getSurfaceFormatsKHR(surface, dld)}; - const auto present_modes{physical_device.getSurfacePresentModesKHR(surface, dld)}; + const auto formats{physical_device.GetSurfaceFormatsKHR(surface)}; + const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; - const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; - const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; + const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; + const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; u32 requested_image_count{capabilities.minImageCount + 1}; if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { requested_image_count = capabilities.maxImageCount; } - vk::SwapchainCreateInfoKHR swapchain_ci( - {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, {}, 1, - vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, capabilities.currentTransform, - vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, {}); + VkSwapchainCreateInfoKHR swapchain_ci; + swapchain_ci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; + swapchain_ci.pNext = nullptr; + swapchain_ci.flags = 0; + swapchain_ci.surface = surface; + swapchain_ci.minImageCount = requested_image_count; + swapchain_ci.imageFormat = surface_format.format; + swapchain_ci.imageColorSpace = surface_format.colorSpace; + swapchain_ci.imageArrayLayers = 1; + swapchain_ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; + swapchain_ci.queueFamilyIndexCount = 0; + swapchain_ci.pQueueFamilyIndices = nullptr; + swapchain_ci.preTransform = capabilities.currentTransform; + swapchain_ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + swapchain_ci.presentMode = present_mode; + swapchain_ci.clipped = VK_FALSE; + swapchain_ci.oldSwapchain = nullptr; const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; const std::array<u32, 2> queue_indices{graphics_family, present_family}; if (graphics_family != present_family) { - swapchain_ci.imageSharingMode = vk::SharingMode::eConcurrent; + swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT; swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); } else { - swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive; + swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; } // Request the size again to reduce the possibility of a TOCTOU race condition. - const auto updated_capabilities = physical_device.getSurfaceCapabilitiesKHR(surface, dld); + const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); // Don't add code within this and the swapchain creation. - const auto dev{device.GetLogical()}; - swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld); + swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); extent = swapchain_ci.imageExtent; current_width = extent.width; current_height = extent.height; current_srgb = srgb; - images = dev.getSwapchainImagesKHR(*swapchain, dld); + images = swapchain.GetImages(); image_count = static_cast<u32>(images.size()); image_format = surface_format.format; } void VKSwapchain::CreateSemaphores() { - const auto dev{device.GetLogical()}; - const auto& dld{device.GetDispatchLoader()}; - present_semaphores.resize(image_count); - for (std::size_t i = 0; i < image_count; i++) { - present_semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld); - } + std::generate(present_semaphores.begin(), present_semaphores.end(), + [this] { return device.GetLogical().CreateSemaphore(); }); } void VKSwapchain::CreateImageViews() { - const auto dev{device.GetLogical()}; - const auto& dld{device.GetDispatchLoader()}; + VkImageViewCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + // ci.image + ci.viewType = VK_IMAGE_VIEW_TYPE_2D; + ci.format = image_format; + ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; + ci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + ci.subresourceRange.baseMipLevel = 0; + ci.subresourceRange.levelCount = 1; + ci.subresourceRange.baseArrayLayer = 0; + ci.subresourceRange.layerCount = 1; image_views.resize(image_count); for (std::size_t i = 0; i < image_count; i++) { - const vk::ImageViewCreateInfo image_view_ci({}, images[i], vk::ImageViewType::e2D, - image_format, {}, - {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}); - image_views[i] = dev.createImageViewUnique(image_view_ci, nullptr, dld); + ci.image = images[i]; + image_views[i] = device.GetLogical().CreateImageView(ci); } } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 2f3b2ccd5..a35d61345 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -7,7 +7,7 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Layout { struct FramebufferLayout; @@ -20,7 +20,7 @@ class VKFence; class VKSwapchain { public: - explicit VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device); + explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device); ~VKSwapchain(); /// Creates (or recreates) the swapchain with a given size. @@ -31,12 +31,12 @@ public: /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be /// recreated. Takes responsability for the ownership of fence. - bool Present(vk::Semaphore render_semaphore, VKFence& fence); + bool Present(VkSemaphore render_semaphore, VKFence& fence); /// Returns true when the framebuffer layout has changed. bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; - const vk::Extent2D& GetSize() const { + VkExtent2D GetSize() const { return extent; } @@ -48,15 +48,15 @@ public: return image_index; } - vk::Image GetImageIndex(std::size_t index) const { + VkImage GetImageIndex(std::size_t index) const { return images[index]; } - vk::ImageView GetImageViewIndex(std::size_t index) const { + VkImageView GetImageViewIndex(std::size_t index) const { return *image_views[index]; } - vk::Format GetImageFormat() const { + VkFormat GetImageFormat() const { return image_format; } @@ -65,30 +65,30 @@ public: } private: - void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, + void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, bool srgb); void CreateSemaphores(); void CreateImageViews(); void Destroy(); - const vk::SurfaceKHR surface; + const VkSurfaceKHR surface; const VKDevice& device; - UniqueSwapchainKHR swapchain; + vk::SwapchainKHR swapchain; std::size_t image_count{}; - std::vector<vk::Image> images; - std::vector<UniqueImageView> image_views; - std::vector<UniqueFramebuffer> framebuffers; + std::vector<VkImage> images; + std::vector<vk::ImageView> image_views; + std::vector<vk::Framebuffer> framebuffers; std::vector<VKFence*> fences; - std::vector<UniqueSemaphore> present_semaphores; + std::vector<vk::Semaphore> present_semaphores; u32 image_index{}; u32 frame_index{}; - vk::Format image_format{}; - vk::Extent2D extent{}; + VkFormat image_format{}; + VkExtent2D extent{}; u32 current_width{}; u32 current_height{}; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 5b9b39670..de4c23120 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -17,7 +17,6 @@ #include "core/memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/morton.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" @@ -25,6 +24,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" #include "video_core/textures/convert.h" @@ -39,18 +39,18 @@ using VideoCore::Surface::SurfaceTarget; namespace { -vk::ImageType SurfaceTargetToImage(SurfaceTarget target) { +VkImageType SurfaceTargetToImage(SurfaceTarget target) { switch (target) { case SurfaceTarget::Texture1D: case SurfaceTarget::Texture1DArray: - return vk::ImageType::e1D; + return VK_IMAGE_TYPE_1D; case SurfaceTarget::Texture2D: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - return vk::ImageType::e2D; + return VK_IMAGE_TYPE_2D; case SurfaceTarget::Texture3D: - return vk::ImageType::e3D; + return VK_IMAGE_TYPE_3D; case SurfaceTarget::TextureBuffer: UNREACHABLE(); return {}; @@ -59,35 +59,35 @@ vk::ImageType SurfaceTargetToImage(SurfaceTarget target) { return {}; } -vk::ImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { +VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { if (pixel_format < PixelFormat::MaxColorFormat) { - return vk::ImageAspectFlagBits::eColor; + return VK_IMAGE_ASPECT_COLOR_BIT; } else if (pixel_format < PixelFormat::MaxDepthFormat) { - return vk::ImageAspectFlagBits::eDepth; + return VK_IMAGE_ASPECT_DEPTH_BIT; } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { - return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; } else { - UNREACHABLE_MSG("Invalid pixel format={}", static_cast<u32>(pixel_format)); - return vk::ImageAspectFlagBits::eColor; + UNREACHABLE_MSG("Invalid pixel format={}", static_cast<int>(pixel_format)); + return VK_IMAGE_ASPECT_COLOR_BIT; } } -vk::ImageViewType GetImageViewType(SurfaceTarget target) { +VkImageViewType GetImageViewType(SurfaceTarget target) { switch (target) { case SurfaceTarget::Texture1D: - return vk::ImageViewType::e1D; + return VK_IMAGE_VIEW_TYPE_1D; case SurfaceTarget::Texture2D: - return vk::ImageViewType::e2D; + return VK_IMAGE_VIEW_TYPE_2D; case SurfaceTarget::Texture3D: - return vk::ImageViewType::e3D; + return VK_IMAGE_VIEW_TYPE_3D; case SurfaceTarget::Texture1DArray: - return vk::ImageViewType::e1DArray; + return VK_IMAGE_VIEW_TYPE_1D_ARRAY; case SurfaceTarget::Texture2DArray: - return vk::ImageViewType::e2DArray; + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; case SurfaceTarget::TextureCubemap: - return vk::ImageViewType::eCube; + return VK_IMAGE_VIEW_TYPE_CUBE; case SurfaceTarget::TextureCubeArray: - return vk::ImageViewType::eCubeArray; + return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; case SurfaceTarget::TextureBuffer: break; } @@ -95,73 +95,88 @@ vk::ImageViewType GetImageViewType(SurfaceTarget target) { return {}; } -UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, - std::size_t host_memory_size) { +vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, + std::size_t host_memory_size) { // TODO(Rodrigo): Move texture buffer creation to the buffer cache - const vk::BufferCreateInfo buffer_ci({}, host_memory_size, - vk::BufferUsageFlagBits::eUniformTexelBuffer | - vk::BufferUsageFlagBits::eTransferSrc | - vk::BufferUsageFlagBits::eTransferDst, - vk::SharingMode::eExclusive, 0, nullptr); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createBufferUnique(buffer_ci, nullptr, dld); + VkBufferCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.size = static_cast<VkDeviceSize>(host_memory_size); + ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT; + ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ci.queueFamilyIndexCount = 0; + ci.pQueueFamilyIndices = nullptr; + return device.GetLogical().CreateBuffer(ci); } -vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, - const SurfaceParams& params, - vk::Buffer buffer, - std::size_t host_memory_size) { +VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, + const SurfaceParams& params, VkBuffer buffer, + std::size_t host_memory_size) { ASSERT(params.IsBuffer()); - const auto format = - MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; - return vk::BufferViewCreateInfo({}, buffer, format, 0, host_memory_size); + VkBufferViewCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.buffer = buffer; + ci.format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; + ci.offset = 0; + ci.range = static_cast<VkDeviceSize>(host_memory_size); + return ci; } -vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { - constexpr auto sample_count = vk::SampleCountFlagBits::e1; - constexpr auto tiling = vk::ImageTiling::eOptimal; - +VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { ASSERT(!params.IsBuffer()); const auto [format, attachable, storage] = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); - auto image_usage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst | - vk::ImageUsageFlagBits::eTransferSrc; + VkImageCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.imageType = SurfaceTargetToImage(params.target); + ci.format = format; + ci.mipLevels = params.num_levels; + ci.arrayLayers = static_cast<u32>(params.GetNumLayers()); + ci.samples = VK_SAMPLE_COUNT_1_BIT; + ci.tiling = VK_IMAGE_TILING_OPTIMAL; + ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ci.queueFamilyIndexCount = 0; + ci.pQueueFamilyIndices = nullptr; + ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT; if (attachable) { - image_usage |= params.IsPixelFormatZeta() ? vk::ImageUsageFlagBits::eDepthStencilAttachment - : vk::ImageUsageFlagBits::eColorAttachment; + ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT + : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; } if (storage) { - image_usage |= vk::ImageUsageFlagBits::eStorage; + ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT; } - vk::ImageCreateFlags flags; - vk::Extent3D extent; switch (params.target) { case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - flags |= vk::ImageCreateFlagBits::eCubeCompatible; + ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; [[fallthrough]]; case SurfaceTarget::Texture1D: case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2D: case SurfaceTarget::Texture2DArray: - extent = vk::Extent3D(params.width, params.height, 1); + ci.extent = {params.width, params.height, 1}; break; case SurfaceTarget::Texture3D: - extent = vk::Extent3D(params.width, params.height, params.depth); + ci.extent = {params.width, params.height, params.depth}; break; case SurfaceTarget::TextureBuffer: UNREACHABLE(); } - return vk::ImageCreateInfo(flags, SurfaceTargetToImage(params.target), format, extent, - params.num_levels, static_cast<u32>(params.GetNumLayers()), - sample_count, tiling, image_usage, vk::SharingMode::eExclusive, 0, - nullptr, vk::ImageLayout::eUndefined); + return ci; } } // Anonymous namespace @@ -175,15 +190,13 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} { if (params.IsBuffer()) { buffer = CreateBuffer(device, params, host_memory_size); - commit = memory_manager.Commit(*buffer, false); + commit = memory_manager.Commit(buffer, false); const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size); format = buffer_view_ci.format; - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - buffer_view = dev.createBufferViewUnique(buffer_view_ci, nullptr, dld); + buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci); } else { const auto image_ci = GenerateImageCreateInfo(device, params); format = image_ci.format; @@ -221,16 +234,15 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { // We can't copy images to buffers inside a renderpass scheduler.RequestOutsideRenderPassOperationContext(); - FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead, - vk::ImageLayout::eTransferSrcOptimal); + FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); // TODO(Rodrigo): Do this in a single copy for (u32 level = 0; level < params.num_levels; ++level) { - scheduler.Record([image = image->GetHandle(), buffer = *buffer.handle, - copy = GetBufferImageCopy(level)](auto cmdbuf, auto& dld) { - cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, buffer, {copy}, - dld); + scheduler.Record([image = *image->GetHandle(), buffer = *buffer.handle, + copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); }); } scheduler.Finish(); @@ -257,15 +269,27 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, - size = host_memory_size](auto cmdbuf, auto& dld) { - const vk::BufferCopy copy(0, 0, size); - cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld); - - cmdbuf.pipelineBarrier( - vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eVertexShader, {}, {}, - {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, - vk::AccessFlagBits::eShaderRead, 0, 0, dst_buffer, 0, size)}, - {}, dld); + size = host_memory_size](vk::CommandBuffer cmdbuf) { + VkBufferCopy copy; + copy.srcOffset = 0; + copy.dstOffset = 0; + copy.size = size; + cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); + + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + barrier.dstAccessMask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + barrier.srcQueueFamilyIndex = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstQueueFamilyIndex = VK_ACCESS_SHADER_READ_BIT; + barrier.srcQueueFamilyIndex = 0; + barrier.dstQueueFamilyIndex = 0; + barrier.buffer = dst_buffer; + barrier.offset = 0; + barrier.size = size; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, {}, barrier, {}); }); } @@ -273,43 +297,49 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); - FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferWrite, - vk::ImageLayout::eTransferDstOptimal); + FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); for (u32 level = 0; level < params.num_levels; ++level) { - vk::BufferImageCopy copy = GetBufferImageCopy(level); - if (image->GetAspectMask() == - (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { - vk::BufferImageCopy depth = copy; - vk::BufferImageCopy stencil = copy; - depth.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; - stencil.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; - scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), depth, - stencil](auto cmdbuf, auto& dld) { - cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, - {depth, stencil}, dld); + const VkBufferImageCopy copy = GetBufferImageCopy(level); + if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), + copy](vk::CommandBuffer cmdbuf) { + std::array<VkBufferImageCopy, 2> copies = {copy, copy}; + copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; + cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + copies); }); } else { - scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), - copy](auto cmdbuf, auto& dld) { - cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, - {copy}, dld); + scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), + copy](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); }); } } } -vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { - const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); - - return vk::BufferImageCopy( - mip_offset, 0, 0, - {image->GetAspectMask(), level, 0, static_cast<u32>(params.GetNumLayers())}, {0, 0, 0}, - {params.GetMipWidth(level), params.GetMipHeight(level), vk_depth}); +VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { + VkBufferImageCopy copy; + copy.bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted); + copy.bufferRowLength = 0; + copy.bufferImageHeight = 0; + copy.imageSubresource.aspectMask = image->GetAspectMask(); + copy.imageSubresource.mipLevel = level; + copy.imageSubresource.baseArrayLayer = 0; + copy.imageSubresource.layerCount = static_cast<u32>(params.GetNumLayers()); + copy.imageOffset.x = 0; + copy.imageOffset.y = 0; + copy.imageOffset.z = 0; + copy.imageExtent.width = params.GetMipWidth(level); + copy.imageExtent.height = params.GetMipHeight(level); + copy.imageExtent.depth = + params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; + return copy; } -vk::ImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { +VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { return {image->GetAspectMask(), 0, params.num_levels, 0, static_cast<u32>(params.GetNumLayers())}; } @@ -321,12 +351,12 @@ CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surf aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level}, num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target) - : vk::ImageViewType{}} {} + : VK_IMAGE_VIEW_TYPE_1D} {} CachedSurfaceView::~CachedSurfaceView() = default; -vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, - SwizzleSource z_source, SwizzleSource w_source) { +VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, + SwizzleSource z_source, SwizzleSource w_source) { const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); if (last_image_view && last_swizzle == swizzle) { return last_image_view; @@ -351,37 +381,45 @@ vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource // Games can sample depth or stencil values on textures. This is decided by the swizzle value on // hardware. To emulate this on Vulkan we specify it in the aspect. - vk::ImageAspectFlags aspect = aspect_mask; - if (aspect == (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { + VkImageAspectFlags aspect = aspect_mask; + if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); const bool is_first = x_source == SwizzleSource::R; switch (params.pixel_format) { case VideoCore::Surface::PixelFormat::Z24S8: case VideoCore::Surface::PixelFormat::Z32FS8: - aspect = is_first ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eStencil; + aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; break; case VideoCore::Surface::PixelFormat::S8Z24: - aspect = is_first ? vk::ImageAspectFlagBits::eStencil : vk::ImageAspectFlagBits::eDepth; + aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; break; default: - aspect = vk::ImageAspectFlagBits::eDepth; + aspect = VK_IMAGE_ASPECT_DEPTH_BIT; UNIMPLEMENTED(); } // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity - swizzle_x = vk::ComponentSwizzle::eR; - swizzle_y = vk::ComponentSwizzle::eG; - swizzle_z = vk::ComponentSwizzle::eB; - swizzle_w = vk::ComponentSwizzle::eA; + swizzle_x = VK_COMPONENT_SWIZZLE_R; + swizzle_y = VK_COMPONENT_SWIZZLE_G; + swizzle_z = VK_COMPONENT_SWIZZLE_B; + swizzle_w = VK_COMPONENT_SWIZZLE_A; } - const vk::ImageViewCreateInfo image_view_ci( - {}, surface.GetImageHandle(), image_view_type, surface.GetImage().GetFormat(), - {swizzle_x, swizzle_y, swizzle_z, swizzle_w}, - {aspect, base_level, num_levels, base_layer, num_layers}); + VkImageViewCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.image = surface.GetImageHandle(); + ci.viewType = image_view_type; + ci.format = surface.GetImage().GetFormat(); + ci.components = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; + ci.subresourceRange.aspectMask = aspect; + ci.subresourceRange.baseMipLevel = base_level; + ci.subresourceRange.levelCount = num_levels; + ci.subresourceRange.baseArrayLayer = base_layer; + ci.subresourceRange.layerCount = num_layers; + image_view = device.GetLogical().CreateImageView(ci); - const auto dev = device.GetLogical(); - image_view = dev.createImageViewUnique(image_view_ci, nullptr, device.GetDispatchLoader()); return last_image_view = *image_view; } @@ -418,25 +456,36 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, scheduler.RequestOutsideRenderPassOperationContext(); src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, - vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead, - vk::ImageLayout::eTransferSrcOptimal); - dst_surface->Transition( - dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, - vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); - - const vk::ImageSubresourceLayers src_subresource( - src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); - const vk::ImageSubresourceLayers dst_subresource( - dst_surface->GetAspectMask(), copy_params.dest_level, dst_base_layer, num_layers); - const vk::Offset3D src_offset(copy_params.source_x, copy_params.source_y, 0); - const vk::Offset3D dst_offset(copy_params.dest_x, copy_params.dest_y, dst_offset_z); - const vk::Extent3D extent(copy_params.width, copy_params.height, extent_z); - const vk::ImageCopy copy(src_subresource, src_offset, dst_subresource, dst_offset, extent); - const vk::Image src_image = src_surface->GetImageHandle(); - const vk::Image dst_image = dst_surface->GetImageHandle(); - scheduler.Record([src_image, dst_image, copy](auto cmdbuf, auto& dld) { - cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, - vk::ImageLayout::eTransferDstOptimal, {copy}, dld); + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + VkImageCopy copy; + copy.srcSubresource.aspectMask = src_surface->GetAspectMask(); + copy.srcSubresource.mipLevel = copy_params.source_level; + copy.srcSubresource.baseArrayLayer = copy_params.source_z; + copy.srcSubresource.layerCount = num_layers; + copy.srcOffset.x = copy_params.source_x; + copy.srcOffset.y = copy_params.source_y; + copy.srcOffset.z = 0; + copy.dstSubresource.aspectMask = dst_surface->GetAspectMask(); + copy.dstSubresource.mipLevel = copy_params.dest_level; + copy.dstSubresource.baseArrayLayer = dst_base_layer; + copy.dstSubresource.layerCount = num_layers; + copy.dstOffset.x = copy_params.dest_x; + copy.dstOffset.y = copy_params.dest_y; + copy.dstOffset.z = dst_offset_z; + copy.extent.width = copy_params.width; + copy.extent.height = copy_params.height; + copy.extent.depth = extent_z; + + const VkImage src_image = src_surface->GetImageHandle(); + const VkImage dst_image = dst_surface->GetImageHandle(); + scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); }); } @@ -445,25 +494,34 @@ void VKTextureCache::ImageBlit(View& src_view, View& dst_view, // We can't blit inside a renderpass scheduler.RequestOutsideRenderPassOperationContext(); - src_view->Transition(vk::ImageLayout::eTransferSrcOptimal, vk::PipelineStageFlagBits::eTransfer, - vk::AccessFlagBits::eTransferRead); - dst_view->Transition(vk::ImageLayout::eTransferDstOptimal, vk::PipelineStageFlagBits::eTransfer, - vk::AccessFlagBits::eTransferWrite); - - const auto& cfg = copy_config; - const auto src_top_left = vk::Offset3D(cfg.src_rect.left, cfg.src_rect.top, 0); - const auto src_bot_right = vk::Offset3D(cfg.src_rect.right, cfg.src_rect.bottom, 1); - const auto dst_top_left = vk::Offset3D(cfg.dst_rect.left, cfg.dst_rect.top, 0); - const auto dst_bot_right = vk::Offset3D(cfg.dst_rect.right, cfg.dst_rect.bottom, 1); - const vk::ImageBlit blit(src_view->GetImageSubresourceLayers(), {src_top_left, src_bot_right}, - dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); + src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_READ_BIT); + dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT); + + VkImageBlit blit; + blit.srcSubresource = src_view->GetImageSubresourceLayers(); + blit.srcOffsets[0].x = copy_config.src_rect.left; + blit.srcOffsets[0].y = copy_config.src_rect.top; + blit.srcOffsets[0].z = 0; + blit.srcOffsets[1].x = copy_config.src_rect.right; + blit.srcOffsets[1].y = copy_config.src_rect.bottom; + blit.srcOffsets[1].z = 1; + blit.dstSubresource = dst_view->GetImageSubresourceLayers(); + blit.dstOffsets[0].x = copy_config.dst_rect.left; + blit.dstOffsets[0].y = copy_config.dst_rect.top; + blit.dstOffsets[0].z = 0; + blit.dstOffsets[1].x = copy_config.dst_rect.right; + blit.dstOffsets[1].y = copy_config.dst_rect.bottom; + blit.dstOffsets[1].z = 1; + const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, - is_linear](auto cmdbuf, auto& dld) { - cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, - vk::ImageLayout::eTransferDstOptimal, {blit}, - is_linear ? vk::Filter::eLinear : vk::Filter::eNearest, dld); + is_linear](vk::CommandBuffer cmdbuf) { + cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit, + is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); }); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 22e3d34de..115595f28 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -13,10 +13,10 @@ #include "common/math_util.h" #include "video_core/gpu.h" #include "video_core/rasterizer_cache.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_image.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/textures/decoders.h" @@ -60,15 +60,15 @@ public: void UploadTexture(const std::vector<u8>& staging_buffer) override; void DownloadTexture(std::vector<u8>& staging_buffer) override; - void FullTransition(vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, - vk::ImageLayout new_layout) { + void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, + VkImageLayout new_layout) { image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, new_stage_mask, new_access, new_layout); } void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, - vk::ImageLayout new_layout) { + VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, + VkImageLayout new_layout) { image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, new_access, new_layout); } @@ -81,15 +81,15 @@ public: return *image; } - vk::Image GetImageHandle() const { - return image->GetHandle(); + VkImage GetImageHandle() const { + return *image->GetHandle(); } - vk::ImageAspectFlags GetAspectMask() const { + VkImageAspectFlags GetAspectMask() const { return image->GetAspectMask(); } - vk::BufferView GetBufferViewHandle() const { + VkBufferView GetBufferViewHandle() const { return *buffer_view; } @@ -104,9 +104,9 @@ private: void UploadImage(const std::vector<u8>& staging_buffer); - vk::BufferImageCopy GetBufferImageCopy(u32 level) const; + VkBufferImageCopy GetBufferImageCopy(u32 level) const; - vk::ImageSubresourceRange GetImageSubresourceRange() const; + VkImageSubresourceRange GetImageSubresourceRange() const; Core::System& system; const VKDevice& device; @@ -116,11 +116,11 @@ private: VKStagingBufferPool& staging_pool; std::optional<VKImage> image; - UniqueBuffer buffer; - UniqueBufferView buffer_view; + vk::Buffer buffer; + vk::BufferView buffer_view; VKMemoryCommit commit; - vk::Format format; + VkFormat format = VK_FORMAT_UNDEFINED; }; class CachedSurfaceView final : public VideoCommon::ViewBase { @@ -129,16 +129,16 @@ public: const ViewParams& params, bool is_proxy); ~CachedSurfaceView(); - vk::ImageView GetHandle(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); + VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source, + Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, + Tegra::Texture::SwizzleSource w_source); bool IsSameSurface(const CachedSurfaceView& rhs) const { return &surface == &rhs.surface; } - vk::ImageView GetHandle() { + VkImageView GetHandle() { return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A); } @@ -159,24 +159,24 @@ public: return buffer_view; } - vk::Image GetImage() const { + VkImage GetImage() const { return image; } - vk::BufferView GetBufferView() const { + VkBufferView GetBufferView() const { return buffer_view; } - vk::ImageSubresourceRange GetImageSubresourceRange() const { + VkImageSubresourceRange GetImageSubresourceRange() const { return {aspect_mask, base_level, num_levels, base_layer, num_layers}; } - vk::ImageSubresourceLayers GetImageSubresourceLayers() const { + VkImageSubresourceLayers GetImageSubresourceLayers() const { return {surface.GetAspectMask(), base_level, base_layer, num_layers}; } - void Transition(vk::ImageLayout new_layout, vk::PipelineStageFlags new_stage_mask, - vk::AccessFlags new_access) const { + void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask, + VkAccessFlags new_access) const { surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, new_access, new_layout); } @@ -196,9 +196,9 @@ private: // Store a copy of these values to avoid double dereference when reading them const SurfaceParams params; - const vk::Image image; - const vk::BufferView buffer_view; - const vk::ImageAspectFlags aspect_mask; + const VkImage image; + const VkBufferView buffer_view; + const VkImageAspectFlags aspect_mask; const VKDevice& device; CachedSurface& surface; @@ -206,12 +206,12 @@ private: const u32 num_layers; const u32 base_level; const u32 num_levels; - const vk::ImageViewType image_view_type; + const VkImageViewType image_view_type; - vk::ImageView last_image_view; - u32 last_swizzle{}; + VkImageView last_image_view = nullptr; + u32 last_swizzle = 0; - std::unordered_map<u32, UniqueImageView> view_cache; + std::unordered_map<u32, vk::ImageView> view_cache; }; class VKTextureCache final : public TextureCacheBase { diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 0e577b9ff..681ecde98 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -7,10 +7,10 @@ #include "common/assert.h" #include "common/logging/log.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -27,31 +27,32 @@ void VKUpdateDescriptorQueue::Acquire() { entries.clear(); } -void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template, - vk::DescriptorSet set) { +void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, + VkDescriptorSet set) { if (payload.size() + entries.size() >= payload.max_size()) { LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); scheduler.WaitWorker(); payload.clear(); } + // TODO(Rodrigo): Rework to write the payload directly const auto payload_start = payload.data() + payload.size(); for (const auto& entry : entries) { - if (const auto image = std::get_if<vk::DescriptorImageInfo>(&entry)) { + if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) { payload.push_back(*image); - } else if (const auto buffer = std::get_if<Buffer>(&entry)) { - payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size); - } else if (const auto texel = std::get_if<vk::BufferView>(&entry)) { + } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) { + payload.push_back(*buffer); + } else if (const auto texel = std::get_if<VkBufferView>(&entry)) { payload.push_back(*texel); } else { UNREACHABLE(); } } - scheduler.Record([dev = device.GetLogical(), payload_start, set, - update_template]([[maybe_unused]] auto cmdbuf, auto& dld) { - dev.updateDescriptorSetWithTemplate(set, update_template, payload_start, dld); - }); + scheduler.Record( + [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) { + logical->UpdateDescriptorSet(set, update_template, payload_start); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 8c825aa29..6ba2c9997 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -9,7 +9,7 @@ #include <boost/container/static_vector.hpp> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -18,20 +18,19 @@ class VKScheduler; class DescriptorUpdateEntry { public: - explicit DescriptorUpdateEntry() : image{} {} + explicit DescriptorUpdateEntry() {} - DescriptorUpdateEntry(vk::DescriptorImageInfo image) : image{image} {} + DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {} - DescriptorUpdateEntry(vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size) - : buffer{buffer, offset, size} {} + DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {} - DescriptorUpdateEntry(vk::BufferView texel_buffer) : texel_buffer{texel_buffer} {} + DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {} private: union { - vk::DescriptorImageInfo image; - vk::DescriptorBufferInfo buffer; - vk::BufferView texel_buffer; + VkDescriptorImageInfo image; + VkDescriptorBufferInfo buffer; + VkBufferView texel_buffer; }; }; @@ -44,37 +43,30 @@ public: void Acquire(); - void Send(vk::DescriptorUpdateTemplate update_template, vk::DescriptorSet set); + void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); - void AddSampledImage(vk::Sampler sampler, vk::ImageView image_view) { - entries.emplace_back(vk::DescriptorImageInfo{sampler, image_view, {}}); + void AddSampledImage(VkSampler sampler, VkImageView image_view) { + entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); } - void AddImage(vk::ImageView image_view) { - entries.emplace_back(vk::DescriptorImageInfo{{}, image_view, {}}); + void AddImage(VkImageView image_view) { + entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); } - void AddBuffer(const vk::Buffer* buffer, u64 offset, std::size_t size) { - entries.push_back(Buffer{buffer, offset, size}); + void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { + entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); } - void AddTexelBuffer(vk::BufferView texel_buffer) { + void AddTexelBuffer(VkBufferView texel_buffer) { entries.emplace_back(texel_buffer); } - vk::ImageLayout* GetLastImageLayout() { - return &std::get<vk::DescriptorImageInfo>(entries.back()).imageLayout; + VkImageLayout* GetLastImageLayout() { + return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout; } private: - struct Buffer { - const vk::Buffer* buffer{}; - u64 offset{}; - std::size_t size{}; - }; - using Variant = std::variant<vk::DescriptorImageInfo, Buffer, vk::BufferView>; - // Old gcc versions don't consider this trivially copyable. - // static_assert(std::is_trivially_copyable_v<Variant>); + using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>; const VKDevice& device; VKScheduler& scheduler; diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 2e2711350..6d313963a 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -484,17 +484,17 @@ bool TryInspectAddress(CFGRebuildState& state) { } case BlockCollision::Inside: { // This case is the tricky one: - // We need to Split the block in 2 sepparate blocks + // We need to split the block into 2 separate blocks const u32 end = state.block_info[block_index].end; BlockInfo& new_block = CreateBlockInfo(state, address, end); BlockInfo& current_block = state.block_info[block_index]; current_block.end = address - 1; - new_block.branch = current_block.branch; + new_block.branch = std::move(current_block.branch); BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); const auto branch = std::get_if<SingleBranch>(forward_branch.get()); branch->address = address; branch->ignore = true; - current_block.branch = forward_branch; + current_block.branch = std::move(forward_branch); return true; } default: diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 478394682..4db329fa5 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -136,7 +136,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } - case OpCode::Id::FCMP_R: { + case OpCode::Id::FCMP_RR: + case OpCode::Id::FCMP_RC: { UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); Node op_c = GetRegister(instr.gpr39); Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index c72690b2b..b9989c88c 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -2,6 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <limits> +#include <optional> +#include <utility> + #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" @@ -15,9 +19,49 @@ using Tegra::Shader::OpCode; using Tegra::Shader::Register; namespace { + constexpr OperationCode GetFloatSelector(u64 selector) { return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; } + +constexpr u32 SizeInBits(Register::Size size) { + switch (size) { + case Register::Size::Byte: + return 8; + case Register::Size::Short: + return 16; + case Register::Size::Word: + return 32; + case Register::Size::Long: + return 64; + } + return 0; +} + +constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size, + Register::Size dst_size, + bool src_signed, + bool dst_signed) { + const u32 dst_bits = SizeInBits(dst_size); + if (src_size == Register::Size::Word && dst_size == Register::Size::Word) { + if (src_signed == dst_signed) { + return std::nullopt; + } + return std::make_pair(0, std::numeric_limits<s32>::max()); + } + if (dst_signed) { + // Signed destination, clamp to [-128, 127] for instance + return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1); + } else { + // Unsigned destination + if (dst_bits == 32) { + // Avoid shifting by 32, that is undefined behavior + return std::make_pair(0, s32(std::numeric_limits<u32>::max())); + } + return std::make_pair(0, (1 << dst_bits) - 1); + } +} + } // Anonymous namespace u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { @@ -28,14 +72,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { case OpCode::Id::I2I_R: case OpCode::Id::I2I_C: case OpCode::Id::I2I_IMM: { - UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); - UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); - UNIMPLEMENTED_IF(instr.alu.saturate_d); + const bool src_signed = instr.conversion.is_input_signed; + const bool dst_signed = instr.conversion.is_output_signed; + const Register::Size src_size = instr.conversion.src_size; + const Register::Size dst_size = instr.conversion.dst_size; + const u32 selector = static_cast<u32>(instr.conversion.int_src.selector); - const bool input_signed = instr.conversion.is_input_signed; - const bool output_signed = instr.conversion.is_output_signed; - - Node value = [&]() { + Node value = [this, instr, opcode] { switch (opcode->get().GetId()) { case OpCode::Id::I2I_R: return GetRegister(instr.gpr20); @@ -48,16 +91,60 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { return Immediate(0); } }(); - value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); - value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, - input_signed); - if (input_signed != output_signed) { - value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value); + // Ensure the source selector is valid + switch (instr.conversion.src_size) { + case Register::Size::Byte: + break; + case Register::Size::Short: + ASSERT(selector == 0 || selector == 2); + break; + default: + ASSERT(selector == 0); + break; + } + + if (src_size != Register::Size::Word || selector != 0) { + value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value), + Immediate(selector * 8), Immediate(SizeInBits(src_size))); + } + + value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a, + instr.conversion.negate_a, src_signed); + + if (instr.alu.saturate_d) { + if (src_signed && !dst_signed) { + Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value, + Immediate(1 << (SizeInBits(src_size) - 1))); + value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0), + std::move(value)); + + // Simplify generated expressions, this can be removed without semantic impact + SetTemporary(bb, 0, std::move(value)); + value = GetTemporary(0); + + if (dst_size != Register::Size::Word) { + const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1); + Node is_large = + Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit); + value = Operation(OperationCode::Select, std::move(is_large), limit, + std::move(value)); + } + } else if (const std::optional bounds = + IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) { + value = SignedOperation(OperationCode::IMax, src_signed, std::move(value), + Immediate(bounds->first)); + value = SignedOperation(OperationCode::IMin, src_signed, std::move(value), + Immediate(bounds->second)); + } + } else if (dst_size != Register::Size::Word) { + // No saturation, we only have to mask the result + Node mask = Immediate((1 << SizeInBits(dst_size)) - 1); + value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask)); } SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); + SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::I2F_R: diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 0dd7a1196..85ee9aa5e 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -352,8 +352,10 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); } else { const Node image_register = GetRegister(instr.gpr39); - const auto [base_image, buffer, offset] = TrackCbuf( - image_register, global_code, static_cast<s64>(global_code.size())); + const auto result = TrackCbuf(image_register, global_code, + static_cast<s64>(global_code.size())); + const auto buffer = std::get<1>(result); + const auto offset = std::get<2>(result); descriptor = registry.ObtainBindlessSampler(buffer, offset); } if (!descriptor) { @@ -497,9 +499,12 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { const Node image_register = GetRegister(reg); - const auto [base_image, buffer, offset] = + const auto result = TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); + const auto buffer = std::get<1>(result); + const auto offset = std::get<2>(result); + const auto it = std::find_if(std::begin(used_images), std::end(used_images), [buffer = buffer, offset = offset](const Image& entry) { diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index b8f63922f..8112ead3e 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -3,7 +3,9 @@ // Refer to the license.txt file included. #include <algorithm> +#include <utility> #include <vector> + #include <fmt/format.h> #include "common/alignment.h" @@ -16,6 +18,7 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; @@ -27,29 +30,26 @@ using Tegra::Shader::StoreType; namespace { -Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) { - const OperationCode operation_code = [op] { - switch (op) { - case AtomicOp::Add: - return OperationCode::AtomicIAdd; - case AtomicOp::Min: - return OperationCode::AtomicIMin; - case AtomicOp::Max: - return OperationCode::AtomicIMax; - case AtomicOp::And: - return OperationCode::AtomicIAnd; - case AtomicOp::Or: - return OperationCode::AtomicIOr; - case AtomicOp::Xor: - return OperationCode::AtomicIXor; - case AtomicOp::Exch: - return OperationCode::AtomicIExchange; - default: - UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); - return OperationCode::AtomicIAdd; - } - }(); - return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data)); +OperationCode GetAtomOperation(AtomicOp op) { + switch (op) { + case AtomicOp::Add: + return OperationCode::AtomicIAdd; + case AtomicOp::Min: + return OperationCode::AtomicIMin; + case AtomicOp::Max: + return OperationCode::AtomicIMax; + case AtomicOp::And: + return OperationCode::AtomicIAnd; + case AtomicOp::Or: + return OperationCode::AtomicIOr; + case AtomicOp::Xor: + return OperationCode::AtomicIXor; + case AtomicOp::Exch: + return OperationCode::AtomicIExchange; + default: + UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); + return OperationCode::AtomicIAdd; + } } bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { @@ -90,23 +90,22 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), - Immediate(size)); + offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); } Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), - std::move(offset), Immediate(size)); + Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), + Immediate(size)); } Node Sign16Extend(Node value) { Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); - Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); + Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); - return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); + return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); } } // Anonymous namespace @@ -379,20 +378,36 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { if (IsUnaligned(type)) { const u32 mask = GetUnalignedMask(type); - value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); + value = InsertUnaligned(gmem, move(value), real_address, mask, size); } bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } + case OpCode::Id::RED: { + UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); + UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add); + const auto [real_address, base_address, descriptor] = + TrackGlobalMemory(bb, instr, true, true); + if (!real_address || !base_address) { + // Tracking failed, skip atomic. + break; + } + Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); + Node value = GetRegister(instr.gpr0); + bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value))); + break; + } case OpCode::Id::ATOM: { UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || instr.atom.operation == AtomicOp::Dec || instr.atom.operation == AtomicOp::SafeAdd, "operation={}", static_cast<int>(instr.atom.operation.Value())); UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || - instr.atom.type == GlobalAtomicType::U64, + instr.atom.type == GlobalAtomicType::U64 || + instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || + instr.atom.type == GlobalAtomicType::F32_FTZ_RN, "type={}", static_cast<int>(instr.atom.type.Value())); const auto [real_address, base_address, descriptor] = @@ -403,11 +418,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } const bool is_signed = - instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; + instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem, - GetRegister(instr.gpr20)); - SetRegister(bb, instr.gpr0, std::move(value)); + SetRegister(bb, instr.gpr0, + SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, + GetRegister(instr.gpr20))); break; } case OpCode::Id::ATOMS: { @@ -421,11 +436,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; const s32 offset = instr.atoms.GetImmediateOffset(); Node address = GetRegister(instr.gpr8); - address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); - Node value = - GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed, - GetSharedMemory(std::move(address)), GetRegister(instr.gpr20)); - SetRegister(bb, instr.gpr0, std::move(value)); + address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); + SetRegister(bb, instr.gpr0, + SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, + GetSharedMemory(move(address)), GetRegister(instr.gpr20))); break; } case OpCode::Id::AL2P: { diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 3b391d3e6..d4ffa8014 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -23,7 +23,6 @@ Node IsFull(Node shift) { } Node Shift(OperationCode opcode, Node value, Node shift) { - Node is_full = Operation(OperationCode::LogicalIEqual, shift, Immediate(32)); Node shifted = Operation(opcode, move(value), shift); return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 48350e042..6c4a1358b 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -780,20 +780,6 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - // Fill empty entries from the guest sampler - const std::size_t entry_coord_count = GetCoordCount(sampler.GetType()); - if (type_coord_count != entry_coord_count) { - LOG_WARNING(HW_GPU, "Bound and built texture types mismatch"); - - // When the size is higher we insert zeroes - for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) { - coords.push_back(GetRegister(Register::ZeroIndex)); - } - - // Then we ensure the size matches the number of entries (dropping unused values) - coords.resize(entry_coord_count); - } - Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index b047cf870..64ba60ea2 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -10,16 +10,24 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; using Tegra::Shader::VideoType; using Tegra::Shader::VmadShr; +using Tegra::Shader::VmnmxOperation; +using Tegra::Shader::VmnmxType; u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() == OpCode::Id::VMNMX) { + DecodeVMNMX(bb, instr); + return pc; + } + const Node op_a = GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, instr.video.type_a, instr.video.byte_height_a); @@ -109,4 +117,54 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, } } +void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { + UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); + UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); + UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); + UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); + UNIMPLEMENTED_IF(instr.vmnmx.sat); + UNIMPLEMENTED_IF(instr.generates_cc); + + Node op_a = GetRegister(instr.gpr8); + Node op_b = GetRegister(instr.gpr20); + Node op_c = GetRegister(instr.gpr39); + + const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed + const bool is_oper2_signed = instr.vmnmx.is_dest_signed; + + const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; + Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); + + switch (instr.vmnmx.operation) { + case VmnmxOperation::Mrg_16H: + value = BitfieldInsert(move(op_c), move(value), 16, 16); + break; + case VmnmxOperation::Mrg_16L: + value = BitfieldInsert(move(op_c), move(value), 0, 16); + break; + case VmnmxOperation::Mrg_8B0: + value = BitfieldInsert(move(op_c), move(value), 0, 8); + break; + case VmnmxOperation::Mrg_8B2: + value = BitfieldInsert(move(op_c), move(value), 16, 8); + break; + case VmnmxOperation::Acc: + value = Operation(OperationCode::IAdd, move(value), move(op_c)); + break; + case VmnmxOperation::Min: + value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); + break; + case VmnmxOperation::Max: + value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); + break; + case VmnmxOperation::Nop: + break; + default: + UNREACHABLE(); + break; + } + + SetRegister(bb, instr.gpr0, move(value)); +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 5fcc9da60..3eee961f5 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -178,6 +178,20 @@ enum class OperationCode { AtomicIOr, /// (memory, int) -> int AtomicIXor, /// (memory, int) -> int + ReduceUAdd, /// (memory, uint) -> void + ReduceUMin, /// (memory, uint) -> void + ReduceUMax, /// (memory, uint) -> void + ReduceUAnd, /// (memory, uint) -> void + ReduceUOr, /// (memory, uint) -> void + ReduceUXor, /// (memory, uint) -> void + + ReduceIAdd, /// (memory, int) -> void + ReduceIMin, /// (memory, int) -> void + ReduceIMax, /// (memory, int) -> void + ReduceIAnd, /// (memory, int) -> void + ReduceIOr, /// (memory, int) -> void + ReduceIXor, /// (memory, int) -> void + Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 8852c8a1b..822674926 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -56,8 +56,7 @@ Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { const auto index = static_cast<u32>(index_); const auto offset = static_cast<u32>(offset_); - const auto [entry, is_new] = used_cbufs.try_emplace(index); - entry->second.MarkAsUsed(offset); + used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); return MakeNode<CbufNode>(index, Immediate(offset)); } @@ -66,8 +65,7 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { const auto index = static_cast<u32>(index_); const auto offset = static_cast<u32>(offset_); - const auto [entry, is_new] = used_cbufs.try_emplace(index); - entry->second.MarkAsUsedIndirect(); + used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); Node final_offset = [&] { // Attempt to inline constant buffer without a variable offset. This is done to allow @@ -166,6 +164,7 @@ Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signe std::move(value), Immediate(16)); value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, std::move(value), Immediate(16)); + return value; case Register::Size::Word: // Default - do nothing return value; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ca6c976c9..c6e7bdf50 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -354,6 +354,9 @@ private: /// Marks the usage of a input or output attribute. void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); + /// Decodes VMNMX instruction and inserts its code into the passed basic block. + void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr); + void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components); diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 10739b37d..224943ad9 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -27,8 +27,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { const auto& conditional_code = conditional->GetCode(); - auto [found, internal_cursor] = FindOperation( + auto result = FindOperation( conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); + auto& found = result.first; if (found) { return {std::move(found), cursor}; } @@ -186,8 +187,8 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register // that it uses as operand - const auto [found, found_cursor] = - TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); + const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); + const auto& found = result.first; if (!found) { return {}; } diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 6fe815135..715f39d0d 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -190,22 +190,11 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, MICROPROFILE_SCOPE(GPU_Load_Texture); auto& staging_buffer = staging_cache.GetBuffer(0); u8* host_ptr; - is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); - - // Handle continuouty - if (is_continuous) { - // Use physical memory directly - host_ptr = memory_manager.GetPointer(gpu_addr); - if (!host_ptr) { - return; - } - } else { - // Use an extra temporal buffer - auto& tmp_buffer = staging_cache.GetBuffer(1); - tmp_buffer.resize(guest_memory_size); - host_ptr = tmp_buffer.data(); - memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); - } + // Use an extra temporal buffer + auto& tmp_buffer = staging_cache.GetBuffer(1); + tmp_buffer.resize(guest_memory_size); + host_ptr = tmp_buffer.data(); + memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", @@ -257,18 +246,15 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, auto& staging_buffer = staging_cache.GetBuffer(0); u8* host_ptr; - // Handle continuouty - if (is_continuous) { - // Use physical memory directly - host_ptr = memory_manager.GetPointer(gpu_addr); - if (!host_ptr) { - return; - } - } else { - // Use an extra temporal buffer - auto& tmp_buffer = staging_cache.GetBuffer(1); - tmp_buffer.resize(guest_memory_size); - host_ptr = tmp_buffer.data(); + // Use an extra temporal buffer + auto& tmp_buffer = staging_cache.GetBuffer(1); + // Special case for 3D Texture Segments + const bool must_read_current_data = + params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D; + tmp_buffer.resize(guest_memory_size); + host_ptr = tmp_buffer.data(); + if (must_read_current_data) { + memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); } if (params.is_tiled) { @@ -300,9 +286,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, } } } - if (!is_continuous) { - memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); - } + memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index d7882a031..c5ab21f56 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -68,13 +68,13 @@ public: return gpu_addr; } - bool Overlaps(const CacheAddr start, const CacheAddr end) const { - return (cache_addr < end) && (cache_addr_end > start); + bool Overlaps(const VAddr start, const VAddr end) const { + return (cpu_addr < end) && (cpu_addr_end > start); } - bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { + bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const { const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; - return (gpu_addr <= other_start && other_end <= gpu_addr_end); + return gpu_addr <= other_start && other_end <= gpu_addr_end; } // Use only when recycling a surface @@ -86,21 +86,13 @@ public: return cpu_addr; } - void SetCpuAddr(const VAddr new_addr) { - cpu_addr = new_addr; - } - - CacheAddr GetCacheAddr() const { - return cache_addr; - } - - CacheAddr GetCacheAddrEnd() const { - return cache_addr_end; + VAddr GetCpuAddrEnd() const { + return cpu_addr_end; } - void SetCacheAddr(const CacheAddr new_addr) { - cache_addr = new_addr; - cache_addr_end = new_addr + guest_memory_size; + void SetCpuAddr(const VAddr new_addr) { + cpu_addr = new_addr; + cpu_addr_end = new_addr + guest_memory_size; } const SurfaceParams& GetSurfaceParams() const { @@ -119,14 +111,6 @@ public: return mipmap_sizes[level]; } - void MarkAsContinuous(const bool is_continuous) { - this->is_continuous = is_continuous; - } - - bool IsContinuous() const { - return is_continuous; - } - bool IsLinear() const { return !params.is_tiled; } @@ -175,10 +159,8 @@ protected: std::size_t guest_memory_size; std::size_t host_memory_size; GPUVAddr gpu_addr{}; - CacheAddr cache_addr{}; - CacheAddr cache_addr_end{}; VAddr cpu_addr{}; - bool is_continuous{}; + VAddr cpu_addr_end{}; bool is_converted{}; std::vector<std::size_t> mipmap_sizes; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 6f3ef45be..0de499946 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -167,7 +167,6 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) { const auto& regs = system.GPU().Maxwell3D().regs; - regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.type; SurfaceParams params; params.is_tiled = regs.zeta.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp index 57a1f5803..6b5f5984b 100644 --- a/src/video_core/texture_cache/surface_view.cpp +++ b/src/video_core/texture_cache/surface_view.cpp @@ -20,4 +20,8 @@ bool ViewParams::operator==(const ViewParams& rhs) const { std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); } +bool ViewParams::operator!=(const ViewParams& rhs) const { + return !operator==(rhs); +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index b17fd11a9..90a8bb0ae 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -21,6 +21,7 @@ struct ViewParams { std::size_t Hash() const; bool operator==(const ViewParams& rhs) const; + bool operator!=(const ViewParams& rhs) const; bool IsLayered() const { switch (target) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c8f8d659d..69ca08fd1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; template <typename TSurface, typename TView> class TextureCache { - using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>; - using IntervalType = typename IntervalMap::interval_type; public: - void InvalidateRegion(CacheAddr addr, std::size_t size) { + void InvalidateRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; for (const auto& surface : GetSurfacesInRegion(addr, size)) { @@ -76,7 +74,7 @@ public: guard_samplers = new_guard; } - void FlushRegion(CacheAddr addr, std::size_t size) { + void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; auto surfaces = GetSurfacesInRegion(addr, size); @@ -99,9 +97,9 @@ public: return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - if (!cache_addr) { + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } @@ -110,7 +108,7 @@ public: } const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); + const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false); if (guard_samplers) { sampled_textures.push_back(surface); } @@ -124,13 +122,13 @@ public: if (!gpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - if (!cache_addr) { + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); + const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false); if (guard_samplers) { sampled_textures.push_back(surface); } @@ -145,7 +143,7 @@ public: return any_rt; } - TView GetDepthBufferSurface(bool preserve_contents) { + TView GetDepthBufferSurface() { std::lock_guard lock{mutex}; auto& maxwell3d = system.GPU().Maxwell3D(); if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { @@ -159,14 +157,14 @@ public: SetEmptyDepthBuffer(); return {}; } - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - if (!cache_addr) { + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { SetEmptyDepthBuffer(); return {}; } const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; - auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); + auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, true); if (depth_buffer.target) depth_buffer.target->MarkAsRenderTarget(false, NO_RT); depth_buffer.target = surface_view.first; @@ -176,7 +174,7 @@ public: return surface_view.second; } - TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { + TView GetColorBufferSurface(std::size_t index) { std::lock_guard lock{mutex}; ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); auto& maxwell3d = system.GPU().Maxwell3D(); @@ -199,16 +197,15 @@ public: return {}; } - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - if (!cache_addr) { + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { SetEmptyColorBuffer(index); return {}; } - auto surface_view = - GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index), - preserve_contents, true); + auto surface_view = GetSurface(gpu_addr, *cpu_addr, + SurfaceParams::CreateForFramebuffer(system, index), true); if (render_targets[index].target) render_targets[index].target->MarkAsRenderTarget(false, NO_RT); render_targets[index].target = surface_view.first; @@ -257,27 +254,26 @@ public: const GPUVAddr src_gpu_addr = src_config.Address(); const GPUVAddr dst_gpu_addr = dst_config.Address(); DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); - const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)}; - const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)}; - const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)}; - const auto src_cache_addr{ToCacheAddr(src_host_ptr)}; + const std::optional<VAddr> dst_cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); + const std::optional<VAddr> src_cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); std::pair<TSurface, TView> dst_surface = - GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false); + GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, false); std::pair<TSurface, TView> src_surface = - GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false); + GetSurface(src_gpu_addr, *src_cpu_addr, src_params, false); ImageBlit(src_surface.second, dst_surface.second, copy_config); dst_surface.first->MarkAsModified(true, Tick()); } - TSurface TryFindFramebufferSurface(const u8* host_ptr) { - const CacheAddr cache_addr = ToCacheAddr(host_ptr); - if (!cache_addr) { + TSurface TryFindFramebufferSurface(VAddr addr) { + if (!addr) { return nullptr; } - const CacheAddr page = cache_addr >> registry_page_bits; + const VAddr page = addr >> registry_page_bits; std::vector<TSurface>& list = registry[page]; for (auto& surface : list) { - if (surface->GetCacheAddr() == cache_addr) { + if (surface->GetCpuAddr() == addr) { return surface; } } @@ -338,18 +334,14 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); const std::optional<VAddr> cpu_addr = system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); - if (!cache_ptr || !cpu_addr) { + if (!cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); return; } - const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); - surface->MarkAsContinuous(continuous); - surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); RegisterInnerCache(surface); surface->MarkAsRegistered(true); @@ -458,22 +450,18 @@ private: * @param overlaps The overlapping surfaces registered in the cache. * @param params The parameters for the new surface. * @param gpu_addr The starting address of the new surface. - * @param preserve_contents Indicates that the new surface should be loaded from memory or left - * blank. * @param untopological Indicates to the recycler that the texture has no way to match the * overlaps due to topological reasons. **/ std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, - const bool preserve_contents, const MatchTopologyResult untopological) { - const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation; for (auto& surface : overlaps) { Unregister(surface); } switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { case RecycleStrategy::Ignore: { - return InitializeSurface(gpu_addr, params, do_load); + return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation); } case RecycleStrategy::Flush: { std::sort(overlaps.begin(), overlaps.end(), @@ -483,7 +471,7 @@ private: for (auto& surface : overlaps) { FlushSurface(surface); } - return InitializeSurface(gpu_addr, params, preserve_contents); + return InitializeSurface(gpu_addr, params); } case RecycleStrategy::BufferCopy: { auto new_surface = GetUncachedSurface(gpu_addr, params); @@ -492,7 +480,7 @@ private: } default: { UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); - return InitializeSurface(gpu_addr, params, do_load); + return InitializeSurface(gpu_addr, params); } } } @@ -521,7 +509,9 @@ private: } const auto& final_params = new_surface->GetSurfaceParams(); if (cr_params.type != final_params.type) { - BufferCopy(current_surface, new_surface); + if (Settings::values.use_accurate_gpu_emulation) { + BufferCopy(current_surface, new_surface); + } } else { std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); for (auto& brick : bricks) { @@ -624,18 +614,15 @@ private: * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of * the HLE methods. * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters on the new surface. - * @param gpu_addr The starting address of the new surface. - * @param cache_addr The starting address of the new surface on physical memory. - * @param preserve_contents Indicates that the new surface should be loaded from memory or - * left blank. + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters on the new surface. + * @param gpu_addr The starting address of the new surface. + * @param cpu_addr The starting address of the new surface on physical memory. */ std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, - const CacheAddr cache_addr, - bool preserve_contents) { + const VAddr cpu_addr) { if (params.target == SurfaceTarget::Texture3D) { bool failed = false; if (params.num_levels > 1) { @@ -659,8 +646,9 @@ private: failed = true; break; } - const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); - const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); + const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); + const auto offsets = params.GetBlockOffsetXYZ(offset); + const auto z = std::get<2>(offsets); modified |= surface->IsModified(); const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, 1); @@ -679,23 +667,23 @@ private: } else { for (const auto& surface : overlaps) { if (!surface->MatchTarget(params.target)) { - if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { + if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { if (Settings::values.use_accurate_gpu_emulation) { return std::nullopt; } Unregister(surface); - return InitializeSurface(gpu_addr, params, preserve_contents); + return InitializeSurface(gpu_addr, params); } return std::nullopt; } - if (surface->GetCacheAddr() != cache_addr) { + if (surface->GetCpuAddr() != cpu_addr) { continue; } if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { return {{surface, surface->GetMainView()}}; } } - return InitializeSurface(gpu_addr, params, preserve_contents); + return InitializeSurface(gpu_addr, params); } } @@ -718,23 +706,19 @@ private: * * @param gpu_addr The starting address of the candidate surface. * @param params The parameters on the candidate surface. - * @param preserve_contents Indicates that the new surface should be loaded from memory or - * left blank. * @param is_render Whether or not the surface is a render target. **/ - std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, - const SurfaceParams& params, bool preserve_contents, - bool is_render) { + std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, + const SurfaceParams& params, bool is_render) { // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. - if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { + if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector<TSurface> overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - topological_result); + return RecycleSurface(overlaps, params, gpu_addr, topological_result); } const auto struct_result = current_surface->MatchesStructure(params); @@ -755,11 +739,11 @@ private: // Step 2 // Obtain all possible overlaps in the memory region const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; // If none are found, we are done. we just load the surface and create it. if (overlaps.empty()) { - return InitializeSurface(gpu_addr, params, preserve_contents); + return InitializeSurface(gpu_addr, params); } // Step 3 @@ -769,15 +753,13 @@ private: for (const auto& surface : overlaps) { const auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - topological_result); + return RecycleSurface(overlaps, params, gpu_addr, topological_result); } } // Check if it's a 3D texture if (params.block_depth > 0) { - auto surface = - Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents); + auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr); if (surface) { return *surface; } @@ -797,8 +779,7 @@ private: return *view; } } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch); } // Now we check if the candidate is a mipmap/layer of the overlap std::optional<TView> view = @@ -822,7 +803,7 @@ private: pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch); } return {current_surface, *view}; @@ -838,8 +819,7 @@ private: } } // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch); } /** @@ -852,16 +832,16 @@ private: * @param params The parameters on the candidate surface. **/ Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); - if (!cache_addr) { + if (!cpu_addr) { Deduction result{}; result.type = DeductionType::DeductionFailed; return result; } - if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { + if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { @@ -880,7 +860,7 @@ private: } const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; if (overlaps.empty()) { Deduction result{}; @@ -997,10 +977,10 @@ private: } std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, - bool preserve_contents) { + bool do_load = true) { auto new_surface{GetUncachedSurface(gpu_addr, params)}; Register(new_surface); - if (preserve_contents) { + if (do_load) { LoadSurface(new_surface); } return {new_surface, new_surface->GetMainView()}; @@ -1024,10 +1004,10 @@ private: } void RegisterInnerCache(TSurface& surface) { - const CacheAddr cache_addr = surface->GetCacheAddr(); - CacheAddr start = cache_addr >> registry_page_bits; - const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; - l1_cache[cache_addr] = surface; + const VAddr cpu_addr = surface->GetCpuAddr(); + VAddr start = cpu_addr >> registry_page_bits; + const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; + l1_cache[cpu_addr] = surface; while (start <= end) { registry[start].push_back(surface); start++; @@ -1035,10 +1015,10 @@ private: } void UnregisterInnerCache(TSurface& surface) { - const CacheAddr cache_addr = surface->GetCacheAddr(); - CacheAddr start = cache_addr >> registry_page_bits; - const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; - l1_cache.erase(cache_addr); + const VAddr cpu_addr = surface->GetCpuAddr(); + VAddr start = cpu_addr >> registry_page_bits; + const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; + l1_cache.erase(cpu_addr); while (start <= end) { auto& reg{registry[start]}; reg.erase(std::find(reg.begin(), reg.end(), surface)); @@ -1046,18 +1026,18 @@ private: } } - std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { + std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { if (size == 0) { return {}; } - const CacheAddr cache_addr_end = cache_addr + size; - CacheAddr start = cache_addr >> registry_page_bits; - const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; + const VAddr cpu_addr_end = cpu_addr + size; + VAddr start = cpu_addr >> registry_page_bits; + const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; std::vector<TSurface> surfaces; while (start <= end) { std::vector<TSurface>& list = registry[start]; for (auto& surface : list) { - if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { + if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { surface->MarkAsPicked(true); surfaces.push_back(surface); } @@ -1146,14 +1126,14 @@ private: // large in size. static constexpr u64 registry_page_bits{20}; static constexpr u64 registry_page_size{1 << registry_page_bits}; - std::unordered_map<CacheAddr, std::vector<TSurface>> registry; + std::unordered_map<VAddr, std::vector<TSurface>> registry; static constexpr u32 DEPTH_RT = 8; static constexpr u32 NO_RT = 0xFFFFFFFF; // The L1 Cache is used for fast texture lookup before checking the overlaps // This avoids calculating size and other stuffs. - std::unordered_map<CacheAddr, TSurface> l1_cache; + std::unordered_map<VAddr, TSurface> l1_cache; /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 062b4f252..365bde2f1 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -20,6 +20,8 @@ #include <cstring> #include <vector> +#include <boost/container/static_vector.hpp> + #include "common/common_types.h" #include "video_core/textures/astc.h" @@ -39,25 +41,25 @@ constexpr u32 Popcnt(u32 n) { class InputBitStream { public: - explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) - : m_CurByte(ptr), m_NextBit(start_offset % 8) {} + constexpr explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) + : cur_byte{ptr}, next_bit{start_offset % 8} {} - std::size_t GetBitsRead() const { - return m_BitsRead; + constexpr std::size_t GetBitsRead() const { + return bits_read; } - u32 ReadBit() { - u32 bit = *m_CurByte >> m_NextBit++; - while (m_NextBit >= 8) { - m_NextBit -= 8; - m_CurByte++; + constexpr bool ReadBit() { + const bool bit = (*cur_byte >> next_bit++) & 1; + while (next_bit >= 8) { + next_bit -= 8; + cur_byte++; } - m_BitsRead++; - return bit & 1; + bits_read++; + return bit; } - u32 ReadBits(std::size_t nBits) { + constexpr u32 ReadBits(std::size_t nBits) { u32 ret = 0; for (std::size_t i = 0; i < nBits; ++i) { ret |= (ReadBit() & 1) << i; @@ -66,7 +68,7 @@ public: } template <std::size_t nBits> - u32 ReadBits() { + constexpr u32 ReadBits() { u32 ret = 0; for (std::size_t i = 0; i < nBits; ++i) { ret |= (ReadBit() & 1) << i; @@ -75,64 +77,58 @@ public: } private: - const u8* m_CurByte; - std::size_t m_NextBit = 0; - std::size_t m_BitsRead = 0; + const u8* cur_byte; + std::size_t next_bit = 0; + std::size_t bits_read = 0; }; class OutputBitStream { public: - explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0) - : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} - - ~OutputBitStream() = default; + constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0) + : cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {} - s32 GetBitsWritten() const { - return m_BitsWritten; + constexpr std::size_t GetBitsWritten() const { + return bits_written; } - void WriteBitsR(u32 val, u32 nBits) { + constexpr void WriteBitsR(u32 val, u32 nBits) { for (u32 i = 0; i < nBits; i++) { WriteBit((val >> (nBits - i - 1)) & 1); } } - void WriteBits(u32 val, u32 nBits) { + constexpr void WriteBits(u32 val, u32 nBits) { for (u32 i = 0; i < nBits; i++) { WriteBit((val >> i) & 1); } } private: - void WriteBit(s32 b) { - - if (done) + constexpr void WriteBit(bool b) { + if (bits_written >= num_bits) { return; + } - const u32 mask = 1 << m_NextBit++; + const u32 mask = 1 << next_bit++; // clear the bit - *m_CurByte &= static_cast<u8>(~mask); + *cur_byte &= static_cast<u8>(~mask); // Write the bit, if necessary if (b) - *m_CurByte |= static_cast<u8>(mask); + *cur_byte |= static_cast<u8>(mask); // Next byte? - if (m_NextBit >= 8) { - m_CurByte += 1; - m_NextBit = 0; + if (next_bit >= 8) { + cur_byte += 1; + next_bit = 0; } - - done = done || ++m_BitsWritten >= m_NumBits; } - s32 m_BitsWritten = 0; - const s32 m_NumBits; - u8* m_CurByte; - s32 m_NextBit = 0; - - bool done = false; + u8* cur_byte; + std::size_t num_bits; + std::size_t bits_written = 0; + std::size_t next_bit = 0; }; template <typename IntType> @@ -195,9 +191,13 @@ struct IntegerEncodedValue { u32 trit_value; }; }; +using IntegerEncodedVector = boost::container::static_vector< + IntegerEncodedValue, 64, + boost::container::static_vector_options< + boost::container::inplace_alignment<alignof(IntegerEncodedValue)>, + boost::container::throw_on_overflow<false>>::type>; -static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, - u32 nBitsPerValue) { +static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) { // Implement the algorithm in section C.2.12 u32 m[5]; u32 t[5]; @@ -255,7 +255,7 @@ static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValu } } -static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, +static void DecodeQus32Block(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) { // Implement the algorithm in section C.2.12 u32 m[3]; @@ -343,8 +343,8 @@ static constexpr std::array EncodingsValues = MakeEncodedValues(); // Fills result with the values that are encoded in the given // bitstream. We must know beforehand what the maximum possible // value is, and how many values we're decoding. -static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits, - u32 maxRange, u32 nValues) { +static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, + u32 nValues) { // Determine encoding parameters IntegerEncodedValue val = EncodingsValues[maxRange]; @@ -634,12 +634,14 @@ static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] // is the same as [(numBits - 1):0] and repeats all the way down. template <typename IntType> -static IntType Replicate(IntType val, u32 numBits, u32 toBit) { - if (numBits == 0) +static constexpr IntType Replicate(IntType val, u32 numBits, u32 toBit) { + if (numBits == 0) { return 0; - if (toBit == 0) + } + if (toBit == 0) { return 0; - IntType v = val & static_cast<IntType>((1 << numBits) - 1); + } + const IntType v = val & static_cast<IntType>((1 << numBits) - 1); IntType res = v; u32 reslen = numBits; while (reslen < toBit) { @@ -656,6 +658,89 @@ static IntType Replicate(IntType val, u32 numBits, u32 toBit) { return res; } +static constexpr std::size_t NumReplicateEntries(u32 num_bits) { + return std::size_t(1) << num_bits; +} + +template <typename IntType, u32 num_bits, u32 to_bit> +static constexpr auto MakeReplicateTable() { + std::array<IntType, NumReplicateEntries(num_bits)> table{}; + for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) { + table[value] = Replicate(value, num_bits, to_bit); + } + return table; +} + +static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); +static constexpr u32 ReplicateByteTo16(std::size_t value) { + return REPLICATE_BYTE_TO_16_TABLE[value]; +} + +static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>(); +static constexpr u32 ReplicateBitTo7(std::size_t value) { + return REPLICATE_BIT_TO_7_TABLE[value]; +} + +static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>(); +static constexpr u32 ReplicateBitTo9(std::size_t value) { + return REPLICATE_BIT_TO_9_TABLE[value]; +} + +static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>(); +static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>(); +static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>(); +static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>(); +static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>(); +static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); +static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); +static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); +/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback +/// to the runtime implementation +static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { + switch (num_bits) { + case 1: + return REPLICATE_1_BIT_TO_8_TABLE[value]; + case 2: + return REPLICATE_2_BIT_TO_8_TABLE[value]; + case 3: + return REPLICATE_3_BIT_TO_8_TABLE[value]; + case 4: + return REPLICATE_4_BIT_TO_8_TABLE[value]; + case 5: + return REPLICATE_5_BIT_TO_8_TABLE[value]; + case 6: + return REPLICATE_6_BIT_TO_8_TABLE[value]; + case 7: + return REPLICATE_7_BIT_TO_8_TABLE[value]; + case 8: + return REPLICATE_8_BIT_TO_8_TABLE[value]; + default: + return Replicate(value, num_bits, 8); + } +} + +static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>(); +static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>(); +static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>(); +static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>(); +static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>(); +static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) { + switch (num_bits) { + case 1: + return REPLICATE_1_BIT_TO_6_TABLE[value]; + case 2: + return REPLICATE_2_BIT_TO_6_TABLE[value]; + case 3: + return REPLICATE_3_BIT_TO_6_TABLE[value]; + case 4: + return REPLICATE_4_BIT_TO_6_TABLE[value]; + case 5: + return REPLICATE_5_BIT_TO_6_TABLE[value]; + default: + return Replicate(value, num_bits, 6); + } +} + class Pixel { protected: using ChannelType = s16; @@ -674,10 +759,10 @@ public: // significant bits when going from larger to smaller bit depth // or by repeating the most significant bits when going from // smaller to larger bit depths. - void ChangeBitDepth(const u8 (&depth)[4]) { + void ChangeBitDepth() { for (u32 i = 0; i < 4; i++) { - Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); - m_BitDepth[i] = depth[i]; + Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i]); + m_BitDepth[i] = 8; } } @@ -689,28 +774,23 @@ public: // Changes the bit depth of a single component. See the comment // above for how we do this. - static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) { - assert(newDepth <= 8); + static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth) { assert(oldDepth <= 8); - if (oldDepth == newDepth) { + if (oldDepth == 8) { // Do nothing return val; - } else if (oldDepth == 0 && newDepth != 0) { - return static_cast<ChannelType>((1 << newDepth) - 1); - } else if (newDepth > oldDepth) { - return Replicate(val, oldDepth, newDepth); + } else if (oldDepth == 0) { + return static_cast<ChannelType>((1 << 8) - 1); + } else if (8 > oldDepth) { + return static_cast<ChannelType>(FastReplicateTo8(static_cast<u32>(val), oldDepth)); } else { // oldDepth > newDepth - if (newDepth == 0) { - return 0xFF; - } else { - u8 bitsWasted = static_cast<u8>(oldDepth - newDepth); - u16 v = static_cast<u16>(val); - v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); - v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1)); - return static_cast<u8>(v); - } + const u8 bitsWasted = static_cast<u8>(oldDepth - 8); + u16 v = static_cast<u16>(val); + v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); + v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << 8) - 1)); + return static_cast<u8>(v); } assert(false && "We shouldn't get here."); @@ -760,8 +840,7 @@ public: // up in the most-significant byte. u32 Pack() const { Pixel eightBit(*this); - const u8 eightBitDepth[4] = {8, 8, 8, 8}; - eightBit.ChangeBitDepth(eightBitDepth); + eightBit.ChangeBitDepth(); u32 r = 0; r |= eightBit.A(); @@ -816,8 +895,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP } // We now have enough to decode our integer sequence. - std::vector<IntegerEncodedValue> decodedColorValues; - decodedColorValues.reserve(32); + IntegerEncodedVector decodedColorValues; InputBitStream colorStream(data); DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); @@ -839,12 +917,12 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP u32 A = 0, B = 0, C = 0, D = 0; // A is just the lsb replicated 9 times. - A = Replicate(bitval & 1, 1, 9); + A = ReplicateBitTo9(bitval & 1); switch (val.encoding) { // Replicate bits case IntegerEncoding::JustBits: - out[outIdx++] = Replicate(bitval, bitlen, 8); + out[outIdx++] = FastReplicateTo8(bitval, bitlen); break; // Use algorithm in C.2.13 @@ -962,13 +1040,13 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { u32 bitval = val.bit_value; u32 bitlen = val.num_bits; - u32 A = Replicate(bitval & 1, 1, 7); + u32 A = ReplicateBitTo7(bitval & 1); u32 B = 0, C = 0, D = 0; u32 result = 0; switch (val.encoding) { case IntegerEncoding::JustBits: - result = Replicate(bitval, bitlen, 6); + result = FastReplicateTo6(bitval, bitlen); break; case IntegerEncoding::Trit: { @@ -1047,7 +1125,7 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { return result; } -static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights, +static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights, const TexelWeightParams& params, const u32 blockWidth, const u32 blockHeight) { u32 weightIdx = 0; @@ -1545,8 +1623,7 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); - std::vector<IntegerEncodedValue> texelWeightValues; - texelWeightValues.reserve(64); + IntegerEncodedVector texelWeightValues; InputBitStream weightStream(texelWeightData); @@ -1568,9 +1645,9 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 Pixel p; for (u32 c = 0; c < 4; c++) { u32 C0 = endpos32s[partition][0].Component(c); - C0 = Replicate(C0, 8, 16); + C0 = ReplicateByteTo16(C0); u32 C1 = endpos32s[partition][1].Component(c); - C1 = Replicate(C1, 8, 16); + C1 = ReplicateByteTo16(C1); u32 plane = 0; if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp new file mode 100644 index 000000000..d1939d744 --- /dev/null +++ b/src/video_core/textures/texture.cpp @@ -0,0 +1,80 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> + +#include "core/settings.h" +#include "video_core/textures/texture.h" + +namespace Tegra::Texture { + +namespace { + +constexpr std::array<float, 256> SRGB_CONVERSION_LUT = { + 0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f, + 0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f, + 0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f, + 0.000887f, 0.000983f, 0.001085f, 0.001195f, 0.001312f, 0.001437f, 0.001569f, 0.001710f, + 0.001860f, 0.002019f, 0.002186f, 0.002364f, 0.002551f, 0.002748f, 0.002955f, 0.003174f, + 0.003403f, 0.003643f, 0.003896f, 0.004160f, 0.004436f, 0.004725f, 0.005028f, 0.005343f, + 0.005672f, 0.006015f, 0.006372f, 0.006744f, 0.007130f, 0.007533f, 0.007950f, 0.008384f, + 0.008834f, 0.009301f, 0.009785f, 0.010286f, 0.010805f, 0.011342f, 0.011898f, 0.012472f, + 0.013066f, 0.013680f, 0.014313f, 0.014967f, 0.015641f, 0.016337f, 0.017054f, 0.017793f, + 0.018554f, 0.019337f, 0.020144f, 0.020974f, 0.021828f, 0.022706f, 0.023609f, 0.024536f, + 0.025489f, 0.026468f, 0.027473f, 0.028504f, 0.029563f, 0.030649f, 0.031762f, 0.032904f, + 0.034074f, 0.035274f, 0.036503f, 0.037762f, 0.039050f, 0.040370f, 0.041721f, 0.043103f, + 0.044518f, 0.045964f, 0.047444f, 0.048956f, 0.050503f, 0.052083f, 0.053699f, 0.055349f, + 0.057034f, 0.058755f, 0.060513f, 0.062307f, 0.064139f, 0.066008f, 0.067915f, 0.069861f, + 0.071845f, 0.073869f, 0.075933f, 0.078037f, 0.080182f, 0.082369f, 0.084597f, 0.086867f, + 0.089180f, 0.091535f, 0.093935f, 0.096378f, 0.098866f, 0.101398f, 0.103977f, 0.106601f, + 0.109271f, 0.111988f, 0.114753f, 0.117565f, 0.120426f, 0.123335f, 0.126293f, 0.129301f, + 0.132360f, 0.135469f, 0.138629f, 0.141841f, 0.145105f, 0.148421f, 0.151791f, 0.155214f, + 0.158691f, 0.162224f, 0.165810f, 0.169453f, 0.173152f, 0.176907f, 0.180720f, 0.184589f, + 0.188517f, 0.192504f, 0.196549f, 0.200655f, 0.204820f, 0.209046f, 0.213334f, 0.217682f, + 0.222093f, 0.226567f, 0.231104f, 0.235704f, 0.240369f, 0.245099f, 0.249894f, 0.254754f, + 0.259681f, 0.264674f, 0.269736f, 0.274864f, 0.280062f, 0.285328f, 0.290664f, 0.296070f, + 0.301546f, 0.307094f, 0.312713f, 0.318404f, 0.324168f, 0.330006f, 0.335916f, 0.341902f, + 0.347962f, 0.354097f, 0.360309f, 0.366597f, 0.372961f, 0.379403f, 0.385924f, 0.392524f, + 0.399202f, 0.405960f, 0.412798f, 0.419718f, 0.426719f, 0.433802f, 0.440967f, 0.448216f, + 0.455548f, 0.462965f, 0.470465f, 0.478052f, 0.485725f, 0.493484f, 0.501329f, 0.509263f, + 0.517285f, 0.525396f, 0.533595f, 0.541885f, 0.550265f, 0.558736f, 0.567299f, 0.575954f, + 0.584702f, 0.593542f, 0.602477f, 0.611507f, 0.620632f, 0.629852f, 0.639168f, 0.648581f, + 0.658092f, 0.667700f, 0.677408f, 0.687214f, 0.697120f, 0.707127f, 0.717234f, 0.727443f, + 0.737753f, 0.748167f, 0.758685f, 0.769305f, 0.780031f, 0.790861f, 0.801798f, 0.812839f, + 0.823989f, 0.835246f, 0.846611f, 0.858085f, 0.869668f, 0.881360f, 0.893164f, 0.905078f, + 0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f, +}; + +unsigned SettingsMinimumAnisotropy() noexcept { + switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { + default: + case Anisotropy::Default: + return 1U; + case Anisotropy::Filter2x: + return 2U; + case Anisotropy::Filter4x: + return 4U; + case Anisotropy::Filter8x: + return 8U; + case Anisotropy::Filter16x: + return 16U; + } +} + +} // Anonymous namespace + +std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { + if (!srgb_conversion) { + return border_color; + } + return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g], + SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; +} + +float TSCEntry::GetMaxAnisotropy() const noexcept { + return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); +} + +} // namespace Tegra::Texture diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 7edc4abe1..eba05aced 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -8,7 +8,6 @@ #include "common/assert.h" #include "common/bit_field.h" #include "common/common_types.h" -#include "core/settings.h" namespace Tegra::Texture { @@ -132,6 +131,20 @@ enum class SwizzleSource : u32 { OneFloat = 7, }; +enum class MsaaMode : u32 { + Msaa1x1 = 0, + Msaa2x1 = 1, + Msaa2x2 = 2, + Msaa4x2 = 3, + Msaa4x2_D3D = 4, + Msaa2x1_D3D = 5, + Msaa4x4 = 6, + Msaa2x2_VC4 = 8, + Msaa2x2_VC12 = 9, + Msaa4x2_VC8 = 10, + Msaa4x2_VC24 = 11, +}; + union TextureHandle { TextureHandle(u32 raw) : raw{raw} {} @@ -198,6 +211,7 @@ struct TICEntry { union { BitField<0, 4, u32> res_min_mip_level; BitField<4, 4, u32> res_max_mip_level; + BitField<8, 4, MsaaMode> msaa_mode; BitField<12, 12, u32> min_lod_clamp; }; @@ -336,24 +350,9 @@ struct TSCEntry { std::array<u8, 0x20> raw; }; - float GetMaxAnisotropy() const { - const u32 min_value = [] { - switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { - default: - case Anisotropy::Default: - return 1U; - case Anisotropy::Filter2x: - return 2U; - case Anisotropy::Filter4x: - return 4U; - case Anisotropy::Filter8x: - return 8U; - case Anisotropy::Filter16x: - return 16U; - } - }(); - return static_cast<float>(std::max(1U << max_anisotropy, min_value)); - } + std::array<float, 4> GetBorderColor() const noexcept; + + float GetMaxAnisotropy() const noexcept; float GetMinLod() const { return static_cast<float>(min_lod_clamp) / 256.0f; @@ -368,15 +367,6 @@ struct TSCEntry { constexpr u32 mask = 1U << (13 - 1); return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; } - - std::array<float, 4> GetBorderColor() const { - if (srgb_conversion) { - return {static_cast<float>(srgb_border_color_r) / 255.0f, - static_cast<float>(srgb_border_color_g) / 255.0f, - static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]}; - } - return border_color; - } }; static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); diff --git a/src/web_service/CMakeLists.txt b/src/web_service/CMakeLists.txt index 01f2d129d..0c9bb0d55 100644 --- a/src/web_service/CMakeLists.txt +++ b/src/web_service/CMakeLists.txt @@ -8,9 +8,4 @@ add_library(web_service STATIC ) create_target_directory_groups(web_service) - -get_directory_property(OPENSSL_LIBS - DIRECTORY ${PROJECT_SOURCE_DIR}/externals/libressl - DEFINITION OPENSSL_LIBS) -target_compile_definitions(web_service PRIVATE -DCPPHTTPLIB_OPENSSL_SUPPORT) -target_link_libraries(web_service PRIVATE common json-headers ${OPENSSL_LIBS} httplib lurlparser) +target_link_libraries(web_service PRIVATE common json-headers httplib lurlparser) diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp index 737ffe409..09d1651ac 100644 --- a/src/web_service/web_backend.cpp +++ b/src/web_service/web_backend.cpp @@ -43,7 +43,7 @@ struct Client::Impl { if (jwt.empty() && !allow_anonymous) { LOG_ERROR(WebService, "Credentials must be provided for authenticated requests"); return Common::WebResult{Common::WebResult::Code::CredentialsMissing, - "Credentials needed"}; + "Credentials needed", ""}; } auto result = GenericRequest(method, path, data, accept, jwt); @@ -81,12 +81,12 @@ struct Client::Impl { cli = std::make_unique<httplib::SSLClient>(parsedUrl.m_Host.c_str(), port); } else { LOG_ERROR(WebService, "Bad URL scheme {}", parsedUrl.m_Scheme); - return Common::WebResult{Common::WebResult::Code::InvalidURL, "Bad URL scheme"}; + return Common::WebResult{Common::WebResult::Code::InvalidURL, "Bad URL scheme", ""}; } } if (cli == nullptr) { LOG_ERROR(WebService, "Invalid URL {}", host + path); - return Common::WebResult{Common::WebResult::Code::InvalidURL, "Invalid URL"}; + return Common::WebResult{Common::WebResult::Code::InvalidURL, "Invalid URL", ""}; } cli->set_timeout_sec(TIMEOUT_SECONDS); @@ -118,27 +118,27 @@ struct Client::Impl { if (!cli->send(request, response)) { LOG_ERROR(WebService, "{} to {} returned null", method, host + path); - return Common::WebResult{Common::WebResult::Code::LibError, "Null response"}; + return Common::WebResult{Common::WebResult::Code::LibError, "Null response", ""}; } if (response.status >= 400) { LOG_ERROR(WebService, "{} to {} returned error status code: {}", method, host + path, response.status); return Common::WebResult{Common::WebResult::Code::HttpError, - std::to_string(response.status)}; + std::to_string(response.status), ""}; } auto content_type = response.headers.find("content-type"); if (content_type == response.headers.end()) { LOG_ERROR(WebService, "{} to {} returned no content", method, host + path); - return Common::WebResult{Common::WebResult::Code::WrongContent, ""}; + return Common::WebResult{Common::WebResult::Code::WrongContent, "", ""}; } if (content_type->second.find(accept) == std::string::npos) { LOG_ERROR(WebService, "{} to {} returned wrong content: {}", method, host + path, content_type->second); - return Common::WebResult{Common::WebResult::Code::WrongContent, "Wrong content"}; + return Common::WebResult{Common::WebResult::Code::WrongContent, "Wrong content", ""}; } return Common::WebResult{Common::WebResult::Code::Success, "", response.body}; } diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index d34b47b3f..8b9404718 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -150,6 +150,10 @@ target_link_libraries(yuzu PRIVATE common core input_common video_core) target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets) target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) +if (ENABLE_VULKAN AND NOT WIN32) + target_include_directories(yuzu PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS}) +endif() + target_compile_definitions(yuzu PRIVATE # Use QStringBuilder for string concatenation to reduce # the overall number of temporary strings created. diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index eaded2640..1cac2f942 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -14,8 +14,9 @@ #include <QScreen> #include <QStringList> #include <QWindow> -#ifdef HAS_VULKAN -#include <QVulkanWindow> + +#if !defined(WIN32) && HAS_VULKAN +#include <qpa/qplatformnativeinterface.h> #endif #include <fmt/format.h> @@ -224,7 +225,6 @@ public: } context->MakeCurrent(); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); if (Core::System::GetInstance().Renderer().TryPresent(100)) { context->SwapBuffers(); glFinish(); @@ -238,16 +238,50 @@ private: #ifdef HAS_VULKAN class VulkanRenderWidget : public RenderWidget { public: - explicit VulkanRenderWidget(GRenderWindow* parent, QVulkanInstance* instance) - : RenderWidget(parent) { + explicit VulkanRenderWidget(GRenderWindow* parent) : RenderWidget(parent) { windowHandle()->setSurfaceType(QWindow::VulkanSurface); - windowHandle()->setVulkanInstance(instance); } }; #endif -GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread) - : QWidget(parent_), emu_thread(emu_thread) { +static Core::Frontend::WindowSystemType GetWindowSystemType() { + // Determine WSI type based on Qt platform. + QString platform_name = QGuiApplication::platformName(); + if (platform_name == QStringLiteral("windows")) + return Core::Frontend::WindowSystemType::Windows; + else if (platform_name == QStringLiteral("xcb")) + return Core::Frontend::WindowSystemType::X11; + else if (platform_name == QStringLiteral("wayland")) + return Core::Frontend::WindowSystemType::Wayland; + + LOG_CRITICAL(Frontend, "Unknown Qt platform!"); + return Core::Frontend::WindowSystemType::Windows; +} + +static Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) { + Core::Frontend::EmuWindow::WindowSystemInfo wsi; + wsi.type = GetWindowSystemType(); + +#ifdef HAS_VULKAN + // Our Win32 Qt external doesn't have the private API. +#if defined(WIN32) || defined(__APPLE__) + wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; +#else + QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface(); + wsi.display_connection = pni->nativeResourceForWindow("display", window); + if (wsi.type == Core::Frontend::WindowSystemType::Wayland) + wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr; + else + wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; +#endif + wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f; +#endif + + return wsi; +} + +GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread_) + : QWidget(parent_), emu_thread(emu_thread_) { setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") .arg(QString::fromUtf8(Common::g_build_name), QString::fromUtf8(Common::g_scm_branch), @@ -460,6 +494,9 @@ bool GRenderWindow::InitRenderTarget() { break; } + // Update the Window System information with the new render target + window_info = GetWindowSystemInfo(child_widget->windowHandle()); + child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); layout()->addWidget(child_widget); // Reset minimum required size to avoid resizing issues on the main window after restarting. @@ -531,30 +568,7 @@ bool GRenderWindow::InitializeOpenGL() { bool GRenderWindow::InitializeVulkan() { #ifdef HAS_VULKAN - vk_instance = std::make_unique<QVulkanInstance>(); - vk_instance->setApiVersion(QVersionNumber(1, 1, 0)); - vk_instance->setFlags(QVulkanInstance::Flag::NoDebugOutputRedirect); - if (Settings::values.renderer_debug) { - const auto supported_layers{vk_instance->supportedLayers()}; - const bool found = - std::find_if(supported_layers.begin(), supported_layers.end(), [](const auto& layer) { - constexpr const char searched_layer[] = "VK_LAYER_LUNARG_standard_validation"; - return layer.name == searched_layer; - }); - if (found) { - vk_instance->setLayers(QByteArrayList() << "VK_LAYER_LUNARG_standard_validation"); - vk_instance->setExtensions(QByteArrayList() << VK_EXT_DEBUG_UTILS_EXTENSION_NAME); - } - } - if (!vk_instance->create()) { - QMessageBox::critical( - this, tr("Error while initializing Vulkan 1.1!"), - tr("Your OS doesn't seem to support Vulkan 1.1 instances, or you do not have the " - "latest graphics drivers.")); - return false; - } - - auto child = new VulkanRenderWidget(this, vk_instance.get()); + auto child = new VulkanRenderWidget(this); child_widget = child; child_widget->windowHandle()->create(); main_context = std::make_unique<DummyContext>(); @@ -567,21 +581,6 @@ bool GRenderWindow::InitializeVulkan() { #endif } -void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const { -#ifdef HAS_VULKAN - const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr"); - const VkInstance instance_copy = vk_instance->vkInstance(); - const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child_widget->windowHandle()); - - std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); - std::memcpy(instance, &instance_copy, sizeof(instance_copy)); - std::memcpy(surface, &surface_copy, sizeof(surface_copy)); -#else - UNREACHABLE_MSG("Executing Vulkan code without compiling Vulkan"); -#endif -} - bool GRenderWindow::LoadOpenGL() { auto context = CreateSharedContext(); auto scope = context->Acquire(); diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index d69078df1..3626604ca 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -22,9 +22,6 @@ class GMainWindow; class QKeyEvent; class QTouchEvent; class QStringList; -#ifdef HAS_VULKAN -class QVulkanInstance; -#endif namespace VideoCore { enum class LoadCallbackStage; @@ -122,8 +119,6 @@ public: // EmuWindow implementation. void PollEvents() override; bool IsShown() const override; - void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const override; std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; void BackupGeometry(); @@ -186,10 +181,6 @@ private: // should instead be shared from std::shared_ptr<Core::Frontend::GraphicsContext> main_context; -#ifdef HAS_VULKAN - std::unique_ptr<QVulkanInstance> vk_instance; -#endif - /// Temporary storage of the screenshot taken QImage screenshot_image; diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index a821c7b3c..ea667caef 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -15,6 +15,10 @@ #include "ui_configure_graphics.h" #include "yuzu/configuration/configure_graphics.h" +#ifdef HAS_VULKAN +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#endif + namespace { enum class Resolution : int { Auto, @@ -165,41 +169,9 @@ void ConfigureGraphics::UpdateDeviceComboBox() { void ConfigureGraphics::RetrieveVulkanDevices() { #ifdef HAS_VULKAN - QVulkanInstance instance; - instance.setApiVersion(QVersionNumber(1, 1, 0)); - if (!instance.create()) { - LOG_INFO(Frontend, "Vulkan 1.1 not available"); - return; - } - const auto vkEnumeratePhysicalDevices{reinterpret_cast<PFN_vkEnumeratePhysicalDevices>( - instance.getInstanceProcAddr("vkEnumeratePhysicalDevices"))}; - if (vkEnumeratePhysicalDevices == nullptr) { - LOG_INFO(Frontend, "Failed to get pointer to vkEnumeratePhysicalDevices"); - return; - } - u32 physical_device_count; - if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, nullptr) != - VK_SUCCESS) { - LOG_INFO(Frontend, "Failed to get physical devices count"); - return; - } - std::vector<VkPhysicalDevice> physical_devices(physical_device_count); - if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, - physical_devices.data()) != VK_SUCCESS) { - LOG_INFO(Frontend, "Failed to get physical devices"); - return; - } - - const auto vkGetPhysicalDeviceProperties{reinterpret_cast<PFN_vkGetPhysicalDeviceProperties>( - instance.getInstanceProcAddr("vkGetPhysicalDeviceProperties"))}; - if (vkGetPhysicalDeviceProperties == nullptr) { - LOG_INFO(Frontend, "Failed to get pointer to vkGetPhysicalDeviceProperties"); - return; - } - for (const auto physical_device : physical_devices) { - VkPhysicalDeviceProperties properties; - vkGetPhysicalDeviceProperties(physical_device, &properties); - vulkan_devices.push_back(QString::fromUtf8(properties.deviceName)); + vulkan_devices.clear(); + for (auto& name : Vulkan::RendererVulkan::EnumerateDevices()) { + vulkan_devices.push_back(QString::fromStdString(name)); } #endif } diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 96dec50e2..15ac30f12 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -541,18 +541,19 @@ void ConfigureInputPlayer::HandleClick( button->setText(tr("[press key]")); button->setFocus(); - const auto iter = std::find(button_map.begin(), button_map.end(), button); - ASSERT(iter != button_map.end()); - const auto index = std::distance(button_map.begin(), iter); - ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); + // Keyboard keys can only be used as button devices + want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button; + if (want_keyboard_keys) { + const auto iter = std::find(button_map.begin(), button_map.end(), button); + ASSERT(iter != button_map.end()); + const auto index = std::distance(button_map.begin(), iter); + ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); + } input_setter = new_input_setter; device_pollers = InputCommon::Polling::GetPollers(type); - // Keyboard keys can only be used as button devices - want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button; - for (auto& poller : device_pollers) { poller->Start(); } diff --git a/src/yuzu/configuration/configure_input_simple.cpp b/src/yuzu/configuration/configure_input_simple.cpp index ab3a11d30..0e0e8f113 100644 --- a/src/yuzu/configuration/configure_input_simple.cpp +++ b/src/yuzu/configuration/configure_input_simple.cpp @@ -35,6 +35,7 @@ void CallConfigureDialog(ConfigureInputSimple* caller, Args&&... args) { // - Open any dialogs // - Block in any way +constexpr std::size_t PLAYER_0_INDEX = 0; constexpr std::size_t HANDHELD_INDEX = 8; void HandheldOnProfileSelect() { @@ -53,8 +54,8 @@ void HandheldOnProfileSelect() { } void DualJoyconsDockedOnProfileSelect() { - Settings::values.players[0].connected = true; - Settings::values.players[0].type = Settings::ControllerType::DualJoycon; + Settings::values.players[PLAYER_0_INDEX].connected = true; + Settings::values.players[PLAYER_0_INDEX].type = Settings::ControllerType::DualJoycon; for (std::size_t player = 1; player <= HANDHELD_INDEX; ++player) { Settings::values.players[player].connected = false; @@ -64,7 +65,7 @@ void DualJoyconsDockedOnProfileSelect() { Settings::values.keyboard_enabled = false; Settings::values.mouse_enabled = false; Settings::values.debug_pad_enabled = false; - Settings::values.touchscreen.enabled = false; + Settings::values.touchscreen.enabled = true; } // Name, OnProfileSelect (called when selected in drop down), OnConfigure (called when configure @@ -78,7 +79,7 @@ constexpr std::array<InputProfile, 3> INPUT_PROFILES{{ }}, {QT_TR_NOOP("Single Player - Dual Joycons - Docked"), DualJoyconsDockedOnProfileSelect, [](ConfigureInputSimple* caller) { - CallConfigureDialog<ConfigureInputPlayer>(caller, 1, false); + CallConfigureDialog<ConfigureInputPlayer>(caller, PLAYER_0_INDEX, false); }}, {QT_TR_NOOP("Custom"), [] {}, CallConfigureDialog<ConfigureInput>}, }}; diff --git a/src/yuzu/configuration/configure_mouse_advanced.cpp b/src/yuzu/configuration/configure_mouse_advanced.cpp index 0a4abe34f..e0647ea5b 100644 --- a/src/yuzu/configuration/configure_mouse_advanced.cpp +++ b/src/yuzu/configuration/configure_mouse_advanced.cpp @@ -184,18 +184,19 @@ void ConfigureMouseAdvanced::HandleClick( button->setText(tr("[press key]")); button->setFocus(); - const auto iter = std::find(button_map.begin(), button_map.end(), button); - ASSERT(iter != button_map.end()); - const auto index = std::distance(button_map.begin(), iter); - ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); + // Keyboard keys can only be used as button devices + want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button; + if (want_keyboard_keys) { + const auto iter = std::find(button_map.begin(), button_map.end(), button); + ASSERT(iter != button_map.end()); + const auto index = std::distance(button_map.begin(), iter); + ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); + } input_setter = new_input_setter; device_pollers = InputCommon::Polling::GetPollers(type); - // Keyboard keys can only be used as button devices - want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button; - for (auto& poller : device_pollers) { poller->Start(); } diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp index f594ef076..53049ffd6 100644 --- a/src/yuzu/debugger/profiler.cpp +++ b/src/yuzu/debugger/profiler.cpp @@ -51,7 +51,8 @@ MicroProfileDialog::MicroProfileDialog(QWidget* parent) : QWidget(parent, Qt::Di setWindowTitle(tr("MicroProfile")); resize(1000, 600); // Remove the "?" button from the titlebar and enable the maximize button - setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint); + setWindowFlags((windowFlags() & ~Qt::WindowContextHelpButtonHint) | + Qt::WindowMaximizeButtonHint); #if MICROPROFILE_ENABLED diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index a2b88c787..dccbabcbf 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -315,7 +315,7 @@ GameList::GameList(FileSys::VirtualFilesystem vfs, FileSys::ManualContentProvide item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type")); item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size")); } - item_model->setSortRole(GameListItemPath::TitleRole); + item_model->setSortRole(GameListItemPath::SortRole); connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::onUpdateThemedIcons); connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry); @@ -441,6 +441,8 @@ void GameList::DonePopulating(QStringList watch_list) { if (children_total > 0) { search_field->setFocus(); } + item_model->sort(tree_view->header()->sortIndicatorSection(), + tree_view->header()->sortIndicatorOrder()); } void GameList::PopupContextMenu(const QPoint& menu_location) { @@ -666,8 +668,6 @@ void GameList::LoadInterfaceLayout() { // so make it as large as possible as default. header->resizeSection(COLUMN_NAME, header->width()); } - - item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder()); } const QStringList GameList::supported_file_extensions = { diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h index 7cde72d1b..3e6d5a7cd 100644 --- a/src/yuzu/game_list_p.h +++ b/src/yuzu/game_list_p.h @@ -65,10 +65,10 @@ public: */ class GameListItemPath : public GameListItem { public: - static const int TitleRole = SortRole; - static const int FullPathRole = SortRole + 1; - static const int ProgramIdRole = SortRole + 2; - static const int FileTypeRole = SortRole + 3; + static const int TitleRole = SortRole + 1; + static const int FullPathRole = SortRole + 2; + static const int ProgramIdRole = SortRole + 3; + static const int FileTypeRole = SortRole + 4; GameListItemPath() = default; GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data, @@ -95,7 +95,7 @@ public: } QVariant data(int role) const override { - if (role == Qt::DisplayRole) { + if (role == Qt::DisplayRole || role == SortRole) { std::string filename; Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename, nullptr); @@ -110,6 +110,9 @@ public: const auto& row1 = row_data.at(UISettings::values.row_1_text_id); const int row2_id = UISettings::values.row_2_text_id; + if (role == SortRole) + return row1.toLower(); + if (row2_id == 4) // None return row1; @@ -123,6 +126,13 @@ public: return GameListItem::data(role); } + + /** + * Override to prevent automatic sorting. + */ + bool operator<(const QStandardItem& other) const override { + return false; + } }; class GameListItemCompat : public GameListItem { @@ -289,6 +299,10 @@ public: int type() const override { return static_cast<int>(GameListItemType::AddDir); } + + bool operator<(const QStandardItem& other) const override { + return false; + } }; class GameList; diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp index da2c27aa2..2018150db 100644 --- a/src/yuzu/game_list_worker.cpp +++ b/src/yuzu/game_list_worker.cpp @@ -91,7 +91,8 @@ std::pair<std::vector<u8>, std::string> GetGameListCachedObject( return generator(); } - if (file1.write(reinterpret_cast<const char*>(icon.data()), icon.size()) != icon.size()) { + if (file1.write(reinterpret_cast<const char*>(icon.data()), icon.size()) != + s64(icon.size())) { LOG_ERROR(Frontend, "Failed to write data to cache file."); return generator(); } diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 1717e06f9..2c8eb481d 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -1019,9 +1019,9 @@ void GMainWindow::BootGame(const QString& filename) { std::string title_name; const auto res = Core::System::GetInstance().GetGameName(title_name); if (res != Loader::ResultStatus::Success) { - const auto [nacp, icon_file] = FileSys::PatchManager(title_id).GetControlMetadata(); - if (nacp != nullptr) - title_name = nacp->GetApplicationName(); + const auto metadata = FileSys::PatchManager(title_id).GetControlMetadata(); + if (metadata.first != nullptr) + title_name = metadata.first->GetApplicationName(); if (title_name.empty()) title_name = FileUtil::GetFilename(filename.toStdString()); @@ -1628,7 +1628,7 @@ void GMainWindow::OnMenuInstallToNAND() { } FileSys::InstallResult res; - if (index >= static_cast<size_t>(FileSys::TitleType::Application)) { + if (index >= static_cast<s32>(FileSys::TitleType::Application)) { res = Core::System::GetInstance() .GetFileSystemController() .GetUserNANDContents() diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 3522dcf6d..411e7e647 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp @@ -156,12 +156,6 @@ EmuWindow_SDL2_GL::~EmuWindow_SDL2_GL() { SDL_GL_DeleteContext(window_context); } -void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const { - // Should not have been called from OpenGL - UNREACHABLE(); -} - std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const { return std::make_unique<SDLGLContext>(); } diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h index e092021d7..48bb41683 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h @@ -15,10 +15,6 @@ public: void Present() override; - /// Ignored in OpenGL - void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const override; - std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; private: diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp index 46d053f04..f2990910e 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp @@ -2,102 +2,62 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <algorithm> +#include <cstdlib> +#include <memory> #include <string> -#include <vector> -#include <SDL.h> -#include <SDL_vulkan.h> + #include <fmt/format.h> -#include <vulkan/vulkan.h> + #include "common/assert.h" #include "common/logging/log.h" #include "common/scm_rev.h" #include "core/settings.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" +// Include these late to avoid polluting everything with Xlib macros +#include <SDL.h> +#include <SDL_syswm.h> + EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen) : EmuWindow_SDL2{system, fullscreen} { - if (SDL_Vulkan_LoadLibrary(nullptr) != 0) { - LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError()); - exit(EXIT_FAILURE); - } - - vkGetInstanceProcAddr = - reinterpret_cast<PFN_vkGetInstanceProcAddr>(SDL_Vulkan_GetVkGetInstanceProcAddr()); - if (vkGetInstanceProcAddr == nullptr) { - LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); - exit(EXIT_FAILURE); - } - const std::string window_title = fmt::format("yuzu {} | {}-{} (Vulkan)", Common::g_build_name, Common::g_scm_branch, Common::g_scm_desc); render_window = - SDL_CreateWindow(window_title.c_str(), - SDL_WINDOWPOS_UNDEFINED, // x position - SDL_WINDOWPOS_UNDEFINED, // y position + SDL_CreateWindow(window_title.c_str(), SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height, - SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI | SDL_WINDOW_VULKAN); - - const bool use_standard_layers = UseStandardLayers(vkGetInstanceProcAddr); - - u32 extra_ext_count{}; - if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, NULL)) { - LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions count from SDL! {}", - SDL_GetError()); - exit(1); - } - - auto extra_ext_names = std::make_unique<const char* []>(extra_ext_count); - if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, extra_ext_names.get())) { - LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions from SDL! {}", SDL_GetError()); - exit(1); - } - std::vector<const char*> enabled_extensions; - enabled_extensions.insert(enabled_extensions.begin(), extra_ext_names.get(), - extra_ext_names.get() + extra_ext_count); - - std::vector<const char*> enabled_layers; - if (use_standard_layers) { - enabled_extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); - enabled_layers.push_back("VK_LAYER_LUNARG_standard_validation"); - } - - VkApplicationInfo app_info{}; - app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - app_info.apiVersion = VK_API_VERSION_1_1; - app_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); - app_info.pApplicationName = "yuzu-emu"; - app_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); - app_info.pEngineName = "yuzu-emu"; + SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI); - VkInstanceCreateInfo instance_ci{}; - instance_ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - instance_ci.pApplicationInfo = &app_info; - instance_ci.enabledExtensionCount = static_cast<u32>(enabled_extensions.size()); - instance_ci.ppEnabledExtensionNames = enabled_extensions.data(); - if (Settings::values.renderer_debug) { - instance_ci.enabledLayerCount = static_cast<u32>(enabled_layers.size()); - instance_ci.ppEnabledLayerNames = enabled_layers.data(); + SDL_SysWMinfo wm; + if (SDL_GetWindowWMInfo(render_window, &wm) == SDL_FALSE) { + LOG_CRITICAL(Frontend, "Failed to get information from the window manager"); + std::exit(EXIT_FAILURE); } - const auto vkCreateInstance = - reinterpret_cast<PFN_vkCreateInstance>(vkGetInstanceProcAddr(nullptr, "vkCreateInstance")); - if (vkCreateInstance == nullptr || - vkCreateInstance(&instance_ci, nullptr, &vk_instance) != VK_SUCCESS) { - LOG_CRITICAL(Frontend, "Failed to create Vulkan instance!"); - exit(EXIT_FAILURE); - } - - vkDestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>( - vkGetInstanceProcAddr(vk_instance, "vkDestroyInstance")); - if (vkDestroyInstance == nullptr) { - LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); - exit(EXIT_FAILURE); - } - - if (!SDL_Vulkan_CreateSurface(render_window, vk_instance, &vk_surface)) { - LOG_CRITICAL(Frontend, "Failed to create Vulkan surface! {}", SDL_GetError()); - exit(EXIT_FAILURE); + switch (wm.subsystem) { +#ifdef SDL_VIDEO_DRIVER_WINDOWS + case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS: + window_info.type = Core::Frontend::WindowSystemType::Windows; + window_info.render_surface = reinterpret_cast<void*>(wm.info.win.window); + break; +#endif +#ifdef SDL_VIDEO_DRIVER_X11 + case SDL_SYSWM_TYPE::SDL_SYSWM_X11: + window_info.type = Core::Frontend::WindowSystemType::X11; + window_info.display_connection = wm.info.x11.display; + window_info.render_surface = reinterpret_cast<void*>(wm.info.x11.window); + break; +#endif +#ifdef SDL_VIDEO_DRIVER_WAYLAND + case SDL_SYSWM_TYPE::SDL_SYSWM_WAYLAND: + window_info.type = Core::Frontend::WindowSystemType::Wayland; + window_info.display_connection = wm.info.wl.display; + window_info.render_surface = wm.info.wl.surface; + break; +#endif + default: + LOG_CRITICAL(Frontend, "Window manager subsystem not implemented"); + std::exit(EXIT_FAILURE); } OnResize(); @@ -107,51 +67,12 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen) Common::g_scm_branch, Common::g_scm_desc); } -EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() { - vkDestroyInstance(vk_instance, nullptr); -} - -void EmuWindow_SDL2_VK::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const { - const auto instance_proc_addr = vkGetInstanceProcAddr; - std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); - std::memcpy(instance, &vk_instance, sizeof(vk_instance)); - std::memcpy(surface, &vk_surface, sizeof(vk_surface)); -} +EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() = default; std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const { return nullptr; } -bool EmuWindow_SDL2_VK::UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const { - if (!Settings::values.renderer_debug) { - return false; - } - - const auto vkEnumerateInstanceLayerProperties = - reinterpret_cast<PFN_vkEnumerateInstanceLayerProperties>( - vkGetInstanceProcAddr(nullptr, "vkEnumerateInstanceLayerProperties")); - if (vkEnumerateInstanceLayerProperties == nullptr) { - LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); - return false; - } - - u32 available_layers_count{}; - if (vkEnumerateInstanceLayerProperties(&available_layers_count, nullptr) != VK_SUCCESS) { - LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!"); - return false; - } - std::vector<VkLayerProperties> layers(available_layers_count); - if (vkEnumerateInstanceLayerProperties(&available_layers_count, layers.data()) != VK_SUCCESS) { - LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!"); - return false; - } - - return std::find_if(layers.begin(), layers.end(), [&](const auto& layer) { - return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation"); - }) != layers.end(); -} - void EmuWindow_SDL2_VK::Present() { // TODO (bunnei): ImplementMe } diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h index 3dd1f3f61..b8021ebea 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h @@ -4,27 +4,21 @@ #pragma once -#include <vulkan/vulkan.h> +#include <memory> + #include "core/frontend/emu_window.h" #include "yuzu_cmd/emu_window/emu_window_sdl2.h" +namespace Core { +class System; +} + class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 { public: explicit EmuWindow_SDL2_VK(Core::System& system, bool fullscreen); ~EmuWindow_SDL2_VK(); void Present() override; - void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const override; std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; - -private: - bool UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const; - - VkInstance vk_instance{}; - VkSurfaceKHR vk_surface{}; - - PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; - PFN_vkDestroyInstance vkDestroyInstance{}; }; diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp index a837430cc..8584f6671 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp @@ -116,10 +116,6 @@ bool EmuWindow_SDL2_Hide::IsShown() const { return false; } -void EmuWindow_SDL2_Hide::RetrieveVulkanHandlers(void*, void*, void*) const { - UNREACHABLE(); -} - class SDLGLContext : public Core::Frontend::GraphicsContext { public: explicit SDLGLContext() { diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h index 9f5d04fca..c13a82df2 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h @@ -19,10 +19,6 @@ public: /// Whether the screen is being shown or not. bool IsShown() const override; - /// Retrieves Vulkan specific handlers from the window - void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const override; - std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; private: |