diff options
26 files changed, 320 insertions, 48 deletions
diff --git a/.ci/scripts/clang/docker.sh b/.ci/scripts/clang/docker.sh new file mode 100755 index 000000000..885d74e97 --- /dev/null +++ b/.ci/scripts/clang/docker.sh @@ -0,0 +1,18 @@ +#!/bin/bash -ex + +# Exit on error, rather than continuing with the rest of the script. +set -e + +cd /yuzu + +ccache -s + +mkdir build || true && cd build +cmake .. -DDISPLAY_VERSION=$1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/clang -DCMAKE_CXX_COMPILER=/usr/lib/ccache/clang++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -DENABLE_QT_TRANSLATION=ON -DCMAKE_INSTALL_PREFIX="/usr" + +make -j$(nproc) + +ccache -s + +ctest -VV -C Release + diff --git a/.ci/scripts/clang/exec.sh b/.ci/scripts/clang/exec.sh new file mode 100644 index 000000000..e56cd4325 --- /dev/null +++ b/.ci/scripts/clang/exec.sh @@ -0,0 +1,8 @@ +#!/bin/bash -ex + +mkdir -p "ccache" || true +chmod a+x ./.ci/scripts/clang/docker.sh +# the UID for the container yuzu user is 1027 +sudo chown -R 1027 ./ +docker run -e ENABLE_COMPATIBILITY_REPORTING -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.ci/scripts/clang/docker.sh $1 +sudo chown -R $UID ./ diff --git a/.ci/scripts/clang/upload.sh b/.ci/scripts/clang/upload.sh new file mode 100644 index 000000000..fe4e6b2ac --- /dev/null +++ b/.ci/scripts/clang/upload.sh @@ -0,0 +1,20 @@ +#!/bin/bash -ex + +. .ci/scripts/common/pre-upload.sh + +REV_NAME="yuzu-linux-${GITDATE}-${GITREV}" +ARCHIVE_NAME="${REV_NAME}.tar.xz" +COMPRESSION_FLAGS="-cJvf" + +if [ "${RELEASE_NAME}" = "mainline" ]; then + DIR_NAME="${REV_NAME}" +else + DIR_NAME="${REV_NAME}_${RELEASE_NAME}" +fi + +mkdir "$DIR_NAME" + +cp build/bin/yuzu-cmd "$DIR_NAME" +cp build/bin/yuzu "$DIR_NAME" + +. .ci/scripts/common/post-upload.sh diff --git a/.ci/templates/build-standard.yml b/.ci/templates/build-standard.yml index 7422c8346..57d36f813 100644 --- a/.ci/templates/build-standard.yml +++ b/.ci/templates/build-standard.yml @@ -12,6 +12,9 @@ jobs: windows: BuildSuffix: 'windows-mingw' ScriptFolder: 'windows' + clang: + BuildSuffix: 'clang' + ScriptFolder: 'clang' linux: BuildSuffix: 'linux' ScriptFolder: 'linux' @@ -24,4 +27,4 @@ jobs: parameters: artifactSource: 'false' cache: $(parameters.cache) - version: $(parameters.version)
\ No newline at end of file + version: $(parameters.version) diff --git a/externals/glad/include/glad/glad.h b/externals/glad/include/glad/glad.h index 6e16358ea..191bb9fcb 100644 --- a/externals/glad/include/glad/glad.h +++ b/externals/glad/include/glad/glad.h @@ -5156,6 +5156,9 @@ GLAPI PFNGLDEPTHRANGEARRAYVPROC glad_glDepthRangeArrayv; typedef void (APIENTRYP PFNGLDEPTHRANGEINDEXEDPROC)(GLuint index, GLdouble n, GLdouble f); GLAPI PFNGLDEPTHRANGEINDEXEDPROC glad_glDepthRangeIndexed; #define glDepthRangeIndexed glad_glDepthRangeIndexed +typedef void (APIENTRYP PFNGLDEPTHRANGEINDEXEDDNVPROC)(GLuint index, GLdouble n, GLdouble f); +GLAPI PFNGLDEPTHRANGEINDEXEDDNVPROC glad_glDepthRangeIndexeddNV; +#define glDepthRangeIndexeddNV glad_glDepthRangeIndexeddNV typedef void (APIENTRYP PFNGLGETFLOATI_VPROC)(GLenum target, GLuint index, GLfloat *data); GLAPI PFNGLGETFLOATI_VPROC glad_glGetFloati_v; #define glGetFloati_v glad_glGetFloati_v diff --git a/externals/glad/src/glad.c b/externals/glad/src/glad.c index d3e13163f..7b24cd68d 100644 --- a/externals/glad/src/glad.c +++ b/externals/glad/src/glad.c @@ -1044,6 +1044,7 @@ PFNGLDEPTHMASKPROC glad_glDepthMask = NULL; PFNGLDEPTHRANGEPROC glad_glDepthRange = NULL; PFNGLDEPTHRANGEARRAYVPROC glad_glDepthRangeArrayv = NULL; PFNGLDEPTHRANGEINDEXEDPROC glad_glDepthRangeIndexed = NULL; +PFNGLDEPTHRANGEINDEXEDDNVPROC glad_glDepthRangeIndexeddNV = NULL; PFNGLDEPTHRANGEFPROC glad_glDepthRangef = NULL; PFNGLDETACHSHADERPROC glad_glDetachShader = NULL; PFNGLDISABLEPROC glad_glDisable = NULL; @@ -7971,6 +7972,7 @@ static void load_GL_NV_depth_buffer_float(GLADloadproc load) { glad_glDepthRangedNV = (PFNGLDEPTHRANGEDNVPROC)load("glDepthRangedNV"); glad_glClearDepthdNV = (PFNGLCLEARDEPTHDNVPROC)load("glClearDepthdNV"); glad_glDepthBoundsdNV = (PFNGLDEPTHBOUNDSDNVPROC)load("glDepthBoundsdNV"); + glad_glDepthRangeIndexeddNV = (PFNGLDEPTHRANGEINDEXEDDNVPROC)load("glDepthRangeIndexeddNV"); } static void load_GL_NV_draw_texture(GLADloadproc load) { if(!GLAD_GL_NV_draw_texture) return; diff --git a/src/common/uint128.h b/src/common/uint128.h index 83560a9ce..4780b2f9d 100644 --- a/src/common/uint128.h +++ b/src/common/uint128.h @@ -98,4 +98,24 @@ namespace Common { #endif } +// This function divides a u128 by a u32 value and produces two u64 values: +// the result of division and the remainder +[[nodiscard]] static inline std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) { + u64 remainder = dividend[0] % divisor; + u64 accum = dividend[0] / divisor; + if (dividend[1] == 0) + return {accum, remainder}; + // We ignore dividend[1] / divisor as that overflows + const u64 first_segment = (dividend[1] % divisor) << 32; + accum += (first_segment / divisor) << 32; + const u64 second_segment = (first_segment % divisor) << 32; + accum += (second_segment / divisor); + remainder += second_segment % divisor; + if (remainder >= divisor) { + accum++; + remainder -= divisor; + } + return {accum, remainder}; +} + } // namespace Common diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 1545993bd..49830b8ab 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -20,9 +20,7 @@ using base_time_point = std::chrono::time_point<base_timer>; class StandardWallClock final : public WallClock { public: explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) - : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false), - emulated_clock_factor{GetFixedPoint64Factor(emulated_clock_frequency, 1000000000)}, - emulated_cpu_factor{GetFixedPoint64Factor(emulated_cpu_frequency, 1000000000)} { + : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) { start_time = base_timer::now(); } @@ -45,11 +43,16 @@ public: } u64 GetClockCycles() override { - return MultiplyHigh(GetTimeNS().count(), emulated_clock_factor); + std::chrono::nanoseconds time_now = GetTimeNS(); + const u128 temporary = + Common::Multiply64Into128(time_now.count(), emulated_clock_frequency); + return Common::Divide128On32(temporary, 1000000000).first; } u64 GetCPUCycles() override { - return MultiplyHigh(GetTimeNS().count(), emulated_cpu_factor); + std::chrono::nanoseconds time_now = GetTimeNS(); + const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency); + return Common::Divide128On32(temporary, 1000000000).first; } void Pause([[maybe_unused]] bool is_paused) override { @@ -58,8 +61,6 @@ public: private: base_time_point start_time; - const u64 emulated_clock_factor; - const u64 emulated_cpu_factor; }; #ifdef ARCHITECTURE_x86_64 diff --git a/src/core/core.cpp b/src/core/core.cpp index 30f5e1128..de6305e2a 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -308,6 +308,9 @@ struct System::Impl { // Close all CPU/threading state cpu_manager.Shutdown(); + // Release the Time Manager's resources + time_manager.Shutdown(); + // Shutdown kernel and core timing core_timing.Shutdown(); kernel.Shutdown(); diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 453695545..331cf3a60 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -101,8 +101,6 @@ struct KernelCore::Impl { current_process = nullptr; - system_resource_limit = nullptr; - global_handle_table.Clear(); preemption_event = nullptr; @@ -111,6 +109,13 @@ struct KernelCore::Impl { exclusive_monitor.reset(); + hid_shared_mem = nullptr; + font_shared_mem = nullptr; + irs_shared_mem = nullptr; + time_shared_mem = nullptr; + + system_resource_limit = nullptr; + // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others next_host_thread_id = Core::Hardware::NUM_CPU_CORES; } diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index 3ec0e1eca..615e20a54 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -508,7 +508,7 @@ public: {1, &IManagerForApplication::GetAccountId, "GetAccountId"}, {2, nullptr, "EnsureIdTokenCacheAsync"}, {3, nullptr, "LoadIdTokenCache"}, - {130, nullptr, "GetNintendoAccountUserResourceCacheForApplication"}, + {130, &IManagerForApplication::GetNintendoAccountUserResourceCacheForApplication, "GetNintendoAccountUserResourceCacheForApplication"}, {150, nullptr, "CreateAuthorizationRequest"}, {160, &IManagerForApplication::StoreOpenContext, "StoreOpenContext"}, {170, nullptr, "LoadNetworkServiceLicenseKindAsync"}, @@ -534,6 +534,22 @@ private: rb.PushRaw<u64>(user_id.GetNintendoID()); } + void GetNintendoAccountUserResourceCacheForApplication(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_ACC, "(STUBBED) called"); + + std::vector<u8> nas_user_base_for_application(0x68); + ctx.WriteBuffer(nas_user_base_for_application, 0); + + if (ctx.CanWriteBuffer(1)) { + std::vector<u8> unknown_out_buffer(ctx.GetWriteBufferSize(1)); + ctx.WriteBuffer(unknown_out_buffer, 1); + } + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(RESULT_SUCCESS); + rb.PushRaw<u64>(user_id.GetNintendoID()); + } + void StoreOpenContext(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_ACC, "(STUBBED) called"); IPC::ResponseBuilder rb{ctx, 2}; diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index ffc3dfdc3..ba27bbb05 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -273,8 +273,8 @@ Hid::Hid(Core::System& system_) : ServiceFramework{system_, "hid"} { {204, &Hid::PermitVibration, "PermitVibration"}, {205, &Hid::IsVibrationPermitted, "IsVibrationPermitted"}, {206, &Hid::SendVibrationValues, "SendVibrationValues"}, - {207, nullptr, "SendVibrationGcErmCommand"}, - {208, nullptr, "GetActualVibrationGcErmCommand"}, + {207, &Hid::SendVibrationGcErmCommand, "SendVibrationGcErmCommand"}, + {208, &Hid::GetActualVibrationGcErmCommand, "GetActualVibrationGcErmCommand"}, {209, &Hid::BeginPermitVibrationSession, "BeginPermitVibrationSession"}, {210, &Hid::EndPermitVibrationSession, "EndPermitVibrationSession"}, {211, &Hid::IsVibrationDeviceMounted, "IsVibrationDeviceMounted"}, @@ -1093,7 +1093,22 @@ void Hid::GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx) { VibrationDeviceInfo vibration_device_info; - vibration_device_info.type = VibrationDeviceType::LinearResonantActuator; + switch (vibration_device_handle.npad_type) { + case Controller_NPad::NpadType::ProController: + case Controller_NPad::NpadType::Handheld: + case Controller_NPad::NpadType::JoyconDual: + case Controller_NPad::NpadType::JoyconLeft: + case Controller_NPad::NpadType::JoyconRight: + default: + vibration_device_info.type = VibrationDeviceType::LinearResonantActuator; + break; + case Controller_NPad::NpadType::GameCube: + vibration_device_info.type = VibrationDeviceType::GcErm; + break; + case Controller_NPad::NpadType::Pokeball: + vibration_device_info.type = VibrationDeviceType::Unknown; + break; + } switch (vibration_device_handle.device_index) { case Controller_NPad::DeviceIndex::Left: @@ -1215,6 +1230,108 @@ void Hid::SendVibrationValues(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } +void Hid::SendVibrationGcErmCommand(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + struct Parameters { + Controller_NPad::DeviceHandle vibration_device_handle; + u64 applet_resource_user_id; + VibrationGcErmCommand gc_erm_command; + }; + static_assert(sizeof(Parameters) == 0x18, "Parameters has incorrect size."); + + const auto parameters{rp.PopRaw<Parameters>()}; + + /** + * Note: This uses yuzu-specific behavior such that the StopHard command produces + * vibrations where freq_low == 0.0f and freq_high == 0.0f, as defined below, + * in order to differentiate between Stop and StopHard commands. + * This is done to reuse the controller vibration functions made for regular controllers. + */ + const auto vibration_value = [parameters] { + switch (parameters.gc_erm_command) { + case VibrationGcErmCommand::Stop: + return Controller_NPad::VibrationValue{ + .amp_low = 0.0f, + .freq_low = 160.0f, + .amp_high = 0.0f, + .freq_high = 320.0f, + }; + case VibrationGcErmCommand::Start: + return Controller_NPad::VibrationValue{ + .amp_low = 1.0f, + .freq_low = 160.0f, + .amp_high = 1.0f, + .freq_high = 320.0f, + }; + case VibrationGcErmCommand::StopHard: + return Controller_NPad::VibrationValue{ + .amp_low = 0.0f, + .freq_low = 0.0f, + .amp_high = 0.0f, + .freq_high = 0.0f, + }; + default: + return Controller_NPad::DEFAULT_VIBRATION_VALUE; + } + }(); + + applet_resource->GetController<Controller_NPad>(HidController::NPad) + .VibrateController(parameters.vibration_device_handle, vibration_value); + + LOG_DEBUG(Service_HID, + "called, npad_type={}, npad_id={}, device_index={}, applet_resource_user_id={}, " + "gc_erm_command={}", + parameters.vibration_device_handle.npad_type, + parameters.vibration_device_handle.npad_id, + parameters.vibration_device_handle.device_index, parameters.applet_resource_user_id, + parameters.gc_erm_command); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + +void Hid::GetActualVibrationGcErmCommand(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + struct Parameters { + Controller_NPad::DeviceHandle vibration_device_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; + }; + + const auto parameters{rp.PopRaw<Parameters>()}; + + const auto last_vibration = applet_resource->GetController<Controller_NPad>(HidController::NPad) + .GetLastVibration(parameters.vibration_device_handle); + + const auto gc_erm_command = [last_vibration] { + if (last_vibration.amp_low != 0.0f || last_vibration.amp_high != 0.0f) { + return VibrationGcErmCommand::Start; + } + + /** + * Note: This uses yuzu-specific behavior such that the StopHard command produces + * vibrations where freq_low == 0.0f and freq_high == 0.0f, as defined in the HID function + * SendVibrationGcErmCommand, in order to differentiate between Stop and StopHard commands. + * This is done to reuse the controller vibration functions made for regular controllers. + */ + if (last_vibration.freq_low == 0.0f && last_vibration.freq_high == 0.0f) { + return VibrationGcErmCommand::StopHard; + } + + return VibrationGcErmCommand::Stop; + }(); + + LOG_DEBUG(Service_HID, + "called, npad_type={}, npad_id={}, device_index={}, applet_resource_user_id={}", + parameters.vibration_device_handle.npad_type, + parameters.vibration_device_handle.npad_id, + parameters.vibration_device_handle.device_index, parameters.applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(RESULT_SUCCESS); + rb.PushEnum(gc_erm_command); +} + void Hid::BeginPermitVibrationSession(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto applet_resource_user_id{rp.Pop<u64>()}; diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index 06ddcf3e4..36ed228c8 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h @@ -136,6 +136,8 @@ private: void PermitVibration(Kernel::HLERequestContext& ctx); void IsVibrationPermitted(Kernel::HLERequestContext& ctx); void SendVibrationValues(Kernel::HLERequestContext& ctx); + void SendVibrationGcErmCommand(Kernel::HLERequestContext& ctx); + void GetActualVibrationGcErmCommand(Kernel::HLERequestContext& ctx); void BeginPermitVibrationSession(Kernel::HLERequestContext& ctx); void EndPermitVibrationSession(Kernel::HLERequestContext& ctx); void IsVibrationDeviceMounted(Kernel::HLERequestContext& ctx); @@ -154,7 +156,9 @@ private: void GetNpadCommunicationMode(Kernel::HLERequestContext& ctx); enum class VibrationDeviceType : u32 { + Unknown = 0, LinearResonantActuator = 1, + GcErm = 2, }; enum class VibrationDevicePosition : u32 { @@ -163,6 +167,12 @@ private: Right = 2, }; + enum class VibrationGcErmCommand : u64 { + Stop = 0, + Start = 1, + StopHard = 2, + }; + struct VibrationDeviceInfo { VibrationDeviceType type{}; VibrationDevicePosition position{}; diff --git a/src/core/hle/service/time/time_manager.cpp b/src/core/hle/service/time/time_manager.cpp index 858623e2b..1f7309f6b 100644 --- a/src/core/hle/service/time/time_manager.cpp +++ b/src/core/hle/service/time/time_manager.cpp @@ -279,6 +279,10 @@ const SharedMemory& TimeManager::GetSharedMemory() const { return impl->shared_memory; } +void TimeManager::Shutdown() { + impl.reset(); +} + void TimeManager::UpdateLocalSystemClockTime(s64 posix_time) { impl->UpdateLocalSystemClockTime(system, posix_time); } diff --git a/src/core/hle/service/time/time_manager.h b/src/core/hle/service/time/time_manager.h index 993c7c288..4db8cc0e1 100644 --- a/src/core/hle/service/time/time_manager.h +++ b/src/core/hle/service/time/time_manager.h @@ -61,6 +61,8 @@ public: const SharedMemory& GetSharedMemory() const; + void Shutdown(); + void SetupTimeZoneManager(std::string location_name, Clock::SteadyClockTimePoint time_zone_updated_time_point, std::size_t total_location_name_count, u128 time_zone_rule_version, diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 48d5c4a5e..1ae5f1d62 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -239,6 +239,7 @@ Device::Device() { has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); + has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" @@ -275,6 +276,7 @@ Device::Device(std::nullptr_t) { has_image_load_formatted = true; has_texture_shadow_lod = true; has_variable_aoffi = true; + has_depth_buffer_float = true; } bool Device::TestVariableAoffi() { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ee053776d..f24bd0c7b 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -122,6 +122,10 @@ public: return use_driver_cache; } + bool HasDepthBufferFloat() const { + return has_depth_buffer_float; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -150,6 +154,7 @@ private: bool use_assembly_shaders{}; bool use_asynchronous_shaders{}; bool use_driver_cache{}; + bool has_depth_buffer_float{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 418644108..4610fd160 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -889,7 +889,11 @@ void RasterizerOpenGL::SyncViewport() { const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z; const GLdouble far_depth = src.translate_z + src.scale_z; - glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth); + if (device.HasDepthBufferFloat()) { + glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth); + } else { + glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth); + } if (!GLAD_GL_NV_viewport_swizzle) { continue; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 529570ff0..5cf7cd151 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -335,6 +335,10 @@ void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop const VideoCore::DiskResourceLoadCallback& callback) { disk_cache.BindTitleID(title_id); const std::optional transferable = disk_cache.LoadTransferable(); + + LOG_INFO(Render_OpenGL, "Total Shader Count: {}", + transferable.has_value() ? transferable->size() : 0); + if (!transferable) { return; } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 848eedd66..668633e7b 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -201,10 +201,6 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, }); } -void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) { - update_descriptor_queue.AddBuffer(buffer, offset, size); -} - void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) { if (num_indices <= current_num_indices) { return; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 041e6515c..982e92191 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -8,6 +8,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -16,7 +17,6 @@ namespace Vulkan { class Device; class VKDescriptorPool; class VKScheduler; -class VKUpdateDescriptorQueue; class BufferCacheRuntime; @@ -86,7 +86,9 @@ public: } private: - void BindBuffer(VkBuffer buffer, u32 offset, u32 size); + void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { + update_descriptor_queue.AddBuffer(buffer, offset, size); + } void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 40e2e0d38..c6846d886 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1845,13 +1845,21 @@ private: Expression TextureGather(Operation operation) { const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); const Id coords = GetCoordinates(operation, Type::Float); + + spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; + std::vector<Id> operands; Id texture{}; + + if (!meta.aoffi.empty()) { + mask = mask | spv::ImageOperandsMask::Offset; + operands.push_back(GetOffsetCoordinates(operation)); + } + if (meta.sampler.is_shadow) { texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords, - AsFloat(Visit(meta.depth_compare))); + AsFloat(Visit(meta.depth_compare)), mask, operands); } else { u32 component_value = 0; if (meta.component) { @@ -1860,7 +1868,7 @@ private: component_value = component->GetValue(); } texture = OpImageGather(t_float4, GetTextureSampler(operation), coords, - Constant(t_uint, component_value)); + Constant(t_uint, component_value), mask, operands); } return GetTextureElement(operation, texture, Type::Float); } @@ -1928,13 +1936,22 @@ private: const Id image = GetTextureImage(operation); const Id coords = GetCoordinates(operation, Type::Int); + + spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; + std::vector<Id> operands; Id fetch; + if (meta.lod && !meta.sampler.is_buffer) { - fetch = OpImageFetch(t_float4, image, coords, spv::ImageOperandsMask::Lod, - AsInt(Visit(meta.lod))); - } else { - fetch = OpImageFetch(t_float4, image, coords); + mask = mask | spv::ImageOperandsMask::Lod; + operands.push_back(AsInt(Visit(meta.lod))); + } + + if (!meta.aoffi.empty()) { + mask = mask | spv::ImageOperandsMask::Offset; + operands.push_back(GetOffsetCoordinates(operation)); } + + fetch = OpImageFetch(t_float4, image, coords, mask, operands); return GetTextureElement(operation, fetch, Type::Float); } diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index f99273c6a..dc45fdcb1 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -20,20 +20,20 @@ VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKSchedu VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; void VKUpdateDescriptorQueue::TickFrame() { - payload.clear(); + payload_cursor = payload.data(); } void VKUpdateDescriptorQueue::Acquire() { // Minimum number of entries required. // This is the maximum number of entries a single draw call migth use. - static constexpr std::size_t MIN_ENTRIES = 0x400; + static constexpr size_t MIN_ENTRIES = 0x400; - if (payload.size() + MIN_ENTRIES >= payload.max_size()) { + if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) { LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); scheduler.WaitWorker(); - payload.clear(); + payload_cursor = payload.data(); } - upload_start = &*payload.end(); + upload_start = payload_cursor; } void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index e214f7195..d35e77c44 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -4,8 +4,7 @@ #pragma once -#include <variant> -#include <boost/container/static_vector.hpp> +#include <array> #include "common/common_types.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -16,13 +15,15 @@ class Device; class VKScheduler; struct DescriptorUpdateEntry { - DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {} + struct Empty {}; + DescriptorUpdateEntry() = default; + DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {} DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {} - DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {} union { + Empty empty{}; VkDescriptorImageInfo image; VkDescriptorBufferInfo buffer; VkBufferView texel_buffer; @@ -41,39 +42,40 @@ public: void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); void AddSampledImage(VkImageView image_view, VkSampler sampler) { - payload.emplace_back(VkDescriptorImageInfo{ + *(payload_cursor++) = VkDescriptorImageInfo{ .sampler = sampler, .imageView = image_view, .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }); + }; } void AddImage(VkImageView image_view) { - payload.emplace_back(VkDescriptorImageInfo{ + *(payload_cursor++) = VkDescriptorImageInfo{ .sampler = VK_NULL_HANDLE, .imageView = image_view, .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }); + }; } - void AddBuffer(VkBuffer buffer, u64 offset, size_t size) { - payload.emplace_back(VkDescriptorBufferInfo{ + void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) { + *(payload_cursor++) = VkDescriptorBufferInfo{ .buffer = buffer, .offset = offset, .range = size, - }); + }; } void AddTexelBuffer(VkBufferView texel_buffer) { - payload.emplace_back(texel_buffer); + *(payload_cursor++) = texel_buffer; } private: const Device& device; VKScheduler& scheduler; + DescriptorUpdateEntry* payload_cursor = nullptr; const DescriptorUpdateEntry* upload_start = nullptr; - boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload; + std::array<DescriptorUpdateEntry, 0x10000> payload; }; } // namespace Vulkan diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 50f4e7d35..7728f600e 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -330,6 +330,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { case StoreType::Bits32: (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); break; + case StoreType::Unsigned16: case StoreType::Signed16: { Node address = GetAddress(0); Node memory = (this->*get_memory)(address); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 833fa2a39..c69681e8d 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -806,6 +806,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is const std::size_t type_coord_count = GetCoordCount(texture_type); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; + const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); // If enabled arrays index is always stored in the gpr8 field const u64 array_register = instr.gpr8.Value(); @@ -820,17 +821,23 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is std::vector<Node> coords; for (std::size_t i = 0; i < type_coord_count; ++i) { const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); - coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); + coords.push_back( + GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); } const Node array = is_array ? GetRegister(array_register) : nullptr; // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); + std::vector<Node> aoffi; + if (aoffi_enabled) { + aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); + } + Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{*sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}}; + MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; |