summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitmodules3
-rw-r--r--CMakeLists.txt4
-rw-r--r--externals/CMakeLists.txt8
m---------externals/sirit0
m---------externals/xbyak0
-rw-r--r--src/audio_core/audio_renderer.cpp99
-rw-r--r--src/audio_core/audio_renderer.h10
-rw-r--r--src/audio_core/common.h1
-rw-r--r--src/common/CMakeLists.txt8
-rw-r--r--src/common/memory_detect.cpp60
-rw-r--r--src/common/memory_detect.h22
-rw-r--r--src/common/time_zone.cpp49
-rw-r--r--src/common/time_zone.h18
-rw-r--r--src/common/x64/xbyak_abi.h266
-rw-r--r--src/common/x64/xbyak_util.h47
-rw-r--r--src/core/file_sys/control_metadata.cpp4
-rw-r--r--src/core/file_sys/control_metadata.h1
-rw-r--r--src/core/file_sys/patch_manager.cpp34
-rw-r--r--src/core/file_sys/patch_manager.h5
-rw-r--r--src/core/file_sys/savedata_factory.cpp9
-rw-r--r--src/core/file_sys/system_archive/system_version.cpp14
-rw-r--r--src/core/frontend/emu_window.cpp2
-rw-r--r--src/core/frontend/framebuffer_layout.h5
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.cpp38
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.h1
-rw-r--r--src/core/hle/service/hid/controllers/keyboard.cpp7
-rw-r--r--src/core/hle/service/nifm/nifm.cpp3
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp4
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h10
-rw-r--r--src/core/hle/service/time/time_manager.cpp11
-rw-r--r--src/core/hle/service/time/time_zone_content_manager.cpp24
-rw-r--r--src/core/settings.cpp17
-rw-r--r--src/core/settings.h5
-rw-r--r--src/core/telemetry_session.cpp1
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h227
-rw-r--r--src/video_core/buffer_cache/map_interval.cpp33
-rw-r--r--src/video_core/buffer_cache/map_interval.h133
-rw-r--r--src/video_core/dma_pusher.cpp9
-rw-r--r--src/video_core/dma_pusher.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp13
-rw-r--r--src/video_core/engines/shader_bytecode.h28
-rw-r--r--src/video_core/rasterizer_cache.h58
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_device.h10
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp125
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h16
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp101
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp153
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp110
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h56
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp71
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h26
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp19
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp81
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp29
-rw-r--r--src/video_core/shader/decode.cpp2
-rw-r--r--src/video_core/shader/decode/memory.cpp3
-rw-r--r--src/video_core/shader/decode/other.cpp40
-rw-r--r--src/video_core/shader/decode/xmad.cpp12
-rw-r--r--src/video_core/shader/node.h30
-rw-r--r--src/video_core/shader/shader_ir.cpp109
-rw-r--r--src/video_core/texture_cache/texture_cache.h57
-rw-r--r--src/yuzu/bootmanager.cpp19
-rw-r--r--src/yuzu/configuration/config.cpp7
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp6
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui10
-rw-r--r--src/yuzu/configuration/configure_system.cpp2
-rw-r--r--src/yuzu/configuration/configure_system.h1
-rw-r--r--src/yuzu/configuration/configure_system.ui257
-rw-r--r--src/yuzu/discord_impl.cpp2
-rw-r--r--src/yuzu/game_list.cpp8
-rw-r--r--src/yuzu/game_list.h2
-rw-r--r--src/yuzu/loading_screen.cpp3
-rw-r--r--src/yuzu/main.cpp90
-rw-r--r--src/yuzu/main.h3
-rw-r--r--src/yuzu/main.ui12
-rw-r--r--src/yuzu_cmd/config.cpp7
-rw-r--r--src/yuzu_cmd/default_ini.h8
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp3
92 files changed, 2192 insertions, 669 deletions
diff --git a/.gitmodules b/.gitmodules
index bf3b80d59..2ec9dda62 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -28,3 +28,6 @@
[submodule "libzip"]
path = externals/libzip/libzip
url = https://github.com/nih-at/libzip.git
+[submodule "xbyak"]
+ path = externals/xbyak
+ url = https://github.com/herumi/xbyak.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 61321bf0a..a9f669d56 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.11)
+cmake_minimum_required(VERSION 3.15)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
@@ -13,7 +13,7 @@ project(yuzu)
option(ENABLE_SDL2 "Enable the SDL2 frontend" ON)
option(ENABLE_QT "Enable the Qt frontend" ON)
-CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" OFF "ENABLE_QT;MSVC" OFF)
+CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" ON "ENABLE_QT;MSVC" OFF)
option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index 0b40cd1b0..df7a5e0a9 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -75,3 +75,11 @@ if (ENABLE_WEB_SERVICE)
target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto)
endif()
+
+if (NOT TARGET xbyak)
+ if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
+ add_library(xbyak INTERFACE)
+ target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
+ target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
+ endif()
+endif()
diff --git a/externals/sirit b/externals/sirit
-Subproject 414fc4dbd28d8fe48f735a0c389db8a234f733c
+Subproject a62c5bbc100a5e5a31ea0ccc4a78d8fa6a4167c
diff --git a/externals/xbyak b/externals/xbyak
new file mode 160000
+Subproject 82b70e665918efc2ee348091742fd0237b3b68c
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index d18ef6940..50846a854 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -17,7 +17,7 @@ namespace AudioCore {
constexpr u32 STREAM_SAMPLE_RATE{48000};
constexpr u32 STREAM_NUM_CHANNELS{2};
-
+using VoiceChannelHolder = std::array<VoiceResourceInformation*, 6>;
class AudioRenderer::VoiceState {
public:
bool IsPlaying() const {
@@ -37,9 +37,10 @@ public:
}
void SetWaveIndex(std::size_t index);
- std::vector<s16> DequeueSamples(std::size_t sample_count, Core::Memory::Memory& memory);
+ std::vector<s16> DequeueSamples(std::size_t sample_count, Core::Memory::Memory& memory,
+ const VoiceChannelHolder& voice_resources);
void UpdateState();
- void RefreshBuffer(Core::Memory::Memory& memory);
+ void RefreshBuffer(Core::Memory::Memory& memory, const VoiceChannelHolder& voice_resources);
private:
bool is_in_use{};
@@ -79,7 +80,7 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
std::shared_ptr<Kernel::WritableEvent> buffer_event,
std::size_t instance_number)
: worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
- effects(params.effect_count), memory{memory_} {
+ voice_resources(params.voice_count), effects(params.effect_count), memory{memory_} {
behavior_info.SetUserRevision(params.revision);
audio_out = std::make_unique<AudioCore::AudioOut>();
stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
@@ -127,6 +128,12 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
input_params.data() + sizeof(UpdateDataHeader) + config.behavior_size,
memory_pool_count * sizeof(MemoryPoolInfo));
+ // Copy voice resources
+ const std::size_t voice_resource_offset{sizeof(UpdateDataHeader) + config.behavior_size +
+ config.memory_pools_size};
+ std::memcpy(voice_resources.data(), input_params.data() + voice_resource_offset,
+ sizeof(VoiceResourceInformation) * voice_resources.size());
+
// Copy VoiceInfo structs
std::size_t voice_offset{sizeof(UpdateDataHeader) + config.behavior_size +
config.memory_pools_size + config.voice_resource_size};
@@ -220,14 +227,15 @@ void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) {
is_refresh_pending = true;
}
-std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(std::size_t sample_count,
- Core::Memory::Memory& memory) {
+std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(
+ std::size_t sample_count, Core::Memory::Memory& memory,
+ const VoiceChannelHolder& voice_resources) {
if (!IsPlaying()) {
return {};
}
if (is_refresh_pending) {
- RefreshBuffer(memory);
+ RefreshBuffer(memory, voice_resources);
}
const std::size_t max_size{samples.size() - offset};
@@ -271,7 +279,8 @@ void AudioRenderer::VoiceState::UpdateState() {
is_in_use = info.is_in_use;
}
-void AudioRenderer::VoiceState::RefreshBuffer(Core::Memory::Memory& memory) {
+void AudioRenderer::VoiceState::RefreshBuffer(Core::Memory::Memory& memory,
+ const VoiceChannelHolder& voice_resources) {
const auto wave_buffer_address = info.wave_buffer[wave_index].buffer_addr;
const auto wave_buffer_size = info.wave_buffer[wave_index].buffer_sz;
std::vector<s16> new_samples(wave_buffer_size / sizeof(s16));
@@ -296,17 +305,77 @@ void AudioRenderer::VoiceState::RefreshBuffer(Core::Memory::Memory& memory) {
}
switch (info.channel_count) {
- case 1:
+ case 1: {
// 1 channel is upsampled to 2 channel
samples.resize(new_samples.size() * 2);
+
for (std::size_t index = 0; index < new_samples.size(); ++index) {
- samples[index * 2] = new_samples[index];
- samples[index * 2 + 1] = new_samples[index];
+ auto sample = static_cast<float>(new_samples[index]);
+ if (voice_resources[0]->in_use) {
+ sample *= voice_resources[0]->mix_volumes[0];
+ }
+
+ samples[index * 2] = static_cast<s16>(sample * info.volume);
+ samples[index * 2 + 1] = static_cast<s16>(sample * info.volume);
}
break;
+ }
case 2: {
// 2 channel is played as is
samples = std::move(new_samples);
+ const std::size_t sample_count = (samples.size() / 2);
+ for (std::size_t index = 0; index < sample_count; ++index) {
+ const std::size_t index_l = index * 2;
+ const std::size_t index_r = index * 2 + 1;
+
+ auto sample_l = static_cast<float>(samples[index_l]);
+ auto sample_r = static_cast<float>(samples[index_r]);
+
+ if (voice_resources[0]->in_use) {
+ sample_l *= voice_resources[0]->mix_volumes[0];
+ }
+
+ if (voice_resources[1]->in_use) {
+ sample_r *= voice_resources[1]->mix_volumes[1];
+ }
+
+ samples[index_l] = static_cast<s16>(sample_l * info.volume);
+ samples[index_r] = static_cast<s16>(sample_r * info.volume);
+ }
+ break;
+ }
+ case 6: {
+ samples.resize((new_samples.size() / 6) * 2);
+ const std::size_t sample_count = samples.size() / 2;
+
+ for (std::size_t index = 0; index < sample_count; ++index) {
+ auto FL = static_cast<float>(new_samples[index * 6]);
+ auto FR = static_cast<float>(new_samples[index * 6 + 1]);
+ auto FC = static_cast<float>(new_samples[index * 6 + 2]);
+ auto BL = static_cast<float>(new_samples[index * 6 + 4]);
+ auto BR = static_cast<float>(new_samples[index * 6 + 5]);
+
+ if (voice_resources[0]->in_use) {
+ FL *= voice_resources[0]->mix_volumes[0];
+ }
+ if (voice_resources[1]->in_use) {
+ FR *= voice_resources[1]->mix_volumes[1];
+ }
+ if (voice_resources[2]->in_use) {
+ FC *= voice_resources[2]->mix_volumes[2];
+ }
+ if (voice_resources[4]->in_use) {
+ BL *= voice_resources[4]->mix_volumes[4];
+ }
+ if (voice_resources[5]->in_use) {
+ BR *= voice_resources[5]->mix_volumes[5];
+ }
+
+ samples[index * 2] =
+ static_cast<s16>((0.3694f * FL + 0.2612f * FC + 0.3694f * BL) * info.volume);
+ samples[index * 2 + 1] =
+ static_cast<s16>((0.3694f * FR + 0.2612f * FC + 0.3694f * BR) * info.volume);
+ }
break;
}
default:
@@ -352,11 +421,17 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
if (!voice.IsPlaying()) {
continue;
}
+ VoiceChannelHolder resources{};
+ for (u32 channel = 0; channel < voice.GetInfo().channel_count; channel++) {
+ const auto channel_resource_id = voice.GetInfo().voice_channel_resource_ids[channel];
+ resources[channel] = &voice_resources[channel_resource_id];
+ }
std::size_t offset{};
s64 samples_remaining{BUFFER_SIZE};
while (samples_remaining > 0) {
- const std::vector<s16> samples{voice.DequeueSamples(samples_remaining, memory)};
+ const std::vector<s16> samples{
+ voice.DequeueSamples(samples_remaining, memory, resources)};
if (samples.empty()) {
break;
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index b42770fae..1f9114c07 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -9,6 +9,7 @@
#include <vector>
#include "audio_core/behavior_info.h"
+#include "audio_core/common.h"
#include "audio_core/stream.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -116,6 +117,14 @@ struct WaveBuffer {
};
static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer has wrong size");
+struct VoiceResourceInformation {
+ s32_le id{};
+ std::array<float_le, MAX_MIX_BUFFERS> mix_volumes{};
+ bool in_use{};
+ INSERT_PADDING_BYTES(11);
+};
+static_assert(sizeof(VoiceResourceInformation) == 0x70, "VoiceResourceInformation has wrong size");
+
struct VoiceInfo {
u32_le id;
u32_le node_id;
@@ -244,6 +253,7 @@ private:
AudioRendererParameter worker_params;
std::shared_ptr<Kernel::WritableEvent> buffer_event;
std::vector<VoiceState> voices;
+ std::vector<VoiceResourceInformation> voice_resources;
std::vector<EffectState> effects;
std::unique_ptr<AudioOut> audio_out;
StreamPtr stream;
diff --git a/src/audio_core/common.h b/src/audio_core/common.h
index 98478b66b..7bb145c53 100644
--- a/src/audio_core/common.h
+++ b/src/audio_core/common.h
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_INVALID_PARAMETERS{ErrorModule::Audio, 41};
}
constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8');
+constexpr std::size_t MAX_MIX_BUFFERS = 24;
static constexpr u32 VersionFromRevision(u32_le rev) {
// "REV7" -> 7
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index d1ec8ff08..24b7a083c 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -123,6 +123,8 @@ add_library(common STATIC
lz4_compression.cpp
lz4_compression.h
math_util.h
+ memory_detect.cpp
+ memory_detect.h
memory_hook.cpp
memory_hook.h
microprofile.cpp
@@ -148,6 +150,8 @@ add_library(common STATIC
thread.h
thread_queue_list.h
threadsafe_queue.h
+ time_zone.cpp
+ time_zone.h
timer.cpp
timer.h
uint128.cpp
@@ -167,10 +171,12 @@ if(ARCHITECTURE_x86_64)
PRIVATE
x64/cpu_detect.cpp
x64/cpu_detect.h
+ x64/xbyak_abi.h
+ x64/xbyak_util.h
)
endif()
create_target_directory_groups(common)
target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile)
-target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd)
+target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak)
diff --git a/src/common/memory_detect.cpp b/src/common/memory_detect.cpp
new file mode 100644
index 000000000..3fdc309a2
--- /dev/null
+++ b/src/common/memory_detect.cpp
@@ -0,0 +1,60 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#ifdef _WIN32
+// clang-format off
+#include <windows.h>
+#include <sysinfoapi.h>
+// clang-format on
+#else
+#include <sys/types.h>
+#ifdef __APPLE__
+#include <sys/sysctl.h>
+#else
+#include <sys/sysinfo.h>
+#endif
+#endif
+
+#include "common/memory_detect.h"
+
+namespace Common {
+
+// Detects the RAM and Swapfile sizes
+static MemoryInfo Detect() {
+ MemoryInfo mem_info{};
+
+#ifdef _WIN32
+ MEMORYSTATUSEX memorystatus;
+ memorystatus.dwLength = sizeof(memorystatus);
+ GlobalMemoryStatusEx(&memorystatus);
+ mem_info.TotalPhysicalMemory = memorystatus.ullTotalPhys;
+ mem_info.TotalSwapMemory = memorystatus.ullTotalPageFile - mem_info.TotalPhysicalMemory;
+#elif defined(__APPLE__)
+ u64 ramsize;
+ struct xsw_usage vmusage;
+ std::size_t sizeof_ramsize = sizeof(ramsize);
+ std::size_t sizeof_vmusage = sizeof(vmusage);
+ // hw and vm are defined in sysctl.h
+ // https://github.com/apple/darwin-xnu/blob/master/bsd/sys/sysctl.h#L471
+ // sysctlbyname(const char *, void *, size_t *, void *, size_t);
+ sysctlbyname("hw.memsize", &ramsize, &sizeof_ramsize, NULL, 0);
+ sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, NULL, 0);
+ mem_info.TotalPhysicalMemory = ramsize;
+ mem_info.TotalSwapMemory = vmusage.xsu_total;
+#else
+ struct sysinfo meminfo;
+ sysinfo(&meminfo);
+ mem_info.TotalPhysicalMemory = meminfo.totalram;
+ mem_info.TotalSwapMemory = meminfo.totalswap;
+#endif
+
+ return mem_info;
+}
+
+const MemoryInfo& GetMemInfo() {
+ static MemoryInfo mem_info = Detect();
+ return mem_info;
+}
+
+} // namespace Common \ No newline at end of file
diff --git a/src/common/memory_detect.h b/src/common/memory_detect.h
new file mode 100644
index 000000000..a73c0f3f4
--- /dev/null
+++ b/src/common/memory_detect.h
@@ -0,0 +1,22 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Common {
+
+struct MemoryInfo {
+ u64 TotalPhysicalMemory{};
+ u64 TotalSwapMemory{};
+};
+
+/**
+ * Gets the memory info of the host system
+ * @return Reference to a MemoryInfo struct with the physical and swap memory sizes in bytes
+ */
+const MemoryInfo& GetMemInfo();
+
+} // namespace Common \ No newline at end of file
diff --git a/src/common/time_zone.cpp b/src/common/time_zone.cpp
new file mode 100644
index 000000000..ce239eb63
--- /dev/null
+++ b/src/common/time_zone.cpp
@@ -0,0 +1,49 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <chrono>
+#include <iomanip>
+#include <sstream>
+
+#include "common/logging/log.h"
+#include "common/time_zone.h"
+
+namespace Common::TimeZone {
+
+std::string GetDefaultTimeZone() {
+ return "GMT";
+}
+
+static std::string GetOsTimeZoneOffset() {
+ const std::time_t t{std::time(nullptr)};
+ const std::tm tm{*std::localtime(&t)};
+
+ std::stringstream ss;
+ ss << std::put_time(&tm, "%z"); // Get the current timezone offset, e.g. "-400", as a string
+
+ return ss.str();
+}
+
+static int ConvertOsTimeZoneOffsetToInt(const std::string& timezone) {
+ try {
+ return std::stoi(timezone);
+ } catch (const std::invalid_argument&) {
+ LOG_CRITICAL(Common, "invalid_argument with {}!", timezone);
+ return 0;
+ } catch (const std::out_of_range&) {
+ LOG_CRITICAL(Common, "out_of_range with {}!", timezone);
+ return 0;
+ }
+}
+
+std::chrono::seconds GetCurrentOffsetSeconds() {
+ const int offset{ConvertOsTimeZoneOffsetToInt(GetOsTimeZoneOffset())};
+
+ int seconds{(offset / 100) * 60 * 60}; // Convert hour component to seconds
+ seconds += (offset % 100) * 60; // Convert minute component to seconds
+
+ return std::chrono::seconds{seconds};
+}
+
+} // namespace Common::TimeZone
diff --git a/src/common/time_zone.h b/src/common/time_zone.h
new file mode 100644
index 000000000..945daa09c
--- /dev/null
+++ b/src/common/time_zone.h
@@ -0,0 +1,18 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <chrono>
+#include <string>
+
+namespace Common::TimeZone {
+
+/// Gets the default timezone, i.e. "GMT"
+std::string GetDefaultTimeZone();
+
+/// Gets the offset of the current timezone (from the default), in seconds
+std::chrono::seconds GetCurrentOffsetSeconds();
+
+} // namespace Common::TimeZone
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
new file mode 100644
index 000000000..794da8a52
--- /dev/null
+++ b/src/common/x64/xbyak_abi.h
@@ -0,0 +1,266 @@
+// Copyright 2016 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+#include <initializer_list>
+#include <xbyak.h>
+#include "common/assert.h"
+
+namespace Common::X64 {
+
+inline int RegToIndex(const Xbyak::Reg& reg) {
+ using Kind = Xbyak::Reg::Kind;
+ ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
+ "RegSet only support GPRs and XMM registers.");
+ ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15.");
+ return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
+}
+
+inline Xbyak::Reg64 IndexToReg64(int reg_index) {
+ ASSERT(reg_index < 16);
+ return Xbyak::Reg64(reg_index);
+}
+
+inline Xbyak::Xmm IndexToXmm(int reg_index) {
+ ASSERT(reg_index >= 16 && reg_index < 32);
+ return Xbyak::Xmm(reg_index - 16);
+}
+
+inline Xbyak::Reg IndexToReg(int reg_index) {
+ if (reg_index < 16) {
+ return IndexToReg64(reg_index);
+ } else {
+ return IndexToXmm(reg_index);
+ }
+}
+
+inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
+ std::bitset<32> bits;
+ for (const Xbyak::Reg& reg : regs) {
+ bits[RegToIndex(reg)] = true;
+ }
+ return bits;
+}
+
+const std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
+const std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
+
+#ifdef _WIN32
+
+// Microsoft x64 ABI
+const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
+const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
+const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
+const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
+const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
+
+const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
+ // GPRs
+ Xbyak::util::rcx,
+ Xbyak::util::rdx,
+ Xbyak::util::r8,
+ Xbyak::util::r9,
+ Xbyak::util::r10,
+ Xbyak::util::r11,
+ // XMMs
+ Xbyak::util::xmm0,
+ Xbyak::util::xmm1,
+ Xbyak::util::xmm2,
+ Xbyak::util::xmm3,
+ Xbyak::util::xmm4,
+ Xbyak::util::xmm5,
+});
+
+const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
+ // GPRs
+ Xbyak::util::rbx,
+ Xbyak::util::rsi,
+ Xbyak::util::rdi,
+ Xbyak::util::rbp,
+ Xbyak::util::r12,
+ Xbyak::util::r13,
+ Xbyak::util::r14,
+ Xbyak::util::r15,
+ // XMMs
+ Xbyak::util::xmm6,
+ Xbyak::util::xmm7,
+ Xbyak::util::xmm8,
+ Xbyak::util::xmm9,
+ Xbyak::util::xmm10,
+ Xbyak::util::xmm11,
+ Xbyak::util::xmm12,
+ Xbyak::util::xmm13,
+ Xbyak::util::xmm14,
+ Xbyak::util::xmm15,
+});
+
+constexpr size_t ABI_SHADOW_SPACE = 0x20;
+
+#else
+
+// System V x86-64 ABI
+const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
+const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
+const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
+const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
+const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
+
+const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
+ // GPRs
+ Xbyak::util::rcx,
+ Xbyak::util::rdx,
+ Xbyak::util::rdi,
+ Xbyak::util::rsi,
+ Xbyak::util::r8,
+ Xbyak::util::r9,
+ Xbyak::util::r10,
+ Xbyak::util::r11,
+ // XMMs
+ Xbyak::util::xmm0,
+ Xbyak::util::xmm1,
+ Xbyak::util::xmm2,
+ Xbyak::util::xmm3,
+ Xbyak::util::xmm4,
+ Xbyak::util::xmm5,
+ Xbyak::util::xmm6,
+ Xbyak::util::xmm7,
+ Xbyak::util::xmm8,
+ Xbyak::util::xmm9,
+ Xbyak::util::xmm10,
+ Xbyak::util::xmm11,
+ Xbyak::util::xmm12,
+ Xbyak::util::xmm13,
+ Xbyak::util::xmm14,
+ Xbyak::util::xmm15,
+});
+
+const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
+ // GPRs
+ Xbyak::util::rbx,
+ Xbyak::util::rbp,
+ Xbyak::util::r12,
+ Xbyak::util::r13,
+ Xbyak::util::r14,
+ Xbyak::util::r15,
+});
+
+constexpr size_t ABI_SHADOW_SPACE = 0;
+
+#endif
+
+inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
+ size_t needed_frame_size, s32* out_subtraction,
+ s32* out_xmm_offset) {
+ const auto count = (regs & ABI_ALL_GPRS).count();
+ rsp_alignment -= count * 8;
+ size_t subtraction = 0;
+ const auto xmm_count = (regs & ABI_ALL_XMMS).count();
+ if (xmm_count) {
+ // If we have any XMMs to save, we must align the stack here.
+ subtraction = rsp_alignment & 0xF;
+ }
+ subtraction += 0x10 * xmm_count;
+ size_t xmm_base_subtraction = subtraction;
+ subtraction += needed_frame_size;
+ subtraction += ABI_SHADOW_SPACE;
+ // Final alignment.
+ rsp_alignment -= subtraction;
+ subtraction += rsp_alignment & 0xF;
+
+ *out_subtraction = (s32)subtraction;
+ *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
+}
+
+inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
+ size_t rsp_alignment, size_t needed_frame_size = 0) {
+ s32 subtraction, xmm_offset;
+ ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
+ for (std::size_t i = 0; i < regs.size(); ++i) {
+ if (regs[i] && ABI_ALL_GPRS[i]) {
+ code.push(IndexToReg64(static_cast<int>(i)));
+ }
+ }
+ if (subtraction != 0) {
+ code.sub(code.rsp, subtraction);
+ }
+
+ for (int i = 0; i < regs.count(); i++) {
+ if (regs.test(i) & ABI_ALL_GPRS.test(i)) {
+ code.push(IndexToReg64(i));
+ }
+ }
+
+ for (std::size_t i = 0; i < regs.size(); ++i) {
+ if (regs[i] && ABI_ALL_XMMS[i]) {
+ code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i)));
+ xmm_offset += 0x10;
+ }
+ }
+
+ return ABI_SHADOW_SPACE;
+}
+
+inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
+ size_t rsp_alignment, size_t needed_frame_size = 0) {
+ s32 subtraction, xmm_offset;
+ ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
+
+ for (std::size_t i = 0; i < regs.size(); ++i) {
+ if (regs[i] && ABI_ALL_XMMS[i]) {
+ code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]);
+ xmm_offset += 0x10;
+ }
+ }
+
+ if (subtraction != 0) {
+ code.add(code.rsp, subtraction);
+ }
+
+ // GPRs need to be popped in reverse order
+ for (int i = 15; i >= 0; i--) {
+ if (regs[i]) {
+ code.pop(IndexToReg64(i));
+ }
+ }
+}
+
+inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
+ size_t rsp_alignment,
+ size_t needed_frame_size = 0) {
+ s32 subtraction, xmm_offset;
+ ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
+
+ for (std::size_t i = 0; i < regs.size(); ++i) {
+ if (regs[i] && ABI_ALL_GPRS[i]) {
+ code.push(IndexToReg64(static_cast<int>(i)));
+ }
+ }
+
+ if (subtraction != 0) {
+ code.sub(code.rsp, subtraction);
+ }
+
+ return ABI_SHADOW_SPACE;
+}
+
+inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
+ size_t rsp_alignment, size_t needed_frame_size = 0) {
+ s32 subtraction, xmm_offset;
+ ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
+
+ if (subtraction != 0) {
+ code.add(code.rsp, subtraction);
+ }
+
+ // GPRs need to be popped in reverse order
+ for (int i = 15; i >= 0; i--) {
+ if (regs[i]) {
+ code.pop(IndexToReg64(i));
+ }
+ }
+}
+
+} // namespace Common::X64
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h
new file mode 100644
index 000000000..df17f8cbe
--- /dev/null
+++ b/src/common/x64/xbyak_util.h
@@ -0,0 +1,47 @@
+// Copyright 2016 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+#include <xbyak.h>
+#include "common/x64/xbyak_abi.h"
+
+namespace Common::X64 {
+
+// Constants for use with cmpps/cmpss
+enum {
+ CMP_EQ = 0,
+ CMP_LT = 1,
+ CMP_LE = 2,
+ CMP_UNORD = 3,
+ CMP_NEQ = 4,
+ CMP_NLT = 5,
+ CMP_NLE = 6,
+ CMP_ORD = 7,
+};
+
+constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) {
+ const u64 distance = target - (ref + 5);
+ return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL);
+}
+
+inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
+ return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target);
+}
+
+template <typename T>
+inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
+ static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
+ size_t addr = reinterpret_cast<size_t>(f);
+ if (IsWithin2G(code, addr)) {
+ code.call(f);
+ } else {
+ // ABI_RETURN is a safe temp register to use before a call
+ code.mov(ABI_RETURN, addr);
+ code.call(ABI_RETURN);
+ }
+}
+
+} // namespace Common::X64
diff --git a/src/core/file_sys/control_metadata.cpp b/src/core/file_sys/control_metadata.cpp
index f155a1341..63cd2eead 100644
--- a/src/core/file_sys/control_metadata.cpp
+++ b/src/core/file_sys/control_metadata.cpp
@@ -95,6 +95,10 @@ u32 NACP::GetSupportedLanguages() const {
return raw.supported_languages;
}
+u64 NACP::GetDeviceSaveDataSize() const {
+ return raw.device_save_data_size;
+}
+
std::vector<u8> NACP::GetRawBytes() const {
std::vector<u8> out(sizeof(RawNACP));
std::memcpy(out.data(), &raw, sizeof(RawNACP));
diff --git a/src/core/file_sys/control_metadata.h b/src/core/file_sys/control_metadata.h
index 2d8c251ac..e37b2fadf 100644
--- a/src/core/file_sys/control_metadata.h
+++ b/src/core/file_sys/control_metadata.h
@@ -113,6 +113,7 @@ public:
u32 GetSupportedLanguages() const;
std::vector<u8> GetRawBytes() const;
bool GetUserAccountSwitchLock() const;
+ u64 GetDeviceSaveDataSize() const;
private:
RawNACP raw{};
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index b93aa6935..c47ff863e 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -10,6 +10,7 @@
#include "common/file_util.h"
#include "common/hex_util.h"
#include "common/logging/log.h"
+#include "common/string_util.h"
#include "core/core.h"
#include "core/file_sys/content_archive.h"
#include "core/file_sys/control_metadata.h"
@@ -48,6 +49,23 @@ std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
return fmt::format("v{}.{}.{}", bytes[3], bytes[2], bytes[1]);
}
+std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir,
+ std::string_view name) {
+#ifdef _WIN32
+ return dir->GetSubdirectory(name);
+#else
+ const auto subdirs = dir->GetSubdirectories();
+ for (const auto& subdir : subdirs) {
+ std::string dir_name = Common::ToLower(subdir->GetName());
+ if (dir_name == name) {
+ return subdir;
+ }
+ }
+
+ return nullptr;
+#endif
+}
+
PatchManager::PatchManager(u64 title_id) : title_id(title_id) {}
PatchManager::~PatchManager() = default;
@@ -104,7 +122,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
if (std::find(disabled.begin(), disabled.end(), subdir->GetName()) != disabled.end())
continue;
- auto exefs_dir = subdir->GetSubdirectory("exefs");
+ auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs");
if (exefs_dir != nullptr)
layers.push_back(std::move(exefs_dir));
}
@@ -130,7 +148,7 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD
if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend())
continue;
- auto exefs_dir = subdir->GetSubdirectory("exefs");
+ auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs");
if (exefs_dir != nullptr) {
for (const auto& file : exefs_dir->GetFiles()) {
if (file->GetExtension() == "ips") {
@@ -295,7 +313,7 @@ std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList(
continue;
}
- auto cheats_dir = subdir->GetSubdirectory("cheats");
+ auto cheats_dir = FindSubdirectoryCaseless(subdir, "cheats");
if (cheats_dir != nullptr) {
auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true);
if (res.has_value()) {
@@ -340,11 +358,11 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
continue;
}
- auto romfs_dir = subdir->GetSubdirectory("romfs");
+ auto romfs_dir = FindSubdirectoryCaseless(subdir, "romfs");
if (romfs_dir != nullptr)
layers.push_back(std::move(romfs_dir));
- auto ext_dir = subdir->GetSubdirectory("romfs_ext");
+ auto ext_dir = FindSubdirectoryCaseless(subdir, "romfs_ext");
if (ext_dir != nullptr)
layers_ext.push_back(std::move(ext_dir));
}
@@ -470,7 +488,7 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
for (const auto& mod : mod_dir->GetSubdirectories()) {
std::string types;
- const auto exefs_dir = mod->GetSubdirectory("exefs");
+ const auto exefs_dir = FindSubdirectoryCaseless(mod, "exefs");
if (IsDirValidAndNonEmpty(exefs_dir)) {
bool ips = false;
bool ipswitch = false;
@@ -494,9 +512,9 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
if (layeredfs)
AppendCommaIfNotEmpty(types, "LayeredExeFS");
}
- if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs")))
+ if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "romfs")))
AppendCommaIfNotEmpty(types, "LayeredFS");
- if (IsDirValidAndNonEmpty(mod->GetSubdirectory("cheats")))
+ if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "cheats")))
AppendCommaIfNotEmpty(types, "Cheats");
if (types.empty())
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index ec6db524d..f4cb918dd 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -29,6 +29,11 @@ enum class TitleVersionFormat : u8 {
std::string FormatTitleVersion(u32 version,
TitleVersionFormat format = TitleVersionFormat::ThreeElements);
+// Returns a directory with name matching name case-insensitive. Returns nullptr if directory
+// doesn't have a directory with name.
+std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir,
+ std::string_view name);
+
// A centralized class to manage patches to games.
class PatchManager {
public:
diff --git a/src/core/file_sys/savedata_factory.cpp b/src/core/file_sys/savedata_factory.cpp
index f3def93ab..adfd2c1a4 100644
--- a/src/core/file_sys/savedata_factory.cpp
+++ b/src/core/file_sys/savedata_factory.cpp
@@ -57,7 +57,8 @@ void PrintSaveDataDescriptorWarnings(SaveDataDescriptor meta) {
bool ShouldSaveDataBeAutomaticallyCreated(SaveDataSpaceId space, const SaveDataDescriptor& desc) {
return desc.type == SaveDataType::CacheStorage || desc.type == SaveDataType::TemporaryStorage ||
(space == SaveDataSpaceId::NandUser && ///< Normal Save Data -- Current Title & User
- desc.type == SaveDataType::SaveData && desc.title_id == 0 && desc.save_id == 0);
+ (desc.type == SaveDataType::SaveData || desc.type == SaveDataType::DeviceSaveData) &&
+ desc.title_id == 0 && desc.save_id == 0);
}
} // Anonymous namespace
@@ -139,8 +140,10 @@ std::string SaveDataFactory::GetFullPath(SaveDataSpaceId space, SaveDataType typ
u128 user_id, u64 save_id) {
// According to switchbrew, if a save is of type SaveData and the title id field is 0, it should
// be interpreted as the title id of the current process.
- if (type == SaveDataType::SaveData && title_id == 0) {
- title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID();
+ if (type == SaveDataType::SaveData || type == SaveDataType::DeviceSaveData) {
+ if (title_id == 0) {
+ title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID();
+ }
}
std::string out = GetSaveDataSpaceIdPath(space);
diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp
index 6e22f97b0..aa313de66 100644
--- a/src/core/file_sys/system_archive/system_version.cpp
+++ b/src/core/file_sys/system_archive/system_version.cpp
@@ -12,17 +12,17 @@ namespace SystemVersionData {
// This section should reflect the best system version to describe yuzu's HLE api.
// TODO(DarkLordZach): Update when HLE gets better.
-constexpr u8 VERSION_MAJOR = 5;
-constexpr u8 VERSION_MINOR = 1;
-constexpr u8 VERSION_MICRO = 0;
+constexpr u8 VERSION_MAJOR = 10;
+constexpr u8 VERSION_MINOR = 0;
+constexpr u8 VERSION_MICRO = 2;
-constexpr u8 REVISION_MAJOR = 3;
+constexpr u8 REVISION_MAJOR = 1;
constexpr u8 REVISION_MINOR = 0;
constexpr char PLATFORM_STRING[] = "NX";
-constexpr char VERSION_HASH[] = "23f9df53e25709d756e0c76effcb2473bd3447dd";
-constexpr char DISPLAY_VERSION[] = "5.1.0";
-constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 5.1.0-3.0";
+constexpr char VERSION_HASH[] = "f90143fa8bbc061d4f68c35f95f04f8080c0ecdc";
+constexpr char DISPLAY_VERSION[] = "10.0.2";
+constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 10.0.2-1.0";
} // namespace SystemVersionData
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index eda466a5d..9a081fbd4 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -46,7 +46,7 @@ private:
EmuWindow::EmuWindow() {
// TODO: Find a better place to set this.
config.min_client_area_size =
- std::make_pair(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
+ std::make_pair(Layout::MinimumSize::Width, Layout::MinimumSize::Height);
active_config = config;
touch_state = std::make_shared<TouchState>();
Input::RegisterFactory<Input::TouchDevice>("emu_window", touch_state);
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index 15ecfb13d..91ecc30ab 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -8,6 +8,11 @@
namespace Layout {
+namespace MinimumSize {
+constexpr u32 Width = 640;
+constexpr u32 Height = 360;
+} // namespace MinimumSize
+
namespace ScreenUndocked {
constexpr u32 Width = 1280;
constexpr u32 Height = 720;
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index f6503fe2f..20c331b77 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -767,7 +767,7 @@ FSP_SRV::FSP_SRV(FileSystemController& fsc, const Core::Reporter& reporter)
{1014, nullptr, "OutputMultiProgramTagAccessLog"},
{1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"},
{1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"},
- {1200, nullptr, "OpenMultiCommitManager"},
+ {1200, &FSP_SRV::OpenMultiCommitManager, "OpenMultiCommitManager"},
{1300, nullptr, "OpenBisWiper"},
};
// clang-format on
@@ -988,4 +988,40 @@ void FSP_SRV::GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx) {
rb.Push(access_log_program_index);
}
+class IMultiCommitManager final : public ServiceFramework<IMultiCommitManager> {
+public:
+ explicit IMultiCommitManager() : ServiceFramework("IMultiCommitManager") {
+ static const FunctionInfo functions[] = {
+ {1, &IMultiCommitManager::Add, "Add"},
+ {2, &IMultiCommitManager::Commit, "Commit"},
+ };
+ RegisterHandlers(functions);
+ }
+
+private:
+ FileSys::VirtualFile backend;
+
+ void Add(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_FS, "(STUBBED) called");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+ }
+
+ void Commit(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_FS, "(STUBBED) called");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+ }
+};
+
+void FSP_SRV::OpenMultiCommitManager(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_FS, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(RESULT_SUCCESS);
+ rb.PushIpcInterface<IMultiCommitManager>(std::make_shared<IMultiCommitManager>());
+}
+
} // namespace Service::FileSystem
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h
index d52b55999..dfb3e395b 100644
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -50,6 +50,7 @@ private:
void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx);
void GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx);
+ void OpenMultiCommitManager(Kernel::HLERequestContext& ctx);
FileSystemController& fsc;
diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp
index 358cb9329..9a8d354ba 100644
--- a/src/core/hle/service/hid/controllers/keyboard.cpp
+++ b/src/core/hle/service/hid/controllers/keyboard.cpp
@@ -38,10 +38,11 @@ void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing,
cur_entry.sampling_number = last_entry.sampling_number + 1;
cur_entry.sampling_number2 = cur_entry.sampling_number;
+ cur_entry.key.fill(0);
+ cur_entry.modifier = 0;
+
for (std::size_t i = 0; i < keyboard_keys.size(); ++i) {
- for (std::size_t k = 0; k < KEYS_PER_BYTE; ++k) {
- cur_entry.key[i / KEYS_PER_BYTE] |= (keyboard_keys[i]->GetStatus() << k);
- }
+ cur_entry.key[i / KEYS_PER_BYTE] |= (keyboard_keys[i]->GetStatus() << (i % KEYS_PER_BYTE));
}
for (std::size_t i = 0; i < keyboard_mods.size(); ++i) {
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index 767158444..01ddcdbd6 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -177,7 +177,8 @@ private:
void CreateTemporaryNetworkProfile(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_NIFM, "called");
- ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c, "NetworkProfileData is not the correct size");
+ ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c,
+ "SfNetworkProfileData is not the correct size");
u128 uuid{};
auto buffer = ctx.ReadBuffer();
std::memcpy(&uuid, buffer.data() + 8, sizeof(u128));
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index f1e3d832a..caca80dde 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -138,9 +138,7 @@ u32 BufferQueue::Query(QueryType type) {
switch (type) {
case QueryType::NativeWindowFormat:
- // TODO(Subv): Use an enum for this
- static constexpr u32 FormatABGR8 = 1;
- return FormatABGR8;
+ return static_cast<u32>(PixelFormat::RGBA8888);
}
UNIMPLEMENTED();
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index d5f31e567..8a837e5aa 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -66,6 +66,16 @@ public:
Rotate270 = 0x07,
};
+ enum class PixelFormat : u32 {
+ RGBA8888 = 1,
+ RGBX8888 = 2,
+ RGB888 = 3,
+ RGB565 = 4,
+ BGRA8888 = 5,
+ RGBA5551 = 6,
+ RRGBA4444 = 7,
+ };
+
struct Buffer {
enum class Status { Free = 0, Queued = 1, Dequeued = 2, Acquired = 3 };
diff --git a/src/core/hle/service/time/time_manager.cpp b/src/core/hle/service/time/time_manager.cpp
index 9d6c55865..b4dfe45e5 100644
--- a/src/core/hle/service/time/time_manager.cpp
+++ b/src/core/hle/service/time/time_manager.cpp
@@ -5,6 +5,7 @@
#include <chrono>
#include <ctime>
+#include "common/time_zone.h"
#include "core/hle/service/time/ephemeral_network_system_clock_context_writer.h"
#include "core/hle/service/time/local_system_clock_context_writer.h"
#include "core/hle/service/time/network_system_clock_context_writer.h"
@@ -21,8 +22,16 @@ static std::chrono::seconds GetSecondsSinceEpoch() {
Settings::values.custom_rtc_differential;
}
+static s64 GetExternalTimeZoneOffset() {
+ // With "auto" timezone setting, we use the external system's timezone offset
+ if (Settings::GetTimeZoneString() == "auto") {
+ return Common::TimeZone::GetCurrentOffsetSeconds().count();
+ }
+ return 0;
+}
+
static s64 GetExternalRtcValue() {
- return GetSecondsSinceEpoch().count();
+ return GetSecondsSinceEpoch().count() + GetExternalTimeZoneOffset();
}
TimeManager::TimeManager(Core::System& system)
diff --git a/src/core/hle/service/time/time_zone_content_manager.cpp b/src/core/hle/service/time/time_zone_content_manager.cpp
index 78d4acd95..c070d6e97 100644
--- a/src/core/hle/service/time/time_zone_content_manager.cpp
+++ b/src/core/hle/service/time/time_zone_content_manager.cpp
@@ -5,6 +5,7 @@
#include <sstream>
#include "common/logging/log.h"
+#include "common/time_zone.h"
#include "core/core.h"
#include "core/file_sys/content_archive.h"
#include "core/file_sys/nca_metadata.h"
@@ -14,6 +15,7 @@
#include "core/hle/service/filesystem/filesystem.h"
#include "core/hle/service/time/time_manager.h"
#include "core/hle/service/time/time_zone_content_manager.h"
+#include "core/settings.h"
namespace Service::Time::TimeZone {
@@ -68,10 +70,22 @@ static std::vector<std::string> BuildLocationNameCache(Core::System& system) {
TimeZoneContentManager::TimeZoneContentManager(TimeManager& time_manager, Core::System& system)
: system{system}, location_name_cache{BuildLocationNameCache(system)} {
- if (FileSys::VirtualFile vfs_file; GetTimeZoneInfoFile("GMT", vfs_file) == RESULT_SUCCESS) {
+
+ std::string location_name;
+ const auto timezone_setting = Settings::GetTimeZoneString();
+ if (timezone_setting == "auto") {
+ location_name = Common::TimeZone::GetDefaultTimeZone();
+ } else if (timezone_setting == "default") {
+ location_name = location_name;
+ } else {
+ location_name = timezone_setting;
+ }
+
+ if (FileSys::VirtualFile vfs_file;
+ GetTimeZoneInfoFile(location_name, vfs_file) == RESULT_SUCCESS) {
const auto time_point{
time_manager.GetStandardSteadyClockCore().GetCurrentTimePoint(system)};
- time_manager.SetupTimeZoneManager("GMT", time_point, location_name_cache.size(), {},
+ time_manager.SetupTimeZoneManager(location_name, time_point, location_name_cache.size(), {},
vfs_file);
} else {
time_zone_manager.MarkAsInitialized();
@@ -114,6 +128,12 @@ ResultCode TimeZoneContentManager::GetTimeZoneInfoFile(const std::string& locati
vfs_file = zoneinfo_dir->GetFile(location_name);
if (!vfs_file) {
+ LOG_ERROR(Service_Time, "{:016X} has no file \"{}\"! Using default timezone.",
+ time_zone_binary_titleid, location_name);
+ vfs_file = zoneinfo_dir->GetFile(Common::TimeZone::GetDefaultTimeZone());
+ }
+
+ if (!vfs_file) {
LOG_ERROR(Service_Time, "{:016X} has no file \"{}\"!", time_zone_binary_titleid,
location_name);
return ERROR_TIME_NOT_FOUND;
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 2b0bdc4d3..4edff9cd8 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -63,6 +63,21 @@ const std::array<const char*, NumMouseButtons> mapping = {{
Values values = {};
+std::string GetTimeZoneString() {
+ static constexpr std::array<const char*, 46> timezones{{
+ "auto", "default", "CET", "CST6CDT", "Cuba", "EET", "Egypt", "Eire",
+ "EST", "EST5EDT", "GB", "GB-Eire", "GMT", "GMT+0", "GMT-0", "GMT0",
+ "Greenwich", "Hongkong", "HST", "Iceland", "Iran", "Israel", "Jamaica", "Japan",
+ "Kwajalein", "Libya", "MET", "MST", "MST7MDT", "Navajo", "NZ", "NZ-CHAT",
+ "Poland", "Portugal", "PRC", "PST8PDT", "ROC", "ROK", "Singapore", "Turkey",
+ "UCT", "Universal", "UTC", "W-SU", "WET", "Zulu",
+ }};
+
+ ASSERT(Settings::values.time_zone_index < timezones.size());
+
+ return timezones[Settings::values.time_zone_index];
+}
+
void Apply() {
GDBStub::SetServerPort(values.gdbstub_port);
GDBStub::ToggleServer(values.use_gdbstub);
@@ -87,6 +102,7 @@ void LogSettings() {
LogSetting("System_CurrentUser", Settings::values.current_user);
LogSetting("System_LanguageIndex", Settings::values.language_index);
LogSetting("System_RegionIndex", Settings::values.region_index);
+ LogSetting("System_TimeZoneIndex", Settings::values.time_zone_index);
LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
@@ -96,6 +112,7 @@ void LogSettings() {
LogSetting("Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation);
LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
+ LogSetting("Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders);
LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy);
LogSetting("Audio_OutputEngine", Settings::values.sink_id);
LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
diff --git a/src/core/settings.h b/src/core/settings.h
index 163900f0b..78eb33737 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -394,6 +394,7 @@ struct Values {
s32 current_user;
s32 language_index;
s32 region_index;
+ s32 time_zone_index;
s32 sound_index;
// Controls
@@ -445,6 +446,7 @@ struct Values {
GPUAccuracy gpu_accuracy;
bool use_asynchronous_gpu_emulation;
bool use_vsync;
+ bool use_assembly_shaders;
bool force_30fps_mode;
bool use_fast_gpu_time;
@@ -490,6 +492,9 @@ struct Values {
bool IsGPULevelExtreme();
bool IsGPULevelHigh();
+std::string GetTimeZoneString();
+
void Apply();
void LogSettings();
+
} // namespace Settings
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 1c3b03a1c..c781b3cfc 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -201,6 +201,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation);
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
+ AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders);
AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode);
}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index d23c53843..d6ee82836 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,6 +1,7 @@
add_library(video_core STATIC
buffer_cache/buffer_block.h
buffer_cache/buffer_cache.h
+ buffer_cache/map_interval.cpp
buffer_cache/map_interval.h
dirty_flags.cpp
dirty_flags.h
@@ -228,7 +229,7 @@ endif()
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad)
+target_link_libraries(video_core PRIVATE glad xbyak)
if (ENABLE_VULKAN)
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 56e570994..d9a4a1b4d 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -12,11 +12,12 @@
#include <utility>
#include <vector>
-#include <boost/icl/interval_map.hpp>
+#include <boost/container/small_vector.hpp>
#include <boost/icl/interval_set.hpp>
-#include <boost/range/iterator_range.hpp>
+#include <boost/intrusive/set.hpp>
#include "common/alignment.h"
+#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "core/core.h"
@@ -29,10 +30,12 @@
namespace VideoCommon {
-using MapInterval = std::shared_ptr<MapIntervalBase>;
-
template <typename OwnerBuffer, typename BufferType, typename StreamBuffer>
class BufferCache {
+ using IntervalSet = boost::icl::interval_set<VAddr>;
+ using IntervalType = typename IntervalSet::interval_type;
+ using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
+
public:
using BufferInfo = std::pair<BufferType, u64>;
@@ -40,14 +43,12 @@ public:
bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex};
- const std::optional<VAddr> cpu_addr_opt =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
-
+ const auto& memory_manager = system.GPU().MemoryManager();
+ const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
if (!cpu_addr_opt) {
return {GetEmptyBuffer(size), 0};
}
-
- VAddr cpu_addr = *cpu_addr_opt;
+ const VAddr cpu_addr = *cpu_addr_opt;
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
@@ -77,16 +78,19 @@ public:
}
}
- auto block = GetBlock(cpu_addr, size);
- auto map = MapAddress(block, gpu_addr, cpu_addr, size);
+ OwnerBuffer block = GetBlock(cpu_addr, size);
+ MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
+ if (!map) {
+ return {GetEmptyBuffer(size), 0};
+ }
if (is_written) {
map->MarkAsModified(true, GetModifiedTicks());
if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
MarkForAsyncFlush(map);
}
- if (!map->IsWritten()) {
- map->MarkAsWritten(true);
- MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
+ if (!map->is_written) {
+ map->is_written = true;
+ MarkRegionAsWritten(map->start, map->end - 1);
}
}
@@ -132,12 +136,11 @@ public:
void FlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
- std::vector<MapInterval> objects = GetMapsInRange(addr, size);
- std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) {
- return a->GetModificationTick() < b->GetModificationTick();
- });
- for (auto& object : objects) {
- if (object->IsModified() && object->IsRegistered()) {
+ VectorMapInterval objects = GetMapsInRange(addr, size);
+ std::sort(objects.begin(), objects.end(),
+ [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; });
+ for (MapInterval* object : objects) {
+ if (object->is_modified && object->is_registered) {
mutex.unlock();
FlushMap(object);
mutex.lock();
@@ -148,9 +151,9 @@ public:
bool MustFlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
- const std::vector<MapInterval> objects = GetMapsInRange(addr, size);
- return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) {
- return map->IsModified() && map->IsRegistered();
+ const VectorMapInterval objects = GetMapsInRange(addr, size);
+ return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) {
+ return map->is_modified && map->is_registered;
});
}
@@ -158,9 +161,8 @@ public:
void InvalidateRegion(VAddr addr, u64 size) {
std::lock_guard lock{mutex};
- std::vector<MapInterval> objects = GetMapsInRange(addr, size);
- for (auto& object : objects) {
- if (object->IsRegistered()) {
+ for (auto& object : GetMapsInRange(addr, size)) {
+ if (object->is_registered) {
Unregister(object);
}
}
@@ -169,10 +171,10 @@ public:
void OnCPUWrite(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
- for (const auto& object : GetMapsInRange(addr, size)) {
- if (object->IsMemoryMarked() && object->IsRegistered()) {
+ for (MapInterval* object : GetMapsInRange(addr, size)) {
+ if (object->is_memory_marked && object->is_registered) {
UnmarkMemory(object);
- object->SetSyncPending(true);
+ object->is_sync_pending = true;
marked_for_unregister.emplace_back(object);
}
}
@@ -181,9 +183,9 @@ public:
void SyncGuestHost() {
std::lock_guard lock{mutex};
- for (const auto& object : marked_for_unregister) {
- if (object->IsRegistered()) {
- object->SetSyncPending(false);
+ for (auto& object : marked_for_unregister) {
+ if (object->is_registered) {
+ object->is_sync_pending = false;
Unregister(object);
}
}
@@ -192,9 +194,9 @@ public:
void CommitAsyncFlushes() {
if (uncommitted_flushes) {
- auto commit_list = std::make_shared<std::list<MapInterval>>();
- for (auto& map : *uncommitted_flushes) {
- if (map->IsRegistered() && map->IsModified()) {
+ auto commit_list = std::make_shared<std::list<MapInterval*>>();
+ for (MapInterval* map : *uncommitted_flushes) {
+ if (map->is_registered && map->is_modified) {
// TODO(Blinkhawk): Implement backend asynchronous flushing
// AsyncFlushMap(map)
commit_list->push_back(map);
@@ -228,8 +230,8 @@ public:
committed_flushes.pop_front();
return;
}
- for (MapInterval& map : *flush_list) {
- if (map->IsRegistered()) {
+ for (MapInterval* map : *flush_list) {
+ if (map->is_registered) {
// TODO(Blinkhawk): Replace this for reading the asynchronous flush
FlushMap(map);
}
@@ -265,61 +267,60 @@ protected:
}
/// Register an object into the cache
- void Register(const MapInterval& new_map, bool inherit_written = false) {
- const VAddr cpu_addr = new_map->GetStart();
+ MapInterval* Register(MapInterval new_map, bool inherit_written = false) {
+ const VAddr cpu_addr = new_map.start;
if (!cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
- new_map->GetGpuAddress());
- return;
+ new_map.gpu_addr);
+ return nullptr;
}
- const std::size_t size = new_map->GetEnd() - new_map->GetStart();
- new_map->MarkAsRegistered(true);
- const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
- mapped_addresses.insert({interval, new_map});
+ const std::size_t size = new_map.end - new_map.start;
+ new_map.is_registered = true;
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
- new_map->SetMemoryMarked(true);
+ new_map.is_memory_marked = true;
if (inherit_written) {
- MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
- new_map->MarkAsWritten(true);
+ MarkRegionAsWritten(new_map.start, new_map.end - 1);
+ new_map.is_written = true;
}
+ MapInterval* const storage = mapped_addresses_allocator.Allocate();
+ *storage = new_map;
+ mapped_addresses.insert(*storage);
+ return storage;
}
- void UnmarkMemory(const MapInterval& map) {
- if (!map->IsMemoryMarked()) {
+ void UnmarkMemory(MapInterval* map) {
+ if (!map->is_memory_marked) {
return;
}
- const std::size_t size = map->GetEnd() - map->GetStart();
- rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
- map->SetMemoryMarked(false);
+ const std::size_t size = map->end - map->start;
+ rasterizer.UpdatePagesCachedCount(map->start, size, -1);
+ map->is_memory_marked = false;
}
/// Unregisters an object from the cache
- void Unregister(const MapInterval& map) {
+ void Unregister(MapInterval* map) {
UnmarkMemory(map);
- map->MarkAsRegistered(false);
- if (map->IsSyncPending()) {
+ map->is_registered = false;
+ if (map->is_sync_pending) {
+ map->is_sync_pending = false;
marked_for_unregister.remove(map);
- map->SetSyncPending(false);
}
- if (map->IsWritten()) {
- UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
+ if (map->is_written) {
+ UnmarkRegionAsWritten(map->start, map->end - 1);
}
- const IntervalType delete_interval{map->GetStart(), map->GetEnd()};
- mapped_addresses.erase(delete_interval);
+ const auto it = mapped_addresses.find(*map);
+ ASSERT(it != mapped_addresses.end());
+ mapped_addresses.erase(it);
+ mapped_addresses_allocator.Release(map);
}
private:
- MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {
- return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
- }
-
- MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
- const std::size_t size) {
- std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
+ MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr,
+ std::size_t size) {
+ const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
if (overlaps.empty()) {
auto& memory_manager = system.GPU().MemoryManager();
const VAddr cpu_addr_end = cpu_addr + size;
- MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
if (memory_manager.IsGranularRange(gpu_addr, size)) {
u8* host_ptr = memory_manager.GetPointer(gpu_addr);
UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
@@ -328,13 +329,12 @@ private:
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
}
- Register(new_map);
- return new_map;
+ return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
}
const VAddr cpu_addr_end = cpu_addr + size;
if (overlaps.size() == 1) {
- MapInterval& current_map = overlaps[0];
+ MapInterval* const current_map = overlaps[0];
if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
return current_map;
}
@@ -344,35 +344,39 @@ private:
bool write_inheritance = false;
bool modified_inheritance = false;
// Calculate new buffer parameters
- for (auto& overlap : overlaps) {
- new_start = std::min(overlap->GetStart(), new_start);
- new_end = std::max(overlap->GetEnd(), new_end);
- write_inheritance |= overlap->IsWritten();
- modified_inheritance |= overlap->IsModified();
+ for (MapInterval* overlap : overlaps) {
+ new_start = std::min(overlap->start, new_start);
+ new_end = std::max(overlap->end, new_end);
+ write_inheritance |= overlap->is_written;
+ modified_inheritance |= overlap->is_modified;
}
GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
for (auto& overlap : overlaps) {
Unregister(overlap);
}
UpdateBlock(block, new_start, new_end, overlaps);
- MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
+
+ const MapInterval new_map{new_start, new_end, new_gpu_addr};
+ MapInterval* const map = Register(new_map, write_inheritance);
+ if (!map) {
+ return nullptr;
+ }
if (modified_inheritance) {
- new_map->MarkAsModified(true, GetModifiedTicks());
+ map->MarkAsModified(true, GetModifiedTicks());
if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
- MarkForAsyncFlush(new_map);
+ MarkForAsyncFlush(map);
}
}
- Register(new_map, write_inheritance);
- return new_map;
+ return map;
}
void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end,
- std::vector<MapInterval>& overlaps) {
+ const VectorMapInterval& overlaps) {
const IntervalType base_interval{start, end};
IntervalSet interval_set{};
interval_set.add(base_interval);
for (auto& overlap : overlaps) {
- const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()};
+ const IntervalType subtract{overlap->start, overlap->end};
interval_set.subtract(subtract);
}
for (auto& interval : interval_set) {
@@ -386,18 +390,24 @@ private:
}
}
- std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {
+ VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) {
+ VectorMapInterval result;
if (size == 0) {
- return {};
+ return result;
}
- std::vector<MapInterval> objects{};
- const IntervalType interval{addr, addr + size};
- for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) {
- objects.push_back(pair.second);
+ const VAddr addr_end = addr + size;
+ auto it = mapped_addresses.lower_bound(addr);
+ if (it != mapped_addresses.begin()) {
+ --it;
}
-
- return objects;
+ while (it != mapped_addresses.end() && it->start < addr_end) {
+ if (it->Overlaps(addr, addr_end)) {
+ result.push_back(&*it);
+ }
+ ++it;
+ }
+ return result;
}
/// Returns a ticks counter used for tracking when cached objects were last modified
@@ -405,12 +415,12 @@ private:
return ++modified_ticks;
}
- void FlushMap(MapInterval map) {
- std::size_t size = map->GetEnd() - map->GetStart();
- OwnerBuffer block = blocks[map->GetStart() >> block_page_bits];
+ void FlushMap(MapInterval* map) {
+ const std::size_t size = map->end - map->start;
+ OwnerBuffer block = blocks[map->start >> block_page_bits];
staging_buffer.resize(size);
- DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
- system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
+ DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data());
+ system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
map->MarkAsModified(false, 0);
}
@@ -515,7 +525,7 @@ private:
} else {
written_pages[page_start] = 1;
}
- page_start++;
+ ++page_start;
}
}
@@ -531,7 +541,7 @@ private:
written_pages.erase(it);
}
}
- page_start++;
+ ++page_start;
}
}
@@ -542,14 +552,14 @@ private:
if (written_pages.count(page_start) > 0) {
return true;
}
- page_start++;
+ ++page_start;
}
return false;
}
- void MarkForAsyncFlush(MapInterval& map) {
+ void MarkForAsyncFlush(MapInterval* map) {
if (!uncommitted_flushes) {
- uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>();
+ uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
}
uncommitted_flushes->insert(map);
}
@@ -566,10 +576,9 @@ private:
u64 buffer_offset = 0;
u64 buffer_offset_base = 0;
- using IntervalSet = boost::icl::interval_set<VAddr>;
- using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;
- using IntervalType = typename IntervalCache::interval_type;
- IntervalCache mapped_addresses;
+ MapIntervalAllocator mapped_addresses_allocator;
+ boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
+ mapped_addresses;
static constexpr u64 write_page_bit = 11;
std::unordered_map<u64, u32> written_pages;
@@ -583,10 +592,10 @@ private:
u64 modified_ticks = 0;
std::vector<u8> staging_buffer;
- std::list<MapInterval> marked_for_unregister;
+ std::list<MapInterval*> marked_for_unregister;
- std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{};
- std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes;
+ std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
+ std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes;
std::recursive_mutex mutex;
};
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp
new file mode 100644
index 000000000..62587e18a
--- /dev/null
+++ b/src/video_core/buffer_cache/map_interval.cpp
@@ -0,0 +1,33 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <memory>
+
+#include "video_core/buffer_cache/map_interval.h"
+
+namespace VideoCommon {
+
+MapIntervalAllocator::MapIntervalAllocator() {
+ FillFreeList(first_chunk);
+}
+
+MapIntervalAllocator::~MapIntervalAllocator() = default;
+
+void MapIntervalAllocator::AllocateNewChunk() {
+ *new_chunk = std::make_unique<Chunk>();
+ FillFreeList(**new_chunk);
+ new_chunk = &(*new_chunk)->next;
+}
+
+void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
+ const std::size_t old_size = free_list.size();
+ free_list.resize(old_size + chunk.data.size());
+ std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
+ [](MapInterval& interval) { return &interval; });
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
index 29d8b26f3..fe0bcd1d8 100644
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -4,104 +4,89 @@
#pragma once
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include <boost/intrusive/set_hook.hpp>
+
#include "common/common_types.h"
#include "video_core/gpu.h"
namespace VideoCommon {
-class MapIntervalBase {
-public:
- MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
- : start{start}, end{end}, gpu_addr{gpu_addr} {}
-
- void SetCpuAddress(VAddr new_cpu_addr) {
- cpu_addr = new_cpu_addr;
- }
-
- VAddr GetCpuAddress() const {
- return cpu_addr;
- }
-
- GPUVAddr GetGpuAddress() const {
- return gpu_addr;
- }
-
- bool IsInside(const VAddr other_start, const VAddr other_end) const {
- return (start <= other_start && other_end <= end);
- }
-
- bool operator==(const MapIntervalBase& rhs) const {
- return std::tie(start, end) == std::tie(rhs.start, rhs.end);
- }
-
- bool operator!=(const MapIntervalBase& rhs) const {
- return !operator==(rhs);
- }
+struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
+ MapInterval() = default;
- void MarkAsRegistered(const bool registered) {
- is_registered = registered;
- }
+ /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
- bool IsRegistered() const {
- return is_registered;
- }
+ explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
+ : start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
- void SetMemoryMarked(bool is_memory_marked_) {
- is_memory_marked = is_memory_marked_;
+ bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
+ return start <= other_start && other_end <= end;
}
- bool IsMemoryMarked() const {
- return is_memory_marked;
+ bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
+ return start < other_end && other_start < end;
}
- void SetSyncPending(bool is_sync_pending_) {
- is_sync_pending = is_sync_pending_;
- }
+ void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
+ is_modified = is_modified_;
+ ticks = ticks_;
+ }
+
+ boost::intrusive::set_member_hook<> member_hook_;
+ VAddr start = 0;
+ VAddr end = 0;
+ GPUVAddr gpu_addr = 0;
+ u64 ticks = 0;
+ bool is_written = false;
+ bool is_modified = false;
+ bool is_registered = false;
+ bool is_memory_marked = false;
+ bool is_sync_pending = false;
+};
- bool IsSyncPending() const {
- return is_sync_pending;
+struct MapIntervalCompare {
+ constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
+ return lhs.start < rhs.start;
}
+};
- VAddr GetStart() const {
- return start;
- }
+class MapIntervalAllocator {
+public:
+ MapIntervalAllocator();
+ ~MapIntervalAllocator();
- VAddr GetEnd() const {
- return end;
+ MapInterval* Allocate() {
+ if (free_list.empty()) {
+ AllocateNewChunk();
+ }
+ MapInterval* const interval = free_list.back();
+ free_list.pop_back();
+ return interval;
}
- void MarkAsModified(const bool is_modified_, const u64 tick) {
- is_modified = is_modified_;
- ticks = tick;
+ void Release(MapInterval* interval) {
+ free_list.push_back(interval);
}
- bool IsModified() const {
- return is_modified;
- }
+private:
+ struct Chunk {
+ std::unique_ptr<Chunk> next;
+ std::array<MapInterval, 0x8000> data;
+ };
- u64 GetModificationTick() const {
- return ticks;
- }
+ void AllocateNewChunk();
- void MarkAsWritten(const bool is_written_) {
- is_written = is_written_;
- }
+ void FillFreeList(Chunk& chunk);
- bool IsWritten() const {
- return is_written;
- }
+ std::vector<MapInterval*> free_list;
+ std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
-private:
- VAddr start;
- VAddr end;
- GPUVAddr gpu_addr;
- VAddr cpu_addr{};
- bool is_written{};
- bool is_modified{};
- bool is_registered{};
- bool is_memory_marked{};
- bool is_sync_pending{};
- u64 ticks{};
+ Chunk first_chunk;
};
} // namespace VideoCommon
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index bdc023d54..f2f96ac33 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -54,9 +54,7 @@ bool DmaPusher::Step() {
return true;
});
const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
- GPUVAddr dma_get = command_list_header.addr;
- GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
- bool non_main = command_list_header.is_non_main;
+ const GPUVAddr dma_get = command_list_header.addr;
if (dma_pushbuffer_subindex >= command_list.size()) {
// We've gone through the current list, remove it from the queue
@@ -133,11 +131,6 @@ bool DmaPusher::Step() {
index++;
}
- if (!non_main) {
- // TODO (degasus): This is dead code, as dma_mget is never read.
- dma_mget = dma_put;
- }
-
return true;
}
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index e8b714e94..efa90d170 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -102,7 +102,6 @@ private:
DmaState dma_state{};
bool dma_increment_once{};
- GPUVAddr dma_mget{}; ///< main pushbuffer last read address
bool ib_enable{true}; ///< IB mode enabled
std::array<Tegra::Engines::EngineInterface*, max_subchannels> subchannels{};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 024c9e43b..004f6b261 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -457,8 +457,9 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units.
- ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
- "Units other than CROP are unimplemented");
+ if (regs.query.query_get.unit != Regs::QueryUnit::Crop) {
+ LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented");
+ }
switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release:
@@ -534,8 +535,8 @@ void Maxwell3D::ProcessCounterReset() {
rasterizer.ResetCounter(QueryType::SamplesPassed);
break;
default:
- LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}",
- static_cast<int>(regs.counter_reset));
+ LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
+ static_cast<int>(regs.counter_reset));
break;
}
}
@@ -592,8 +593,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
system.GPU().GetTicks());
return {};
default:
- UNIMPLEMENTED_MSG("Unimplemented query select type {}",
- static_cast<u32>(regs.query.query_get.select.Value()));
+ LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
+ static_cast<u32>(regs.query.query_get.select.Value()));
return 1;
}
}
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 8dae754d4..e7cb87589 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -168,18 +168,22 @@ enum class Pred : u64 {
};
enum class PredCondition : u64 {
- LessThan = 1,
- Equal = 2,
- LessEqual = 3,
- GreaterThan = 4,
- NotEqual = 5,
- GreaterEqual = 6,
- LessThanWithNan = 9,
- LessEqualWithNan = 11,
- GreaterThanWithNan = 12,
- NotEqualWithNan = 13,
- GreaterEqualWithNan = 14,
- // TODO(Subv): Other condition types
+ F = 0, // Always false
+ LT = 1, // Ordered less than
+ EQ = 2, // Ordered equal
+ LE = 3, // Ordered less than or equal
+ GT = 4, // Ordered greater than
+ NE = 5, // Ordered not equal
+ GE = 6, // Ordered greater than or equal
+ NUM = 7, // Ordered
+ NAN_ = 8, // Unordered
+ LTU = 9, // Unordered less than
+ EQU = 10, // Unordered equal
+ LEU = 11, // Unordered less than or equal
+ GTU = 12, // Unordered greater than
+ NEU = 13, // Unordered not equal
+ GEU = 14, // Unordered greater than or equal
+ T = 15, // Always true
};
enum class PredOperation : u64 {
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index 22987751e..096ee337c 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -56,9 +56,27 @@ public:
last_modified_ticks = cache.GetModifiedTicks();
}
+ void SetMemoryMarked(bool is_memory_marked_) {
+ is_memory_marked = is_memory_marked_;
+ }
+
+ bool IsMemoryMarked() const {
+ return is_memory_marked;
+ }
+
+ void SetSyncPending(bool is_sync_pending_) {
+ is_sync_pending = is_sync_pending_;
+ }
+
+ bool IsSyncPending() const {
+ return is_sync_pending;
+ }
+
private:
bool is_registered{}; ///< Whether the object is currently registered with the cache
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
+ bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory.
+ bool is_sync_pending{}; ///< Whether the object is pending deletion.
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
};
@@ -94,6 +112,30 @@ public:
}
}
+ void OnCPUWrite(VAddr addr, std::size_t size) {
+ std::lock_guard lock{mutex};
+
+ for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
+ if (object->IsRegistered()) {
+ UnmarkMemory(object);
+ object->SetSyncPending(true);
+ marked_for_unregister.emplace_back(object);
+ }
+ }
+ }
+
+ void SyncGuestHost() {
+ std::lock_guard lock{mutex};
+
+ for (const auto& object : marked_for_unregister) {
+ if (object->IsRegistered()) {
+ object->SetSyncPending(false);
+ Unregister(object);
+ }
+ }
+ marked_for_unregister.clear();
+ }
+
/// Invalidates everything in the cache
void InvalidateAll() {
std::lock_guard lock{mutex};
@@ -120,19 +162,32 @@ protected:
interval_cache.add({GetInterval(object), ObjectSet{object}});
map_cache.insert({object->GetCpuAddr(), object});
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
+ object->SetMemoryMarked(true);
}
/// Unregisters an object from the cache
virtual void Unregister(const T& object) {
std::lock_guard lock{mutex};
+ UnmarkMemory(object);
object->SetIsRegistered(false);
- rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
+ if (object->IsSyncPending()) {
+ marked_for_unregister.remove(object);
+ object->SetSyncPending(false);
+ }
const VAddr addr = object->GetCpuAddr();
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
map_cache.erase(addr);
}
+ void UnmarkMemory(const T& object) {
+ if (!object->IsMemoryMarked()) {
+ return;
+ }
+ rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
+ object->SetMemoryMarked(false);
+ }
+
/// Returns a ticks counter used for tracking when cached objects were last modified
u64 GetModifiedTicks() {
std::lock_guard lock{mutex};
@@ -194,4 +249,5 @@ private:
IntervalCache interval_cache; ///< Cache of objects
u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
VideoCore::RasterizerInterface& rasterizer;
+ std::list<T> marked_for_unregister;
};
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index d2cab50bd..9964ea894 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -8,6 +8,7 @@
#include "common/assert.h"
#include "common/microprofile.h"
+#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index d83dca25a..e1b245288 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -13,6 +13,7 @@
#include "common/logging/log.h"
#include "common/scope_exit.h"
+#include "core/settings.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -165,8 +166,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
const bool is_nvidia = vendor == "NVIDIA Corporation";
const bool is_amd = vendor == "ATI Technologies Inc.";
- const bool is_intel = vendor == "Intel";
- const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -181,12 +180,17 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
- has_broken_compute = is_intel_proprietary;
has_fast_buffer_sub_data = is_nvidia;
+ use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
+ GLAD_GL_NV_compute_program5;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
+
+ if (Settings::values.use_assembly_shaders && !use_assembly_shaders) {
+ LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
+ }
}
Device::Device(std::nullptr_t) {
@@ -199,7 +203,6 @@ Device::Device(std::nullptr_t) {
has_image_load_formatted = true;
has_variable_aoffi = true;
has_component_indexing_bug = false;
- has_broken_compute = false;
has_precise_bug = false;
}
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index a55050cb5..683ed9002 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -80,14 +80,14 @@ public:
return has_precise_bug;
}
- bool HasBrokenCompute() const {
- return has_broken_compute;
- }
-
bool HasFastBufferSubData() const {
return has_fast_buffer_sub_data;
}
+ bool UseAssemblyShaders() const {
+ return use_assembly_shaders;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -105,8 +105,8 @@ private:
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
- bool has_broken_compute{};
bool has_fast_buffer_sub_data{};
+ bool use_assembly_shaders{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 99ddcb3f8..ec5421afa 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
namespace OpenGL {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 69dcf952f..61cf99b9d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -94,17 +94,30 @@ void oglEnable(GLenum cap, bool state) {
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- ScreenInfo& info, GLShader::ProgramManager& program_manager,
- StateTracker& state_tracker)
- : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
+ const Device& device, ScreenInfo& info,
+ ProgramManager& program_manager, StateTracker& state_tracker)
+ : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device,
+ state_tracker},
shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
CheckExtensions();
+
+ if (device.UseAssemblyShaders()) {
+ glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
+ for (const GLuint cbuf : staging_cbufs) {
+ glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
+ nullptr, 0);
+ }
+ }
}
-RasterizerOpenGL::~RasterizerOpenGL() {}
+RasterizerOpenGL::~RasterizerOpenGL() {
+ if (device.UseAssemblyShaders()) {
+ glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
+ }
+}
void RasterizerOpenGL::CheckExtensions() {
if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
@@ -230,6 +243,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
auto& gpu = system.GPU().Maxwell3D();
+ std::size_t num_ssbos = 0;
u32 clip_distances = 0;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -261,6 +275,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
Shader shader{shader_cache.GetStageProgram(program)};
+ if (device.UseAssemblyShaders()) {
+ // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
+ // all stages share the same bindings.
+ const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size();
+ ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage");
+ num_ssbos += num_stage_ssbos;
+ }
+
// Stage indices are 0 - 5
const std::size_t stage = index == 0 ? 0 : index - 1;
SetupDrawConstBuffers(stage, shader);
@@ -526,6 +548,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
SyncFramebufferSRGB();
buffer_cache.Acquire();
+ current_cbuf = 0;
std::size_t buffer_size = CalculateVertexArraysSize();
@@ -535,9 +558,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
// Uniform space for the 5 shader stages
- buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) +
- (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) *
- Maxwell::MaxShaderStage;
+ buffer_size =
+ Common::AlignUp<std::size_t>(buffer_size, 4) +
+ (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
// Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers *
@@ -558,12 +581,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
// Setup emulation uniform buffer.
- GLShader::MaxwellUniformData ubo;
- ubo.SetFromRegs(gpu);
- const auto [buffer, offset] =
- buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
- glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
- static_cast<GLsizeiptr>(sizeof(ubo)));
+ if (!device.UseAssemblyShaders()) {
+ MaxwellUniformData ubo;
+ ubo.SetFromRegs(gpu);
+ const auto [buffer, offset] =
+ buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
+ glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
+ static_cast<GLsizeiptr>(sizeof(ubo)));
+ }
// Setup shaders and their used resources.
texture_cache.GuardSamplers(true);
@@ -630,16 +655,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
- if (device.HasBrokenCompute()) {
- return;
- }
-
buffer_cache.Acquire();
+ current_cbuf = 0;
auto kernel = shader_cache.GetComputeKernel(code_addr);
SetupComputeTextures(kernel);
SetupComputeImages(kernel);
- program_manager.BindComputeShader(kernel->GetHandle());
const std::size_t buffer_size =
Tegra::Engines::KeplerCompute::NumConstBuffers *
@@ -652,6 +673,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Unmap();
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ program_manager.BindCompute(kernel->GetHandle());
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
}
@@ -701,15 +723,15 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
return;
}
texture_cache.OnCPUWrite(addr, size);
- shader_cache.InvalidateRegion(addr, size);
+ shader_cache.OnCPUWrite(addr, size);
buffer_cache.OnCPUWrite(addr, size);
- query_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::SyncGuestHost() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
texture_cache.SyncGuestHost();
buffer_cache.SyncGuestHost();
+ shader_cache.SyncGuestHost();
}
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
@@ -812,14 +834,20 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
}
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
+ static constexpr std::array PARAMETER_LUT = {
+ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
+ GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
+ GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
+
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
const auto& shader_stage = stages[stage_index];
- u32 binding = device.GetBaseBindings(stage_index).uniform_buffer;
+ u32 binding =
+ device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer;
for (const auto& entry : shader->GetEntries().const_buffers) {
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
- SetupConstBuffer(binding++, buffer, entry);
+ SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry);
}
}
@@ -835,16 +863,21 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
buffer.address = config.Address();
buffer.size = config.size;
buffer.enabled = mask[entry.GetIndex()];
- SetupConstBuffer(binding++, buffer, entry);
+ SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry);
}
}
-void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
+void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
+ const Tegra::Engines::ConstBufferInfo& buffer,
const ConstBufferEntry& entry) {
if (!buffer.enabled) {
// Set values to zero to unbind buffers
- glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
- sizeof(float));
+ if (device.UseAssemblyShaders()) {
+ glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
+ } else {
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding,
+ buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
+ }
return;
}
@@ -853,9 +886,19 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
const auto alignment = device.GetUniformBufferAlignment();
- const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
- device.HasFastBufferSubData());
- glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
+ auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
+ device.HasFastBufferSubData());
+ if (!device.UseAssemblyShaders()) {
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
+ return;
+ }
+ if (offset != 0) {
+ const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
+ glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
+ cbuf = staging_cbuf;
+ offset = 0;
+ }
+ glBindBufferRangeNV(stage, binding, cbuf, offset, size);
}
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
@@ -863,7 +906,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
auto& memory_manager{gpu.MemoryManager()};
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
- u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
+ u32 binding =
+ device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
for (const auto& entry : shader->GetEntries().global_memory_entries) {
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
@@ -929,16 +973,12 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
glBindTextureUnit(binding, 0);
return;
}
- glBindTextureUnit(binding, view->GetTexture());
-
- if (view->GetSurfaceParams().IsBuffer()) {
- return;
+ const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
+ texture.tic.z_source, texture.tic.w_source);
+ glBindTextureUnit(binding, handle);
+ if (!view->GetSurfaceParams().IsBuffer()) {
+ glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
}
- // Apply swizzle to textures that are not buffers.
- view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
- texture.tic.w_source);
-
- glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
}
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
@@ -967,14 +1007,11 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
return;
}
- if (!tic.IsBuffer()) {
- view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
- }
if (entry.is_written) {
view->MarkAsModified(texture_cache.Tick());
}
- glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE,
- view->GetFormat());
+ const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
+ glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
}
void RasterizerOpenGL::SyncViewport() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index b94c65907..87f7fe159 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -56,8 +56,8 @@ struct DrawParameters;
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- ScreenInfo& info, GLShader::ProgramManager& program_manager,
- StateTracker& state_tracker);
+ const Device& device, ScreenInfo& info,
+ ProgramManager& program_manager, StateTracker& state_tracker);
~RasterizerOpenGL() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -106,7 +106,7 @@ private:
void SetupComputeConstBuffers(const Shader& kernel);
/// Configures a constant buffer.
- void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
+ void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
const ConstBufferEntry& entry);
/// Configures the current global memory entries to use for the draw command.
@@ -224,7 +224,7 @@ private:
void SetupShaders(GLenum primitive_mode);
- const Device device;
+ const Device& device;
TextureCacheOpenGL texture_cache;
ShaderCacheOpenGL shader_cache;
@@ -236,7 +236,7 @@ private:
Core::System& system;
ScreenInfo& screen_info;
- GLShader::ProgramManager& program_manager;
+ ProgramManager& program_manager;
StateTracker& state_tracker;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
@@ -248,6 +248,12 @@ private:
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
enabled_transform_feedback_buffers;
+ static constexpr std::size_t NUM_CONSTANT_BUFFERS =
+ Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
+ Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
+ std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
+ std::size_t current_cbuf = 0;
+
/// Number of commands queued to the OpenGL driver. Reseted on flush.
std::size_t num_queued_commands = 0;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 97803d480..a787e27d2 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -125,6 +125,15 @@ void OGLProgram::Release() {
handle = 0;
}
+void OGLAssemblyProgram::Release() {
+ if (handle == 0) {
+ return;
+ }
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteProgramsARB(1, &handle);
+ handle = 0;
+}
+
void OGLPipeline::Create() {
if (handle != 0)
return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index de93f4212..f8b322227 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -167,6 +167,22 @@ public:
GLuint handle = 0;
};
+class OGLAssemblyProgram : private NonCopyable {
+public:
+ OGLAssemblyProgram() = default;
+
+ OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
+ ~OGLAssemblyProgram() {
+ Release();
+ }
+
+ /// Deletes the internal OpenGL resource
+ void Release();
+
+ GLuint handle = 0;
+};
+
class OGLPipeline : private NonCopyable {
public:
OGLPipeline() = default;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 9759a7078..4cd0f36cf 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -97,6 +97,24 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
return {};
}
+constexpr GLenum AssemblyEnum(ShaderType shader_type) {
+ switch (shader_type) {
+ case ShaderType::Vertex:
+ return GL_VERTEX_PROGRAM_NV;
+ case ShaderType::TesselationControl:
+ return GL_TESS_CONTROL_PROGRAM_NV;
+ case ShaderType::TesselationEval:
+ return GL_TESS_EVALUATION_PROGRAM_NV;
+ case ShaderType::Geometry:
+ return GL_GEOMETRY_PROGRAM_NV;
+ case ShaderType::Fragment:
+ return GL_FRAGMENT_PROGRAM_NV;
+ case ShaderType::Compute:
+ return GL_COMPUTE_PROGRAM_NV;
+ }
+ return {};
+}
+
std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
}
@@ -120,18 +138,43 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
return registry;
}
-std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
- u64 unique_identifier, const ShaderIR& ir,
- const Registry& registry, bool hint_retrievable = false) {
+ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
+ const ShaderIR& ir, const Registry& registry,
+ bool hint_retrievable = false) {
const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
LOG_INFO(Render_OpenGL, "{}", shader_id);
- const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
- OGLShader shader;
- shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
+ auto program = std::make_shared<ProgramHandle>();
+
+ if (device.UseAssemblyShaders()) {
+ const std::string arb = "Not implemented";
+
+ GLuint& arb_prog = program->assembly_program.handle;
+
+// Commented out functions signal OpenGL errors but are compatible with apitrace.
+// Use them only to capture and replay on apitrace.
+#if 0
+ glGenProgramsNV(1, &arb_prog);
+ glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
+ reinterpret_cast<const GLubyte*>(arb.data()));
+#else
+ glGenProgramsARB(1, &arb_prog);
+ glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
+ static_cast<GLsizei>(arb.size()), arb.data());
+#endif
+ const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
+ if (err && *err) {
+ LOG_CRITICAL(Render_OpenGL, "{}", err);
+ LOG_INFO(Render_OpenGL, "\n{}", arb);
+ }
+ } else {
+ const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
+ OGLShader shader;
+ shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
+
+ program->source_program.Create(true, hint_retrievable, shader.handle);
+ }
- auto program = std::make_shared<OGLProgram>();
- program->Create(true, hint_retrievable, shader.handle);
return program;
}
@@ -153,15 +196,22 @@ std::unordered_set<GLenum> GetSupportedFormats() {
CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry,
- ShaderEntries entries, std::shared_ptr<OGLProgram> program)
+ ShaderEntries entries, ProgramSharedPtr program_)
: RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
- size_in_bytes{size_in_bytes}, program{std::move(program)} {}
+ size_in_bytes{size_in_bytes}, program{std::move(program_)} {
+ // Assign either the assembly program or source program. We can't have both.
+ handle = program->assembly_program.handle;
+ if (handle == 0) {
+ handle = program->source_program.handle;
+ }
+ ASSERT(handle != 0);
+}
CachedShader::~CachedShader() = default;
GLuint CachedShader::GetHandle() const {
DEBUG_ASSERT(registry->IsConsistent());
- return program->handle;
+ return handle;
}
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
@@ -239,7 +289,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return;
}
- const std::vector gl_cache = disk_cache.LoadPrecompiled();
+ std::vector<ShaderDiskCachePrecompiled> gl_cache;
+ if (!device.UseAssemblyShaders()) {
+ // Only load precompiled cache when we are not using assembly shaders
+ gl_cache = disk_cache.LoadPrecompiled();
+ }
const auto supported_formats = GetSupportedFormats();
// Track if precompiled cache was altered during loading to know if we have to
@@ -278,7 +332,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
auto registry = MakeRegistry(entry);
const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
- std::shared_ptr<OGLProgram> program;
+ ProgramSharedPtr program;
if (precompiled_entry) {
// If the shader is precompiled, attempt to load it with
program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
@@ -332,6 +386,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return;
}
+ if (device.UseAssemblyShaders()) {
+ // Don't store precompiled binaries for assembly shaders.
+ return;
+ }
+
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
// before precompiling them
@@ -339,7 +398,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const u64 id = (*transferable)[i].unique_identifier;
const auto it = find_precompiled(id);
if (it == gl_cache.end()) {
- const GLuint program = runtime_cache.at(id).program->handle;
+ const GLuint program = runtime_cache.at(id).program->source_program.handle;
disk_cache.SavePrecompiled(id, program);
precompiled_cache_altered = true;
}
@@ -350,7 +409,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
}
-std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
+ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats) {
if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
@@ -358,15 +417,15 @@ std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
return {};
}
- auto program = std::make_shared<OGLProgram>();
- program->handle = glCreateProgram();
- glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
- glProgramBinary(program->handle, precompiled_entry.binary_format,
- precompiled_entry.binary.data(),
+ auto program = std::make_shared<ProgramHandle>();
+ GLuint& handle = program->source_program.handle;
+ handle = glCreateProgram();
+ glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
+ glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
static_cast<GLsizei>(precompiled_entry.binary.size()));
GLint link_status;
- glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status);
+ glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
if (link_status == GL_FALSE) {
LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
return {};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 91690b470..b2ae8d7f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -43,8 +43,14 @@ struct UnspecializedShader;
using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+struct ProgramHandle {
+ OGLProgram source_program;
+ OGLAssemblyProgram assembly_program;
+};
+using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
+
struct PrecompiledShader {
- std::shared_ptr<OGLProgram> program;
+ ProgramSharedPtr program;
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
};
@@ -87,12 +93,13 @@ public:
private:
explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry,
- ShaderEntries entries, std::shared_ptr<OGLProgram> program);
+ ShaderEntries entries, ProgramSharedPtr program);
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
std::size_t size_in_bytes = 0;
- std::shared_ptr<OGLProgram> program;
+ ProgramSharedPtr program;
+ GLuint handle = 0;
};
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -115,7 +122,7 @@ protected:
void FlushObjectInner(const Shader& object) override {}
private:
- std::shared_ptr<OGLProgram> GeneratePrecompiledProgram(
+ ProgramSharedPtr GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 99fd4ae2c..9cb115959 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1538,7 +1538,9 @@ private:
Expression target;
if (const auto gpr = std::get_if<GprNode>(&*dest)) {
if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op
+ // Writing to Register::ZeroIndex is a no op but we still have to visit the source
+ // as it might have side effects.
+ code.AddLine("{};", Visit(src).GetCode());
return {};
}
target = {GetRegister(gpr->GetIndex()), Type::Float};
@@ -1840,34 +1842,40 @@ private:
Type::HalfFloat};
}
- template <Type type>
- Expression LogicalLessThan(Operation operation) {
- return GenerateBinaryInfix(operation, "<", Type::Bool, type, type);
- }
+ template <const std::string_view& op, Type type, bool unordered = false>
+ Expression Comparison(Operation operation) {
+ static_assert(!unordered || type == Type::Float);
- template <Type type>
- Expression LogicalEqual(Operation operation) {
- return GenerateBinaryInfix(operation, "==", Type::Bool, type, type);
- }
+ const Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
- template <Type type>
- Expression LogicalLessEqual(Operation operation) {
- return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type);
- }
-
- template <Type type>
- Expression LogicalGreaterThan(Operation operation) {
- return GenerateBinaryInfix(operation, ">", Type::Bool, type, type);
+ if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
+ // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
+ // and Nvidia's proprietary stacks. Manually force an ordered comparison.
+ return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(),
+ VisitOperand(operation, 0).AsFloat(),
+ VisitOperand(operation, 1).AsFloat()),
+ Type::Bool};
+ }
+ if constexpr (!unordered) {
+ return expr;
+ }
+ // Unordered comparisons are always true for NaN operands.
+ return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(),
+ VisitOperand(operation, 0).AsFloat(),
+ VisitOperand(operation, 1).AsFloat()),
+ Type::Bool};
}
- template <Type type>
- Expression LogicalNotEqual(Operation operation) {
- return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type);
+ Expression FOrdered(Operation operation) {
+ return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(),
+ VisitOperand(operation, 1).AsFloat()),
+ Type::Bool};
}
- template <Type type>
- Expression LogicalGreaterEqual(Operation operation) {
- return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type);
+ Expression FUnordered(Operation operation) {
+ return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(),
+ VisitOperand(operation, 1).AsFloat()),
+ Type::Bool};
}
Expression LogicalAddCarry(Operation operation) {
@@ -2303,6 +2311,18 @@ private:
return {"gl_SubGroupInvocationARB", Type::Uint};
}
+ template <const std::string_view& comparison>
+ Expression ThreadMask(Operation) {
+ if (device.HasWarpIntrinsics()) {
+ return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
+ }
+ if (device.HasShaderBallot()) {
+ return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
+ }
+ LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
+ return {"0U", Type::Uint};
+ }
+
Expression ShuffleIndexed(Operation operation) {
std::string value = VisitOperand(operation, 0).AsFloat();
@@ -2315,7 +2335,21 @@ private:
return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
}
- Expression MemoryBarrierGL(Operation) {
+ Expression Barrier(Operation) {
+ if (!ir.IsDecompiled()) {
+ LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
+ return {};
+ }
+ code.AddLine("barrier();");
+ return {};
+ }
+
+ Expression MemoryBarrierGroup(Operation) {
+ code.AddLine("groupMemoryBarrier();");
+ return {};
+ }
+
+ Expression MemoryBarrierGlobal(Operation) {
code.AddLine("memoryBarrier();");
return {};
}
@@ -2324,6 +2358,19 @@ private:
Func() = delete;
~Func() = delete;
+ static constexpr std::string_view LessThan = "<";
+ static constexpr std::string_view Equal = "==";
+ static constexpr std::string_view LessEqual = "<=";
+ static constexpr std::string_view GreaterThan = ">";
+ static constexpr std::string_view NotEqual = "!=";
+ static constexpr std::string_view GreaterEqual = ">=";
+
+ static constexpr std::string_view Eq = "Eq";
+ static constexpr std::string_view Ge = "Ge";
+ static constexpr std::string_view Gt = "Gt";
+ static constexpr std::string_view Le = "Le";
+ static constexpr std::string_view Lt = "Lt";
+
static constexpr std::string_view Add = "Add";
static constexpr std::string_view Min = "Min";
static constexpr std::string_view Max = "Max";
@@ -2425,27 +2472,34 @@ private:
&GLSLDecompiler::LogicalPick2,
&GLSLDecompiler::LogicalAnd2,
- &GLSLDecompiler::LogicalLessThan<Type::Float>,
- &GLSLDecompiler::LogicalEqual<Type::Float>,
- &GLSLDecompiler::LogicalLessEqual<Type::Float>,
- &GLSLDecompiler::LogicalGreaterThan<Type::Float>,
- &GLSLDecompiler::LogicalNotEqual<Type::Float>,
- &GLSLDecompiler::LogicalGreaterEqual<Type::Float>,
- &GLSLDecompiler::LogicalFIsNan,
-
- &GLSLDecompiler::LogicalLessThan<Type::Int>,
- &GLSLDecompiler::LogicalEqual<Type::Int>,
- &GLSLDecompiler::LogicalLessEqual<Type::Int>,
- &GLSLDecompiler::LogicalGreaterThan<Type::Int>,
- &GLSLDecompiler::LogicalNotEqual<Type::Int>,
- &GLSLDecompiler::LogicalGreaterEqual<Type::Int>,
-
- &GLSLDecompiler::LogicalLessThan<Type::Uint>,
- &GLSLDecompiler::LogicalEqual<Type::Uint>,
- &GLSLDecompiler::LogicalLessEqual<Type::Uint>,
- &GLSLDecompiler::LogicalGreaterThan<Type::Uint>,
- &GLSLDecompiler::LogicalNotEqual<Type::Uint>,
- &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>,
+ &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>,
+ &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>,
+ &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>,
+ &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>,
+ &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>,
+ &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>,
+ &GLSLDecompiler::FOrdered,
+ &GLSLDecompiler::FUnordered,
+ &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>,
+ &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>,
+ &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>,
+ &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>,
+ &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>,
+ &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>,
+
+ &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>,
+ &GLSLDecompiler::Comparison<Func::Equal, Type::Int>,
+ &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>,
+ &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>,
+ &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>,
+ &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>,
+
+ &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>,
+ &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>,
+ &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>,
+ &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>,
+ &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>,
+ &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>,
&GLSLDecompiler::LogicalAddCarry,
@@ -2534,9 +2588,16 @@ private:
&GLSLDecompiler::VoteEqual,
&GLSLDecompiler::ThreadId,
+ &GLSLDecompiler::ThreadMask<Func::Eq>,
+ &GLSLDecompiler::ThreadMask<Func::Ge>,
+ &GLSLDecompiler::ThreadMask<Func::Gt>,
+ &GLSLDecompiler::ThreadMask<Func::Le>,
+ &GLSLDecompiler::ThreadMask<Func::Lt>,
&GLSLDecompiler::ShuffleIndexed,
- &GLSLDecompiler::MemoryBarrierGL,
+ &GLSLDecompiler::Barrier,
+ &GLSLDecompiler::MemoryBarrierGroup,
+ &GLSLDecompiler::MemoryBarrierGlobal,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 9c7b0adbd..8e754fa90 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -6,45 +6,109 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
-namespace OpenGL::GLShader {
+namespace OpenGL {
-ProgramManager::ProgramManager() = default;
+ProgramManager::ProgramManager(const Device& device) {
+ use_assembly_programs = device.UseAssemblyShaders();
+ if (use_assembly_programs) {
+ glEnable(GL_COMPUTE_PROGRAM_NV);
+ } else {
+ graphics_pipeline.Create();
+ glBindProgramPipeline(graphics_pipeline.handle);
+ }
+}
ProgramManager::~ProgramManager() = default;
-void ProgramManager::Create() {
- graphics_pipeline.Create();
- glBindProgramPipeline(graphics_pipeline.handle);
+void ProgramManager::BindCompute(GLuint program) {
+ if (use_assembly_programs) {
+ glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
+ } else {
+ is_graphics_bound = false;
+ glUseProgram(program);
+ }
}
void ProgramManager::BindGraphicsPipeline() {
- if (!is_graphics_bound) {
- is_graphics_bound = true;
- glUseProgram(0);
+ if (use_assembly_programs) {
+ UpdateAssemblyPrograms();
+ } else {
+ UpdateSourcePrograms();
}
+}
- // Avoid updating the pipeline when values have no changed
- if (old_state == current_state) {
- return;
+void ProgramManager::BindHostPipeline(GLuint pipeline) {
+ if (use_assembly_programs) {
+ if (geometry_enabled) {
+ geometry_enabled = false;
+ old_state.geometry = 0;
+ glDisable(GL_GEOMETRY_PROGRAM_NV);
+ }
+ } else {
+ if (!is_graphics_bound) {
+ glUseProgram(0);
+ }
}
+ glBindProgramPipeline(pipeline);
+}
- // Workaround for AMD bug
- static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
- GL_FRAGMENT_SHADER_BIT};
- const GLuint handle = graphics_pipeline.handle;
- glUseProgramStages(handle, all_used_stages, 0);
- glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
- glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
- glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
+void ProgramManager::RestoreGuestPipeline() {
+ if (use_assembly_programs) {
+ glBindProgramPipeline(0);
+ } else {
+ glBindProgramPipeline(graphics_pipeline.handle);
+ }
+}
+
+void ProgramManager::UpdateAssemblyPrograms() {
+ const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) {
+ if (current == old) {
+ return;
+ }
+ if (current == 0) {
+ if (enabled) {
+ enabled = false;
+ glDisable(stage);
+ }
+ return;
+ }
+ if (!enabled) {
+ enabled = true;
+ glEnable(stage);
+ }
+ glBindProgramARB(stage, current);
+ };
+
+ update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex);
+ update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry,
+ old_state.geometry);
+ update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment,
+ old_state.fragment);
old_state = current_state;
}
-void ProgramManager::BindComputeShader(GLuint program) {
- is_graphics_bound = false;
- glUseProgram(program);
+void ProgramManager::UpdateSourcePrograms() {
+ if (!is_graphics_bound) {
+ is_graphics_bound = true;
+ glUseProgram(0);
+ }
+
+ const GLuint handle = graphics_pipeline.handle;
+ const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
+ if (current == old) {
+ return;
+ }
+ glUseProgramStages(handle, stage, current);
+ };
+ update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
+ update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
+ update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
+
+ old_state = current_state;
}
void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
@@ -54,4 +118,4 @@ void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
}
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index d2e47f2a9..0f03b4f12 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -11,7 +11,9 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
-namespace OpenGL::GLShader {
+namespace OpenGL {
+
+class Device;
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
@@ -28,50 +30,58 @@ static_assert(sizeof(MaxwellUniformData) < 16384,
class ProgramManager {
public:
- explicit ProgramManager();
+ explicit ProgramManager(const Device& device);
~ProgramManager();
- void Create();
+ /// Binds a compute program
+ void BindCompute(GLuint program);
- /// Updates the graphics pipeline and binds it.
+ /// Updates bound programs.
void BindGraphicsPipeline();
- /// Binds a compute shader.
- void BindComputeShader(GLuint program);
+ /// Binds an OpenGL pipeline object unsynchronized with the guest state.
+ void BindHostPipeline(GLuint pipeline);
+
+ /// Rewinds BindHostPipeline state changes.
+ void RestoreGuestPipeline();
void UseVertexShader(GLuint program) {
- current_state.vertex_shader = program;
+ current_state.vertex = program;
}
void UseGeometryShader(GLuint program) {
- current_state.geometry_shader = program;
+ current_state.geometry = program;
}
void UseFragmentShader(GLuint program) {
- current_state.fragment_shader = program;
+ current_state.fragment = program;
}
private:
struct PipelineState {
- bool operator==(const PipelineState& rhs) const noexcept {
- return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader &&
- geometry_shader == rhs.geometry_shader;
- }
-
- bool operator!=(const PipelineState& rhs) const noexcept {
- return !operator==(rhs);
- }
-
- GLuint vertex_shader = 0;
- GLuint fragment_shader = 0;
- GLuint geometry_shader = 0;
+ GLuint vertex = 0;
+ GLuint geometry = 0;
+ GLuint fragment = 0;
};
+ /// Update NV_gpu_program5 programs.
+ void UpdateAssemblyPrograms();
+
+ /// Update GLSL programs.
+ void UpdateSourcePrograms();
+
OGLPipeline graphics_pipeline;
- OGLPipeline compute_pipeline;
+
PipelineState current_state;
PipelineState old_state;
+
+ bool use_assembly_programs = false;
+
bool is_graphics_bound = true;
+
+ bool vertex_enabled = false;
+ bool geometry_enabled = false;
+ bool fragment_enabled = false;
};
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 94fbd2a22..4faa8b90c 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -35,7 +35,7 @@ MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
namespace {
struct FormatTuple {
- GLint internal_format;
+ GLenum internal_format;
GLenum format = GL_NONE;
GLenum type = GL_NONE;
};
@@ -238,6 +238,12 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
return texture;
}
+constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source,
+ SwizzleSource w_source) {
+ return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
+ (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+}
+
} // Anonymous namespace
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
@@ -381,7 +387,7 @@ void CachedSurface::DecorateSurfaceName() {
}
void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) {
- LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix);
+ LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix);
}
View CachedSurface::CreateView(const ViewParams& view_key) {
@@ -397,14 +403,13 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr
}
CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
- const bool is_proxy)
- : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} {
- target = GetTextureTarget(params.target);
- format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format;
+ bool is_proxy)
+ : VideoCommon::ViewBase(params), surface{surface},
+ format{GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format},
+ target{GetTextureTarget(params.target)}, is_proxy{is_proxy} {
if (!is_proxy) {
- texture_view = CreateTextureView();
+ main_view = CreateTextureView();
}
- swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A);
}
CachedSurfaceView::~CachedSurfaceView() = default;
@@ -447,27 +452,49 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
}
}
-void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source,
+GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source,
SwizzleSource z_source, SwizzleSource w_source) {
- u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
- if (new_swizzle == swizzle)
- return;
- swizzle = new_swizzle;
- const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source),
- GetSwizzleSource(z_source), GetSwizzleSource(w_source)};
- const GLuint handle = GetTexture();
- const PixelFormat format = surface.GetSurfaceParams().pixel_format;
- switch (format) {
+ if (GetSurfaceParams().IsBuffer()) {
+ return GetTexture();
+ }
+ const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
+ if (current_swizzle == new_swizzle) {
+ return current_view;
+ }
+ current_swizzle = new_swizzle;
+
+ const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
+ OGLTextureView& view = entry->second;
+ if (!is_cache_miss) {
+ current_view = view.handle;
+ return view.handle;
+ }
+ view = CreateTextureView();
+ current_view = view.handle;
+
+ std::array swizzle{x_source, y_source, z_source, w_source};
+
+ switch (const PixelFormat format = GetSurfaceParams().pixel_format) {
case PixelFormat::Z24S8:
case PixelFormat::Z32FS8:
case PixelFormat::S8Z24:
- glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
+ UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
+ glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
GetComponent(format, x_source == SwizzleSource::R));
+
+ // Make sure we sample the first component
+ std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) {
+ return value == SwizzleSource::G ? SwizzleSource::R : value;
+ });
+ [[fallthrough]];
+ default: {
+ const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]),
+ GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])};
+ glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
break;
- default:
- glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
- break;
}
+ }
+ return view.handle;
}
OGLTextureView CachedSurfaceView::CreateTextureView() const {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 02d9981a1..8a2ac8603 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -83,7 +83,7 @@ public:
/// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
void Attach(GLenum attachment, GLenum target) const;
- void ApplySwizzle(Tegra::Texture::SwizzleSource x_source,
+ GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,
Tegra::Texture::SwizzleSource z_source,
Tegra::Texture::SwizzleSource w_source);
@@ -98,7 +98,7 @@ public:
if (is_proxy) {
return surface.GetTexture();
}
- return texture_view.handle;
+ return main_view.handle;
}
GLenum GetFormat() const {
@@ -110,23 +110,19 @@ public:
}
private:
- u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source) const {
- return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
- (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
- }
-
OGLTextureView CreateTextureView() const;
CachedSurface& surface;
- GLenum target{};
- GLenum format{};
+ const GLenum format;
+ const GLenum target;
+ const bool is_proxy;
+
+ std::unordered_map<u32, OGLTextureView> view_cache;
+ OGLTextureView main_view;
- OGLTextureView texture_view;
- u32 swizzle{};
- bool is_proxy{};
+ // Use an invalid default so it always fails the comparison test
+ u32 current_swizzle = 0xffffffff;
+ GLuint current_view = 0;
};
class TextureCacheOpenGL final : public TextureCacheBase {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b2a179746..e7952924a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -316,7 +316,7 @@ public:
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
Core::Frontend::GraphicsContext& context)
: RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
- has_debug_tool{HasDebugTool()} {}
+ program_manager{device}, has_debug_tool{HasDebugTool()} {}
RendererOpenGL::~RendererOpenGL() = default;
@@ -468,8 +468,9 @@ void RendererOpenGL::InitOpenGLObjects() {
vertex_program.Create(true, false, vertex_shader.handle);
fragment_program.Create(true, false, fragment_shader.handle);
- // Create program pipeline
- program_manager.Create();
+ pipeline.Create();
+ glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
+ glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -508,7 +509,7 @@ void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
}
- rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info,
+ rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info,
program_manager, state_tracker);
}
@@ -620,10 +621,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
- program_manager.UseVertexShader(vertex_program.handle);
- program_manager.UseGeometryShader(0);
- program_manager.UseFragmentShader(fragment_program.handle);
- program_manager.BindGraphicsPipeline();
+ program_manager.BindHostPipeline(pipeline.handle);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
@@ -665,6 +663,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+ program_manager.RestoreGuestPipeline();
}
bool RendererOpenGL::TryPresent(int timeout_ms) {
@@ -753,6 +753,9 @@ void RendererOpenGL::RenderScreenshot() {
bool RendererOpenGL::Init() {
if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
+ if (Settings::values.renderer_debug) {
+ glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
+ }
glDebugMessageCallback(DebugHandler, nullptr);
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 50b647661..61bf507f4 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "common/math_util.h"
#include "video_core/renderer_base.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -95,6 +96,7 @@ private:
Core::Frontend::EmuWindow& emu_window;
Core::System& system;
Core::Frontend::GraphicsContext& context;
+ const Device device;
StateTracker state_tracker{system};
@@ -102,13 +104,14 @@ private:
OGLBuffer vertex_buffer;
OGLProgram vertex_program;
OGLProgram fragment_program;
+ OGLPipeline pipeline;
OGLFramebuffer screenshot_framebuffer;
/// Display information for Switch screen
ScreenInfo screen_info;
/// Global dummy shader pipeline
- GLShader::ProgramManager program_manager;
+ ProgramManager program_manager;
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 12be691a5..2871035f5 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -142,7 +142,7 @@ struct FormatTuple {
{VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16
{VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16
{VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4
- {VK_FORMAT_B8G8R8A8_UNORM}, // BGRA8
+ {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8
{VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F
{VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F
{VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F
@@ -168,7 +168,7 @@ struct FormatTuple {
{VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8
{VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5
{VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4
- {VK_FORMAT_UNDEFINED}, // BGRA8_SRGB
+ {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB
{VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB
{VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB
{VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 5b494da8c..5f33d9e40 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -7,6 +7,7 @@
#include <memory>
#include "core/core.h"
+#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index f0c491d00..750e5a0ca 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -104,6 +104,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
VK_FORMAT_R16_SFLOAT,
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_FORMAT_B8G8R8A8_UNORM,
+ VK_FORMAT_B8G8R8A8_SRGB,
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
VK_FORMAT_D32_SFLOAT,
VK_FORMAT_D16_UNORM,
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 04d07fe6a..043fe7947 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -7,6 +7,7 @@
#include <memory>
#include "video_core/fence_manager.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 890175d2d..65a1c6245 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -331,8 +331,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr);
- const auto shader = TryGet(*cpu_addr);
+ const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
ASSERT(shader);
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index cf15e6d1c..a3d992ed3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -532,14 +532,14 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
return;
}
texture_cache.OnCPUWrite(addr, size);
- pipeline_cache.InvalidateRegion(addr, size);
+ pipeline_cache.OnCPUWrite(addr, size);
buffer_cache.OnCPUWrite(addr, size);
- query_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::SyncGuestHost() {
texture_cache.SyncGuestHost();
buffer_cache.SyncGuestHost();
+ pipeline_cache.SyncGuestHost();
}
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
@@ -569,7 +569,9 @@ void RasterizerVulkan::ReleaseFences() {
}
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
- FlushRegion(addr, size);
+ if (Settings::IsGPULevelExtreme()) {
+ FlushRegion(addr, size);
+ }
InvalidateRegion(addr, size);
}
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 6ce6bfcb5..a13e8baa7 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -515,6 +515,16 @@ private:
void DeclareCommon() {
thread_id =
DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
+ thread_masks[0] =
+ DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
+ thread_masks[1] =
+ DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
+ thread_masks[2] =
+ DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
+ thread_masks[3] =
+ DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
+ thread_masks[4] =
+ DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
}
void DeclareVertex() {
@@ -1077,8 +1087,7 @@ private:
void VisitBasicBlock(const NodeBlock& bb) {
for (const auto& node : bb) {
- [[maybe_unused]] const Type type = Visit(node).type;
- ASSERT(type == Type::Void);
+ Visit(node);
}
}
@@ -1372,7 +1381,9 @@ private:
Expression target{};
if (const auto gpr = std::get_if<GprNode>(&*dest)) {
if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op
+ // Writing to Register::ZeroIndex is a no op but we still have to visit its source
+ // because it might have side effects.
+ Visit(src);
return {};
}
target = {registers.at(gpr->GetIndex()), Type::Float};
@@ -1628,6 +1639,24 @@ private:
return {};
}
+ Expression LogicalFOrdered(Operation operation) {
+ // Emulate SPIR-V's OpOrdered
+ const Id op_a = AsFloat(Visit(operation[0]));
+ const Id op_b = AsFloat(Visit(operation[1]));
+ const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a);
+ const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b);
+ return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool};
+ }
+
+ Expression LogicalFUnordered(Operation operation) {
+ // Emulate SPIR-V's OpUnordered
+ const Id op_a = AsFloat(Visit(operation[0]));
+ const Id op_b = AsFloat(Visit(operation[1]));
+ const Id is_nan_a = OpIsNan(t_bool, op_a);
+ const Id is_nan_b = OpIsNan(t_bool, op_b);
+ return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool};
+ }
+
Id GetTextureSampler(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
ASSERT(!meta.sampler.is_buffer);
@@ -2167,14 +2196,37 @@ private:
return {OpLoad(t_uint, thread_id), Type::Uint};
}
+ template <std::size_t index>
+ Expression ThreadMask(Operation) {
+ // TODO(Rodrigo): Handle devices with different warp sizes
+ const Id mask = thread_masks[index];
+ return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
+ }
+
Expression ShuffleIndexed(Operation operation) {
const Id value = AsFloat(Visit(operation[0]));
const Id index = AsUint(Visit(operation[1]));
return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
}
- Expression MemoryBarrierGL(Operation) {
- const auto scope = spv::Scope::Device;
+ Expression Barrier(Operation) {
+ if (!ir.IsDecompiled()) {
+ LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
+ return {};
+ }
+
+ const auto scope = spv::Scope::Workgroup;
+ const auto memory = spv::Scope::Workgroup;
+ const auto semantics =
+ spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease;
+ OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)),
+ Constant(t_uint, static_cast<u32>(memory)),
+ Constant(t_uint, static_cast<u32>(semantics)));
+ return {};
+ }
+
+ template <spv::Scope scope>
+ Expression MemoryBarrier(Operation) {
const auto semantics =
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
spv::MemorySemanticsMask::WorkgroupMemory |
@@ -2521,7 +2573,14 @@ private:
&SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>,
&SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>,
&SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpIsNan, Type::Bool, Type::Float>,
+ &SPIRVDecompiler::LogicalFOrdered,
+ &SPIRVDecompiler::LogicalFUnordered,
+ &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>,
+ &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>,
+ &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>,
+ &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>,
+ &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>,
+ &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>,
&SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>,
&SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>,
@@ -2624,9 +2683,16 @@ private:
&SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,
&SPIRVDecompiler::ThreadId,
+ &SPIRVDecompiler::ThreadMask<0>, // Eq
+ &SPIRVDecompiler::ThreadMask<1>, // Ge
+ &SPIRVDecompiler::ThreadMask<2>, // Gt
+ &SPIRVDecompiler::ThreadMask<3>, // Le
+ &SPIRVDecompiler::ThreadMask<4>, // Lt
&SPIRVDecompiler::ShuffleIndexed,
- &SPIRVDecompiler::MemoryBarrierGL,
+ &SPIRVDecompiler::Barrier,
+ &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
+ &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -2748,6 +2814,7 @@ private:
Id workgroup_id{};
Id local_invocation_id{};
Id thread_id{};
+ std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt
VertexIndices in_indices;
VertexIndices out_indices;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 55f43e61b..2f1d5021d 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -354,26 +354,23 @@ CachedSurfaceView::~CachedSurfaceView() = default;
VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
SwizzleSource z_source, SwizzleSource w_source) {
- const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
- if (last_image_view && last_swizzle == swizzle) {
+ const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
+ if (last_image_view && last_swizzle == new_swizzle) {
return last_image_view;
}
- last_swizzle = swizzle;
+ last_swizzle = new_swizzle;
- const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle);
+ const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
auto& image_view = entry->second;
if (!is_cache_miss) {
return last_image_view = *image_view;
}
- auto swizzle_x = MaxwellToVK::SwizzleSource(x_source);
- auto swizzle_y = MaxwellToVK::SwizzleSource(y_source);
- auto swizzle_z = MaxwellToVK::SwizzleSource(z_source);
- auto swizzle_w = MaxwellToVK::SwizzleSource(w_source);
-
+ std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source),
+ MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)};
if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
// A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
- std::swap(swizzle_x, swizzle_z);
+ std::swap(swizzle[0], swizzle[2]);
}
// Games can sample depth or stencil values on textures. This is decided by the swizzle value on
@@ -395,11 +392,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
UNIMPLEMENTED();
}
- // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity
- swizzle_x = VK_COMPONENT_SWIZZLE_R;
- swizzle_y = VK_COMPONENT_SWIZZLE_G;
- swizzle_z = VK_COMPONENT_SWIZZLE_B;
- swizzle_w = VK_COMPONENT_SWIZZLE_A;
+ // Make sure we sample the first component
+ std::transform(
+ swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) {
+ return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component;
+ });
}
VkImageViewCreateInfo ci;
@@ -409,7 +406,7 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
ci.image = surface.GetImageHandle();
ci.viewType = image_view_type;
ci.format = surface.GetImage().GetFormat();
- ci.components = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
+ ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]};
ci.subresourceRange.aspectMask = aspect;
ci.subresourceRange.baseMipLevel = base_level;
ci.subresourceRange.levelCount = num_levels;
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index a75a5cc63..eeac328a6 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -255,7 +255,7 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
Node op_b = Immediate(branch_case.cmp_value);
Node condition =
- GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b);
+ GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b);
auto result = Conditional(condition, {n});
bb.push_back(result);
global_code.push_back(result);
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 9392f065b..63adbc4a3 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -387,7 +387,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
case OpCode::Id::RED: {
UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
- UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add);
const auto [real_address, base_address, descriptor] =
TrackGlobalMemory(bb, instr, true, true);
if (!real_address || !base_address) {
@@ -396,7 +395,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
Node value = GetRegister(instr.gpr0);
- bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value)));
+ bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
break;
}
case OpCode::Id::ATOM: {
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d4f95b18c..d00e10913 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -109,6 +109,27 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
return Operation(OperationCode::WorkGroupIdY);
case SystemVariable::CtaIdZ:
return Operation(OperationCode::WorkGroupIdZ);
+ case SystemVariable::EqMask:
+ case SystemVariable::LtMask:
+ case SystemVariable::LeMask:
+ case SystemVariable::GtMask:
+ case SystemVariable::GeMask:
+ uses_warps = true;
+ switch (instr.sys20) {
+ case SystemVariable::EqMask:
+ return Operation(OperationCode::ThreadEqMask);
+ case SystemVariable::LtMask:
+ return Operation(OperationCode::ThreadLtMask);
+ case SystemVariable::LeMask:
+ return Operation(OperationCode::ThreadLeMask);
+ case SystemVariable::GtMask:
+ return Operation(OperationCode::ThreadGtMask);
+ case SystemVariable::GeMask:
+ return Operation(OperationCode::ThreadGeMask);
+ default:
+ UNREACHABLE();
+ return Immediate(0u);
+ }
default:
UNIMPLEMENTED_MSG("Unhandled system move: {}",
static_cast<u32>(instr.sys20.Value()));
@@ -272,10 +293,25 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
break;
}
+ case OpCode::Id::BAR: {
+ UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
+ bb.push_back(Operation(OperationCode::Barrier));
+ break;
+ }
case OpCode::Id::MEMBAR: {
- UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
- bb.push_back(Operation(OperationCode::MemoryBarrierGL));
+ const OperationCode type = [instr] {
+ switch (instr.membar.type) {
+ case Tegra::Shader::MembarType::CTA:
+ return OperationCode::MemoryBarrierGroup;
+ case Tegra::Shader::MembarType::GL:
+ return OperationCode::MemoryBarrierGlobal;
+ default:
+ UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value()));
+ return OperationCode::MemoryBarrierGlobal;
+ }
+ }();
+ bb.push_back(Operation(type));
break;
}
case OpCode::Id::DEPBAR: {
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 6191ffba1..c83dc6615 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -97,19 +97,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
}
case Tegra::Shader::XmadMode::CSfu: {
- const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a,
- op_a, Immediate(0));
- const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b,
- op_b, Immediate(0));
+ const Node comp_a =
+ GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
+ const Node comp_b =
+ GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
const Node comp_minus_a = GetPredicateComparisonInteger(
- PredCondition::NotEqual, is_signed_a,
+ PredCondition::NE, is_signed_a,
SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
Immediate(0x80000000)),
Immediate(0));
const Node comp_minus_b = GetPredicateComparisonInteger(
- PredCondition::NotEqual, is_signed_b,
+ PredCondition::NE, is_signed_b,
SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
Immediate(0x80000000)),
Immediate(0));
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 601c822d2..c5e5165ff 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -110,13 +110,20 @@ enum class OperationCode {
LogicalPick2, /// (bool2 pair, uint index) -> bool
LogicalAnd2, /// (bool2 a) -> bool
- LogicalFLessThan, /// (float a, float b) -> bool
- LogicalFEqual, /// (float a, float b) -> bool
- LogicalFLessEqual, /// (float a, float b) -> bool
- LogicalFGreaterThan, /// (float a, float b) -> bool
- LogicalFNotEqual, /// (float a, float b) -> bool
- LogicalFGreaterEqual, /// (float a, float b) -> bool
- LogicalFIsNan, /// (float a) -> bool
+ LogicalFOrdLessThan, /// (float a, float b) -> bool
+ LogicalFOrdEqual, /// (float a, float b) -> bool
+ LogicalFOrdLessEqual, /// (float a, float b) -> bool
+ LogicalFOrdGreaterThan, /// (float a, float b) -> bool
+ LogicalFOrdNotEqual, /// (float a, float b) -> bool
+ LogicalFOrdGreaterEqual, /// (float a, float b) -> bool
+ LogicalFOrdered, /// (float a, float b) -> bool
+ LogicalFUnordered, /// (float a, float b) -> bool
+ LogicalFUnordLessThan, /// (float a, float b) -> bool
+ LogicalFUnordEqual, /// (float a, float b) -> bool
+ LogicalFUnordLessEqual, /// (float a, float b) -> bool
+ LogicalFUnordGreaterThan, /// (float a, float b) -> bool
+ LogicalFUnordNotEqual, /// (float a, float b) -> bool
+ LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
LogicalILessThan, /// (int a, int b) -> bool
LogicalIEqual, /// (int a, int b) -> bool
@@ -219,9 +226,16 @@ enum class OperationCode {
VoteEqual, /// (bool) -> bool
ThreadId, /// () -> uint
+ ThreadEqMask, /// () -> uint
+ ThreadGeMask, /// () -> uint
+ ThreadGtMask, /// () -> uint
+ ThreadLeMask, /// () -> uint
+ ThreadLtMask, /// () -> uint
ShuffleIndexed, /// (uint value, uint index) -> uint
- MemoryBarrierGL, /// () -> void
+ Barrier, /// () -> void
+ MemoryBarrierGroup, /// () -> void
+ MemoryBarrierGlobal, /// () -> void
Amount,
};
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 822674926..e322c3402 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -10,6 +10,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
@@ -243,56 +244,44 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
}
Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
+ if (condition == PredCondition::T) {
+ return GetPredicate(true);
+ } else if (condition == PredCondition::F) {
+ return GetPredicate(false);
+ }
+
static constexpr std::array comparison_table{
- std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan},
- std::pair{PredCondition::Equal, OperationCode::LogicalFEqual},
- std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
- std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
- std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
- std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
- std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
- std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
- std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
- std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
- std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual},
+ OperationCode(0),
+ OperationCode::LogicalFOrdLessThan, // LT
+ OperationCode::LogicalFOrdEqual, // EQ
+ OperationCode::LogicalFOrdLessEqual, // LE
+ OperationCode::LogicalFOrdGreaterThan, // GT
+ OperationCode::LogicalFOrdNotEqual, // NE
+ OperationCode::LogicalFOrdGreaterEqual, // GE
+ OperationCode::LogicalFOrdered, // NUM
+ OperationCode::LogicalFUnordered, // NAN
+ OperationCode::LogicalFUnordLessThan, // LTU
+ OperationCode::LogicalFUnordEqual, // EQU
+ OperationCode::LogicalFUnordLessEqual, // LEU
+ OperationCode::LogicalFUnordGreaterThan, // GTU
+ OperationCode::LogicalFUnordNotEqual, // NEU
+ OperationCode::LogicalFUnordGreaterEqual, // GEU
};
+ const std::size_t index = static_cast<std::size_t>(condition);
+ ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index);
- const auto comparison =
- std::find_if(comparison_table.cbegin(), comparison_table.cend(),
- [condition](const auto entry) { return condition == entry.first; });
- UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
- "Unknown predicate comparison operation");
-
- Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
-
- if (condition == PredCondition::LessThanWithNan ||
- condition == PredCondition::NotEqualWithNan ||
- condition == PredCondition::LessEqualWithNan ||
- condition == PredCondition::GreaterThanWithNan ||
- condition == PredCondition::GreaterEqualWithNan) {
- predicate = Operation(OperationCode::LogicalOr, predicate,
- Operation(OperationCode::LogicalFIsNan, op_a));
- predicate = Operation(OperationCode::LogicalOr, predicate,
- Operation(OperationCode::LogicalFIsNan, op_b));
- }
-
- return predicate;
+ return Operation(comparison_table[index], op_a, op_b);
}
Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
Node op_b) {
static constexpr std::array comparison_table{
- std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan},
- std::pair{PredCondition::Equal, OperationCode::LogicalIEqual},
- std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual},
- std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
- std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual},
- std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
- std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
- std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
- std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
- std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
- std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual},
+ std::pair{PredCondition::LT, OperationCode::LogicalILessThan},
+ std::pair{PredCondition::EQ, OperationCode::LogicalIEqual},
+ std::pair{PredCondition::LE, OperationCode::LogicalILessEqual},
+ std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan},
+ std::pair{PredCondition::NE, OperationCode::LogicalINotEqual},
+ std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual},
};
const auto comparison =
@@ -301,32 +290,24 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
"Unknown predicate comparison operation");
- Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
- std::move(op_b));
-
- UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
- condition == PredCondition::NotEqualWithNan ||
- condition == PredCondition::LessEqualWithNan ||
- condition == PredCondition::GreaterThanWithNan ||
- condition == PredCondition::GreaterEqualWithNan,
- "NaN comparisons for integers are not implemented");
- return predicate;
+ return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
+ std::move(op_b));
}
Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
Node op_b) {
static constexpr std::array comparison_table{
- std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan},
- std::pair{PredCondition::Equal, OperationCode::Logical2HEqual},
- std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
- std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
- std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
- std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
- std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
- std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
- std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
- std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
- std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan},
+ std::pair{PredCondition::LT, OperationCode::Logical2HLessThan},
+ std::pair{PredCondition::EQ, OperationCode::Logical2HEqual},
+ std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual},
+ std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan},
+ std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual},
+ std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual},
+ std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan},
+ std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan},
+ std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan},
+ std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan},
+ std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan},
};
const auto comparison =
@@ -397,7 +378,7 @@ void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc
if (!sets_cc) {
return;
}
- Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f));
+ Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f));
SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
}
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d6efc34b2..45e3ddd2c 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -14,6 +14,7 @@
#include <unordered_map>
#include <vector>
+#include <boost/container/small_vector.hpp>
#include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range.hpp>
@@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
template <typename TSurface, typename TView>
class TextureCache {
+ using VectorSurface = boost::container::small_vector<TSurface, 1>;
public:
void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -308,18 +310,20 @@ public:
dst_surface.first->MarkAsModified(true, Tick());
}
- TSurface TryFindFramebufferSurface(VAddr addr) {
+ TSurface TryFindFramebufferSurface(VAddr addr) const {
if (!addr) {
return nullptr;
}
const VAddr page = addr >> registry_page_bits;
- std::vector<TSurface>& list = registry[page];
- for (auto& surface : list) {
- if (surface->GetCpuAddr() == addr) {
- return surface;
- }
+ const auto it = registry.find(page);
+ if (it == registry.end()) {
+ return nullptr;
}
- return nullptr;
+ const auto& list = it->second;
+ const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
+ return surface->GetCpuAddr() == addr;
+ });
+ return found != list.end() ? *found : nullptr;
}
u64 Tick() {
@@ -498,7 +502,7 @@ private:
* @param untopological Indicates to the recycler that the texture has no way
* to match the overlaps due to topological reasons.
**/
- RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
+ RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
if (Settings::IsGPULevelExtreme()) {
return RecycleStrategy::Flush;
@@ -538,9 +542,8 @@ private:
* @param untopological Indicates to the recycler that the texture has no way to match the
* overlaps due to topological reasons.
**/
- std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
- const SurfaceParams& params, const GPUVAddr gpu_addr,
- const bool preserve_contents,
+ std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
+ const GPUVAddr gpu_addr, const bool preserve_contents,
const MatchTopologyResult untopological) {
const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
for (auto& surface : overlaps) {
@@ -650,7 +653,7 @@ private:
* @param params The parameters on the new surface.
* @param gpu_addr The starting address of the new surface.
**/
- std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
+ std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
const SurfaceParams& params,
const GPUVAddr gpu_addr) {
if (params.target == SurfaceTarget::Texture3D) {
@@ -708,7 +711,7 @@ private:
* @param preserve_contents Indicates that the new surface should be loaded from memory or
* left blank.
*/
- std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
+ std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
const SurfaceParams& params,
const GPUVAddr gpu_addr,
const VAddr cpu_addr,
@@ -810,7 +813,7 @@ private:
TSurface& current_surface = iter->second;
const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
- std::vector<TSurface> overlaps{current_surface};
+ VectorSurface overlaps{current_surface};
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result);
}
@@ -991,7 +994,9 @@ private:
params.target = target;
params.is_tiled = false;
params.srgb_conversion = false;
- params.is_layered = false;
+ params.is_layered =
+ target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray ||
+ target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray;
params.block_width = 0;
params.block_height = 0;
params.block_depth = 0;
@@ -1124,23 +1129,25 @@ private:
}
}
- std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
+ VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
if (size == 0) {
return {};
}
const VAddr cpu_addr_end = cpu_addr + size;
- VAddr start = cpu_addr >> registry_page_bits;
const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
- std::vector<TSurface> surfaces;
- while (start <= end) {
- std::vector<TSurface>& list = registry[start];
- for (auto& surface : list) {
- if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
- surface->MarkAsPicked(true);
- surfaces.push_back(surface);
+ VectorSurface surfaces;
+ for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
+ const auto it = registry.find(start);
+ if (it == registry.end()) {
+ continue;
+ }
+ for (auto& surface : it->second) {
+ if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
+ continue;
}
+ surface->MarkAsPicked(true);
+ surfaces.push_back(surface);
}
- start++;
}
for (auto& surface : surfaces) {
surface->MarkAsPicked(false);
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 3d759f77b..1f5e43043 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -106,6 +106,9 @@ public:
format.setVersion(4, 3);
format.setProfile(QSurfaceFormat::CompatibilityProfile);
format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
+ if (Settings::values.renderer_debug) {
+ format.setOption(QSurfaceFormat::FormatOption::DebugContext);
+ }
// TODO: expose a setting for buffer value (ie default/single/double/triple)
format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);
format.setSwapInterval(0);
@@ -150,18 +153,19 @@ public:
}
void MakeCurrent() override {
- if (is_current) {
- return;
+ // We can't track the current state of the underlying context in this wrapper class because
+ // Qt may make the underlying context not current for one reason or another. In particular,
+ // the WebBrowser uses GL, so it seems to conflict if we aren't careful.
+ // Instead of always just making the context current (which does not have any caching to
+ // check if the underlying context is already current) we can check for the current context
+ // in the thread local data by calling `currentContext()` and checking if its ours.
+ if (QOpenGLContext::currentContext() != context.get()) {
+ context->makeCurrent(surface);
}
- is_current = context->makeCurrent(surface);
}
void DoneCurrent() override {
- if (!is_current) {
- return;
- }
context->doneCurrent();
- is_current = false;
}
QOpenGLContext* GetShareContext() {
@@ -178,7 +182,6 @@ private:
std::unique_ptr<QOpenGLContext> context;
std::unique_ptr<QOffscreenSurface> offscreen_surface{};
QSurface* surface;
- bool is_current = false;
};
class DummyContext : public Core::Frontend::GraphicsContext {};
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 75c6cf20b..b08b87426 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -643,6 +643,8 @@ void Config::ReadRendererValues() {
Settings::values.use_asynchronous_gpu_emulation =
ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
+ Settings::values.use_assembly_shaders =
+ ReadSetting(QStringLiteral("use_assembly_shaders"), false).toBool();
Settings::values.use_fast_gpu_time =
ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool();
Settings::values.force_30fps_mode =
@@ -687,6 +689,8 @@ void Config::ReadSystemValues() {
Settings::values.region_index = ReadSetting(QStringLiteral("region_index"), 1).toInt();
+ Settings::values.time_zone_index = ReadSetting(QStringLiteral("time_zone_index"), 0).toInt();
+
const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool();
if (rng_seed_enabled) {
Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong();
@@ -1088,6 +1092,8 @@ void Config::SaveRendererValues() {
WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
Settings::values.use_asynchronous_gpu_emulation, false);
WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
+ WriteSetting(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders,
+ false);
WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true);
WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false);
@@ -1126,6 +1132,7 @@ void Config::SaveSystemValues() {
WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0);
WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1);
WriteSetting(QStringLiteral("region_index"), Settings::values.region_index, 1);
+ WriteSetting(QStringLiteral("time_zone_index"), Settings::values.time_zone_index, 0);
WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false);
WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 5bb2ae555..37aadf7f8 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -12,6 +12,9 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
ui->setupUi(this);
+ // TODO: Remove this after assembly shaders are fully integrated
+ ui->use_assembly_shaders->setVisible(false);
+
SetConfiguration();
}
@@ -22,6 +25,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
ui->use_vsync->setEnabled(runtime_lock);
ui->use_vsync->setChecked(Settings::values.use_vsync);
+ ui->use_assembly_shaders->setEnabled(runtime_lock);
+ ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders);
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time);
ui->force_30fps_mode->setEnabled(runtime_lock);
ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
@@ -33,6 +38,7 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
Settings::values.gpu_accuracy = gpu_accuracy;
Settings::values.use_vsync = ui->use_vsync->isChecked();
+ Settings::values.use_assembly_shaders = ui->use_assembly_shaders->isChecked();
Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked();
Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 770b80c50..0021607ac 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -63,6 +63,16 @@
</widget>
</item>
<item>
+ <widget class="QCheckBox" name="use_assembly_shaders">
+ <property name="toolTip">
+ <string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string>
+ </property>
+ <property name="text">
+ <string>Use assembly shaders (experimental, Nvidia OpenGL only)</string>
+ </property>
+ </widget>
+ </item>
+ <item>
<widget class="QCheckBox" name="force_30fps_mode">
<property name="text">
<string>Force 30 FPS mode</string>
diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp
index f49cd4c8f..10315e7a6 100644
--- a/src/yuzu/configuration/configure_system.cpp
+++ b/src/yuzu/configuration/configure_system.cpp
@@ -57,6 +57,7 @@ void ConfigureSystem::SetConfiguration() {
ui->combo_language->setCurrentIndex(Settings::values.language_index);
ui->combo_region->setCurrentIndex(Settings::values.region_index);
+ ui->combo_time_zone->setCurrentIndex(Settings::values.time_zone_index);
ui->combo_sound->setCurrentIndex(Settings::values.sound_index);
ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value());
@@ -84,6 +85,7 @@ void ConfigureSystem::ApplyConfiguration() {
Settings::values.language_index = ui->combo_language->currentIndex();
Settings::values.region_index = ui->combo_region->currentIndex();
+ Settings::values.time_zone_index = ui->combo_time_zone->currentIndex();
Settings::values.sound_index = ui->combo_sound->currentIndex();
if (ui->rng_seed_checkbox->isChecked()) {
diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h
index d8fa2d2cc..26d42d5c5 100644
--- a/src/yuzu/configuration/configure_system.h
+++ b/src/yuzu/configuration/configure_system.h
@@ -37,5 +37,6 @@ private:
int language_index = 0;
int region_index = 0;
+ int time_zone_index = 0;
int sound_index = 0;
};
diff --git a/src/yuzu/configuration/configure_system.ui b/src/yuzu/configuration/configure_system.ui
index 4e2c7e76e..9c8cca6dc 100644
--- a/src/yuzu/configuration/configure_system.ui
+++ b/src/yuzu/configuration/configure_system.ui
@@ -22,14 +22,14 @@
<string>System Settings</string>
</property>
<layout class="QGridLayout" name="gridLayout">
- <item row="2" column="0">
+ <item row="3" column="0">
<widget class="QLabel" name="label_sound">
<property name="text">
<string>Sound output mode</string>
</property>
</widget>
</item>
- <item row="3" column="0">
+ <item row="4" column="0">
<widget class="QLabel" name="label_console_id">
<property name="text">
<string>Console ID:</string>
@@ -174,14 +174,255 @@
</item>
</widget>
</item>
- <item row="5" column="0">
+ <item row="2" column="0">
+ <widget class="QLabel" name="label_timezone">
+ <property name="text">
+ <string>Time Zone:</string>
+ </property>
+ </widget>
+ </item>
+ <item row="2" column="1">
+ <widget class="QComboBox" name="combo_time_zone">
+ <item>
+ <property name="text">
+ <string>Auto</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Default</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>CET</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>CST6CDT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Cuba</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>EET</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Egypt</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Eire</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>EST</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>EST5EDT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>GB</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>GB-Eire</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>GMT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>GMT+0</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>GMT-0</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>GMT0</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Greenwich</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Hongkong</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>HST</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Iceland</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Iran</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Israel</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Jamaica</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Japan</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Kwajalein</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Libya</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>MET</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>MST</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>MST7MDT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Navajo</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>NZ</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>NZ-CHAT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Poland</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Portugal</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>PRC</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>PST8PDT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>ROC</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>ROK</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Singapore</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Turkey</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>UCT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Universal</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>UTC</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>W-SU</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>WET</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Zulu</string>
+ </property>
+ </item>
+ </widget>
+ </item>
+ <item row="6" column="0">
<widget class="QCheckBox" name="rng_seed_checkbox">
<property name="text">
<string>RNG Seed</string>
</property>
</widget>
</item>
- <item row="2" column="1">
+ <item row="3" column="1">
<widget class="QComboBox" name="combo_sound">
<item>
<property name="text">
@@ -207,7 +448,7 @@
</property>
</widget>
</item>
- <item row="3" column="1">
+ <item row="4" column="1">
<widget class="QPushButton" name="button_regenerate_console_id">
<property name="sizePolicy">
<sizepolicy hsizetype="Fixed" vsizetype="Fixed">
@@ -223,14 +464,14 @@
</property>
</widget>
</item>
- <item row="4" column="0">
+ <item row="5" column="0">
<widget class="QCheckBox" name="custom_rtc_checkbox">
<property name="text">
<string>Custom RTC</string>
</property>
</widget>
</item>
- <item row="4" column="1">
+ <item row="5" column="1">
<widget class="QDateTimeEdit" name="custom_rtc_edit">
<property name="minimumDate">
<date>
@@ -244,7 +485,7 @@
</property>
</widget>
</item>
- <item row="5" column="1">
+ <item row="6" column="1">
<widget class="QLineEdit" name="rng_seed_edit">
<property name="sizePolicy">
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">
diff --git a/src/yuzu/discord_impl.cpp b/src/yuzu/discord_impl.cpp
index ea0079353..a93733b26 100644
--- a/src/yuzu/discord_impl.cpp
+++ b/src/yuzu/discord_impl.cpp
@@ -18,7 +18,7 @@ DiscordImpl::DiscordImpl() {
// The number is the client ID for yuzu, it's used for images and the
// application name
- Discord_Initialize("471872241299226636", &handlers, 1, nullptr);
+ Discord_Initialize("712465656758665259", &handlers, 1, nullptr);
}
DiscordImpl::~DiscordImpl() {
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index dccbabcbf..bfb600df0 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -488,11 +488,11 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, std::string pat
auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id);
navigate_to_gamedb_entry->setVisible(it != compatibility_list.end() && program_id != 0);
- connect(open_save_location, &QAction::triggered, [this, program_id]() {
- emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData);
+ connect(open_save_location, &QAction::triggered, [this, program_id, path]() {
+ emit OpenFolderRequested(GameListOpenTarget::SaveData, path);
});
- connect(open_lfs_location, &QAction::triggered, [this, program_id]() {
- emit OpenFolderRequested(program_id, GameListOpenTarget::ModData);
+ connect(open_lfs_location, &QAction::triggered, [this, program_id, path]() {
+ emit OpenFolderRequested(GameListOpenTarget::ModData, path);
});
connect(open_transferable_shader_cache, &QAction::triggered,
[this, program_id]() { emit OpenTransferableShaderCacheRequested(program_id); });
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index 878d94413..a38cb2fc3 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -73,7 +73,7 @@ public:
signals:
void GameChosen(QString game_path);
void ShouldCancelWorker();
- void OpenFolderRequested(u64 program_id, GameListOpenTarget target);
+ void OpenFolderRequested(GameListOpenTarget target, const std::string& game_path);
void OpenTransferableShaderCacheRequested(u64 program_id);
void DumpRomFSRequested(u64 program_id, const std::string& game_path);
void CopyTIDRequested(u64 program_id);
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp
index 2a6483370..ae842306c 100644
--- a/src/yuzu/loading_screen.cpp
+++ b/src/yuzu/loading_screen.cpp
@@ -19,6 +19,7 @@
#include <QTime>
#include <QtConcurrent/QtConcurrentRun>
#include "common/logging/log.h"
+#include "core/frontend/framebuffer_layout.h"
#include "core/loader/loader.h"
#include "ui_loading_screen.h"
#include "video_core/rasterizer_interface.h"
@@ -61,7 +62,7 @@ LoadingScreen::LoadingScreen(QWidget* parent)
: QWidget(parent), ui(std::make_unique<Ui::LoadingScreen>()),
previous_stage(VideoCore::LoadCallbackStage::Complete) {
ui->setupUi(this);
- setMinimumSize(1280, 720);
+ setMinimumSize(Layout::MinimumSize::Width, Layout::MinimumSize::Height);
// Create a fade out effect to hide this loading screen widget.
// When fading opacity, it will fade to the parent widgets background color, which is why we
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 86e8a1d49..270cccc77 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -65,6 +65,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
#include "common/logging/backend.h"
#include "common/logging/filter.h"
#include "common/logging/log.h"
+#include "common/memory_detect.h"
#include "common/microprofile.h"
#include "common/scm_rev.h"
#include "common/scope_exit.h"
@@ -219,6 +220,10 @@ GMainWindow::GMainWindow()
LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string);
#endif
LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString());
+ LOG_INFO(Frontend, "Host RAM: {:.2f} GB",
+ Common::GetMemInfo().TotalPhysicalMemory / 1024.0f / 1024 / 1024);
+ LOG_INFO(Frontend, "Host Swap: {:.2f} GB",
+ Common::GetMemInfo().TotalSwapMemory / 1024.0f / 1024 / 1024);
UpdateWindowTitle();
show();
@@ -724,13 +729,13 @@ void GMainWindow::InitializeHotkeys() {
}
void GMainWindow::SetDefaultUIGeometry() {
- // geometry: 55% of the window contents are in the upper screen half, 45% in the lower half
+ // geometry: 53% of the window contents are in the upper screen half, 47% in the lower half
const QRect screenRect = QApplication::desktop()->screenGeometry(this);
const int w = screenRect.width() * 2 / 3;
- const int h = screenRect.height() / 2;
+ const int h = screenRect.height() * 2 / 3;
const int x = (screenRect.x() + screenRect.width()) / 2 - w / 2;
- const int y = (screenRect.y() + screenRect.height()) / 2 - h * 55 / 100;
+ const int y = (screenRect.y() + screenRect.height()) / 2 - h * 53 / 100;
setGeometry(x, y, w, h);
}
@@ -831,6 +836,7 @@ void GMainWindow::ConnectMenuEvents() {
&GMainWindow::OnDisplayTitleBars);
connect(ui.action_Show_Filter_Bar, &QAction::triggered, this, &GMainWindow::OnToggleFilterBar);
connect(ui.action_Show_Status_Bar, &QAction::triggered, statusBar(), &QStatusBar::setVisible);
+ connect(ui.action_Reset_Window_Size, &QAction::triggered, this, &GMainWindow::ResetWindowSize);
// Fullscreen
ui.action_Fullscreen->setShortcut(
@@ -1154,39 +1160,61 @@ void GMainWindow::OnGameListLoadFile(QString game_path) {
BootGame(game_path);
}
-void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target) {
+void GMainWindow::OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path) {
std::string path;
QString open_target;
+
+ const auto v_file = Core::GetGameFileFromPath(vfs, game_path);
+ const auto loader = Loader::GetLoader(v_file);
+ FileSys::NACP control{};
+ u64 program_id{};
+
+ loader->ReadControlData(control);
+ loader->ReadProgramId(program_id);
+
+ const bool has_user_save{control.GetDefaultNormalSaveSize() > 0};
+ const bool has_device_save{control.GetDeviceSaveDataSize() > 0};
+
+ ASSERT_MSG(has_user_save != has_device_save, "Game uses both user and device savedata?");
+
switch (target) {
case GameListOpenTarget::SaveData: {
open_target = tr("Save Data");
const std::string nand_dir = FileUtil::GetUserPath(FileUtil::UserPath::NANDDir);
ASSERT(program_id != 0);
- const auto select_profile = [this] {
- QtProfileSelectionDialog dialog(this);
- dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint |
- Qt::WindowSystemMenuHint | Qt::WindowCloseButtonHint);
- dialog.setWindowModality(Qt::WindowModal);
+ if (has_user_save) {
+ // User save data
+ const auto select_profile = [this] {
+ QtProfileSelectionDialog dialog(this);
+ dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint |
+ Qt::WindowSystemMenuHint | Qt::WindowCloseButtonHint);
+ dialog.setWindowModality(Qt::WindowModal);
- if (dialog.exec() == QDialog::Rejected) {
- return -1;
- }
+ if (dialog.exec() == QDialog::Rejected) {
+ return -1;
+ }
- return dialog.GetIndex();
- };
+ return dialog.GetIndex();
+ };
- const auto index = select_profile();
- if (index == -1) {
- return;
- }
+ const auto index = select_profile();
+ if (index == -1) {
+ return;
+ }
- Service::Account::ProfileManager manager;
- const auto user_id = manager.GetUser(static_cast<std::size_t>(index));
- ASSERT(user_id);
- path = nand_dir + FileSys::SaveDataFactory::GetFullPath(FileSys::SaveDataSpaceId::NandUser,
- FileSys::SaveDataType::SaveData,
- program_id, user_id->uuid, 0);
+ Service::Account::ProfileManager manager;
+ const auto user_id = manager.GetUser(static_cast<std::size_t>(index));
+ ASSERT(user_id);
+ path = nand_dir + FileSys::SaveDataFactory::GetFullPath(
+ FileSys::SaveDataSpaceId::NandUser,
+ FileSys::SaveDataType::SaveData, program_id, user_id->uuid, 0);
+ } else {
+ // Device save data
+ path = nand_dir + FileSys::SaveDataFactory::GetFullPath(
+ FileSys::SaveDataSpaceId::NandUser,
+ FileSys::SaveDataType::SaveData, program_id, {}, 0);
+ }
if (!FileUtil::Exists(path)) {
FileUtil::CreateFullPath(path);
@@ -1829,6 +1857,20 @@ void GMainWindow::ToggleWindowMode() {
}
}
+void GMainWindow::ResetWindowSize() {
+ const auto aspect_ratio = Layout::EmulationAspectRatio(
+ static_cast<Layout::AspectRatio>(Settings::values.aspect_ratio),
+ static_cast<float>(Layout::ScreenUndocked::Height) / Layout::ScreenUndocked::Width);
+ if (!ui.action_Single_Window_Mode->isChecked()) {
+ render_window->resize(Layout::ScreenUndocked::Height / aspect_ratio,
+ Layout::ScreenUndocked::Height);
+ } else {
+ resize(Layout::ScreenUndocked::Height / aspect_ratio,
+ Layout::ScreenUndocked::Height + menuBar()->height() +
+ (ui.action_Show_Status_Bar->isChecked() ? statusBar()->height() : 0));
+ }
+}
+
void GMainWindow::OnConfigure() {
const auto old_theme = UISettings::values.theme;
const bool old_discord_presence = UISettings::values.enable_discord_presence;
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 60b17c54a..4f4c8ddbe 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -183,7 +183,7 @@ private slots:
void OnMenuReportCompatibility();
/// Called whenever a user selects a game in the game list widget.
void OnGameListLoadFile(QString game_path);
- void OnGameListOpenFolder(u64 program_id, GameListOpenTarget target);
+ void OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path);
void OnTransferableShaderCacheOpenFile(u64 program_id);
void OnGameListDumpRomFS(u64 program_id, const std::string& game_path);
void OnGameListCopyTID(u64 program_id);
@@ -208,6 +208,7 @@ private slots:
void ShowFullscreen();
void HideFullscreen();
void ToggleWindowMode();
+ void ResetWindowSize();
void OnCaptureScreenshot();
void OnCoreError(Core::System::ResultStatus, std::string);
void OnReinitializeKeys(ReinitializeKeyBehavior behavior);
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index ae414241e..97c90f50b 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
- <width>1081</width>
- <height>730</height>
+ <width>1280</width>
+ <height>720</height>
</rect>
</property>
<property name="windowTitle">
@@ -44,7 +44,7 @@
<rect>
<x>0</x>
<y>0</y>
- <width>1081</width>
+ <width>1280</width>
<height>21</height>
</rect>
</property>
@@ -96,6 +96,7 @@
<addaction name="action_Display_Dock_Widget_Headers"/>
<addaction name="action_Show_Filter_Bar"/>
<addaction name="action_Show_Status_Bar"/>
+ <addaction name="action_Reset_Window_Size"/>
<addaction name="separator"/>
<addaction name="menu_View_Debugging"/>
</widget>
@@ -215,6 +216,11 @@
<string>Show Status Bar</string>
</property>
</action>
+ <action name="action_Reset_Window_Size">
+ <property name="text">
+ <string>Reset Window Size</string>
+ </property>
+ </action>
<action name="action_Fullscreen">
<property name="checkable">
<bool>true</bool>
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 8476a5a16..c20d48c42 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -367,6 +367,9 @@ void Config::ReadValues() {
Settings::values.custom_rtc = std::nullopt;
}
+ Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
+ Settings::values.time_zone_index = sdl2_config->GetInteger("System", "time_zone_index", 0);
+
// Core
Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
@@ -394,6 +397,8 @@ void Config::ReadValues() {
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
Settings::values.use_vsync =
static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1));
+ Settings::values.use_assembly_shaders =
+ sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false);
Settings::values.use_fast_gpu_time =
sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true);
@@ -409,8 +414,6 @@ void Config::ReadValues() {
Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1));
- Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
-
// Miscellaneous
Settings::values.log_filter = sdl2_config->Get("Miscellaneous", "log_filter", "*:Trace");
Settings::values.use_dev_keys = sdl2_config->GetBoolean("Miscellaneous", "use_dev_keys", false);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 60b1a62fa..abc6e6e65 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -134,6 +134,10 @@ max_anisotropy =
# 0 (default): Off, 1: On
use_vsync =
+# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required.
+# 0 (default): Off, 1: On
+use_assembly_shaders =
+
# Turns on the frame limiter, which will limit frames output to the target game speed
# 0: Off, 1: On (default)
use_frame_limit =
@@ -262,6 +266,10 @@ language_index =
# -1: Auto-select (default), 0: Japan, 1: USA, 2: Europe, 3: Australia, 4: China, 5: Korea, 6: Taiwan
region_value =
+# The system time zone that yuzu will use during emulation
+# 0: Auto-select (default), 1: Default (system archive value), Others: Index for specified time zone
+time_zone_index =
+
[Miscellaneous]
# A filter which removes logs below a certain logging level.
# Examples: *:Debug Kernel.SVC:Trace Service.*:Critical
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index 411e7e647..09cc0a3b5 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -98,6 +98,9 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen)
SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0);
SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
+ if (Settings::values.renderer_debug) {
+ SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
+ }
SDL_GL_SetSwapInterval(0);
std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname,