summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2020-01-03 21:16:29 +0100
committerFernandoS27 <fsahmkow27@gmail.com>2020-01-24 21:43:29 +0100
commitc921e496eb47de49a4d6ce62527581b966dca259 (patch)
tree788c71599f0abf53b479bd3f2f3ea730fc9c35c4 /src/video_core
parentMerge pull request #3273 from FernandoS27/txd-array (diff)
downloadyuzu-c921e496eb47de49a4d6ce62527581b966dca259.tar
yuzu-c921e496eb47de49a4d6ce62527581b966dca259.tar.gz
yuzu-c921e496eb47de49a4d6ce62527581b966dca259.tar.bz2
yuzu-c921e496eb47de49a4d6ce62527581b966dca259.tar.lz
yuzu-c921e496eb47de49a4d6ce62527581b966dca259.tar.xz
yuzu-c921e496eb47de49a4d6ce62527581b966dca259.tar.zst
yuzu-c921e496eb47de49a4d6ce62527581b966dca259.zip
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h3
-rw-r--r--src/video_core/engines/kepler_compute.cpp4
-rw-r--r--src/video_core/engines/kepler_compute.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp4
-rw-r--r--src/video_core/engines/maxwell_3d.h2
-rw-r--r--src/video_core/guest_driver.cpp34
-rw-r--r--src/video_core/guest_driver.h37
-rw-r--r--src/video_core/rasterizer_interface.h8
-rw-r--r--src/video_core/shader/const_buffer_locker.h8
-rw-r--r--src/video_core/shader/decode.cpp21
-rw-r--r--src/video_core/shader/shader_ir.cpp1
-rw-r--r--src/video_core/shader/shader_ir.h1
13 files changed, 127 insertions, 0 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index ccfed4f2e..04a25da4f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,6 +29,8 @@ add_library(video_core STATIC
gpu_synch.h
gpu_thread.cpp
gpu_thread.h
+ guest_driver.cpp
+ guest_driver.h
macro_interpreter.cpp
macro_interpreter.h
memory_manager.cpp
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index 44b8b8d22..c29156e34 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_type.h"
+#include "video_core/guest_driver.h"
#include "video_core/textures/texture.h"
namespace Tegra::Engines {
@@ -106,6 +107,8 @@ public:
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const = 0;
virtual u32 GetBoundBuffer() const = 0;
+
+ virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
};
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 110406f2f..f177ae938 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -94,6 +94,10 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
return result;
}
+VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
+ return rasterizer.AccessGuestDriverProfile();
+}
+
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 4ef3e0613..99c82a9af 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -218,6 +218,8 @@ public:
return regs.tex_cb_index;
}
+ VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
+
private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 58dfa8033..8167864c0 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -784,4 +784,8 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
return result;
}
+VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
+ return rasterizer.AccessGuestDriverProfile();
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ee79260fc..08ef95410 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1306,6 +1306,8 @@ public:
return regs.tex_cb_index;
}
+ VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
+
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
/// we've seen used.
using MacroMemory = std::array<u32, 0x40000>;
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
new file mode 100644
index 000000000..b1ac254ff
--- /dev/null
+++ b/src/video_core/guest_driver.cpp
@@ -0,0 +1,34 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/guest_driver.h"
+
+namespace VideoCore {
+
+void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
+ if (texture_handler_size_deduced) {
+ return;
+ }
+ std::size_t size = bound_offsets.size();
+ if (size < 2) {
+ return;
+ }
+ std::sort(bound_offsets.begin(), bound_offsets.end(),
+ [](const u32& a, const u32& b) { return a < b; });
+ u32 min_val = 0xFFFFFFFF; // set to highest possible 32 bit integer;
+ for (std::size_t i = 1; i < size; i++) {
+ if (bound_offsets[i] == bound_offsets[i - 1]) {
+ continue;
+ }
+ const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
+ min_val = std::min(min_val, new_min);
+ }
+ if (min_val > 2) {
+ return;
+ }
+ texture_handler_size_deduced = true;
+ texture_handler_size = sizeof(u32) * min_val;
+}
+
+} // namespace VideoCore
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
new file mode 100644
index 000000000..f64f043af
--- /dev/null
+++ b/src/video_core/guest_driver.h
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace VideoCore {
+
+/**
+ * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
+ * information necessary for impossible to avoid HLE methods like shader tracks.
+ */
+class GuestDriverProfile {
+public:
+ u32 GetTextureHandlerSize() const {
+ return texture_handler_size;
+ }
+
+ bool TextureHandlerSizeKnown() const {
+ return texture_handler_size_deduced;
+ }
+
+ void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
+
+private:
+ // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
+ // use 4 bytes instead. Thus, certain drivers may squish the size.
+ static constexpr u32 default_texture_handler_size = 8;
+ u32 texture_handler_size{default_texture_handler_size};
+ bool texture_handler_size_deduced{};
+};
+
+} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 5b0eca9e2..149f79af3 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
+#include "video_core/guest_driver.h"
namespace Tegra {
class MemoryManager;
@@ -78,5 +79,12 @@ public:
/// Initialize disk cached resources for the game being emulated
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
const DiskResourceLoadCallback& callback = {}) {}
+
+ GuestDriverProfile& AccessGuestDriverProfile() {
+ return guest_driver_profile;
+ }
+
+private:
+ GuestDriverProfile guest_driver_profile{};
};
} // namespace VideoCore
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
index d32e2d657..78d9d7037 100644
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -10,6 +10,7 @@
#include "common/hash.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/shader_type.h"
+#include "video_core/guest_driver.h"
namespace VideoCommon::Shader {
@@ -71,6 +72,13 @@ public:
return bindless_samplers;
}
+ VideoCore::GuestDriverProfile* AccessGuestDriverProfile() {
+ if (engine) {
+ return &(engine->AccessGuestDriverProfile());
+ }
+ return nullptr;
+ }
+
private:
const Tegra::Engines::ShaderType stage;
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 22c3e5120..aed35a9b8 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -315,4 +315,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
return pc + 1;
}
+void ShaderIR::PostDecode() {
+ // Deduce texture handler size if needed
+ auto* gpu_driver = locker.AccessGuestDriverProfile();
+ if (gpu_driver) {
+ if (!gpu_driver->TextureHandlerSizeKnown() && used_samplers.size() > 1) {
+ u32 count{};
+ std::vector<u32> bound_offsets;
+ for (const auto& sampler : used_samplers) {
+ if (sampler.IsBindless()) {
+ continue;
+ }
+ count++;
+ bound_offsets.emplace_back(sampler.GetOffset());
+ }
+ if (count > 1) {
+ gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
+ }
+ }
+ }
+}
+
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 31eecb3f4..a186e22b2 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet
ConstBufferLocker& locker)
: program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
Decode();
+ PostDecode();
}
ShaderIR::~ShaderIR() = default;
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index ba1db4c11..92c24247d 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -191,6 +191,7 @@ private:
};
void Decode();
+ void PostDecode();
NodeBlock DecodeRange(u32 begin, u32 end);
void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);