summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt16
-rw-r--r--src/video_core/dirty_flags.cpp38
-rw-r--r--src/video_core/dirty_flags.h49
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h67
-rw-r--r--src/video_core/engines/kepler_compute.cpp12
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp189
-rw-r--r--src/video_core/engines/maxwell_3d.h217
-rw-r--r--src/video_core/engines/maxwell_dma.cpp2
-rw-r--r--src/video_core/engines/shader_bytecode.h11
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/guest_driver.cpp7
-rw-r--r--src/video_core/guest_driver.h21
-rw-r--r--src/video_core/memory_manager.h2
-rw-r--r--src/video_core/morton.cpp2
-rw-r--r--src/video_core/rasterizer_interface.h4
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1040
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h71
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp28
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h25
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp513
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h99
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp431
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h24
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp404
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h153
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp109
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h34
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h39
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp569
-rw-r--r--src/video_core/renderer_opengl/gl_state.h251
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp247
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h215
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp48
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h10
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h51
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp485
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h14
-rw-r--r--src/video_core/renderer_opengl/utils.cpp13
-rw-r--r--src/video_core/renderer_opengl/utils.h9
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp15
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h8
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp39
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h4
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp8
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp48
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h45
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp131
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h16
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp231
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h22
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h42
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp170
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h13
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp99
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h79
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp7
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp126
-rw-r--r--src/video_core/shader/const_buffer_locker.h103
-rw-r--r--src/video_core/shader/control_flow.cpp13
-rw-r--r--src/video_core/shader/control_flow.h3
-rw-r--r--src/video_core/shader/decode.cpp22
-rw-r--r--src/video_core/shader/decode/bfe.cpp69
-rw-r--r--src/video_core/shader/decode/texture.cpp5
-rw-r--r--src/video_core/shader/decode/xmad.cpp63
-rw-r--r--src/video_core/shader/node.h2
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/registry.cpp161
-rw-r--r--src/video_core/shader/registry.h137
-rw-r--r--src/video_core/shader/shader_ir.cpp5
-rw-r--r--src/video_core/shader/shader_ir.h6
-rw-r--r--src/video_core/shader/track.cpp38
-rw-r--r--src/video_core/shader/transform_feedback.cpp115
-rw-r--r--src/video_core/shader/transform_feedback.h23
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/surface.h142
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp3
-rw-r--r--src/video_core/texture_cache/surface_params.cpp6
-rw-r--r--src/video_core/texture_cache/texture_cache.h59
-rw-r--r--src/video_core/textures/astc.cpp1074
-rw-r--r--src/video_core/textures/texture.h26
93 files changed, 4443 insertions, 4371 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 4b0c6346f..91df062d7 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -2,6 +2,8 @@ add_library(video_core STATIC
buffer_cache/buffer_block.h
buffer_cache/buffer_cache.h
buffer_cache/map_interval.h
+ dirty_flags.cpp
+ dirty_flags.h
dma_pusher.cpp
dma_pusher.h
engines/const_buffer_engine_interface.h
@@ -63,14 +65,12 @@ add_library(video_core STATIC
renderer_opengl/gl_shader_decompiler.h
renderer_opengl/gl_shader_disk_cache.cpp
renderer_opengl/gl_shader_disk_cache.h
- renderer_opengl/gl_shader_gen.cpp
- renderer_opengl/gl_shader_gen.h
renderer_opengl/gl_shader_manager.cpp
renderer_opengl/gl_shader_manager.h
renderer_opengl/gl_shader_util.cpp
renderer_opengl/gl_shader_util.h
- renderer_opengl/gl_state.cpp
- renderer_opengl/gl_state.h
+ renderer_opengl/gl_state_tracker.cpp
+ renderer_opengl/gl_state_tracker.h
renderer_opengl/gl_stream_buffer.cpp
renderer_opengl/gl_stream_buffer.h
renderer_opengl/gl_texture_cache.cpp
@@ -116,8 +116,6 @@ add_library(video_core STATIC
shader/ast.h
shader/compiler_settings.cpp
shader/compiler_settings.h
- shader/const_buffer_locker.cpp
- shader/const_buffer_locker.h
shader/control_flow.cpp
shader/control_flow.h
shader/decode.cpp
@@ -126,9 +124,13 @@ add_library(video_core STATIC
shader/node_helper.cpp
shader/node_helper.h
shader/node.h
+ shader/registry.cpp
+ shader/registry.h
shader/shader_ir.cpp
shader/shader_ir.h
shader/track.cpp
+ shader/transform_feedback.cpp
+ shader/transform_feedback.h
surface.cpp
surface.h
texture_cache/format_lookup_table.cpp
@@ -198,6 +200,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_shader_util.h
renderer_vulkan/vk_staging_buffer_pool.cpp
renderer_vulkan/vk_staging_buffer_pool.h
+ renderer_vulkan/vk_state_tracker.cpp
+ renderer_vulkan/vk_state_tracker.h
renderer_vulkan/vk_stream_buffer.cpp
renderer_vulkan/vk_stream_buffer.h
renderer_vulkan/vk_swapchain.cpp
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
new file mode 100644
index 000000000..e16075993
--- /dev/null
+++ b/src/video_core/dirty_flags.cpp
@@ -0,0 +1,38 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <cstddef>
+
+#include "common/common_types.h"
+#include "video_core/dirty_flags.h"
+
+#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
+#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / sizeof(u32))
+
+namespace VideoCommon::Dirty {
+
+using Tegra::Engines::Maxwell3D;
+
+void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
+ static constexpr std::size_t num_per_rt = NUM(rt[0]);
+ static constexpr std::size_t begin = OFF(rt);
+ static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
+ for (std::size_t rt = 0; rt < Maxwell3D::Regs::NumRenderTargets; ++rt) {
+ FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt);
+ }
+ FillBlock(tables[1], begin, num, RenderTargets);
+
+ static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets};
+ for (std::size_t i = 0; i < std::size(zeta_flags); ++i) {
+ const u8 flag = zeta_flags[i];
+ auto& table = tables[i];
+ table[OFF(zeta_enable)] = flag;
+ table[OFF(zeta_width)] = flag;
+ table[OFF(zeta_height)] = flag;
+ FillBlock(table, OFF(zeta), NUM(zeta), flag);
+ }
+}
+
+} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
new file mode 100644
index 000000000..3f6c1d83a
--- /dev/null
+++ b/src/video_core/dirty_flags.h
@@ -0,0 +1,49 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <iterator>
+
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+
+namespace VideoCommon::Dirty {
+
+enum : u8 {
+ NullEntry = 0,
+
+ RenderTargets,
+ ColorBuffer0,
+ ColorBuffer1,
+ ColorBuffer2,
+ ColorBuffer3,
+ ColorBuffer4,
+ ColorBuffer5,
+ ColorBuffer6,
+ ColorBuffer7,
+ ZetaBuffer,
+
+ LastCommonEntry,
+};
+
+template <typename Integer>
+void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Table& table, std::size_t begin,
+ std::size_t num, Integer dirty_index) {
+ const auto it = std::begin(table) + begin;
+ std::fill(it, it + num, static_cast<u8>(dirty_index));
+}
+
+template <typename Integer1, typename Integer2>
+void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_t begin,
+ std::size_t num, Integer1 index_a, Integer2 index_b) {
+ FillBlock(tables[0], begin, num, index_a);
+ FillBlock(tables[1], begin, num, index_b);
+}
+
+void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
+
+} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 0094fd715..713c14182 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {
MICROPROFILE_SCOPE(DispatchCalls);
// On entering GPU code, assume all memory may be touched by the ARM core.
- gpu.Maxwell3D().dirty.OnMemoryWrite();
+ gpu.Maxwell3D().OnMemoryWrite();
dma_pushbuffer_subindex = 0;
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index d56a47710..724ee0fd6 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -16,11 +16,12 @@ namespace Tegra::Engines {
struct SamplerDescriptor {
union {
- BitField<0, 20, Tegra::Shader::TextureType> texture_type;
- BitField<20, 1, u32> is_array;
- BitField<21, 1, u32> is_buffer;
- BitField<22, 1, u32> is_shadow;
- u32 raw{};
+ u32 raw = 0;
+ BitField<0, 2, Tegra::Shader::TextureType> texture_type;
+ BitField<2, 3, Tegra::Texture::ComponentType> component_type;
+ BitField<5, 1, u32> is_array;
+ BitField<6, 1, u32> is_buffer;
+ BitField<7, 1, u32> is_shadow;
};
bool operator==(const SamplerDescriptor& rhs) const noexcept {
@@ -31,68 +32,48 @@ struct SamplerDescriptor {
return !operator==(rhs);
}
- static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) {
+ static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
+ using Tegra::Shader::TextureType;
SamplerDescriptor result;
- switch (tic_texture_type) {
+
+ // This is going to be used to determine the shading language type.
+ // Because of that we don't care about all component types on color textures.
+ result.component_type.Assign(tic.r_type.Value());
+
+ switch (tic.texture_type.Value()) {
case Tegra::Texture::TextureType::Texture1D:
- result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
- result.is_array.Assign(0);
- result.is_buffer.Assign(0);
- result.is_shadow.Assign(0);
+ result.texture_type.Assign(TextureType::Texture1D);
return result;
case Tegra::Texture::TextureType::Texture2D:
- result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
- result.is_array.Assign(0);
- result.is_buffer.Assign(0);
- result.is_shadow.Assign(0);
+ result.texture_type.Assign(TextureType::Texture2D);
return result;
case Tegra::Texture::TextureType::Texture3D:
- result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D);
- result.is_array.Assign(0);
- result.is_buffer.Assign(0);
- result.is_shadow.Assign(0);
+ result.texture_type.Assign(TextureType::Texture3D);
return result;
case Tegra::Texture::TextureType::TextureCubemap:
- result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
- result.is_array.Assign(0);
- result.is_buffer.Assign(0);
- result.is_shadow.Assign(0);
+ result.texture_type.Assign(TextureType::TextureCube);
return result;
case Tegra::Texture::TextureType::Texture1DArray:
- result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
+ result.texture_type.Assign(TextureType::Texture1D);
result.is_array.Assign(1);
- result.is_buffer.Assign(0);
- result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture2DArray:
- result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
+ result.texture_type.Assign(TextureType::Texture2D);
result.is_array.Assign(1);
- result.is_buffer.Assign(0);
- result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture1DBuffer:
- result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
- result.is_array.Assign(0);
+ result.texture_type.Assign(TextureType::Texture1D);
result.is_buffer.Assign(1);
- result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture2DNoMipmap:
- result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
- result.is_array.Assign(0);
- result.is_buffer.Assign(0);
- result.is_shadow.Assign(0);
+ result.texture_type.Assign(TextureType::Texture2D);
return result;
case Tegra::Texture::TextureType::TextureCubeArray:
- result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
+ result.texture_type.Assign(TextureType::TextureCube);
result.is_array.Assign(1);
- result.is_buffer.Assign(0);
- result.is_shadow.Assign(0);
return result;
default:
- result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
- result.is_array.Assign(0);
- result.is_buffer.Assign(0);
- result.is_shadow.Assign(0);
+ result.texture_type.Assign(TextureType::Texture2D);
return result;
}
}
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 4b824aa4e..368c75a66 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -39,7 +39,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
- system.GPU().Maxwell3D().dirty.OnMemoryWrite();
+ system.GPU().Maxwell3D().OnMemoryWrite();
}
break;
}
@@ -89,7 +89,7 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
- SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
+ SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
return result;
}
@@ -119,14 +119,6 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
- const auto r_type{tic_entry.r_type.Value()};
- const auto g_type{tic_entry.g_type.Value()};
- const auto b_type{tic_entry.b_type.Value()};
- const auto a_type{tic_entry.a_type.Value()};
-
- // TODO(Subv): Different data types for separate components are not supported
- DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
-
return tic_entry;
}
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index fa4a7c5c1..597872e43 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
- system.GPU().Maxwell3D().dirty.OnMemoryWrite();
+ system.GPU().Maxwell3D().OnMemoryWrite();
}
break;
}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index b28de1092..ce536e29b 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -26,7 +26,8 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
MemoryManager& memory_manager)
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
- InitDirtySettings();
+ dirty.flags.flip();
+
InitializeRegisterDefaults();
}
@@ -75,8 +76,8 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.stencil_back_mask = 0xFFFFFFFF;
regs.depth_test_func = Regs::ComparisonOp::Always;
- regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise;
- regs.cull.cull_face = Regs::Cull::CullFace::Back;
+ regs.front_face = Regs::FrontFace::CounterClockWise;
+ regs.cull_face = Regs::CullFace::Back;
// TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
// register carrying a default value. Assume it's OpenGL's default (1).
@@ -95,7 +96,7 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.rasterize_enable = 1;
regs.rt_separate_frag_data = 1;
regs.framebuffer_srgb = 1;
- regs.cull.front_face = Maxwell3D::Regs::Cull::FrontFace::ClockWise;
+ regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
@@ -103,164 +104,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
}
-#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name))
-
-void Maxwell3D::InitDirtySettings() {
- const auto set_block = [this](std::size_t start, std::size_t range, u8 position) {
- const auto start_itr = dirty_pointers.begin() + start;
- const auto end_itr = start_itr + range;
- std::fill(start_itr, end_itr, position);
- };
- dirty.regs.fill(true);
-
- // Init Render Targets
- constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
- constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
- constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
- u8 rt_dirty_reg = DIRTY_REGS_POS(render_target);
- for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
- set_block(rt_reg, registers_per_rt, rt_dirty_reg);
- ++rt_dirty_reg;
- }
- constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
- dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
- dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
- dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
- constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
- constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
- set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
-
- // Init Vertex Arrays
- constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
- constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
- constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
- u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array);
- u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
- for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
- vertex_reg += vertex_array_size) {
- set_block(vertex_reg, 3, va_dirty_reg);
- // The divisor concerns vertex array instances
- dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg;
- ++va_dirty_reg;
- ++vi_dirty_reg;
- }
- constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
- constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
- constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
- va_dirty_reg = DIRTY_REGS_POS(vertex_array);
- for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
- vertex_reg += vertex_limit_size) {
- set_block(vertex_reg, vertex_limit_size, va_dirty_reg);
- va_dirty_reg++;
- }
- constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
- constexpr u32 vertex_instance_size =
- sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
- constexpr u32 vertex_instance_end =
- vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
- vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
- for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
- vertex_reg += vertex_instance_size) {
- set_block(vertex_reg, vertex_instance_size, vi_dirty_reg);
- vi_dirty_reg++;
- }
- set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
- DIRTY_REGS_POS(vertex_attrib_format));
-
- // Init Shaders
- constexpr u32 shader_registers_count =
- sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
- set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
- DIRTY_REGS_POS(shaders));
-
- // State
-
- // Viewport
- constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
- constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
- constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
- set_block(viewport_start, viewport_size, viewport_dirty_reg);
- constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
- constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
- set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
-
- // Viewport transformation
- constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
- constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
- set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
-
- // Cullmode
- constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
- constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
- set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
-
- // Screen y control
- dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
-
- // Primitive Restart
- constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
- constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
- set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
-
- // Depth Test
- constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
- dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
-
- // Stencil Test
- constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
-
- // Color Mask
- constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
- dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
- set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
- color_mask_dirty_reg);
- // Blend State
- constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
- set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
- blend_state_dirty_reg);
- dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
- set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
- set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
- blend_state_dirty_reg);
-
- // Scissor State
- constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
- set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
- scissor_test_dirty_reg);
-
- // Polygon Offset
- constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
- dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
-
- // Depth bounds
- constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values);
- dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg;
- dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg;
-}
-
void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) {
// Reset the current macro.
executing_macro = 0;
@@ -319,19 +162,9 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
if (regs.reg_array[method] != method_call.argument) {
regs.reg_array[method] = method_call.argument;
- const std::size_t dirty_reg = dirty_pointers[method];
- if (dirty_reg) {
- dirty.regs[dirty_reg] = true;
- if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
- dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
- dirty.vertex_array_buffers = true;
- } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
- dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
- dirty.vertex_instances = true;
- } else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
- dirty_reg < DIRTY_REGS_POS(render_settings)) {
- dirty.render_settings = true;
- }
+
+ for (const auto& table : dirty.tables) {
+ dirty.flags[table[method]] = true;
}
}
@@ -419,7 +252,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
- dirty.OnMemoryWrite();
+ OnMemoryWrite();
}
break;
}
@@ -727,7 +560,7 @@ void Maxwell3D::FinishCBData() {
const u32 id = cb_data_state.id;
memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
- dirty.OnMemoryWrite();
+ OnMemoryWrite();
cb_data_state.id = null_cb_data;
cb_data_state.current = null_cb_data;
@@ -805,7 +638,7 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
- SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
+ SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
return result;
}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 6ea7cc6a5..8a9e9992e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -6,6 +6,7 @@
#include <array>
#include <bitset>
+#include <limits>
#include <optional>
#include <type_traits>
#include <unordered_map>
@@ -66,6 +67,7 @@ public:
static constexpr std::size_t NumVaryings = 31;
static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number
static constexpr std::size_t NumClipDistances = 8;
+ static constexpr std::size_t NumTransformFeedbackBuffers = 4;
static constexpr std::size_t MaxShaderProgram = 6;
static constexpr std::size_t MaxShaderStage = 5;
// Maximum number of const buffers per shader stage.
@@ -431,21 +433,15 @@ public:
GeneratedPrimitives = 0x1F,
};
- struct Cull {
- enum class FrontFace : u32 {
- ClockWise = 0x0900,
- CounterClockWise = 0x0901,
- };
-
- enum class CullFace : u32 {
- Front = 0x0404,
- Back = 0x0405,
- FrontAndBack = 0x0408,
- };
+ enum class FrontFace : u32 {
+ ClockWise = 0x0900,
+ CounterClockWise = 0x0901,
+ };
- u32 enabled;
- FrontFace front_face;
- CullFace cull_face;
+ enum class CullFace : u32 {
+ Front = 0x0404,
+ Back = 0x0405,
+ FrontAndBack = 0x0408,
};
struct Blend {
@@ -529,6 +525,12 @@ public:
FractionalEven = 2,
};
+ enum class PolygonMode : u32 {
+ Point = 0x1b00,
+ Line = 0x1b01,
+ Fill = 0x1b02,
+ };
+
struct RenderTargetConfig {
u32 address_high;
u32 address_low;
@@ -574,7 +576,7 @@ public:
f32 translate_z;
INSERT_UNION_PADDING_WORDS(2);
- Common::Rectangle<s32> GetRect() const {
+ Common::Rectangle<f32> GetRect() const {
return {
GetX(), // left
GetY() + GetHeight(), // top
@@ -583,20 +585,20 @@ public:
};
};
- s32 GetX() const {
- return static_cast<s32>(std::max(0.0f, translate_x - std::fabs(scale_x)));
+ f32 GetX() const {
+ return std::max(0.0f, translate_x - std::fabs(scale_x));
}
- s32 GetY() const {
- return static_cast<s32>(std::max(0.0f, translate_y - std::fabs(scale_y)));
+ f32 GetY() const {
+ return std::max(0.0f, translate_y - std::fabs(scale_y));
}
- s32 GetWidth() const {
- return static_cast<s32>(translate_x + std::fabs(scale_x)) - GetX();
+ f32 GetWidth() const {
+ return translate_x + std::fabs(scale_x) - GetX();
}
- s32 GetHeight() const {
- return static_cast<s32>(translate_y + std::fabs(scale_y)) - GetY();
+ f32 GetHeight() const {
+ return translate_y + std::fabs(scale_y) - GetY();
}
};
@@ -626,6 +628,29 @@ public:
float depth_range_far;
};
+ struct TransformFeedbackBinding {
+ u32 buffer_enable;
+ u32 address_high;
+ u32 address_low;
+ s32 buffer_size;
+ s32 buffer_offset;
+ INSERT_UNION_PADDING_WORDS(3);
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ };
+ static_assert(sizeof(TransformFeedbackBinding) == 32);
+
+ struct TransformFeedbackLayout {
+ u32 stream;
+ u32 varying_count;
+ u32 stride;
+ INSERT_UNION_PADDING_WORDS(1);
+ };
+ static_assert(sizeof(TransformFeedbackLayout) == 16);
+
bool IsShaderConfigEnabled(std::size_t index) const {
// The VertexB is always enabled.
if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
@@ -634,6 +659,10 @@ public:
return shader_config[index].enable != 0;
}
+ bool IsShaderConfigEnabled(Regs::ShaderProgram type) const {
+ return IsShaderConfigEnabled(static_cast<std::size_t>(type));
+ }
+
union {
struct {
INSERT_UNION_PADDING_WORDS(0x45);
@@ -682,7 +711,13 @@ public:
u32 rasterize_enable;
- INSERT_UNION_PADDING_WORDS(0xF1);
+ std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings;
+
+ INSERT_UNION_PADDING_WORDS(0xC0);
+
+ std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts;
+
+ INSERT_UNION_PADDING_WORDS(0x1);
u32 tfb_enabled;
@@ -710,7 +745,12 @@ public:
s32 clear_stencil;
- INSERT_UNION_PADDING_WORDS(0x7);
+ INSERT_UNION_PADDING_WORDS(0x2);
+
+ PolygonMode polygon_mode_front;
+ PolygonMode polygon_mode_back;
+
+ INSERT_UNION_PADDING_WORDS(0x3);
u32 polygon_offset_point_enable;
u32 polygon_offset_line_enable;
@@ -769,7 +809,11 @@ public:
BitField<12, 4, u32> viewport;
} clear_flags;
- INSERT_UNION_PADDING_WORDS(0x19);
+ INSERT_UNION_PADDING_WORDS(0x10);
+
+ u32 fill_rectangle;
+
+ INSERT_UNION_PADDING_WORDS(0x8);
std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
@@ -872,16 +916,7 @@ public:
INSERT_UNION_PADDING_WORDS(0x35);
- union {
- BitField<0, 1, u32> c0;
- BitField<1, 1, u32> c1;
- BitField<2, 1, u32> c2;
- BitField<3, 1, u32> c3;
- BitField<4, 1, u32> c4;
- BitField<5, 1, u32> c5;
- BitField<6, 1, u32> c6;
- BitField<7, 1, u32> c7;
- } clip_distance_enabled;
+ u32 clip_distance_enabled;
u32 samplecnt_enable;
@@ -1060,7 +1095,9 @@ public:
INSERT_UNION_PADDING_WORDS(1);
- Cull cull;
+ u32 cull_test_enabled;
+ FrontFace front_face;
+ CullFace cull_face;
u32 pixel_center_integer;
@@ -1199,7 +1236,11 @@ public:
u32 tex_cb_index;
- INSERT_UNION_PADDING_WORDS(0x395);
+ INSERT_UNION_PADDING_WORDS(0x7D);
+
+ std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs;
+
+ INSERT_UNION_PADDING_WORDS(0x298);
struct {
/// Compressed address of a buffer that holds information about bound SSBOs.
@@ -1238,79 +1279,6 @@ public:
State state{};
- struct DirtyRegs {
- static constexpr std::size_t NUM_REGS = 256;
- static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max());
-
- union {
- struct {
- bool null_dirty;
-
- // Vertex Attributes
- bool vertex_attrib_format;
-
- // Vertex Arrays
- std::array<bool, 32> vertex_array;
-
- bool vertex_array_buffers;
-
- // Vertex Instances
- std::array<bool, 32> vertex_instance;
-
- bool vertex_instances;
-
- // Render Targets
- std::array<bool, 8> render_target;
- bool depth_buffer;
-
- bool render_settings;
-
- // Shaders
- bool shaders;
-
- // Rasterizer State
- bool viewport;
- bool clip_coefficient;
- bool cull_mode;
- bool primitive_restart;
- bool depth_test;
- bool stencil_test;
- bool blend_state;
- bool scissor_test;
- bool transform_feedback;
- bool color_mask;
- bool polygon_offset;
- bool depth_bounds_values;
-
- // Complementary
- bool viewport_transform;
- bool screen_y_control;
-
- bool memory_general;
- };
- std::array<bool, NUM_REGS> regs;
- };
-
- void ResetVertexArrays() {
- vertex_array.fill(true);
- vertex_array_buffers = true;
- }
-
- void ResetRenderTargets() {
- depth_buffer = true;
- render_target.fill(true);
- render_settings = true;
- }
-
- void OnMemoryWrite() {
- shaders = true;
- memory_general = true;
- ResetRenderTargets();
- ResetVertexArrays();
- }
-
- } dirty{};
-
/// Reads a register value located at the input method address
u32 GetRegisterValue(u32 method) const;
@@ -1356,6 +1324,11 @@ public:
return execute_on;
}
+ /// Notify a memory write has happened.
+ void OnMemoryWrite() {
+ dirty.flags |= dirty.on_write_stores;
+ }
+
enum class MMEDrawMode : u32 {
Undefined,
Array,
@@ -1371,6 +1344,16 @@ public:
u32 gl_end_count{};
} mme_draw;
+ struct DirtyState {
+ using Flags = std::bitset<std::numeric_limits<u8>::max()>;
+ using Table = std::array<u8, Regs::NUM_REGS>;
+ using Tables = std::array<Table, 2>;
+
+ Flags flags;
+ Flags on_write_stores;
+ Tables tables{};
+ } dirty;
+
private:
void InitializeRegisterDefaults();
@@ -1417,8 +1400,6 @@ private:
/// Retrieves information about a specific TSC entry from the TSC buffer.
Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
- void InitDirtySettings();
-
/**
* Call a macro on this engine.
* @param method Method to call
@@ -1485,6 +1466,8 @@ ASSERT_REG_POSITION(tess_mode, 0xC8);
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
ASSERT_REG_POSITION(tess_level_inner, 0xCD);
ASSERT_REG_POSITION(rasterize_enable, 0xDF);
+ASSERT_REG_POSITION(tfb_bindings, 0xE0);
+ASSERT_REG_POSITION(tfb_layouts, 0x1C0);
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
ASSERT_REG_POSITION(rt, 0x200);
ASSERT_REG_POSITION(viewport_transform, 0x280);
@@ -1494,6 +1477,8 @@ ASSERT_REG_POSITION(depth_mode, 0x35F);
ASSERT_REG_POSITION(clear_color[0], 0x360);
ASSERT_REG_POSITION(clear_depth, 0x364);
ASSERT_REG_POSITION(clear_stencil, 0x368);
+ASSERT_REG_POSITION(polygon_mode_front, 0x36B);
+ASSERT_REG_POSITION(polygon_mode_back, 0x36C);
ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
@@ -1507,6 +1492,7 @@ ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
ASSERT_REG_POSITION(depth_bounds, 0x3E7);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(clear_flags, 0x43E);
+ASSERT_REG_POSITION(fill_rectangle, 0x44F);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
@@ -1561,7 +1547,9 @@ ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(vp_point_size, 0x644);
-ASSERT_REG_POSITION(cull, 0x646);
+ASSERT_REG_POSITION(cull_test_enabled, 0x646);
+ASSERT_REG_POSITION(front_face, 0x647);
+ASSERT_REG_POSITION(cull_face, 0x648);
ASSERT_REG_POSITION(pixel_center_integer, 0x649);
ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
@@ -1578,6 +1566,7 @@ ASSERT_REG_POSITION(firmware, 0x8C0);
ASSERT_REG_POSITION(const_buffer, 0x8E0);
ASSERT_REG_POSITION(cb_bind[0], 0x904);
ASSERT_REG_POSITION(tex_cb_index, 0x982);
+ASSERT_REG_POSITION(tfb_varying_locs, 0xA00);
ASSERT_REG_POSITION(ssbo_info, 0xD18);
ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A);
ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ad8453c5f..c2610f992 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -57,7 +57,7 @@ void MaxwellDMA::HandleCopy() {
}
// All copies here update the main memory, so mark all rasterizer states as invalid.
- system.GPU().Maxwell3D().dirty.OnMemoryWrite();
+ system.GPU().Maxwell3D().OnMemoryWrite();
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c9bc83cd7..eba42deb4 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -911,14 +911,9 @@ union Instruction {
} fadd32i;
union {
- BitField<20, 8, u64> shift_position;
- BitField<28, 8, u64> shift_length;
- BitField<48, 1, u64> negate_b;
- BitField<49, 1, u64> negate_a;
-
- u64 GetLeftShiftValue() const {
- return 32 - (shift_position + shift_length);
- }
+ BitField<40, 1, u64> brev;
+ BitField<47, 1, u64> rd_cc;
+ BitField<48, 1, u64> is_signed;
} bfe;
union {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ba8c9d665..64acb17df 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -39,6 +39,7 @@ enum class RenderTargetFormat : u32 {
RGBA32_FLOAT = 0xC0,
RGBA32_UINT = 0xC2,
RGBA16_UNORM = 0xC6,
+ RGBA16_SNORM = 0xC7,
RGBA16_UINT = 0xC9,
RGBA16_FLOAT = 0xCA,
RG32_FLOAT = 0xCB,
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
index 6adef459e..f058f2744 100644
--- a/src/video_core/guest_driver.cpp
+++ b/src/video_core/guest_driver.cpp
@@ -4,13 +4,15 @@
#include <algorithm>
#include <limits>
+#include <vector>
+#include "common/common_types.h"
#include "video_core/guest_driver.h"
namespace VideoCore {
-void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
- if (texture_handler_size_deduced) {
+void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
+ if (texture_handler_size) {
return;
}
const std::size_t size = bound_offsets.size();
@@ -29,7 +31,6 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offse
if (min_val > 2) {
return;
}
- texture_handler_size_deduced = true;
texture_handler_size = min_texture_handler_size * min_val;
}
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
index fc1917347..99450777e 100644
--- a/src/video_core/guest_driver.h
+++ b/src/video_core/guest_driver.h
@@ -4,6 +4,7 @@
#pragma once
+#include <optional>
#include <vector>
#include "common/common_types.h"
@@ -17,25 +18,29 @@ namespace VideoCore {
*/
class GuestDriverProfile {
public:
- void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
+ explicit GuestDriverProfile() = default;
+ explicit GuestDriverProfile(std::optional<u32> texture_handler_size)
+ : texture_handler_size{texture_handler_size} {}
+
+ void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
u32 GetTextureHandlerSize() const {
- return texture_handler_size;
+ return texture_handler_size.value_or(default_texture_handler_size);
}
- bool TextureHandlerSizeKnown() const {
- return texture_handler_size_deduced;
+ bool IsTextureHandlerSizeKnown() const {
+ return texture_handler_size.has_value();
}
private:
// Minimum size of texture handler any driver can use.
static constexpr u32 min_texture_handler_size = 4;
- // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
- // use 4 bytes instead. Thus, certain drivers may squish the size.
+
+ // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
+ // Thus, certain drivers may squish the size.
static constexpr u32 default_texture_handler_size = 8;
- u32 texture_handler_size = default_texture_handler_size;
- bool texture_handler_size_deduced = false;
+ std::optional<u32> texture_handler_size = default_texture_handler_size;
};
} // namespace VideoCore
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index aea010087..073bdb491 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -174,7 +174,7 @@ private:
/// End of address space, based on address space in bits.
static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
- Common::PageTable page_table{page_bits};
+ Common::BackingPageTable page_table{page_bits};
VMAMap vma_map;
VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index f2c83266e..6d522c318 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -51,6 +51,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::R8UI>,
MortonCopy<true, PixelFormat::RGBA16F>,
MortonCopy<true, PixelFormat::RGBA16U>,
+ MortonCopy<true, PixelFormat::RGBA16S>,
MortonCopy<true, PixelFormat::RGBA16UI>,
MortonCopy<true, PixelFormat::R11FG11FB10F>,
MortonCopy<true, PixelFormat::RGBA32UI>,
@@ -131,6 +132,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
MortonCopy<false, PixelFormat::R8U>,
MortonCopy<false, PixelFormat::R8UI>,
MortonCopy<false, PixelFormat::RGBA16F>,
+ MortonCopy<false, PixelFormat::RGBA16S>,
MortonCopy<false, PixelFormat::RGBA16U>,
MortonCopy<false, PixelFormat::RGBA16UI>,
MortonCopy<false, PixelFormat::R11FG11FB10F>,
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index f18eaf4bc..1a68e3caa 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -25,7 +25,6 @@ constexpr std::size_t NumQueryTypes = 1;
enum class LoadCallbackStage {
Prepare,
- Decompile,
Build,
Complete,
};
@@ -89,6 +88,9 @@ public:
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
const DiskResourceLoadCallback& callback = {}) {}
+ /// Initializes renderer dirty flags
+ virtual void SetupDirtyFlags() {}
+
/// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
GuestDriverProfile& AccessGuestDriverProfile() {
return guest_driver_profile;
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
index 874ed3c6e..b8a512cb6 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
@@ -11,7 +11,6 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
-#include "video_core/renderer_opengl/gl_state.h"
namespace OpenGL {
@@ -36,8 +35,7 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK
framebuffer.Create();
// TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
- local_state.draw.draw_framebuffer = framebuffer.handle;
- local_state.ApplyFramebufferState();
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
if (key.zeta) {
const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
index 02ec80ae9..8f698fee0 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
@@ -13,7 +13,6 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace OpenGL {
@@ -63,7 +62,6 @@ public:
private:
OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
- OpenGLState local_state;
std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e1965fb21..063f41327 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -28,7 +28,6 @@
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -36,6 +35,7 @@ namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using Tegra::Engines::ShaderType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
@@ -54,10 +54,11 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
namespace {
+constexpr std::size_t NumSupportedVertexAttributes = 16;
+
template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
- Tegra::Engines::ShaderType shader_type,
- std::size_t index = 0) {
+ ShaderType shader_type, std::size_t index = 0) {
if (entry.IsBindless()) {
const Tegra::Texture::TextureHandle tex_handle =
engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
@@ -74,7 +75,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
}
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
- const GLShader::ConstBufferEntry& entry) {
+ const ConstBufferEntry& entry) {
if (!entry.IsIndirect()) {
return entry.GetSize();
}
@@ -88,18 +89,19 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
return buffer.size;
}
+void oglEnable(GLenum cap, bool state) {
+ (state ? glEnable : glDisable)(cap);
+}
+
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- ScreenInfo& info)
- : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device},
+ ScreenInfo& info, GLShader::ProgramManager& program_manager,
+ StateTracker& state_tracker)
+ : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
- screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
- shader_program_manager = std::make_unique<GLShader::ProgramManager>();
- state.draw.shader_program = 0;
- state.Apply();
-
- LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
+ screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
+ buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
CheckExtensions();
}
@@ -113,93 +115,72 @@ void RasterizerOpenGL::CheckExtensions() {
}
}
-GLuint RasterizerOpenGL::SetupVertexFormat() {
+void RasterizerOpenGL::SetupVertexFormat() {
auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
-
- if (!gpu.dirty.vertex_attrib_format) {
- return state.draw.vertex_array;
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::VertexFormats]) {
+ return;
}
- gpu.dirty.vertex_attrib_format = false;
+ flags[Dirty::VertexFormats] = false;
MICROPROFILE_SCOPE(OpenGL_VAO);
- auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format);
- auto& vao_entry = iter->second;
-
- if (is_cache_miss) {
- vao_entry.Create();
- const GLuint vao = vao_entry.handle;
-
- // Eventhough we are using DSA to create this vertex array, there is a bug on Intel's blob
- // that fails to properly create the vertex array if it's not bound even after creating it
- // with glCreateVertexArrays
- state.draw.vertex_array = vao;
- state.ApplyVertexArrayState();
-
- // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
- // Enables the first 16 vertex attributes always, as we don't know which ones are actually
- // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
- // for now to avoid OpenGL errors.
- // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
- // assume every shader uses them all.
- for (u32 index = 0; index < 16; ++index) {
- const auto& attrib = regs.vertex_attrib_format[index];
-
- // Ignore invalid attributes.
- if (!attrib.IsValid())
- continue;
-
- const auto& buffer = regs.vertex_array[attrib.buffer];
- LOG_TRACE(Render_OpenGL,
- "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
- index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
- attrib.offset.Value(), attrib.IsNormalized());
-
- ASSERT(buffer.IsEnabled());
-
- glEnableVertexArrayAttrib(vao, index);
- if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt ||
- attrib.type ==
- Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) {
- glVertexArrayAttribIFormat(vao, index, attrib.ComponentCount(),
- MaxwellToGL::VertexType(attrib), attrib.offset);
- } else {
- glVertexArrayAttribFormat(
- vao, index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
- attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
- }
- glVertexArrayAttribBinding(vao, index, attrib.buffer);
+ // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
+ // the first 16 vertex attributes always, as we don't know which ones are actually used until
+ // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
+ // avoid OpenGL errors.
+ // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
+ // assume every shader uses them all.
+ for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
+ if (!flags[Dirty::VertexFormat0 + index]) {
+ continue;
}
- }
+ flags[Dirty::VertexFormat0 + index] = false;
+
+ const auto attrib = gpu.regs.vertex_attrib_format[index];
+ const auto gl_index = static_cast<GLuint>(index);
- // Rebinding the VAO invalidates the vertex buffer bindings.
- gpu.dirty.ResetVertexArrays();
+ // Ignore invalid attributes.
+ if (!attrib.IsValid()) {
+ glDisableVertexAttribArray(gl_index);
+ continue;
+ }
+ glEnableVertexAttribArray(gl_index);
- state.draw.vertex_array = vao_entry.handle;
- return vao_entry.handle;
+ if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt ||
+ attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) {
+ glVertexAttribIFormat(gl_index, attrib.ComponentCount(),
+ MaxwellToGL::VertexType(attrib), attrib.offset);
+ } else {
+ glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
+ attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
+ }
+ glVertexAttribBinding(gl_index, attrib.buffer);
+ }
}
-void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
+void RasterizerOpenGL::SetupVertexBuffer() {
auto& gpu = system.GPU().Maxwell3D();
- if (!gpu.dirty.vertex_array_buffers)
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::VertexBuffers]) {
return;
- gpu.dirty.vertex_array_buffers = false;
-
- const auto& regs = gpu.regs;
+ }
+ flags[Dirty::VertexBuffers] = false;
MICROPROFILE_SCOPE(OpenGL_VB);
// Upload all guest vertex arrays sequentially to our buffer
- for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- if (!gpu.dirty.vertex_array[index])
+ const auto& regs = gpu.regs;
+ for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
- gpu.dirty.vertex_array[index] = false;
- gpu.dirty.vertex_instance[index] = false;
+ }
+ flags[Dirty::VertexBuffer0 + index] = false;
const auto& vertex_array = regs.vertex_array[index];
- if (!vertex_array.IsEnabled())
+ if (!vertex_array.IsEnabled()) {
continue;
+ }
const GPUVAddr start = vertex_array.StartAddress();
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
@@ -209,42 +190,30 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
// Bind the vertex array to the buffer at the current offset.
- vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset,
- vertex_array.stride);
-
- if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
- // Enable vertex buffer instancing with the specified divisor.
- glVertexArrayBindingDivisor(vao, index, vertex_array.divisor);
- } else {
- // Disable the vertex buffer instancing.
- glVertexArrayBindingDivisor(vao, index, 0);
- }
+ vertex_array_pushbuffer.SetVertexBuffer(static_cast<GLuint>(index), vertex_buffer,
+ vertex_buffer_offset, vertex_array.stride);
}
}
-void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
+void RasterizerOpenGL::SetupVertexInstances() {
auto& gpu = system.GPU().Maxwell3D();
-
- if (!gpu.dirty.vertex_instances)
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::VertexInstances]) {
return;
- gpu.dirty.vertex_instances = false;
+ }
+ flags[Dirty::VertexInstances] = false;
const auto& regs = gpu.regs;
- // Upload all guest vertex arrays sequentially to our buffer
- for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- if (!gpu.dirty.vertex_instance[index])
+ for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
+ if (!flags[Dirty::VertexInstance0 + index]) {
continue;
-
- gpu.dirty.vertex_instance[index] = false;
-
- if (regs.instanced_arrays.IsInstancingEnabled(index) &&
- regs.vertex_array[index].divisor != 0) {
- // Enable vertex buffer instancing with the specified divisor.
- glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
- } else {
- // Disable the vertex buffer instancing.
- glVertexArrayBindingDivisor(vao, index, 0);
}
+ flags[Dirty::VertexInstance0 + index] = false;
+
+ const auto gl_index = static_cast<GLuint>(index);
+ const bool instancing_enabled = regs.instanced_arrays.IsInstancingEnabled(gl_index);
+ const GLuint divisor = instancing_enabled ? regs.vertex_array[index].divisor : 0;
+ glVertexBindingDivisor(gl_index, divisor);
}
}
@@ -260,8 +229,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
auto& gpu = system.GPU().Maxwell3D();
-
- std::array<bool, Maxwell::NumClipDistances> clip_distances{};
+ u32 clip_distances = 0;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index];
@@ -271,10 +239,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
if (!gpu.regs.IsShaderConfigEnabled(index)) {
switch (program) {
case Maxwell::ShaderProgram::Geometry:
- shader_program_manager->UseTrivialGeometryShader();
+ program_manager.UseGeometryShader(0);
break;
case Maxwell::ShaderProgram::Fragment:
- shader_program_manager->UseTrivialFragmentShader();
+ program_manager.UseFragmentShader(0);
break;
default:
break;
@@ -299,19 +267,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
SetupDrawTextures(stage, shader);
SetupDrawImages(stage, shader);
- const ProgramVariant variant(primitive_mode);
- const auto program_handle = shader->GetHandle(variant);
-
+ const GLuint program_handle = shader->GetHandle();
switch (program) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
- shader_program_manager->UseProgrammableVertexShader(program_handle);
+ program_manager.UseVertexShader(program_handle);
break;
case Maxwell::ShaderProgram::Geometry:
- shader_program_manager->UseProgrammableGeometryShader(program_handle);
+ program_manager.UseGeometryShader(program_handle);
break;
case Maxwell::ShaderProgram::Fragment:
- shader_program_manager->UseProgrammableFragmentShader(program_handle);
+ program_manager.UseFragmentShader(program_handle);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
@@ -322,9 +288,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
// clip distances only when it's written by a shader stage.
- for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) {
- clip_distances[i] = clip_distances[i] || shader->GetShaderEntries().clip_distances[i];
- }
+ clip_distances |= shader->GetEntries().clip_distances;
// When VertexA is enabled, we have dual vertex shaders
if (program == Maxwell::ShaderProgram::VertexA) {
@@ -334,8 +298,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
SyncClipEnabled(clip_distances);
-
- gpu.dirty.shaders = false;
+ gpu.dirty.flags[Dirty::Shaders] = false;
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -368,20 +331,23 @@ void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
shader_cache.LoadDiskCache(stop_loading, callback);
}
+void RasterizerOpenGL::SetupDirtyFlags() {
+ state_tracker.Initialize();
+}
+
void RasterizerOpenGL::ConfigureFramebuffers() {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
auto& gpu = system.GPU().Maxwell3D();
- if (!gpu.dirty.render_settings) {
+ if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
return;
}
- gpu.dirty.render_settings = false;
+ gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
texture_cache.GuardRenderTargets(true);
View depth_surface = texture_cache.GetDepthBufferSurface(true);
const auto& regs = gpu.regs;
- state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
// Bind the framebuffer surfaces
@@ -409,14 +375,11 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
texture_cache.GuardRenderTargets(false);
- state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key);
- SyncViewport(state);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
}
-void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
- bool using_depth_fb, bool using_stencil_fb) {
- using VideoCore::Surface::SurfaceType;
-
+void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb,
+ bool using_stencil_fb) {
auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
@@ -435,80 +398,44 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, boo
key.colors[0] = color_surface;
key.zeta = depth_surface;
- current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key);
- current_state.ApplyFramebufferState();
+ state_tracker.NotifyFramebuffer();
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
}
void RasterizerOpenGL::Clear() {
- const auto& maxwell3d = system.GPU().Maxwell3D();
-
- if (!maxwell3d.ShouldExecute()) {
+ const auto& gpu = system.GPU().Maxwell3D();
+ if (!gpu.ShouldExecute()) {
return;
}
- const auto& regs = maxwell3d.regs;
+ const auto& regs = gpu.regs;
bool use_color{};
bool use_depth{};
bool use_stencil{};
- OpenGLState prev_state{OpenGLState::GetCurState()};
- SCOPE_EXIT({
- prev_state.AllDirty();
- prev_state.Apply();
- });
-
- OpenGLState clear_state{OpenGLState::GetCurState()};
- clear_state.SetDefaultViewports();
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) {
use_color = true;
}
if (use_color) {
- clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
- clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
- clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
- clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
+ state_tracker.NotifyColorMask0();
+ glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
+ regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
+
+ // TODO(Rodrigo): Determine if clamping is used on clears
+ SyncFragmentColorClampState();
+ SyncFramebufferSRGB();
}
if (regs.clear_buffers.Z) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
use_depth = true;
- // Always enable the depth write when clearing the depth buffer. The depth write mask is
- // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to
- // true.
- clear_state.depth.test_enabled = true;
- clear_state.depth.test_func = GL_ALWAYS;
- clear_state.depth.write_mask = GL_TRUE;
+ state_tracker.NotifyDepthMask();
+ glDepthMask(GL_TRUE);
}
if (regs.clear_buffers.S) {
- ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
+ ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!");
use_stencil = true;
- clear_state.stencil.test_enabled = true;
-
- if (regs.clear_flags.stencil) {
- // Stencil affects the clear so fill it with the used masks
- clear_state.stencil.front.test_func = GL_ALWAYS;
- clear_state.stencil.front.test_mask = regs.stencil_front_func_mask;
- clear_state.stencil.front.action_stencil_fail = GL_KEEP;
- clear_state.stencil.front.action_depth_fail = GL_KEEP;
- clear_state.stencil.front.action_depth_pass = GL_KEEP;
- clear_state.stencil.front.write_mask = regs.stencil_front_mask;
- if (regs.stencil_two_side_enable) {
- clear_state.stencil.back.test_func = GL_ALWAYS;
- clear_state.stencil.back.test_mask = regs.stencil_back_func_mask;
- clear_state.stencil.back.action_stencil_fail = GL_KEEP;
- clear_state.stencil.back.action_depth_fail = GL_KEEP;
- clear_state.stencil.back.action_depth_pass = GL_KEEP;
- clear_state.stencil.back.write_mask = regs.stencil_back_mask;
- } else {
- clear_state.stencil.back.test_func = GL_ALWAYS;
- clear_state.stencil.back.test_mask = 0xFFFFFFFF;
- clear_state.stencil.back.write_mask = 0xFFFFFFFF;
- clear_state.stencil.back.action_stencil_fail = GL_KEEP;
- clear_state.stencil.back.action_depth_fail = GL_KEEP;
- clear_state.stencil.back.action_depth_pass = GL_KEEP;
- }
- }
}
if (!use_color && !use_depth && !use_stencil) {
@@ -516,20 +443,18 @@ void RasterizerOpenGL::Clear() {
return;
}
- ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
+ SyncRasterizeEnable();
- SyncViewport(clear_state);
- SyncRasterizeEnable(clear_state);
if (regs.clear_flags.scissor) {
- SyncScissorTest(clear_state);
+ SyncScissorTest();
+ } else {
+ state_tracker.NotifyScissor0();
+ glDisablei(GL_SCISSOR_TEST, 0);
}
- if (regs.clear_flags.viewport) {
- clear_state.EmulateViewportWithScissor();
- }
+ UNIMPLEMENTED_IF(regs.clear_flags.viewport);
- clear_state.AllDirty();
- clear_state.Apply();
+ ConfigureClearFramebuffer(use_color, use_depth, use_stencil);
if (use_color) {
glClearBufferfv(GL_COLOR, 0, regs.clear_color);
@@ -549,25 +474,27 @@ void RasterizerOpenGL::Clear() {
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
query_cache.UpdateCounters();
- SyncRasterizeEnable(state);
+ SyncViewport();
+ SyncRasterizeEnable();
+ SyncPolygonModes();
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
SyncDepthTestState();
+ SyncDepthClamp();
SyncStencilTestState();
SyncBlendState();
SyncLogicOpState();
SyncCullMode();
SyncPrimitiveRestart();
- SyncScissorTest(state);
- SyncTransformFeedback();
+ SyncScissorTest();
SyncPointState();
SyncPolygonOffset();
SyncAlphaTest();
+ SyncFramebufferSRGB();
buffer_cache.Acquire();
@@ -591,14 +518,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
buffer_cache.Map(buffer_size);
// Prepare vertex array format.
- const GLuint vao = SetupVertexFormat();
- vertex_array_pushbuffer.Setup(vao);
+ SetupVertexFormat();
+ vertex_array_pushbuffer.Setup();
// Upload vertex and index data.
- SetupVertexBuffer(vao);
- SetupVertexInstances(vao);
-
- GLintptr index_buffer_offset;
+ SetupVertexBuffer();
+ SetupVertexInstances();
+ GLintptr index_buffer_offset = 0;
if (is_indexed) {
index_buffer_offset = SetupIndexBuffer();
}
@@ -624,27 +550,20 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
ConfigureFramebuffers();
// Signal the buffer cache that we are not going to upload more things.
- const bool invalidate = buffer_cache.Unmap();
+ buffer_cache.Unmap();
// Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
vertex_array_pushbuffer.Bind();
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
- if (invalidate) {
- // As all cached buffers are invalidated, we need to recheck their state.
- gpu.dirty.ResetVertexArrays();
- }
- gpu.dirty.memory_general = false;
-
- shader_program_manager->ApplyTo(state);
- state.Apply();
+ program_manager.BindGraphicsPipeline();
if (texture_cache.TextureBarrier()) {
glTextureBarrier();
}
- ++num_queued_commands;
+ BeginTransformFeedback(primitive_mode);
const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
const GLsizei num_instances =
@@ -683,6 +602,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
num_instances, base_instance);
}
}
+
+ EndTransformFeedback();
+
+ ++num_queued_commands;
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -695,13 +618,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
auto kernel = shader_cache.GetComputeKernel(code_addr);
SetupComputeTextures(kernel);
SetupComputeImages(kernel);
-
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
- const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
- launch_desc.block_dim_z, launch_desc.shared_alloc,
- launch_desc.local_pos_alloc);
- state.draw.shader_program = kernel->GetHandle(variant);
- state.draw.program_pipeline = 0;
+ program_manager.BindComputeShader(kernel->GetHandle());
const std::size_t buffer_size =
Tegra::Engines::KeplerCompute::NumConstBuffers *
@@ -719,11 +636,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
- state.ApplyTextures();
- state.ApplyImages();
- state.ApplyShaderProgram();
- state.ApplyProgramPipeline();
-
+ const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
}
@@ -828,7 +741,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
const auto& shader_stage = stages[stage_index];
u32 binding = device.GetBaseBindings(stage_index).uniform_buffer;
- for (const auto& entry : shader->GetShaderEntries().const_buffers) {
+ for (const auto& entry : shader->GetEntries().const_buffers) {
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
SetupConstBuffer(binding++, buffer, entry);
}
@@ -839,7 +752,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
u32 binding = 0;
- for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
+ for (const auto& entry : kernel->GetEntries().const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
Tegra::Engines::ConstBufferInfo buffer;
@@ -851,7 +764,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
}
void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
- const GLShader::ConstBufferEntry& entry) {
+ const ConstBufferEntry& entry) {
if (!buffer.enabled) {
// Set values to zero to unbind buffers
bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
@@ -875,7 +788,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
- for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
+ for (const auto& entry : shader->GetEntries().global_memory_entries) {
const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
const auto gpu_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -889,7 +802,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
u32 binding = 0;
- for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
+ for (const auto& entry : kernel->GetEntries().global_memory_entries) {
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
const auto gpu_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -897,7 +810,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
}
}
-void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry,
+void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
GPUVAddr gpu_addr, std::size_t size) {
const auto alignment{device.GetShaderStorageBufferAlignment()};
const auto [ssbo, buffer_offset] =
@@ -909,16 +822,11 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).sampler;
- for (const auto& entry : shader->GetShaderEntries().samplers) {
- const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
- if (!entry.IsIndexed()) {
- const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
+ for (const auto& entry : shader->GetEntries().samplers) {
+ const auto shader_type = static_cast<ShaderType>(stage_index);
+ for (std::size_t i = 0; i < entry.Size(); ++i) {
+ const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
SetupTexture(binding++, texture, entry);
- } else {
- for (std::size_t i = 0; i < entry.Size(); ++i) {
- const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
- SetupTexture(binding++, texture, entry);
- }
}
}
}
@@ -927,46 +835,39 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
- for (const auto& entry : kernel->GetShaderEntries().samplers) {
- if (!entry.IsIndexed()) {
- const auto texture =
- GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
+ for (const auto& entry : kernel->GetEntries().samplers) {
+ for (std::size_t i = 0; i < entry.Size(); ++i) {
+ const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i);
SetupTexture(binding++, texture, entry);
- } else {
- for (std::size_t i = 0; i < entry.Size(); ++i) {
- const auto texture =
- GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i);
- SetupTexture(binding++, texture, entry);
- }
}
}
}
void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
- const GLShader::SamplerEntry& entry) {
+ const SamplerEntry& entry) {
const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
if (!view) {
// Can occur when texture addr is null or its memory is unmapped/invalid
- state.samplers[binding] = 0;
- state.textures[binding] = 0;
+ glBindSampler(binding, 0);
+ glBindTextureUnit(binding, 0);
return;
}
- state.textures[binding] = view->GetTexture();
+ glBindTextureUnit(binding, view->GetTexture());
if (view->GetSurfaceParams().IsBuffer()) {
return;
}
- state.samplers[binding] = sampler_cache.GetSampler(texture.tsc);
-
// Apply swizzle to textures that are not buffers.
view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
texture.tic.w_source);
+
+ glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
}
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).image;
- for (const auto& entry : shader->GetShaderEntries().images) {
+ for (const auto& entry : shader->GetEntries().images) {
const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
SetupImage(binding++, tic, entry);
@@ -976,17 +877,17 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
- for (const auto& entry : shader->GetShaderEntries().images) {
+ for (const auto& entry : shader->GetEntries().images) {
const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
SetupImage(binding++, tic, entry);
}
}
void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
- const GLShader::ImageEntry& entry) {
+ const ImageEntry& entry) {
const auto view = texture_cache.GetImageSurface(tic, entry);
if (!view) {
- state.images[binding] = 0;
+ glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
return;
}
if (!tic.IsBuffer()) {
@@ -995,55 +896,85 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
if (entry.IsWritten()) {
view->MarkAsModified(texture_cache.Tick());
}
- state.images[binding] = view->GetTexture();
+ glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE,
+ view->GetFormat());
}
-void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
- const auto& regs = system.GPU().Maxwell3D().regs;
- const bool geometry_shaders_enabled =
- regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
- const std::size_t viewport_count =
- geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1;
- for (std::size_t i = 0; i < viewport_count; i++) {
- auto& viewport = current_state.viewports[i];
- const auto& src = regs.viewports[i];
- const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
- viewport.x = viewport_rect.left;
- viewport.y = viewport_rect.bottom;
- viewport.width = viewport_rect.GetWidth();
- viewport.height = viewport_rect.GetHeight();
- viewport.depth_range_far = src.depth_range_far;
- viewport.depth_range_near = src.depth_range_near;
- }
- state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
- state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
-
- bool flip_y = false;
- if (regs.viewport_transform[0].scale_y < 0.0) {
- flip_y = !flip_y;
- }
- if (regs.screen_y_control.y_negate != 0) {
- flip_y = !flip_y;
- }
- state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
- state.clip_control.depth_mode =
- regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne
- ? GL_ZERO_TO_ONE
- : GL_NEGATIVE_ONE_TO_ONE;
+void RasterizerOpenGL::SyncViewport() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ const auto& regs = gpu.regs;
+
+ const bool dirty_viewport = flags[Dirty::Viewports];
+ if (dirty_viewport || flags[Dirty::ClipControl]) {
+ flags[Dirty::ClipControl] = false;
+
+ bool flip_y = false;
+ if (regs.viewport_transform[0].scale_y < 0.0) {
+ flip_y = !flip_y;
+ }
+ if (regs.screen_y_control.y_negate != 0) {
+ flip_y = !flip_y;
+ }
+ glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
+ regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
+ : GL_NEGATIVE_ONE_TO_ONE);
+ }
+
+ if (dirty_viewport) {
+ flags[Dirty::Viewports] = false;
+
+ const bool force = flags[Dirty::ViewportTransform];
+ flags[Dirty::ViewportTransform] = false;
+
+ for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) {
+ if (!force && !flags[Dirty::Viewport0 + i]) {
+ continue;
+ }
+ flags[Dirty::Viewport0 + i] = false;
+
+ const Common::Rectangle<f32> rect{regs.viewport_transform[i].GetRect()};
+ glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(),
+ rect.GetHeight());
+
+ const auto& src = regs.viewports[i];
+ glDepthRangeIndexed(static_cast<GLuint>(i), static_cast<GLdouble>(src.depth_range_near),
+ static_cast<GLdouble>(src.depth_range_far));
+ }
+ }
}
-void RasterizerOpenGL::SyncClipEnabled(
- const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) {
+void RasterizerOpenGL::SyncDepthClamp() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::DepthClampEnabled]) {
+ return;
+ }
+ flags[Dirty::DepthClampEnabled] = false;
- const auto& regs = system.GPU().Maxwell3D().regs;
- const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{
- regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0,
- regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0,
- regs.clip_distance_enabled.c4 != 0, regs.clip_distance_enabled.c5 != 0,
- regs.clip_distance_enabled.c6 != 0, regs.clip_distance_enabled.c7 != 0};
+ const auto& state = gpu.regs.view_volume_clip_control;
+ UNIMPLEMENTED_IF_MSG(state.depth_clamp_far != state.depth_clamp_near,
+ "Unimplemented depth clamp separation!");
+
+ oglEnable(GL_DEPTH_CLAMP, state.depth_clamp_far || state.depth_clamp_near);
+}
+
+void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
+ return;
+ }
+ flags[Dirty::ClipDistances] = false;
+
+ clip_mask &= gpu.regs.clip_distance_enabled;
+ if (clip_mask == last_clip_distance_mask) {
+ return;
+ }
+ last_clip_distance_mask = clip_mask;
for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) {
- state.clip_distance[i] = reg_state[i] && clip_mask[i];
+ oglEnable(static_cast<GLenum>(GL_CLIP_DISTANCE0 + i), (clip_mask >> i) & 1);
}
}
@@ -1052,247 +983,442 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ const auto& regs = gpu.regs;
- state.cull.enabled = regs.cull.enabled != 0;
- if (state.cull.enabled) {
- state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
+ if (flags[Dirty::CullTest]) {
+ flags[Dirty::CullTest] = false;
+
+ if (regs.cull_test_enabled) {
+ glEnable(GL_CULL_FACE);
+ glCullFace(MaxwellToGL::CullFace(regs.cull_face));
+ } else {
+ glDisable(GL_CULL_FACE);
+ }
}
- state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
+ if (flags[Dirty::FrontFace]) {
+ flags[Dirty::FrontFace] = false;
+ glFrontFace(MaxwellToGL::FrontFace(regs.front_face));
+ }
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::PrimitiveRestart]) {
+ return;
+ }
+ flags[Dirty::PrimitiveRestart] = false;
- state.primitive_restart.enabled = regs.primitive_restart.enabled;
- state.primitive_restart.index = regs.primitive_restart.index;
+ if (gpu.regs.primitive_restart.enabled) {
+ glEnable(GL_PRIMITIVE_RESTART);
+ glPrimitiveRestartIndex(gpu.regs.primitive_restart.index);
+ } else {
+ glDisable(GL_PRIMITIVE_RESTART);
+ }
}
void RasterizerOpenGL::SyncDepthTestState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
-
- state.depth.test_enabled = regs.depth_test_enable != 0;
- state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
- if (!state.depth.test_enabled) {
- return;
+ const auto& regs = gpu.regs;
+ if (flags[Dirty::DepthMask]) {
+ flags[Dirty::DepthMask] = false;
+ glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE);
}
- state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
+ if (flags[Dirty::DepthTest]) {
+ flags[Dirty::DepthTest] = false;
+ if (regs.depth_test_enable) {
+ glEnable(GL_DEPTH_TEST);
+ glDepthFunc(MaxwellToGL::ComparisonOp(regs.depth_test_func));
+ } else {
+ glDisable(GL_DEPTH_TEST);
+ }
+ }
}
void RasterizerOpenGL::SyncStencilTestState() {
- auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.stencil_test) {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::StencilTest]) {
return;
}
- maxwell3d.dirty.stencil_test = false;
-
- const auto& regs = maxwell3d.regs;
- state.stencil.test_enabled = regs.stencil_enable != 0;
- state.MarkDirtyStencilState();
+ flags[Dirty::StencilTest] = false;
+ const auto& regs = gpu.regs;
if (!regs.stencil_enable) {
+ glDisable(GL_STENCIL_TEST);
return;
}
- state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func);
- state.stencil.front.test_ref = regs.stencil_front_func_ref;
- state.stencil.front.test_mask = regs.stencil_front_func_mask;
- state.stencil.front.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_fail);
- state.stencil.front.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_zfail);
- state.stencil.front.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_front_op_zpass);
- state.stencil.front.write_mask = regs.stencil_front_mask;
+ glEnable(GL_STENCIL_TEST);
+ glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
+ regs.stencil_front_func_ref, regs.stencil_front_func_mask);
+ glStencilOpSeparate(GL_FRONT, MaxwellToGL::StencilOp(regs.stencil_front_op_fail),
+ MaxwellToGL::StencilOp(regs.stencil_front_op_zfail),
+ MaxwellToGL::StencilOp(regs.stencil_front_op_zpass));
+ glStencilMaskSeparate(GL_FRONT, regs.stencil_front_mask);
+
if (regs.stencil_two_side_enable) {
- state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func);
- state.stencil.back.test_ref = regs.stencil_back_func_ref;
- state.stencil.back.test_mask = regs.stencil_back_func_mask;
- state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail);
- state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail);
- state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass);
- state.stencil.back.write_mask = regs.stencil_back_mask;
+ glStencilFuncSeparate(GL_BACK, MaxwellToGL::ComparisonOp(regs.stencil_back_func_func),
+ regs.stencil_back_func_ref, regs.stencil_back_func_mask);
+ glStencilOpSeparate(GL_BACK, MaxwellToGL::StencilOp(regs.stencil_back_op_fail),
+ MaxwellToGL::StencilOp(regs.stencil_back_op_zfail),
+ MaxwellToGL::StencilOp(regs.stencil_back_op_zpass));
+ glStencilMaskSeparate(GL_BACK, regs.stencil_back_mask);
} else {
- state.stencil.back.test_func = GL_ALWAYS;
- state.stencil.back.test_ref = 0;
- state.stencil.back.test_mask = 0xFFFFFFFF;
- state.stencil.back.write_mask = 0xFFFFFFFF;
- state.stencil.back.action_stencil_fail = GL_KEEP;
- state.stencil.back.action_depth_fail = GL_KEEP;
- state.stencil.back.action_depth_pass = GL_KEEP;
+ glStencilFuncSeparate(GL_BACK, GL_ALWAYS, 0, 0xFFFFFFFF);
+ glStencilOpSeparate(GL_BACK, GL_KEEP, GL_KEEP, GL_KEEP);
+ glStencilMaskSeparate(GL_BACK, 0xFFFFFFFF);
}
}
-void RasterizerOpenGL::SyncRasterizeEnable(OpenGLState& current_state) {
- const auto& regs = system.GPU().Maxwell3D().regs;
- current_state.rasterizer_discard = regs.rasterize_enable == 0;
+void RasterizerOpenGL::SyncRasterizeEnable() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::RasterizeEnable]) {
+ return;
+ }
+ flags[Dirty::RasterizeEnable] = false;
+
+ oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0);
+}
+
+void RasterizerOpenGL::SyncPolygonModes() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::PolygonModes]) {
+ return;
+ }
+ flags[Dirty::PolygonModes] = false;
+
+ if (gpu.regs.fill_rectangle) {
+ if (!GLAD_GL_NV_fill_rectangle) {
+ LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+ return;
+ }
+
+ flags[Dirty::PolygonModeFront] = true;
+ flags[Dirty::PolygonModeBack] = true;
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL_RECTANGLE_NV);
+ return;
+ }
+
+ if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) {
+ flags[Dirty::PolygonModeFront] = false;
+ flags[Dirty::PolygonModeBack] = false;
+ glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+ return;
+ }
+
+ if (flags[Dirty::PolygonModeFront]) {
+ flags[Dirty::PolygonModeFront] = false;
+ glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+ }
+
+ if (flags[Dirty::PolygonModeBack]) {
+ flags[Dirty::PolygonModeBack] = false;
+ glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back));
+ }
}
void RasterizerOpenGL::SyncColorMask() {
- auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.color_mask) {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::ColorMasks]) {
return;
}
- const auto& regs = maxwell3d.regs;
+ flags[Dirty::ColorMasks] = false;
+
+ const bool force = flags[Dirty::ColorMaskCommon];
+ flags[Dirty::ColorMaskCommon] = false;
+
+ const auto& regs = gpu.regs;
+ if (regs.color_mask_common) {
+ if (!force && !flags[Dirty::ColorMask0]) {
+ return;
+ }
+ flags[Dirty::ColorMask0] = false;
- const std::size_t count =
- regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
- for (std::size_t i = 0; i < count; i++) {
- const auto& source = regs.color_mask[regs.color_mask_common ? 0 : i];
- auto& dest = state.color_mask[i];
- dest.red_enabled = (source.R == 0) ? GL_FALSE : GL_TRUE;
- dest.green_enabled = (source.G == 0) ? GL_FALSE : GL_TRUE;
- dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
- dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
+ auto& mask = regs.color_mask[0];
+ glColorMask(mask.R != 0, mask.B != 0, mask.G != 0, mask.A != 0);
+ return;
}
- state.MarkDirtyColorMask();
- maxwell3d.dirty.color_mask = false;
+ // Path without color_mask_common set
+ for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
+ if (!force && !flags[Dirty::ColorMask0 + i]) {
+ continue;
+ }
+ flags[Dirty::ColorMask0 + i] = false;
+
+ const auto& mask = regs.color_mask[i];
+ glColorMaski(static_cast<GLuint>(i), mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0);
+ }
}
void RasterizerOpenGL::SyncMultiSampleState() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::MultisampleControl]) {
+ return;
+ }
+ flags[Dirty::MultisampleControl] = false;
+
const auto& regs = system.GPU().Maxwell3D().regs;
- state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0;
- state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0;
+ oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage);
+ oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one);
}
void RasterizerOpenGL::SyncFragmentColorClampState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
- state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0;
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::FragmentClampColor]) {
+ return;
+ }
+ flags[Dirty::FragmentClampColor] = false;
+
+ glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
}
void RasterizerOpenGL::SyncBlendState() {
- auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.blend_state) {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ const auto& regs = gpu.regs;
+
+ if (flags[Dirty::BlendColor]) {
+ flags[Dirty::BlendColor] = false;
+ glBlendColor(regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
+ regs.blend_color.a);
+ }
+
+ // TODO(Rodrigo): Revisit blending, there are several registers we are not reading
+
+ if (!flags[Dirty::BlendStates]) {
return;
}
- const auto& regs = maxwell3d.regs;
-
- state.blend_color.red = regs.blend_color.r;
- state.blend_color.green = regs.blend_color.g;
- state.blend_color.blue = regs.blend_color.b;
- state.blend_color.alpha = regs.blend_color.a;
-
- state.independant_blend.enabled = regs.independent_blend_enable;
- if (!state.independant_blend.enabled) {
- auto& blend = state.blend[0];
- const auto& src = regs.blend;
- blend.enabled = src.enable[0] != 0;
- if (blend.enabled) {
- blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb);
- blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb);
- blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb);
- blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a);
- blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
- blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
- }
- for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
- state.blend[i].enabled = false;
+ flags[Dirty::BlendStates] = false;
+
+ if (!regs.independent_blend_enable) {
+ if (!regs.blend.enable[0]) {
+ glDisable(GL_BLEND);
+ return;
}
- maxwell3d.dirty.blend_state = false;
- state.MarkDirtyBlendState();
+ glEnable(GL_BLEND);
+ glBlendFuncSeparate(MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb),
+ MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb),
+ MaxwellToGL::BlendFunc(regs.blend.factor_source_a),
+ MaxwellToGL::BlendFunc(regs.blend.factor_dest_a));
+ glBlendEquationSeparate(MaxwellToGL::BlendEquation(regs.blend.equation_rgb),
+ MaxwellToGL::BlendEquation(regs.blend.equation_a));
return;
}
- for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
- auto& blend = state.blend[i];
- const auto& src = regs.independent_blend[i];
- blend.enabled = regs.blend.enable[i] != 0;
- if (!blend.enabled)
+ const bool force = flags[Dirty::BlendIndependentEnabled];
+ flags[Dirty::BlendIndependentEnabled] = false;
+
+ for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
+ if (!force && !flags[Dirty::BlendState0 + i]) {
continue;
- blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb);
- blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb);
- blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb);
- blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a);
- blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
- blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
- }
+ }
+ flags[Dirty::BlendState0 + i] = false;
+
+ if (!regs.blend.enable[i]) {
+ glDisablei(GL_BLEND, static_cast<GLuint>(i));
+ continue;
+ }
+ glEnablei(GL_BLEND, static_cast<GLuint>(i));
- state.MarkDirtyBlendState();
- maxwell3d.dirty.blend_state = false;
+ const auto& src = regs.independent_blend[i];
+ glBlendFuncSeparatei(static_cast<GLuint>(i), MaxwellToGL::BlendFunc(src.factor_source_rgb),
+ MaxwellToGL::BlendFunc(src.factor_dest_rgb),
+ MaxwellToGL::BlendFunc(src.factor_source_a),
+ MaxwellToGL::BlendFunc(src.factor_dest_a));
+ glBlendEquationSeparatei(static_cast<GLuint>(i),
+ MaxwellToGL::BlendEquation(src.equation_rgb),
+ MaxwellToGL::BlendEquation(src.equation_a));
+ }
}
void RasterizerOpenGL::SyncLogicOpState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::LogicOp]) {
+ return;
+ }
+ flags[Dirty::LogicOp] = false;
- state.logic_op.enabled = regs.logic_op.enable != 0;
+ const auto& regs = gpu.regs;
+ if (regs.logic_op.enable) {
+ glEnable(GL_COLOR_LOGIC_OP);
+ glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation));
+ } else {
+ glDisable(GL_COLOR_LOGIC_OP);
+ }
+}
- if (!state.logic_op.enabled)
+void RasterizerOpenGL::SyncScissorTest() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::Scissors]) {
return;
+ }
+ flags[Dirty::Scissors] = false;
- ASSERT_MSG(regs.blend.enable[0] == 0,
- "Blending and logic op can't be enabled at the same time.");
-
- state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation);
-}
+ const auto& regs = gpu.regs;
+ for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
+ if (!flags[Dirty::Scissor0 + index]) {
+ continue;
+ }
+ flags[Dirty::Scissor0 + index] = false;
-void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
- const auto& regs = system.GPU().Maxwell3D().regs;
- const bool geometry_shaders_enabled =
- regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
- const std::size_t viewport_count =
- geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1;
- for (std::size_t i = 0; i < viewport_count; i++) {
- const auto& src = regs.scissor_test[i];
- auto& dst = current_state.viewports[i].scissor;
- dst.enabled = (src.enable != 0);
- if (dst.enabled == 0) {
- return;
+ const auto& src = regs.scissor_test[index];
+ if (src.enable) {
+ glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
+ glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y,
+ src.max_x - src.min_x, src.max_y - src.min_y);
+ } else {
+ glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
}
- const u32 width = src.max_x - src.min_x;
- const u32 height = src.max_y - src.min_y;
- dst.x = src.min_x;
- dst.y = src.min_y;
- dst.width = width;
- dst.height = height;
}
}
-void RasterizerOpenGL::SyncTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
- UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
-}
-
void RasterizerOpenGL::SyncPointState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::PointSize]) {
+ return;
+ }
+ flags[Dirty::PointSize] = false;
+
+ oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable);
+
+ if (gpu.regs.vp_point_size.enable) {
+ // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
+ glEnable(GL_PROGRAM_POINT_SIZE);
+ return;
+ }
+
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
// in OpenGL).
- state.point.program_control = regs.vp_point_size.enable != 0;
- state.point.sprite = regs.point_sprite_enable != 0;
- state.point.size = std::max(1.0f, regs.point_size);
+ glPointSize(std::max(1.0f, gpu.regs.point_size));
+ glDisable(GL_PROGRAM_POINT_SIZE);
}
void RasterizerOpenGL::SyncPolygonOffset() {
- auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.polygon_offset) {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::PolygonOffset]) {
return;
}
- const auto& regs = maxwell3d.regs;
-
- state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
- state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
- state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
+ flags[Dirty::PolygonOffset] = false;
- // Hardware divides polygon offset units by two
- state.polygon_offset.units = regs.polygon_offset_units / 2.0f;
- state.polygon_offset.factor = regs.polygon_offset_factor;
- state.polygon_offset.clamp = regs.polygon_offset_clamp;
+ const auto& regs = gpu.regs;
+ oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
+ oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
+ oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
- state.MarkDirtyPolygonOffset();
- maxwell3d.dirty.polygon_offset = false;
+ if (regs.polygon_offset_fill_enable || regs.polygon_offset_line_enable ||
+ regs.polygon_offset_point_enable) {
+ // Hardware divides polygon offset units by two
+ glPolygonOffsetClamp(regs.polygon_offset_factor, regs.polygon_offset_units / 2.0f,
+ regs.polygon_offset_clamp);
+ }
}
void RasterizerOpenGL::SyncAlphaTest() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::AlphaTest]) {
+ return;
+ }
+ flags[Dirty::AlphaTest] = false;
+
+ const auto& regs = gpu.regs;
+ if (regs.alpha_test_enabled && regs.rt_control.count > 1) {
+ LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested");
+ }
+
+ if (regs.alpha_test_enabled) {
+ glEnable(GL_ALPHA_TEST);
+ glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref);
+ } else {
+ glDisable(GL_ALPHA_TEST);
+ }
+}
+
+void RasterizerOpenGL::SyncFramebufferSRGB() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::FramebufferSRGB]) {
+ return;
+ }
+ flags[Dirty::FramebufferSRGB] = false;
+
+ oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
+}
+
+void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
const auto& regs = system.GPU().Maxwell3D().regs;
- UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
- "Alpha Testing is enabled with more than one rendertarget");
+ if (regs.tfb_enabled == 0) {
+ return;
+ }
+
+ UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
+ regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
+ regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
- state.alpha_test.enabled = regs.alpha_test_enabled;
- if (!state.alpha_test.enabled) {
+ for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
+ const auto& binding = regs.tfb_bindings[index];
+ if (!binding.buffer_enable) {
+ if (enabled_transform_feedback_buffers[index]) {
+ glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
+ 0);
+ }
+ enabled_transform_feedback_buffers[index] = false;
+ continue;
+ }
+ enabled_transform_feedback_buffers[index] = true;
+
+ auto& tfb_buffer = transform_feedback_buffers[index];
+ tfb_buffer.Create();
+
+ const GLuint handle = tfb_buffer.handle;
+ const std::size_t size = binding.buffer_size;
+ glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
+ glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
+ static_cast<GLsizeiptr>(size));
+ }
+
+ glBeginTransformFeedback(GL_POINTS);
+}
+
+void RasterizerOpenGL::EndTransformFeedback() {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ if (regs.tfb_enabled == 0) {
return;
}
- state.alpha_test.func = MaxwellToGL::ComparisonOp(regs.alpha_test_func);
- state.alpha_test.ref = regs.alpha_test_ref;
+
+ glEndTransformFeedback();
+
+ for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
+ const auto& binding = regs.tfb_bindings[index];
+ if (!binding.buffer_enable) {
+ continue;
+ }
+ UNIMPLEMENTED_IF(binding.buffer_offset != 0);
+
+ const GLuint handle = transform_feedback_buffers[index].handle;
+ const GPUVAddr gpu_addr = binding.Address();
+ const std::size_t size = binding.buffer_size;
+ const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
+ glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
+ }
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 68abe9a21..2d3be2437 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -30,7 +30,7 @@
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
-#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/textures/texture.h"
@@ -55,7 +55,8 @@ struct DrawParameters;
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- ScreenInfo& info);
+ ScreenInfo& info, GLShader::ProgramManager& program_manager,
+ StateTracker& state_tracker);
~RasterizerOpenGL() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -76,6 +77,7 @@ public:
u32 pixel_stride) override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
+ void SetupDirtyFlags() override;
/// Returns true when there are commands queued to the OpenGL server.
bool AnyCommandQueued() const {
@@ -86,8 +88,7 @@ private:
/// Configures the color and depth framebuffer states.
void ConfigureFramebuffers();
- void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
- bool using_depth_fb, bool using_stencil_fb);
+ void ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, bool using_stencil_fb);
/// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
@@ -97,7 +98,7 @@ private:
/// Configures a constant buffer.
void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
- const GLShader::ConstBufferEntry& entry);
+ const ConstBufferEntry& entry);
/// Configures the current global memory entries to use for the draw command.
void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
@@ -106,7 +107,7 @@ private:
void SetupComputeGlobalMemory(const Shader& kernel);
/// Configures a constant buffer.
- void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
+ void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
std::size_t size);
/// Configures the current textures to use for the draw command.
@@ -117,7 +118,7 @@ private:
/// Configures a texture.
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
- const GLShader::SamplerEntry& entry);
+ const SamplerEntry& entry);
/// Configures images in a graphics shader.
void SetupDrawImages(std::size_t stage_index, const Shader& shader);
@@ -126,15 +127,16 @@ private:
void SetupComputeImages(const Shader& shader);
/// Configures an image.
- void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
- const GLShader::ImageEntry& entry);
+ void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
/// Syncs the viewport and depth range to match the guest state
- void SyncViewport(OpenGLState& current_state);
+ void SyncViewport();
+
+ /// Syncs the depth clamp state
+ void SyncDepthClamp();
/// Syncs the clip enabled status to match the guest state
- void SyncClipEnabled(
- const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& clip_mask);
+ void SyncClipEnabled(u32 clip_mask);
/// Syncs the clip coefficients to match the guest state
void SyncClipCoef();
@@ -164,16 +166,16 @@ private:
void SyncMultiSampleState();
/// Syncs the scissor test state to match the guest state
- void SyncScissorTest(OpenGLState& current_state);
-
- /// Syncs the transform feedback state to match the guest state
- void SyncTransformFeedback();
+ void SyncScissorTest();
/// Syncs the point state to match the guest state
void SyncPointState();
/// Syncs the rasterizer enable state to match the guest state
- void SyncRasterizeEnable(OpenGLState& current_state);
+ void SyncRasterizeEnable();
+
+ /// Syncs polygon modes to match the guest state
+ void SyncPolygonModes();
/// Syncs Color Mask
void SyncColorMask();
@@ -184,6 +186,15 @@ private:
/// Syncs the alpha test state to match the guest state
void SyncAlphaTest();
+ /// Syncs the framebuffer sRGB state to match the guest state
+ void SyncFramebufferSRGB();
+
+ /// Begin a transform feedback
+ void BeginTransformFeedback(GLenum primitive_mode);
+
+ /// End a transform feedback
+ void EndTransformFeedback();
+
/// Check for extension that are not strictly required but are needed for correct emulation
void CheckExtensions();
@@ -191,18 +202,17 @@ private:
std::size_t CalculateIndexBufferSize() const;
- /// Updates and returns a vertex array object representing current vertex format
- GLuint SetupVertexFormat();
+ /// Updates the current vertex format
+ void SetupVertexFormat();
- void SetupVertexBuffer(GLuint vao);
- void SetupVertexInstances(GLuint vao);
+ void SetupVertexBuffer();
+ void SetupVertexInstances();
GLintptr SetupIndexBuffer();
void SetupShaders(GLenum primitive_mode);
const Device device;
- OpenGLState state;
TextureCacheOpenGL texture_cache;
ShaderCacheOpenGL shader_cache;
@@ -212,22 +222,25 @@ private:
Core::System& system;
ScreenInfo& screen_info;
-
- std::unique_ptr<GLShader::ProgramManager> shader_program_manager;
- std::map<std::array<Tegra::Engines::Maxwell3D::Regs::VertexAttribute,
- Tegra::Engines::Maxwell3D::Regs::NumVertexAttributes>,
- OGLVertexArray>
- vertex_array_cache;
+ GLShader::ProgramManager& program_manager;
+ StateTracker& state_tracker;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
- VertexArrayPushBuffer vertex_array_pushbuffer;
+ VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker};
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
+ std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
+ transform_feedback_buffers;
+ std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
+ enabled_transform_feedback_buffers;
+
/// Number of commands queued to the OpenGL driver. Reseted on flush.
std::size_t num_queued_commands = 0;
+
+ u32 last_clip_distance_mask = 0;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index c0aee770f..97803d480 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -8,7 +8,6 @@
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
-#include "video_core/renderer_opengl/gl_state.h"
MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192));
@@ -20,7 +19,7 @@ void OGLRenderbuffer::Create() {
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
- glGenRenderbuffers(1, &handle);
+ glCreateRenderbuffers(1, &handle);
}
void OGLRenderbuffer::Release() {
@@ -29,7 +28,6 @@ void OGLRenderbuffer::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteRenderbuffers(1, &handle);
- OpenGLState::GetCurState().ResetRenderbuffer(handle).Apply();
handle = 0;
}
@@ -47,7 +45,6 @@ void OGLTexture::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle);
- OpenGLState::GetCurState().UnbindTexture(handle).Apply();
handle = 0;
}
@@ -65,7 +62,6 @@ void OGLTextureView::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle);
- OpenGLState::GetCurState().UnbindTexture(handle).Apply();
handle = 0;
}
@@ -83,7 +79,6 @@ void OGLSampler::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteSamplers(1, &handle);
- OpenGLState::GetCurState().ResetSampler(handle).Apply();
handle = 0;
}
@@ -127,7 +122,6 @@ void OGLProgram::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgram(handle);
- OpenGLState::GetCurState().ResetProgram(handle).Apply();
handle = 0;
}
@@ -145,7 +139,6 @@ void OGLPipeline::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgramPipelines(1, &handle);
- OpenGLState::GetCurState().ResetPipeline(handle).Apply();
handle = 0;
}
@@ -189,24 +182,6 @@ void OGLSync::Release() {
handle = 0;
}
-void OGLVertexArray::Create() {
- if (handle != 0)
- return;
-
- MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
- glCreateVertexArrays(1, &handle);
-}
-
-void OGLVertexArray::Release() {
- if (handle == 0)
- return;
-
- MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
- glDeleteVertexArrays(1, &handle);
- OpenGLState::GetCurState().ResetVertexArray(handle).Apply();
- handle = 0;
-}
-
void OGLFramebuffer::Create() {
if (handle != 0)
return;
@@ -221,7 +196,6 @@ void OGLFramebuffer::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteFramebuffers(1, &handle);
- OpenGLState::GetCurState().ResetFramebuffer(handle).Apply();
handle = 0;
}
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 995a4e45e..de93f4212 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -241,31 +241,6 @@ public:
GLsync handle = 0;
};
-class OGLVertexArray : private NonCopyable {
-public:
- OGLVertexArray() = default;
-
- OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
-
- ~OGLVertexArray() {
- Release();
- }
-
- OGLVertexArray& operator=(OGLVertexArray&& o) noexcept {
- Release();
- handle = std::exchange(o.handle, 0);
- return *this;
- }
-
- /// Creates a new internal OpenGL resource and stores the handle
- void Create();
-
- /// Deletes the internal OpenGL resource
- void Release();
-
- GLuint handle = 0;
-};
-
class OGLFramebuffer : private NonCopyable {
public:
OGLFramebuffer() = default;
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
index 3ded5ecea..5c174879a 100644
--- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
@@ -38,7 +38,7 @@ OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
} else if (GLAD_GL_EXT_texture_filter_anisotropic) {
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
- } else if (tsc.GetMaxAnisotropy() != 1) {
+ } else {
LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 489eb143c..e3d31c3eb 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -2,12 +2,16 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <atomic>
+#include <functional>
#include <mutex>
#include <optional>
#include <string>
#include <thread>
#include <unordered_set>
+
#include <boost/functional/hash.hpp>
+
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
@@ -22,14 +26,16 @@
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/utils.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL {
using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::ConstBufferLocker;
using VideoCommon::Shader::ProgramCode;
+using VideoCommon::Shader::Registry;
using VideoCommon::Shader::ShaderIR;
namespace {
@@ -55,7 +61,7 @@ constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
}
/// Calculates the size of a program stream
-std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
+std::size_t CalculateProgramSize(const ProgramCode& program) {
constexpr std::size_t start_offset = 10;
// This is the encoded version of BRA that jumps to itself. All Nvidia
// shaders end with one.
@@ -108,32 +114,9 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) {
}
}
-/// Describes primitive behavior on geometry shaders
-constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
- switch (primitive_mode) {
- case GL_POINTS:
- return {"points", 1};
- case GL_LINES:
- case GL_LINE_STRIP:
- return {"lines", 2};
- case GL_LINES_ADJACENCY:
- case GL_LINE_STRIP_ADJACENCY:
- return {"lines_adjacency", 4};
- case GL_TRIANGLES:
- case GL_TRIANGLE_STRIP:
- case GL_TRIANGLE_FAN:
- return {"triangles", 3};
- case GL_TRIANGLES_ADJACENCY:
- case GL_TRIANGLE_STRIP_ADJACENCY:
- return {"triangles_adjacency", 6};
- default:
- return {"points", 1};
- }
-}
-
/// Hashes one (or two) program streams
u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code,
- const ProgramCode& code_b) {
+ const ProgramCode& code_b = {}) {
u64 unique_identifier = boost::hash_value(code);
if (is_a) {
// VertexA programs include two programs
@@ -142,24 +125,6 @@ u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& co
return unique_identifier;
}
-/// Creates an unspecialized program from code streams
-std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir,
- const std::optional<ShaderIR>& ir_b) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
- case ShaderType::Geometry:
- return GLShader::GenerateGeometryShader(device, ir);
- case ShaderType::Fragment:
- return GLShader::GenerateFragmentShader(device, ir);
- case ShaderType::Compute:
- return GLShader::GenerateComputeShader(device, ir);
- default:
- UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type));
- return {};
- }
-}
-
constexpr const char* GetShaderTypeName(ShaderType shader_type) {
switch (shader_type) {
case ShaderType::Vertex:
@@ -195,102 +160,38 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
return {};
}
-std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) {
+std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
}
-Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system,
- ShaderType shader_type) {
- if (shader_type == ShaderType::Compute) {
- return system.GPU().KeplerCompute();
- } else {
- return system.GPU().Maxwell3D();
- }
-}
-
-std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) {
- return std::make_unique<ConstBufferLocker>(shader_type,
- GetConstBufferEngineInterface(system, shader_type));
-}
-
-void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
- locker.SetBoundBuffer(usage.bound_buffer);
- for (const auto& key : usage.keys) {
- const auto [buffer, offset] = key.first;
- locker.InsertKey(buffer, offset, key.second);
+std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
+ const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
+ const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
+ entry.graphics_info, entry.compute_info};
+ const auto registry = std::make_shared<Registry>(entry.type, info);
+ for (const auto& [address, value] : entry.keys) {
+ const auto [buffer, offset] = address;
+ registry->InsertKey(buffer, offset, value);
}
- for (const auto& [offset, sampler] : usage.bound_samplers) {
- locker.InsertBoundSampler(offset, sampler);
+ for (const auto& [offset, sampler] : entry.bound_samplers) {
+ registry->InsertBoundSampler(offset, sampler);
}
- for (const auto& [key, sampler] : usage.bindless_samplers) {
+ for (const auto& [key, sampler] : entry.bindless_samplers) {
const auto [buffer, offset] = key;
- locker.InsertBindlessSampler(buffer, offset, sampler);
+ registry->InsertBindlessSampler(buffer, offset, sampler);
}
+ return registry;
}
-CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type,
- const ProgramCode& code, const ProgramCode& code_b,
- ConstBufferLocker& locker, const ProgramVariant& variant,
- bool hint_retrievable = false) {
- LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type));
-
- const bool is_compute = shader_type == ShaderType::Compute;
- const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
- const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker);
- std::optional<ShaderIR> ir_b;
- if (!code_b.empty()) {
- ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
- }
-
- std::string source = fmt::format(R"(// {}
-#version 430 core
-#extension GL_ARB_separate_shader_objects : enable
-)",
- GetShaderId(unique_identifier, shader_type));
- if (device.HasShaderBallot()) {
- source += "#extension GL_ARB_shader_ballot : require\n";
- }
- if (device.HasVertexViewportLayer()) {
- source += "#extension GL_ARB_shader_viewport_layer_array : require\n";
- }
- if (device.HasImageLoadFormatted()) {
- source += "#extension GL_EXT_shader_image_load_formatted : require\n";
- }
- if (device.HasWarpIntrinsics()) {
- source += "#extension GL_NV_gpu_shader5 : require\n"
- "#extension GL_NV_shader_thread_group : require\n"
- "#extension GL_NV_shader_thread_shuffle : require\n";
- }
- // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations)
- // on places where we don't want to.
- // Thanks to Ryujinx for finding this workaround.
- source += "#pragma optionNV(fastmath off)\n";
-
- if (shader_type == ShaderType::Geometry) {
- const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode);
- source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);
- source += fmt::format("layout ({}) in;\n", glsl_topology);
- }
- if (shader_type == ShaderType::Compute) {
- if (variant.local_memory_size > 0) {
- source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n",
- Common::AlignUp(variant.local_memory_size, 4) / 4);
- }
- source +=
- fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
- variant.block_x, variant.block_y, variant.block_z);
-
- if (variant.shared_memory_size > 0) {
- // shared_memory_size is described in number of words
- source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size);
- }
- }
-
- source += '\n';
- source += GenerateGLSL(device, shader_type, ir, ir_b);
+std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
+ u64 unique_identifier, const ShaderIR& ir,
+ const Registry& registry, bool hint_retrievable = false) {
+ const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
+ LOG_INFO(Render_OpenGL, "{}", shader_id);
+ const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
OGLShader shader;
- shader.Create(source.c_str(), GetGLShaderType(shader_type));
+ shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
auto program = std::make_shared<OGLProgram>();
program->Create(true, hint_retrievable, shader.handle);
@@ -298,7 +199,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
}
std::unordered_set<GLenum> GetSupportedFormats() {
- GLint num_formats{};
+ GLint num_formats;
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
std::vector<GLint> formats(num_formats);
@@ -313,115 +214,82 @@ std::unordered_set<GLenum> GetSupportedFormats() {
} // Anonymous namespace
-CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
- GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
- : RasterizerCacheObject{params.host_ptr}, system{params.system},
- disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
- unique_identifier{params.unique_identifier}, shader_type{shader_type},
- entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} {
- if (!params.precompiled_variants) {
- return;
- }
- for (const auto& pair : *params.precompiled_variants) {
- auto locker = MakeLocker(system, shader_type);
- const auto& usage = pair->first;
- FillLocker(*locker, usage);
-
- std::unique_ptr<LockerVariant>* locker_variant = nullptr;
- const auto it =
- std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) {
- return variant->locker->HasEqualKeys(*locker);
- });
- if (it == locker_variants.end()) {
- locker_variant = &locker_variants.emplace_back();
- *locker_variant = std::make_unique<LockerVariant>();
- locker_variant->get()->locker = std::move(locker);
- } else {
- locker_variant = &*it;
- }
- locker_variant->get()->programs.emplace(usage.variant, pair->second);
- }
+CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
+ std::shared_ptr<VideoCommon::Shader::Registry> registry,
+ ShaderEntries entries, std::shared_ptr<OGLProgram> program)
+ : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)},
+ cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {}
+
+CachedShader::~CachedShader() = default;
+
+GLuint CachedShader::GetHandle() const {
+ DEBUG_ASSERT(registry->IsConsistent());
+ return program->handle;
}
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
Maxwell::ShaderProgram program_type, ProgramCode code,
ProgramCode code_b) {
const auto shader_type = GetShaderType(program_type);
- params.disk_cache.SaveRaw(
- ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b));
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
- ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D());
- const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker);
+ auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
+ const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
// TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b;
// if (!code_b.empty()) {
// ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
// }
- return std::shared_ptr<CachedShader>(new CachedShader(
- params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b)));
+ auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
+
+ ShaderDiskCacheEntry entry;
+ entry.type = shader_type;
+ entry.code = std::move(code);
+ entry.code_b = std::move(code_b);
+ entry.unique_identifier = params.unique_identifier;
+ entry.bound_buffer = registry->GetBoundBuffer();
+ entry.graphics_info = registry->GetGraphicsInfo();
+ entry.keys = registry->GetKeys();
+ entry.bound_samplers = registry->GetBoundSamplers();
+ entry.bindless_samplers = registry->GetBindlessSamplers();
+ params.disk_cache.SaveEntry(std::move(entry));
+
+ return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
+ size_in_bytes, std::move(registry),
+ MakeEntries(ir), std::move(program)));
}
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
- params.disk_cache.SaveRaw(
- ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code));
-
- ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute,
- params.system.GPU().KeplerCompute());
- const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker);
- return std::shared_ptr<CachedShader>(new CachedShader(
- params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {}));
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+ auto& engine = params.system.GPU().KeplerCompute();
+ auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
+ const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
+ const u64 uid = params.unique_identifier;
+ auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
+
+ ShaderDiskCacheEntry entry;
+ entry.type = ShaderType::Compute;
+ entry.code = std::move(code);
+ entry.unique_identifier = uid;
+ entry.bound_buffer = registry->GetBoundBuffer();
+ entry.compute_info = registry->GetComputeInfo();
+ entry.keys = registry->GetKeys();
+ entry.bound_samplers = registry->GetBoundSamplers();
+ entry.bindless_samplers = registry->GetBindlessSamplers();
+ params.disk_cache.SaveEntry(std::move(entry));
+
+ return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
+ size_in_bytes, std::move(registry),
+ MakeEntries(ir), std::move(program)));
}
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
- const UnspecializedShader& unspecialized) {
- return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type,
- unspecialized.entries, unspecialized.code,
- unspecialized.code_b));
-}
-
-GLuint CachedShader::GetHandle(const ProgramVariant& variant) {
- EnsureValidLockerVariant();
-
- const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant);
- auto& program = entry->second;
- if (!is_cache_miss) {
- return program->handle;
- }
-
- program = BuildShader(device, unique_identifier, shader_type, code, code_b,
- *curr_locker_variant->locker, variant);
- disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
-
- LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
- return program->handle;
-}
-
-bool CachedShader::EnsureValidLockerVariant() {
- const auto previous_variant = curr_locker_variant;
- if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) {
- curr_locker_variant = nullptr;
- }
- if (!curr_locker_variant) {
- for (auto& variant : locker_variants) {
- if (variant->locker->IsConsistent()) {
- curr_locker_variant = variant.get();
- }
- }
- }
- if (!curr_locker_variant) {
- auto& new_variant = locker_variants.emplace_back();
- new_variant = std::make_unique<LockerVariant>();
- new_variant->locker = MakeLocker(system, shader_type);
- curr_locker_variant = new_variant.get();
- }
- return previous_variant == curr_locker_variant;
-}
-
-ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
- const ConstBufferLocker& locker) const {
- return ShaderDiskCacheUsage{unique_identifier, variant,
- locker.GetBoundBuffer(), locker.GetKeys(),
- locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
+ const PrecompiledShader& precompiled_shader,
+ std::size_t size_in_bytes) {
+ return std::shared_ptr<CachedShader>(new CachedShader(
+ params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry,
+ precompiled_shader.entries, precompiled_shader.program));
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -431,16 +299,12 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System&
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
- const auto transferable = disk_cache.LoadTransferable();
+ const std::optional transferable = disk_cache.LoadTransferable();
if (!transferable) {
return;
}
- const auto [raws, shader_usages] = *transferable;
- if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) {
- return;
- }
- const auto dumps = disk_cache.LoadPrecompiled();
+ const std::vector gl_cache = disk_cache.LoadPrecompiled();
const auto supported_formats = GetSupportedFormats();
// Track if precompiled cache was altered during loading to know if we have to
@@ -449,77 +313,82 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
// Inform the frontend about shader build initialization
if (callback) {
- callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size());
+ callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
}
std::mutex mutex;
std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
- std::atomic_bool compilation_failed = false;
+ std::atomic_bool gl_cache_failed = false;
- const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
- std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages,
- const ShaderDumpsMap& dumps) {
+ const auto find_precompiled = [&gl_cache](u64 id) {
+ return std::find_if(gl_cache.begin(), gl_cache.end(),
+ [id](const auto& entry) { return entry.unique_identifier == id; });
+ };
+
+ const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
+ std::size_t end) {
context->MakeCurrent();
SCOPE_EXIT({ return context->DoneCurrent(); });
for (std::size_t i = begin; i < end; ++i) {
- if (stop_loading || compilation_failed) {
+ if (stop_loading) {
return;
}
- const auto& usage{shader_usages[i]};
- const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
- const auto dump{dumps.find(usage)};
-
- CachedProgram shader;
- if (dump != dumps.end()) {
- // If the shader is dumped, attempt to load it with
- shader = GeneratePrecompiledProgram(dump->second, supported_formats);
- if (!shader) {
- compilation_failed = true;
- return;
+ const auto& entry = (*transferable)[i];
+ const u64 uid = entry.unique_identifier;
+ const auto it = find_precompiled(uid);
+ const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
+
+ const bool is_compute = entry.type == ShaderType::Compute;
+ const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
+ auto registry = MakeRegistry(entry);
+ const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
+
+ std::shared_ptr<OGLProgram> program;
+ if (precompiled_entry) {
+ // If the shader is precompiled, attempt to load it with
+ program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
+ if (!program) {
+ gl_cache_failed = true;
}
}
- if (!shader) {
- auto locker{MakeLocker(system, unspecialized.type)};
- FillLocker(*locker, usage);
-
- shader = BuildShader(device, usage.unique_identifier, unspecialized.type,
- unspecialized.code, unspecialized.code_b, *locker,
- usage.variant, true);
+ if (!program) {
+ // Otherwise compile it from GLSL
+ program = BuildShader(device, entry.type, uid, ir, *registry, true);
}
+ PrecompiledShader shader;
+ shader.program = std::move(program);
+ shader.registry = std::move(registry);
+ shader.entries = MakeEntries(ir);
+
std::scoped_lock lock{mutex};
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
- shader_usages.size());
+ transferable->size());
}
-
- precompiled_programs.emplace(usage, std::move(shader));
-
- // TODO(Rodrigo): Is there a better way to do this?
- precompiled_variants[usage.unique_identifier].push_back(
- precompiled_programs.find(usage));
+ runtime_cache.emplace(entry.unique_identifier, std::move(shader));
}
};
const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
- const std::size_t bucket_size{shader_usages.size() / num_workers};
+ const std::size_t bucket_size{transferable->size() / num_workers};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
for (std::size_t i = 0; i < num_workers; ++i) {
const bool is_last_worker = i + 1 == num_workers;
const std::size_t start{bucket_size * i};
- const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size};
+ const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
// On some platforms the shared context has to be created from the GUI thread
contexts[i] = emu_window.CreateSharedContext();
- threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps);
+ threads[i] = std::thread(worker, contexts[i].get(), start, end);
}
for (auto& thread : threads) {
thread.join();
}
- if (compilation_failed) {
+ if (gl_cache_failed) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
precompiled_cache_altered = true;
@@ -532,11 +401,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
// before precompiling them
- for (std::size_t i = 0; i < shader_usages.size(); ++i) {
- const auto& usage{shader_usages[i]};
- if (dumps.find(usage) == dumps.end()) {
- const auto& program{precompiled_programs.at(usage)};
- disk_cache.SaveDump(usage, program->handle);
+ for (std::size_t i = 0; i < transferable->size(); ++i) {
+ const u64 id = (*transferable)[i].unique_identifier;
+ const auto it = find_precompiled(id);
+ if (it == gl_cache.end()) {
+ const GLuint program = runtime_cache.at(id).program->handle;
+ disk_cache.SavePrecompiled(id, program);
precompiled_cache_altered = true;
}
}
@@ -546,84 +416,33 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
}
-const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const {
- const auto it = precompiled_variants.find(unique_identifier);
- return it == precompiled_variants.end() ? nullptr : &it->second;
-}
-
-CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
- const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) {
- if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
- LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
+std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
+ const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
+ const std::unordered_set<GLenum>& supported_formats) {
+ if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
+ LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
return {};
}
- CachedProgram shader = std::make_shared<OGLProgram>();
- shader->handle = glCreateProgram();
- glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
- glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(),
- static_cast<GLsizei>(dump.binary.size()));
-
- GLint link_status{};
- glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status);
+ auto program = std::make_shared<OGLProgram>();
+ program->handle = glCreateProgram();
+ glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
+ glProgramBinary(program->handle, precompiled_entry.binary_format,
+ precompiled_entry.binary.data(),
+ static_cast<GLsizei>(precompiled_entry.binary.size()));
+
+ GLint link_status;
+ glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status);
if (link_status == GL_FALSE) {
- LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing");
+ LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
return {};
}
- return shader;
-}
-
-bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
- const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
- const std::vector<ShaderDiskCacheRaw>& raws) {
- if (callback) {
- callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
- }
-
- for (std::size_t i = 0; i < raws.size(); ++i) {
- if (stop_loading) {
- return false;
- }
- const auto& raw{raws[i]};
- const u64 unique_identifier{raw.GetUniqueIdentifier()};
- const u64 calculated_hash{
- GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())};
- if (unique_identifier != calculated_hash) {
- LOG_ERROR(Render_OpenGL,
- "Invalid hash in entry={:016x} (obtained hash={:016x}) - "
- "removing shader cache",
- raw.GetUniqueIdentifier(), calculated_hash);
- disk_cache.InvalidateTransferable();
- return false;
- }
-
- const u32 main_offset =
- raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
- ConstBufferLocker locker(raw.GetType());
- const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker);
- // TODO(Rodrigo): Handle VertexA shaders
- // std::optional<ShaderIR> ir_b;
- // if (raw.HasProgramA()) {
- // ir_b.emplace(raw.GetProgramCodeB(), main_offset);
- // }
-
- UnspecializedShader unspecialized;
- unspecialized.entries = GLShader::GetEntries(ir);
- unspecialized.type = raw.GetType();
- unspecialized.code = raw.GetCode();
- unspecialized.code_b = raw.GetCodeB();
- unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
-
- if (callback) {
- callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
- }
- }
- return true;
+ return program;
}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
- if (!system.GPU().Maxwell3D().dirty.shaders) {
+ if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
return last_shaders[static_cast<std::size_t>(program)];
}
@@ -647,17 +466,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const auto unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
- const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
- const ShaderParameters params{system, disk_cache, precompiled_variants, device,
+ const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier};
- const auto found = unspecialized_shaders.find(unique_identifier);
- if (found == unspecialized_shaders.end()) {
+ const auto found = runtime_cache.find(unique_identifier);
+ if (found == runtime_cache.end()) {
shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
std::move(code_b));
} else {
- shader = CachedShader::CreateFromCache(params, found->second);
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+ shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
}
Register(shader);
@@ -672,19 +491,19 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
return kernel;
}
- // No kernel found - create a new one
+ // No kernel found, create a new one
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
- const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})};
- const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
+ const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
- const ShaderParameters params{system, disk_cache, precompiled_variants, device,
+ const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier};
- const auto found = unspecialized_shaders.find(unique_identifier);
- if (found == unspecialized_shaders.end()) {
+ const auto found = runtime_cache.find(unique_identifier);
+ if (found == runtime_cache.end()) {
kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
} else {
- kernel = CachedShader::CreateFromCache(params, found->second);
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+ kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
}
Register(kernel);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 7b1470db3..4935019fc 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -22,7 +22,7 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/shader/const_buffer_locker.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace Core {
@@ -41,22 +41,17 @@ class RasterizerOpenGL;
struct UnspecializedShader;
using Shader = std::shared_ptr<CachedShader>;
-using CachedProgram = std::shared_ptr<OGLProgram>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
-using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>;
-
-struct UnspecializedShader {
- GLShader::ShaderEntries entries;
- Tegra::Engines::ShaderType type;
- ProgramCode code;
- ProgramCode code_b;
+
+struct PrecompiledShader {
+ std::shared_ptr<OGLProgram> program;
+ std::shared_ptr<VideoCommon::Shader::Registry> registry;
+ ShaderEntries entries;
};
struct ShaderParameters {
Core::System& system;
ShaderDiskCacheOpenGL& disk_cache;
- const PrecompiledVariants* precompiled_variants;
const Device& device;
VAddr cpu_addr;
u8* host_ptr;
@@ -65,61 +60,45 @@ struct ShaderParameters {
class CachedShader final : public RasterizerCacheObject {
public:
- static Shader CreateStageFromMemory(const ShaderParameters& params,
- Maxwell::ShaderProgram program_type,
- ProgramCode program_code, ProgramCode program_code_b);
- static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
+ ~CachedShader();
- static Shader CreateFromCache(const ShaderParameters& params,
- const UnspecializedShader& unspecialized);
+ /// Gets the GL program handle for the shader
+ GLuint GetHandle() const;
+ /// Returns the guest CPU address of the shader
VAddr GetCpuAddr() const override {
return cpu_addr;
}
+ /// Returns the size in bytes of the shader
std::size_t GetSizeInBytes() const override {
- return code.size() * sizeof(u64);
+ return size_in_bytes;
}
/// Gets the shader entries for the shader
- const GLShader::ShaderEntries& GetShaderEntries() const {
+ const ShaderEntries& GetEntries() const {
return entries;
}
- /// Gets the GL program handle for the shader
- GLuint GetHandle(const ProgramVariant& variant);
-
-private:
- struct LockerVariant {
- std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker;
- std::unordered_map<ProgramVariant, CachedProgram> programs;
- };
-
- explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type,
- GLShader::ShaderEntries entries, ProgramCode program_code,
- ProgramCode program_code_b);
-
- bool EnsureValidLockerVariant();
-
- ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
- const VideoCommon::Shader::ConstBufferLocker& locker) const;
-
- Core::System& system;
- ShaderDiskCacheOpenGL& disk_cache;
- const Device& device;
-
- VAddr cpu_addr{};
-
- u64 unique_identifier{};
- Tegra::Engines::ShaderType shader_type{};
-
- GLShader::ShaderEntries entries;
+ static Shader CreateStageFromMemory(const ShaderParameters& params,
+ Maxwell::ShaderProgram program_type,
+ ProgramCode program_code, ProgramCode program_code_b);
+ static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
- ProgramCode code;
- ProgramCode code_b;
+ static Shader CreateFromCache(const ShaderParameters& params,
+ const PrecompiledShader& precompiled_shader,
+ std::size_t size_in_bytes);
- LockerVariant* curr_locker_variant = nullptr;
- std::vector<std::unique_ptr<LockerVariant>> locker_variants;
+private:
+ explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
+ std::shared_ptr<VideoCommon::Shader::Registry> registry,
+ ShaderEntries entries, std::shared_ptr<OGLProgram> program);
+
+ std::shared_ptr<VideoCommon::Shader::Registry> registry;
+ ShaderEntries entries;
+ VAddr cpu_addr = 0;
+ std::size_t size_in_bytes = 0;
+ std::shared_ptr<OGLProgram> program;
};
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -142,25 +121,15 @@ protected:
void FlushObjectInner(const Shader& object) override {}
private:
- bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading,
- const VideoCore::DiskResourceLoadCallback& callback,
- const std::vector<ShaderDiskCacheRaw>& raws);
-
- CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
- const std::unordered_set<GLenum>& supported_formats);
-
- const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const;
+ std::shared_ptr<OGLProgram> GeneratePrecompiledProgram(
+ const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
+ const std::unordered_set<GLenum>& supported_formats);
Core::System& system;
Core::Frontend::EmuWindow& emu_window;
const Device& device;
-
ShaderDiskCacheOpenGL disk_cache;
-
- PrecompiledPrograms precompiled_programs;
- std::unordered_map<u64, PrecompiledVariants> precompiled_variants;
-
- std::unordered_map<u64, UnspecializedShader> unspecialized_shaders;
+ std::unordered_map<u64, PrecompiledShader> runtime_cache;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 4735000b5..2c38f57fd 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -23,8 +23,9 @@
#include "video_core/shader/ast.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader/transform_feedback.h"
-namespace OpenGL::GLShader {
+namespace OpenGL {
namespace {
@@ -36,6 +37,8 @@ using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::Register;
+using VideoCommon::Shader::BuildTransformFeedback;
+using VideoCommon::Shader::Registry;
using namespace std::string_literals;
using namespace VideoCommon::Shader;
@@ -48,6 +51,11 @@ class ExprDecompiler;
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
+constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
+
+constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
+constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
+
struct TextureOffset {};
struct TextureDerivates {};
using TextureArgument = std::pair<Type, Node>;
@@ -56,6 +64,25 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
+constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
+#define ftou floatBitsToUint
+#define itof intBitsToFloat
+#define utof uintBitsToFloat
+
+bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
+ bvec2 is_nan1 = isnan(pair1);
+ bvec2 is_nan2 = isnan(pair2);
+ return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
+}}
+
+const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
+const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
+
+layout (std140, binding = {}) uniform vs_config {{
+ float y_direction;
+}};
+)";
+
class ShaderWriter final {
public:
void AddExpression(std::string_view text) {
@@ -269,12 +296,41 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
}
}
+/// Describes primitive behavior on geometry shaders
+std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
+ switch (topology) {
+ case Maxwell::PrimitiveTopology::Points:
+ return {"points", 1};
+ case Maxwell::PrimitiveTopology::Lines:
+ case Maxwell::PrimitiveTopology::LineStrip:
+ return {"lines", 2};
+ case Maxwell::PrimitiveTopology::LinesAdjacency:
+ case Maxwell::PrimitiveTopology::LineStripAdjacency:
+ return {"lines_adjacency", 4};
+ case Maxwell::PrimitiveTopology::Triangles:
+ case Maxwell::PrimitiveTopology::TriangleStrip:
+ case Maxwell::PrimitiveTopology::TriangleFan:
+ return {"triangles", 3};
+ case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+ case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+ return {"triangles_adjacency", 6};
+ default:
+ UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
+ return {"points", 1};
+ }
+}
+
/// Generates code to use for a swizzle operation.
-constexpr const char* GetSwizzle(u32 element) {
+constexpr const char* GetSwizzle(std::size_t element) {
constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
return swizzle.at(element);
}
+constexpr const char* GetColorSwizzle(std::size_t element) {
+ constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
+ return swizzle.at(element);
+}
+
/// Translate topology
std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
switch (topology) {
@@ -337,15 +393,66 @@ std::string FlowStackTopName(MetaStackClass stack) {
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
}
-[[deprecated]] constexpr bool IsVertexShader(ShaderType stage) {
- return stage == ShaderType::Vertex;
-}
+struct GenericVaryingDescription {
+ std::string name;
+ u8 first_element = 0;
+ bool is_scalar = false;
+};
class GLSLDecompiler final {
public:
- explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage,
- std::string suffix)
- : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
+ explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
+ ShaderType stage, std::string_view identifier, std::string_view suffix)
+ : device{device}, ir{ir}, registry{registry}, stage{stage},
+ identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
+ if (stage != ShaderType::Compute) {
+ transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
+ }
+ }
+
+ void Decompile() {
+ DeclareHeader();
+ DeclareVertex();
+ DeclareGeometry();
+ DeclareFragment();
+ DeclareCompute();
+ DeclareInputAttributes();
+ DeclareOutputAttributes();
+ DeclareImages();
+ DeclareSamplers();
+ DeclareGlobalMemory();
+ DeclareConstantBuffers();
+ DeclareLocalMemory();
+ DeclareRegisters();
+ DeclarePredicates();
+ DeclareInternalFlags();
+ DeclareCustomVariables();
+ DeclarePhysicalAttributeReader();
+
+ code.AddLine("void main() {{");
+ ++code.scope;
+
+ if (stage == ShaderType::Vertex) {
+ code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
+ }
+
+ if (ir.IsDecompiled()) {
+ DecompileAST();
+ } else {
+ DecompileBranchMode();
+ }
+
+ --code.scope;
+ code.AddLine("}}");
+ }
+
+ std::string GetResult() {
+ return code.GetResult();
+ }
+
+private:
+ friend class ASTDecompiler;
+ friend class ExprDecompiler;
void DecompileBranchMode() {
// VM's program counter
@@ -387,46 +494,40 @@ public:
void DecompileAST();
- void Decompile() {
- DeclareVertex();
- DeclareGeometry();
- DeclareRegisters();
- DeclareCustomVariables();
- DeclarePredicates();
- DeclareLocalMemory();
- DeclareInternalFlags();
- DeclareInputAttributes();
- DeclareOutputAttributes();
- DeclareConstantBuffers();
- DeclareGlobalMemory();
- DeclareSamplers();
- DeclareImages();
- DeclarePhysicalAttributeReader();
-
- code.AddLine("void execute_{}() {{", suffix);
- ++code.scope;
-
- if (ir.IsDecompiled()) {
- DecompileAST();
- } else {
- DecompileBranchMode();
+ void DeclareHeader() {
+ if (!identifier.empty()) {
+ code.AddLine("// {}", identifier);
+ }
+ code.AddLine("#version 440 core");
+ code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
+ if (device.HasShaderBallot()) {
+ code.AddLine("#extension GL_ARB_shader_ballot : require");
+ }
+ if (device.HasVertexViewportLayer()) {
+ code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
}
+ if (device.HasImageLoadFormatted()) {
+ code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
+ }
+ if (device.HasWarpIntrinsics()) {
+ code.AddLine("#extension GL_NV_gpu_shader5 : require");
+ code.AddLine("#extension GL_NV_shader_thread_group : require");
+ code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
+ }
+ // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
+ // operations) on places where we don't want to.
+ // Thanks to Ryujinx for finding this workaround.
+ code.AddLine("#pragma optionNV(fastmath off)");
- --code.scope;
- code.AddLine("}}");
- }
+ code.AddNewLine();
- std::string GetResult() {
- return code.GetResult();
+ code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
}
-private:
- friend class ASTDecompiler;
- friend class ExprDecompiler;
-
void DeclareVertex() {
- if (!IsVertexShader(stage))
+ if (stage != ShaderType::Vertex) {
return;
+ }
DeclareVertexRedeclarations();
}
@@ -436,9 +537,15 @@ private:
return;
}
+ const auto& info = registry.GetGraphicsInfo();
+ const auto input_topology = info.primitive_topology;
+ const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
+ max_input_vertices = max_vertices;
+ code.AddLine("layout ({}) in;", glsl_topology);
+
const auto topology = GetTopologyName(header.common3.output_topology);
- const auto max_vertices = header.common4.max_output_vertices.Value();
- code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices);
+ const auto max_output_vertices = header.common4.max_output_vertices.Value();
+ code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
code.AddNewLine();
code.AddLine("in gl_PerVertex {{");
@@ -450,11 +557,40 @@ private:
DeclareVertexRedeclarations();
}
+ void DeclareFragment() {
+ if (stage != ShaderType::Fragment) {
+ return;
+ }
+ for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
+ code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
+ }
+ }
+
+ void DeclareCompute() {
+ if (stage != ShaderType::Compute) {
+ return;
+ }
+ const auto& info = registry.GetComputeInfo();
+ if (const u32 size = info.shared_memory_size_in_words; size > 0) {
+ code.AddLine("shared uint smem[{}];", size);
+ code.AddNewLine();
+ }
+ code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
+ info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
+ code.AddNewLine();
+ }
+
void DeclareVertexRedeclarations() {
code.AddLine("out gl_PerVertex {{");
++code.scope;
- code.AddLine("vec4 gl_Position;");
+ auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
+ if (!pos_xfb.empty()) {
+ pos_xfb = fmt::format("layout ({}) ", pos_xfb);
+ }
+ const char* pos_type =
+ FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
+ code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
for (const auto attribute : ir.GetOutputAttributes()) {
if (attribute == Attribute::Index::ClipDistances0123 ||
@@ -463,14 +599,14 @@ private:
break;
}
}
- if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) {
+ if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) {
if (ir.UsesLayer()) {
code.AddLine("int gl_Layer;");
}
if (ir.UsesViewportIndex()) {
code.AddLine("int gl_ViewportIndex;");
}
- } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) &&
+ } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
!device.HasVertexViewportLayer()) {
LOG_ERROR(
Render_OpenGL,
@@ -525,18 +661,16 @@ private:
}
void DeclareLocalMemory() {
+ u64 local_memory_size = 0;
if (stage == ShaderType::Compute) {
- code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
- code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
- code.AddLine("#endif");
- return;
+ local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
+ } else {
+ local_memory_size = header.GetLocalMemorySize();
}
-
- const u64 local_memory_size = header.GetLocalMemorySize();
if (local_memory_size == 0) {
return;
}
- const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
+ const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
code.AddNewLine();
}
@@ -589,7 +723,7 @@ private:
void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
const u32 location{GetGenericAttributeIndex(index)};
- std::string name{GetInputAttribute(index)};
+ std::string name{GetGenericInputAttribute(index)};
if (stage == ShaderType::Geometry) {
name = "gs_" + name + "[]";
}
@@ -626,9 +760,59 @@ private:
}
}
+ std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
+ const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
+ const auto it = transform_feedback.find(location);
+ if (it == transform_feedback.end()) {
+ return {};
+ }
+ return it->second.components;
+ }
+
+ std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
+ const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
+ const auto it = transform_feedback.find(location);
+ if (it == transform_feedback.end()) {
+ return {};
+ }
+
+ const VaryingTFB& tfb = it->second;
+ return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
+ tfb.offset, tfb.stride);
+ }
+
void DeclareOutputAttribute(Attribute::Index index) {
- const u32 location{GetGenericAttributeIndex(index)};
- code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index));
+ static constexpr std::string_view swizzle = "xyzw";
+ u8 element = 0;
+ while (element < 4) {
+ auto xfb = GetTransformFeedbackDecoration(index, element);
+ if (!xfb.empty()) {
+ xfb = fmt::format(", {}", xfb);
+ }
+ const std::size_t remainder = 4 - element;
+ const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
+ const char* const type = FLOAT_TYPES.at(num_components - 1);
+
+ const u32 location = GetGenericAttributeIndex(index);
+
+ GenericVaryingDescription description;
+ description.first_element = static_cast<u8>(element);
+ description.is_scalar = num_components == 1;
+ description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
+ if (element != 0 || num_components != 4) {
+ const std::string_view name_swizzle = swizzle.substr(element, num_components);
+ description.name = fmt::format("{}_{}", description.name, name_swizzle);
+ }
+ for (std::size_t i = 0; i < num_components; ++i) {
+ const u8 offset = static_cast<u8>(location * 4 + element + i);
+ varying_description.insert({offset, description});
+ }
+
+ code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
+ xfb, type, description.name);
+
+ element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
+ }
}
void DeclareConstantBuffers() {
@@ -925,7 +1109,8 @@ private:
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
// set an 0x80000000 index for those and the shader fails to build. Find out why
// this happens and what's its intent.
- return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint());
+ return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
+ max_input_vertices.value());
}
return std::string(name);
};
@@ -959,7 +1144,7 @@ private:
// TODO(Subv): Find out what the values are for the first two elements when inside a
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
// shader.
- ASSERT(IsVertexShader(stage));
+ ASSERT(stage == ShaderType::Vertex);
switch (element) {
case 2:
// Config pack's first value is instance_id.
@@ -980,7 +1165,7 @@ private:
return {"0", Type::Int};
default:
if (IsGenericAttribute(attribute)) {
- return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element),
+ return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
Type::Float};
}
break;
@@ -1030,12 +1215,12 @@ private:
UNIMPLEMENTED();
return {};
case 1:
- if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
+ if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
return {};
}
return {{"gl_Layer", Type::Int}};
case 2:
- if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
+ if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
return {};
}
return {{"gl_ViewportIndex", Type::Int}};
@@ -1049,8 +1234,7 @@ private:
return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}};
default:
if (IsGenericAttribute(attribute)) {
- return {
- {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}};
+ return {{GetGenericOutputAttribute(attribute, abuf->GetElement()), Type::Float}};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
return {};
@@ -1822,16 +2006,19 @@ private:
expr += GetSampler(meta->sampler);
expr += ", ";
- expr += constructors.at(operation.GetOperandsCount() - 1);
+ expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
- expr += VisitOperand(operation, i).AsInt();
- const std::size_t next = i + 1;
- if (next == count)
- expr += ')';
- else if (next < count)
+ if (i > 0) {
expr += ", ";
+ }
+ expr += VisitOperand(operation, i).AsInt();
+ }
+ if (meta->array) {
+ expr += ", ";
+ expr += Visit(meta->array).AsInt();
}
+ expr += ')';
if (meta->lod && !meta->sampler.IsBuffer()) {
expr += ", ";
@@ -1945,7 +2132,7 @@ private:
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
- code.AddLine("FragColor{}[{}] = {};", render_target, component,
+ code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
SafeGetRegister(current_reg).AsFloat());
++current_reg;
}
@@ -2261,27 +2448,34 @@ private:
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
std::string GetRegister(u32 index) const {
- return GetDeclarationWithSuffix(index, "gpr");
+ return AppendSuffix(index, "gpr");
}
std::string GetCustomVariable(u32 index) const {
- return GetDeclarationWithSuffix(index, "custom_var");
+ return AppendSuffix(index, "custom_var");
}
std::string GetPredicate(Tegra::Shader::Pred pred) const {
- return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
+ return AppendSuffix(static_cast<u32>(pred), "pred");
}
- std::string GetInputAttribute(Attribute::Index attribute) const {
- return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr");
+ std::string GetGenericInputAttribute(Attribute::Index attribute) const {
+ return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
}
- std::string GetOutputAttribute(Attribute::Index attribute) const {
- return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr");
+ std::unordered_map<u8, GenericVaryingDescription> varying_description;
+
+ std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
+ const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
+ const auto& description = varying_description.at(offset);
+ if (description.is_scalar) {
+ return description.name;
+ }
+ return fmt::format("{}[{}]", description.name, element - description.first_element);
}
std::string GetConstBuffer(u32 index) const {
- return GetDeclarationWithSuffix(index, "cbuf");
+ return AppendSuffix(index, "cbuf");
}
std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
@@ -2294,11 +2488,15 @@ private:
}
std::string GetConstBufferBlock(u32 index) const {
- return GetDeclarationWithSuffix(index, "cbuf_block");
+ return AppendSuffix(index, "cbuf_block");
}
std::string GetLocalMemory() const {
- return "lmem_" + suffix;
+ if (suffix.empty()) {
+ return "lmem";
+ } else {
+ return "lmem_" + std::string{suffix};
+ }
}
std::string GetInternalFlag(InternalFlag flag) const {
@@ -2307,23 +2505,31 @@ private:
const auto index = static_cast<u32>(flag);
ASSERT(index < static_cast<u32>(InternalFlag::Amount));
- return fmt::format("{}_{}", InternalFlagNames[index], suffix);
+ if (suffix.empty()) {
+ return InternalFlagNames[index];
+ } else {
+ return fmt::format("{}_{}", InternalFlagNames[index], suffix);
+ }
}
std::string GetSampler(const Sampler& sampler) const {
- return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
+ return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
}
std::string GetImage(const Image& image) const {
- return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image");
+ return AppendSuffix(static_cast<u32>(image.GetIndex()), "image");
}
- std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {
- return fmt::format("{}_{}_{}", name, index, suffix);
+ std::string AppendSuffix(u32 index, std::string_view name) const {
+ if (suffix.empty()) {
+ return fmt::format("{}{}", name, index);
+ } else {
+ return fmt::format("{}{}_{}", name, index, suffix);
+ }
}
u32 GetNumPhysicalInputAttributes() const {
- return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
+ return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
}
u32 GetNumPhysicalAttributes() const {
@@ -2334,17 +2540,31 @@ private:
return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
}
+ bool IsRenderTargetEnabled(u32 render_target) const {
+ for (u32 component = 0; component < 4; ++component) {
+ if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
const Device& device;
const ShaderIR& ir;
+ const Registry& registry;
const ShaderType stage;
- const std::string suffix;
+ const std::string_view identifier;
+ const std::string_view suffix;
const Header header;
+ std::unordered_map<u8, VaryingTFB> transform_feedback;
ShaderWriter code;
+
+ std::optional<u32> max_input_vertices;
};
-std::string GetFlowVariable(u32 i) {
- return fmt::format("flow_var_{}", i);
+std::string GetFlowVariable(u32 index) {
+ return fmt::format("flow_var{}", index);
}
class ExprDecompiler {
@@ -2531,7 +2751,7 @@ void GLSLDecompiler::DecompileAST() {
} // Anonymous namespace
-ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
+ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
ShaderEntries entries;
for (const auto& cbuf : ir.GetConstantBuffers()) {
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
@@ -2547,33 +2767,20 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
for (const auto& image : ir.GetImages()) {
entries.images.emplace_back(image);
}
- entries.clip_distances = ir.GetClipDistances();
+ const auto clip_distances = ir.GetClipDistances();
+ for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
+ entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
+ }
entries.shader_length = ir.GetLength();
return entries;
}
-std::string GetCommonDeclarations() {
- return R"(#define ftoi floatBitsToInt
-#define ftou floatBitsToUint
-#define itof intBitsToFloat
-#define utof uintBitsToFloat
-
-bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
- bvec2 is_nan1 = isnan(pair1);
- bvec2 is_nan2 = isnan(pair2);
- return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
-}
-
-const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
-const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
-)";
-}
-
-std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage,
- const std::string& suffix) {
- GLSLDecompiler decompiler(device, ir, stage, suffix);
+std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
+ ShaderType stage, std::string_view identifier,
+ std::string_view suffix) {
+ GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
decompiler.Decompile();
return decompiler.GetResult();
}
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 7876f48d6..e7dbd810c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -6,22 +6,18 @@
#include <array>
#include <string>
+#include <string_view>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
-namespace VideoCommon::Shader {
-class ShaderIR;
-}
-
namespace OpenGL {
-class Device;
-}
-namespace OpenGL::GLShader {
+class Device;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using SamplerEntry = VideoCommon::Shader::Sampler;
@@ -74,15 +70,15 @@ struct ShaderEntries {
std::vector<GlobalMemoryEntry> global_memory_entries;
std::vector<SamplerEntry> samplers;
std::vector<ImageEntry> images;
- std::array<bool, Maxwell::NumClipDistances> clip_distances{};
+ u32 clip_distances{};
std::size_t shader_length{};
};
-ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir);
-
-std::string GetCommonDeclarations();
+ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
-std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- Tegra::Engines::ShaderType stage, const std::string& suffix);
+std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
+ const VideoCommon::Shader::Registry& registry,
+ Tegra::Engines::ShaderType stage, std::string_view identifier,
+ std::string_view suffix = {});
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 1fc204f6f..9e95a122b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -31,32 +31,24 @@ namespace {
using ShaderCacheVersionHash = std::array<u8, 64>;
-enum class TransferableEntryKind : u32 {
- Raw,
- Usage,
-};
-
struct ConstBufferKey {
- u32 cbuf{};
- u32 offset{};
- u32 value{};
+ u32 cbuf = 0;
+ u32 offset = 0;
+ u32 value = 0;
};
struct BoundSamplerKey {
- u32 offset{};
- Tegra::Engines::SamplerDescriptor sampler{};
+ u32 offset = 0;
+ Tegra::Engines::SamplerDescriptor sampler;
};
struct BindlessSamplerKey {
- u32 cbuf{};
- u32 offset{};
- Tegra::Engines::SamplerDescriptor sampler{};
+ u32 cbuf = 0;
+ u32 offset = 0;
+ Tegra::Engines::SamplerDescriptor sampler;
};
-constexpr u32 NativeVersion = 12;
-
-// Making sure sizes doesn't change by accident
-static_assert(sizeof(ProgramVariant) == 20);
+constexpr u32 NativeVersion = 20;
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
@@ -67,61 +59,124 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
} // Anonymous namespace
-ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code,
- ProgramCode code_b)
- : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move(
- code_b)} {}
+ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
-ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
+ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
-ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default;
-
-bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
- if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
- file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
+bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
+ if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
return false;
}
- u32 code_size{};
- u32 code_size_b{};
+ u32 code_size;
+ u32 code_size_b;
if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) {
return false;
}
-
code.resize(code_size);
code_b.resize(code_size_b);
- if (file.ReadArray(code.data(), code_size) != code_size)
+ if (file.ReadArray(code.data(), code_size) != code_size) {
return false;
-
+ }
if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) {
return false;
}
+
+ u8 is_texture_handler_size_known;
+ u32 texture_handler_size_value;
+ u32 num_keys;
+ u32 num_bound_samplers;
+ u32 num_bindless_samplers;
+ if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
+ file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
+ file.ReadArray(&texture_handler_size_value, 1) != 1 ||
+ file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
+ file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
+ file.ReadArray(&num_bindless_samplers, 1) != 1) {
+ return false;
+ }
+ if (is_texture_handler_size_known) {
+ texture_handler_size = texture_handler_size_value;
+ }
+
+ std::vector<ConstBufferKey> flat_keys(num_keys);
+ std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
+ std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
+ if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
+ file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
+ flat_bound_samplers.size() ||
+ file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
+ flat_bindless_samplers.size()) {
+ return false;
+ }
+ for (const auto& key : flat_keys) {
+ keys.insert({{key.cbuf, key.offset}, key.value});
+ }
+ for (const auto& key : flat_bound_samplers) {
+ bound_samplers.emplace(key.offset, key.sampler);
+ }
+ for (const auto& key : flat_bindless_samplers) {
+ bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
+ }
+
return true;
}
-bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
- if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 ||
+bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
+ if (file.WriteObject(static_cast<u32>(type)) != 1 ||
file.WriteObject(static_cast<u32>(code.size())) != 1 ||
file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
return false;
}
-
- if (file.WriteArray(code.data(), code.size()) != code.size())
+ if (file.WriteArray(code.data(), code.size()) != code.size()) {
return false;
-
+ }
if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) {
return false;
}
- return true;
+
+ if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 ||
+ file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 ||
+ file.WriteObject(texture_handler_size.value_or(0)) != 1 ||
+ file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
+ file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
+ file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
+ file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
+ return false;
+ }
+
+ std::vector<ConstBufferKey> flat_keys;
+ flat_keys.reserve(keys.size());
+ for (const auto& [address, value] : keys) {
+ flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
+ }
+
+ std::vector<BoundSamplerKey> flat_bound_samplers;
+ flat_bound_samplers.reserve(bound_samplers.size());
+ for (const auto& [address, sampler] : bound_samplers) {
+ flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
+ }
+
+ std::vector<BindlessSamplerKey> flat_bindless_samplers;
+ flat_bindless_samplers.reserve(bindless_samplers.size());
+ for (const auto& [address, sampler] : bindless_samplers) {
+ flat_bindless_samplers.push_back(
+ BindlessSamplerKey{address.first, address.second, sampler});
+ }
+
+ return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
+ file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
+ flat_bound_samplers.size() &&
+ file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
+ flat_bindless_samplers.size();
}
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
-std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
-ShaderDiskCacheOpenGL::LoadTransferable() {
+std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
// Skip games without title id
const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
if (!Settings::values.use_disk_shader_cache || !has_title_id) {
@@ -130,17 +185,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
FileUtil::IOFile file(GetTransferablePath(), "rb");
if (!file.IsOpen()) {
- LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}",
- GetTitleID());
+ LOG_INFO(Render_OpenGL, "No transferable shader cache found");
is_usable = true;
return {};
}
u32 version{};
if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
- LOG_ERROR(Render_OpenGL,
- "Failed to get transferable cache version for title id={}, skipping",
- GetTitleID());
+ LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
return {};
}
@@ -158,105 +210,42 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
}
// Version is valid, load the shaders
- constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping";
- std::vector<ShaderDiskCacheRaw> raws;
- std::vector<ShaderDiskCacheUsage> usages;
+ std::vector<ShaderDiskCacheEntry> entries;
while (file.Tell() < file.GetSize()) {
- TransferableEntryKind kind{};
- if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
- LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping");
- return {};
- }
-
- switch (kind) {
- case TransferableEntryKind::Raw: {
- ShaderDiskCacheRaw entry;
- if (!entry.Load(file)) {
- LOG_ERROR(Render_OpenGL, error_loading);
- return {};
- }
- transferable.insert({entry.GetUniqueIdentifier(), {}});
- raws.push_back(std::move(entry));
- break;
- }
- case TransferableEntryKind::Usage: {
- ShaderDiskCacheUsage usage;
-
- u32 num_keys{};
- u32 num_bound_samplers{};
- u32 num_bindless_samplers{};
- if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
- file.ReadArray(&usage.variant, 1) != 1 ||
- file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
- file.ReadArray(&num_bound_samplers, 1) != 1 ||
- file.ReadArray(&num_bindless_samplers, 1) != 1) {
- LOG_ERROR(Render_OpenGL, error_loading);
- return {};
- }
-
- std::vector<ConstBufferKey> keys(num_keys);
- std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
- std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
- if (file.ReadArray(keys.data(), keys.size()) != keys.size() ||
- file.ReadArray(bound_samplers.data(), bound_samplers.size()) !=
- bound_samplers.size() ||
- file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) !=
- bindless_samplers.size()) {
- LOG_ERROR(Render_OpenGL, error_loading);
- return {};
- }
- for (const auto& key : keys) {
- usage.keys.insert({{key.cbuf, key.offset}, key.value});
- }
- for (const auto& key : bound_samplers) {
- usage.bound_samplers.emplace(key.offset, key.sampler);
- }
- for (const auto& key : bindless_samplers) {
- usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
- }
-
- usages.push_back(std::move(usage));
- break;
- }
- default:
- LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping",
- static_cast<u32>(kind));
+ ShaderDiskCacheEntry& entry = entries.emplace_back();
+ if (!entry.Load(file)) {
+ LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
return {};
}
}
is_usable = true;
- return {{std::move(raws), std::move(usages)}};
+ return {std::move(entries)};
}
-std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>
-ShaderDiskCacheOpenGL::LoadPrecompiled() {
+std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
if (!is_usable) {
return {};
}
- std::string path = GetPrecompiledPath();
- FileUtil::IOFile file(path, "rb");
+ FileUtil::IOFile file(GetPrecompiledPath(), "rb");
if (!file.IsOpen()) {
- LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
- GetTitleID());
+ LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
return {};
}
- const auto result = LoadPrecompiledFile(file);
- if (!result) {
- LOG_INFO(Render_OpenGL,
- "Failed to load precompiled cache for game with title id={}, removing",
- GetTitleID());
- file.Close();
- InvalidatePrecompiled();
- return {};
+ if (const auto result = LoadPrecompiledFile(file)) {
+ return *result;
}
- return *result;
+
+ LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
+ file.Close();
+ InvalidatePrecompiled();
+ return {};
}
-std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
-ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
+std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
+ FileUtil::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize());
file.ReadBytes(compressed.data(), compressed.size());
@@ -275,58 +264,22 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
- ShaderDumpsMap dumps;
+ std::vector<ShaderDiskCachePrecompiled> entries;
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
- u32 num_keys{};
- u32 num_bound_samplers{};
- u32 num_bindless_samplers{};
- ShaderDiskCacheUsage usage;
- if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
- !LoadObjectFromPrecompiled(usage.variant) ||
- !LoadObjectFromPrecompiled(usage.bound_buffer) ||
- !LoadObjectFromPrecompiled(num_keys) ||
- !LoadObjectFromPrecompiled(num_bound_samplers) ||
- !LoadObjectFromPrecompiled(num_bindless_samplers)) {
- return {};
- }
- std::vector<ConstBufferKey> keys(num_keys);
- std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
- std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
- if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) ||
- !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) !=
- bound_samplers.size() ||
- !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) !=
- bindless_samplers.size()) {
- return {};
- }
- for (const auto& key : keys) {
- usage.keys.insert({{key.cbuf, key.offset}, key.value});
- }
- for (const auto& key : bound_samplers) {
- usage.bound_samplers.emplace(key.offset, key.sampler);
- }
- for (const auto& key : bindless_samplers) {
- usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
- }
-
- ShaderDiskCacheDump dump;
- if (!LoadObjectFromPrecompiled(dump.binary_format)) {
- return {};
- }
-
- u32 binary_length{};
- if (!LoadObjectFromPrecompiled(binary_length)) {
+ u32 binary_size;
+ auto& entry = entries.emplace_back();
+ if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
+ !LoadObjectFromPrecompiled(entry.binary_format) ||
+ !LoadObjectFromPrecompiled(binary_size)) {
return {};
}
- dump.binary.resize(binary_length);
- if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
+ entry.binary.resize(binary_size);
+ if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
return {};
}
-
- dumps.emplace(std::move(usage), dump);
}
- return dumps;
+ return entries;
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -346,13 +299,13 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
}
}
-void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
+void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
if (!is_usable) {
return;
}
- const u64 id = entry.GetUniqueIdentifier();
- if (transferable.find(id) != transferable.end()) {
+ const u64 id = entry.unique_identifier;
+ if (stored_transferable.find(id) != stored_transferable.end()) {
// The shader already exists
return;
}
@@ -361,71 +314,17 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
if (!file.IsOpen()) {
return;
}
- if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
+ if (!entry.Save(file)) {
LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
file.Close();
InvalidateTransferable();
return;
}
- transferable.insert({id, {}});
-}
-void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
- if (!is_usable) {
- return;
- }
-
- const auto it = transferable.find(usage.unique_identifier);
- ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
-
- auto& usages{it->second};
- if (usages.find(usage) != usages.end()) {
- // Skip this variant since the shader is already stored.
- return;
- }
- usages.insert(usage);
-
- FileUtil::IOFile file = AppendTransferableFile();
- if (!file.IsOpen())
- return;
- const auto Close = [&] {
- LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing");
- file.Close();
- InvalidateTransferable();
- };
-
- if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
- file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
- file.WriteObject(usage.bound_buffer) != 1 ||
- file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
- file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
- file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
- Close();
- return;
- }
- for (const auto& [pair, value] : usage.keys) {
- const auto [cbuf, offset] = pair;
- if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) {
- Close();
- return;
- }
- }
- for (const auto& [offset, sampler] : usage.bound_samplers) {
- if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) {
- Close();
- return;
- }
- }
- for (const auto& [pair, sampler] : usage.bindless_samplers) {
- const auto [cbuf, offset] = pair;
- if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
- Close();
- return;
- }
- }
+ stored_transferable.insert(id);
}
-void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
+void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
if (!is_usable) {
return;
}
@@ -437,51 +336,19 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
SavePrecompiledHeaderToVirtualPrecompiledCache();
}
- GLint binary_length{};
+ GLint binary_length;
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
- GLenum binary_format{};
+ GLenum binary_format;
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
- const auto Close = [&] {
+ if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
+ !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
+ !SaveArrayToPrecompiled(binary.data(), binary.size())) {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
- usage.unique_identifier);
+ unique_identifier);
InvalidatePrecompiled();
- };
-
- if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
- !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) ||
- !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
- !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
- !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
- Close();
- return;
- }
- for (const auto& [pair, value] : usage.keys) {
- const auto [cbuf, offset] = pair;
- if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) {
- Close();
- return;
- }
- }
- for (const auto& [offset, sampler] : usage.bound_samplers) {
- if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) {
- Close();
- return;
- }
- }
- for (const auto& [pair, sampler] : usage.bindless_samplers) {
- const auto [cbuf, offset] = pair;
- if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
- Close();
- return;
- }
- }
- if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
- !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
- !SaveArrayToPrecompiled(binary.data(), binary.size())) {
- Close();
}
}
@@ -534,7 +401,6 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
precompiled_path);
- return;
}
}
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index ef2371f6d..d5be52e40 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -19,8 +19,7 @@
#include "common/common_types.h"
#include "core/file_sys/vfs_vector.h"
#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
-#include "video_core/shader/const_buffer_locker.h"
+#include "video_core/shader/registry.h"
namespace Core {
class System;
@@ -32,139 +31,39 @@ class IOFile;
namespace OpenGL {
-struct ShaderDiskCacheUsage;
-struct ShaderDiskCacheDump;
-
using ProgramCode = std::vector<u64>;
-using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
-
-/// Describes the different variants a program can be compiled with.
-struct ProgramVariant final {
- ProgramVariant() = default;
-
- /// Graphics constructor.
- explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept
- : primitive_mode{primitive_mode} {}
-
- /// Compute constructor.
- explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
- u32 local_memory_size) noexcept
- : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
- shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
-
- // Graphics specific parameters.
- GLenum primitive_mode{};
-
- // Compute specific parameters.
- u32 block_x{};
- u16 block_y{};
- u16 block_z{};
- u32 shared_memory_size{};
- u32 local_memory_size{};
-
- bool operator==(const ProgramVariant& rhs) const noexcept {
- return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size,
- local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y,
- rhs.block_z, rhs.shared_memory_size,
- rhs.local_memory_size);
- }
-
- bool operator!=(const ProgramVariant& rhs) const noexcept {
- return !operator==(rhs);
- }
-};
-static_assert(std::is_trivially_copyable_v<ProgramVariant>);
-
-/// Describes how a shader is used.
-struct ShaderDiskCacheUsage {
- u64 unique_identifier{};
- ProgramVariant variant;
- u32 bound_buffer{};
- VideoCommon::Shader::KeyMap keys;
- VideoCommon::Shader::BoundSamplerMap bound_samplers;
- VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
-
- bool operator==(const ShaderDiskCacheUsage& rhs) const {
- return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) ==
- std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers,
- rhs.bindless_samplers);
- }
-
- bool operator!=(const ShaderDiskCacheUsage& rhs) const {
- return !operator==(rhs);
- }
-};
-
-} // namespace OpenGL
-
-namespace std {
-
-template <>
-struct hash<OpenGL::ProgramVariant> {
- std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
- return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^
- static_cast<std::size_t>(variant.block_x) ^
- (static_cast<std::size_t>(variant.block_y) << 32) ^
- (static_cast<std::size_t>(variant.block_z) << 48) ^
- (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
- (static_cast<std::size_t>(variant.local_memory_size) << 36);
- }
-};
-
-template <>
-struct hash<OpenGL::ShaderDiskCacheUsage> {
- std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
- return static_cast<std::size_t>(usage.unique_identifier) ^
- std::hash<OpenGL::ProgramVariant>{}(usage.variant);
- }
-};
-
-} // namespace std
-
-namespace OpenGL {
-/// Describes a shader how it's used by the guest GPU
-class ShaderDiskCacheRaw {
-public:
- explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type,
- ProgramCode code, ProgramCode code_b = {});
- ShaderDiskCacheRaw();
- ~ShaderDiskCacheRaw();
+/// Describes a shader and how it's used by the guest GPU
+struct ShaderDiskCacheEntry {
+ ShaderDiskCacheEntry();
+ ~ShaderDiskCacheEntry();
bool Load(FileUtil::IOFile& file);
bool Save(FileUtil::IOFile& file) const;
- u64 GetUniqueIdentifier() const {
- return unique_identifier;
- }
-
bool HasProgramA() const {
return !code.empty() && !code_b.empty();
}
- Tegra::Engines::ShaderType GetType() const {
- return type;
- }
-
- const ProgramCode& GetCode() const {
- return code;
- }
-
- const ProgramCode& GetCodeB() const {
- return code_b;
- }
-
-private:
- u64 unique_identifier{};
Tegra::Engines::ShaderType type{};
ProgramCode code;
ProgramCode code_b;
+
+ u64 unique_identifier = 0;
+ std::optional<u32> texture_handler_size;
+ u32 bound_buffer = 0;
+ VideoCommon::Shader::GraphicsInfo graphics_info;
+ VideoCommon::Shader::ComputeInfo compute_info;
+ VideoCommon::Shader::KeyMap keys;
+ VideoCommon::Shader::BoundSamplerMap bound_samplers;
+ VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
};
/// Contains an OpenGL dumped binary program
-struct ShaderDiskCacheDump {
- GLenum binary_format{};
+struct ShaderDiskCachePrecompiled {
+ u64 unique_identifier = 0;
+ GLenum binary_format = 0;
std::vector<u8> binary;
};
@@ -174,11 +73,10 @@ public:
~ShaderDiskCacheOpenGL();
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
- std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
- LoadTransferable();
+ std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
/// Loads current game's precompiled cache. Invalidates on failure.
- std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled();
+ std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
/// Removes the transferable (and precompiled) cache file.
void InvalidateTransferable();
@@ -187,21 +85,18 @@ public:
void InvalidatePrecompiled();
/// Saves a raw dump to the transferable file. Checks for collisions.
- void SaveRaw(const ShaderDiskCacheRaw& entry);
-
- /// Saves shader usage to the transferable file. Does not check for collisions.
- void SaveUsage(const ShaderDiskCacheUsage& usage);
+ void SaveEntry(const ShaderDiskCacheEntry& entry);
/// Saves a dump entry to the precompiled file. Does not check for collisions.
- void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
+ void SavePrecompiled(u64 unique_identifier, GLuint program);
/// Serializes virtual precompiled shader cache file to real file
void SaveVirtualPrecompiledFile();
private:
/// Loads the transferable cache. Returns empty on failure.
- std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
- LoadPrecompiledFile(FileUtil::IOFile& file);
+ std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
+ FileUtil::IOFile& file);
/// Opens current game's transferable file and write it's header if it doesn't exist
FileUtil::IOFile AppendTransferableFile() const;
@@ -270,7 +165,7 @@ private:
std::size_t precompiled_cache_virtual_file_offset = 0;
// Stored transferable shaders
- std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
+ std::unordered_set<u64> stored_transferable;
// The cache has been loaded at boot
bool is_usable{};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
deleted file mode 100644
index 34946fb47..000000000
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <string>
-
-#include <fmt/format.h>
-
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace OpenGL::GLShader {
-
-using Tegra::Engines::Maxwell3D;
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::CompileDepth;
-using VideoCommon::Shader::CompilerSettings;
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::ShaderIR;
-
-std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
- std::string out = GetCommonDeclarations();
- out += fmt::format(R"(
-layout (std140, binding = {}) uniform vs_config {{
- float y_direction;
-}};
-
-)",
- EmulationUniformBlockBinding);
- out += Decompile(device, ir, ShaderType::Vertex, "vertex");
- if (ir_b) {
- out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b");
- }
-
- out += R"(
-void main() {
- gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);
- execute_vertex();
-)";
- if (ir_b) {
- out += " execute_vertex_b();";
- }
- out += "}\n";
- return out;
-}
-
-std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
- std::string out = GetCommonDeclarations();
- out += fmt::format(R"(
-layout (std140, binding = {}) uniform gs_config {{
- float y_direction;
-}};
-
-)",
- EmulationUniformBlockBinding);
- out += Decompile(device, ir, ShaderType::Geometry, "geometry");
-
- out += R"(
-void main() {
- execute_geometry();
-}
-)";
- return out;
-}
-
-std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
- std::string out = GetCommonDeclarations();
- out += fmt::format(R"(
-layout (location = 0) out vec4 FragColor0;
-layout (location = 1) out vec4 FragColor1;
-layout (location = 2) out vec4 FragColor2;
-layout (location = 3) out vec4 FragColor3;
-layout (location = 4) out vec4 FragColor4;
-layout (location = 5) out vec4 FragColor5;
-layout (location = 6) out vec4 FragColor6;
-layout (location = 7) out vec4 FragColor7;
-
-layout (std140, binding = {}) uniform fs_config {{
- float y_direction;
-}};
-
-)",
- EmulationUniformBlockBinding);
- out += Decompile(device, ir, ShaderType::Fragment, "fragment");
-
- out += R"(
-void main() {
- execute_fragment();
-}
-)";
- return out;
-}
-
-std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
- std::string out = GetCommonDeclarations();
- out += Decompile(device, ir, ShaderType::Compute, "compute");
- out += R"(
-void main() {
- execute_compute();
-}
-)";
- return out;
-}
-
-} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
deleted file mode 100644
index cba2be9f9..000000000
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace OpenGL {
-class Device;
-}
-
-namespace OpenGL::GLShader {
-
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::ShaderIR;
-
-/// Generates the GLSL vertex shader program source code for the given VS program
-std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b);
-
-/// Generates the GLSL geometry shader program source code for the given GS program
-std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir);
-
-/// Generates the GLSL fragment shader program source code for the given FS program
-std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir);
-
-/// Generates the GLSL compute shader program source code for the given CS program
-std::string GenerateComputeShader(const Device& device, const ShaderIR& ir);
-
-} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 75d3fac04..9c7b0adbd 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,45 +2,52 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <glad/glad.h>
+
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
namespace OpenGL::GLShader {
-using Tegra::Engines::Maxwell3D;
-
-ProgramManager::ProgramManager() {
- pipeline.Create();
-}
+ProgramManager::ProgramManager() = default;
ProgramManager::~ProgramManager() = default;
-void ProgramManager::ApplyTo(OpenGLState& state) {
- UpdatePipeline();
- state.draw.shader_program = 0;
- state.draw.program_pipeline = pipeline.handle;
+void ProgramManager::Create() {
+ graphics_pipeline.Create();
+ glBindProgramPipeline(graphics_pipeline.handle);
}
-void ProgramManager::UpdatePipeline() {
+void ProgramManager::BindGraphicsPipeline() {
+ if (!is_graphics_bound) {
+ is_graphics_bound = true;
+ glUseProgram(0);
+ }
+
// Avoid updating the pipeline when values have no changed
if (old_state == current_state) {
return;
}
// Workaround for AMD bug
- constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
- GL_FRAGMENT_SHADER_BIT};
- glUseProgramStages(pipeline.handle, all_used_stages, 0);
-
- glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
- glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
- glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
+ static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
+ GL_FRAGMENT_SHADER_BIT};
+ const GLuint handle = graphics_pipeline.handle;
+ glUseProgramStages(handle, all_used_stages, 0);
+ glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
+ glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
+ glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
old_state = current_state;
}
-void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell) {
+void ProgramManager::BindComputeShader(GLuint program) {
+ is_graphics_bound = false;
+ glUseProgram(program);
+}
+
+void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
const auto& regs = maxwell.regs;
// Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 478c165ce..d2e47f2a9 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -9,7 +9,6 @@
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL::GLShader {
@@ -32,49 +31,47 @@ public:
explicit ProgramManager();
~ProgramManager();
- void ApplyTo(OpenGLState& state);
+ void Create();
- void UseProgrammableVertexShader(GLuint program) {
+ /// Updates the graphics pipeline and binds it.
+ void BindGraphicsPipeline();
+
+ /// Binds a compute shader.
+ void BindComputeShader(GLuint program);
+
+ void UseVertexShader(GLuint program) {
current_state.vertex_shader = program;
}
- void UseProgrammableGeometryShader(GLuint program) {
+ void UseGeometryShader(GLuint program) {
current_state.geometry_shader = program;
}
- void UseProgrammableFragmentShader(GLuint program) {
+ void UseFragmentShader(GLuint program) {
current_state.fragment_shader = program;
}
- void UseTrivialGeometryShader() {
- current_state.geometry_shader = 0;
- }
-
- void UseTrivialFragmentShader() {
- current_state.fragment_shader = 0;
- }
-
private:
struct PipelineState {
- bool operator==(const PipelineState& rhs) const {
+ bool operator==(const PipelineState& rhs) const noexcept {
return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader &&
geometry_shader == rhs.geometry_shader;
}
- bool operator!=(const PipelineState& rhs) const {
+ bool operator!=(const PipelineState& rhs) const noexcept {
return !operator==(rhs);
}
- GLuint vertex_shader{};
- GLuint fragment_shader{};
- GLuint geometry_shader{};
+ GLuint vertex_shader = 0;
+ GLuint fragment_shader = 0;
+ GLuint geometry_shader = 0;
};
- void UpdatePipeline();
-
- OGLPipeline pipeline;
+ OGLPipeline graphics_pipeline;
+ OGLPipeline compute_pipeline;
PipelineState current_state;
PipelineState old_state;
+ bool is_graphics_bound = true;
};
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
deleted file mode 100644
index 7d3bc1a1f..000000000
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ /dev/null
@@ -1,569 +0,0 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <iterator>
-#include <glad/glad.h>
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "common/microprofile.h"
-#include "video_core/renderer_opengl/gl_state.h"
-
-MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
-
-namespace OpenGL {
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-OpenGLState OpenGLState::cur_state;
-
-namespace {
-
-template <typename T>
-bool UpdateValue(T& current_value, const T new_value) {
- const bool changed = current_value != new_value;
- current_value = new_value;
- return changed;
-}
-
-template <typename T1, typename T2>
-bool UpdateTie(T1 current_value, const T2 new_value) {
- const bool changed = current_value != new_value;
- current_value = new_value;
- return changed;
-}
-
-template <typename T>
-std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) {
- std::optional<std::size_t> first;
- std::size_t last;
- for (std::size_t i = 0; i < std::size(current_values); ++i) {
- if (!UpdateValue(current_values[i], new_values[i])) {
- continue;
- }
- if (!first) {
- first = i;
- }
- last = i;
- }
- if (!first) {
- return std::nullopt;
- }
- return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1));
-}
-
-void Enable(GLenum cap, bool enable) {
- if (enable) {
- glEnable(cap);
- } else {
- glDisable(cap);
- }
-}
-
-void Enable(GLenum cap, GLuint index, bool enable) {
- if (enable) {
- glEnablei(cap, index);
- } else {
- glDisablei(cap, index);
- }
-}
-
-void Enable(GLenum cap, bool& current_value, bool new_value) {
- if (UpdateValue(current_value, new_value)) {
- Enable(cap, new_value);
- }
-}
-
-void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
- if (UpdateValue(current_value, new_value)) {
- Enable(cap, index, new_value);
- }
-}
-
-} // Anonymous namespace
-
-OpenGLState::OpenGLState() = default;
-
-void OpenGLState::SetDefaultViewports() {
- viewports.fill(Viewport{});
-
- depth_clamp.far_plane = false;
- depth_clamp.near_plane = false;
-}
-
-void OpenGLState::ApplyFramebufferState() {
- if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
- glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
- }
- if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
- }
-}
-
-void OpenGLState::ApplyVertexArrayState() {
- if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
- glBindVertexArray(draw.vertex_array);
- }
-}
-
-void OpenGLState::ApplyShaderProgram() {
- if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
- glUseProgram(draw.shader_program);
- }
-}
-
-void OpenGLState::ApplyProgramPipeline() {
- if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
- glBindProgramPipeline(draw.program_pipeline);
- }
-}
-
-void OpenGLState::ApplyClipDistances() {
- for (std::size_t i = 0; i < clip_distance.size(); ++i) {
- Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
- clip_distance[i]);
- }
-}
-
-void OpenGLState::ApplyPointSize() {
- Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
- Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite);
- if (UpdateValue(cur_state.point.size, point.size)) {
- glPointSize(point.size);
- }
-}
-
-void OpenGLState::ApplyFragmentColorClamp() {
- if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
- glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
- fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
- }
-}
-
-void OpenGLState::ApplyMultisample() {
- Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
- multisample_control.alpha_to_coverage);
- Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
- multisample_control.alpha_to_one);
-}
-
-void OpenGLState::ApplyDepthClamp() {
- if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
- depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
- return;
- }
- cur_state.depth_clamp = depth_clamp;
-
- UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
- "Unimplemented Depth Clamp Separation!");
-
- Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
-}
-
-void OpenGLState::ApplySRgb() {
- if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
- return;
- cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
- if (framebuffer_srgb.enabled) {
- glEnable(GL_FRAMEBUFFER_SRGB);
- } else {
- glDisable(GL_FRAMEBUFFER_SRGB);
- }
-}
-
-void OpenGLState::ApplyCulling() {
- Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
-
- if (UpdateValue(cur_state.cull.mode, cull.mode)) {
- glCullFace(cull.mode);
- }
-
- if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
- glFrontFace(cull.front_face);
- }
-}
-
-void OpenGLState::ApplyRasterizerDiscard() {
- Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard);
-}
-
-void OpenGLState::ApplyColorMask() {
- if (!dirty.color_mask) {
- return;
- }
- dirty.color_mask = false;
-
- for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
- const auto& updated = color_mask[i];
- auto& current = cur_state.color_mask[i];
- if (updated.red_enabled != current.red_enabled ||
- updated.green_enabled != current.green_enabled ||
- updated.blue_enabled != current.blue_enabled ||
- updated.alpha_enabled != current.alpha_enabled) {
- current = updated;
- glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
- updated.blue_enabled, updated.alpha_enabled);
- }
- }
-}
-
-void OpenGLState::ApplyDepth() {
- Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
-
- if (cur_state.depth.test_func != depth.test_func) {
- cur_state.depth.test_func = depth.test_func;
- glDepthFunc(depth.test_func);
- }
-
- if (cur_state.depth.write_mask != depth.write_mask) {
- cur_state.depth.write_mask = depth.write_mask;
- glDepthMask(depth.write_mask);
- }
-}
-
-void OpenGLState::ApplyPrimitiveRestart() {
- Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
-
- if (cur_state.primitive_restart.index != primitive_restart.index) {
- cur_state.primitive_restart.index = primitive_restart.index;
- glPrimitiveRestartIndex(primitive_restart.index);
- }
-}
-
-void OpenGLState::ApplyStencilTest() {
- if (!dirty.stencil_state) {
- return;
- }
- dirty.stencil_state = false;
-
- Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
-
- const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
- if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
- current.test_mask != config.test_mask) {
- current.test_func = config.test_func;
- current.test_ref = config.test_ref;
- current.test_mask = config.test_mask;
- glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
- }
- if (current.action_depth_fail != config.action_depth_fail ||
- current.action_depth_pass != config.action_depth_pass ||
- current.action_stencil_fail != config.action_stencil_fail) {
- current.action_depth_fail = config.action_depth_fail;
- current.action_depth_pass = config.action_depth_pass;
- current.action_stencil_fail = config.action_stencil_fail;
- glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
- config.action_depth_pass);
- }
- if (current.write_mask != config.write_mask) {
- current.write_mask = config.write_mask;
- glStencilMaskSeparate(face, config.write_mask);
- }
- };
- ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
- ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
-}
-
-void OpenGLState::ApplyViewport() {
- for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
- const auto& updated = viewports[i];
- auto& current = cur_state.viewports[i];
-
- if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
- current.height != updated.height) {
- current.x = updated.x;
- current.y = updated.y;
- current.width = updated.width;
- current.height = updated.height;
- glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
- static_cast<GLfloat>(updated.width),
- static_cast<GLfloat>(updated.height));
- }
- if (current.depth_range_near != updated.depth_range_near ||
- current.depth_range_far != updated.depth_range_far) {
- current.depth_range_near = updated.depth_range_near;
- current.depth_range_far = updated.depth_range_far;
- glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
- }
-
- Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
-
- if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
- current.scissor.width != updated.scissor.width ||
- current.scissor.height != updated.scissor.height) {
- current.scissor.x = updated.scissor.x;
- current.scissor.y = updated.scissor.y;
- current.scissor.width = updated.scissor.width;
- current.scissor.height = updated.scissor.height;
- glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
- updated.scissor.height);
- }
- }
-}
-
-void OpenGLState::ApplyGlobalBlending() {
- const Blend& updated = blend[0];
- Blend& current = cur_state.blend[0];
-
- Enable(GL_BLEND, current.enabled, updated.enabled);
-
- if (current.src_rgb_func != updated.src_rgb_func ||
- current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
- current.dst_a_func != updated.dst_a_func) {
- current.src_rgb_func = updated.src_rgb_func;
- current.dst_rgb_func = updated.dst_rgb_func;
- current.src_a_func = updated.src_a_func;
- current.dst_a_func = updated.dst_a_func;
- glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
- updated.dst_a_func);
- }
-
- if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
- current.rgb_equation = updated.rgb_equation;
- current.a_equation = updated.a_equation;
- glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
- }
-}
-
-void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) {
- const Blend& updated = blend[target];
- Blend& current = cur_state.blend[target];
-
- if (current.enabled != updated.enabled || force) {
- current.enabled = updated.enabled;
- Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
- }
-
- if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
- current.dst_a_func),
- std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
- updated.dst_a_func))) {
- glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
- updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
- }
-
- if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
- std::tie(updated.rgb_equation, updated.a_equation))) {
- glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
- updated.a_equation);
- }
-}
-
-void OpenGLState::ApplyBlending() {
- if (!dirty.blend_state) {
- return;
- }
- dirty.blend_state = false;
-
- if (independant_blend.enabled) {
- const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
- for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
- ApplyTargetBlending(target, force);
- }
- } else {
- ApplyGlobalBlending();
- }
- cur_state.independant_blend.enabled = independant_blend.enabled;
-
- if (UpdateTie(
- std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
- cur_state.blend_color.blue, cur_state.blend_color.alpha),
- std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
- glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
- }
-}
-
-void OpenGLState::ApplyLogicOp() {
- Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
-
- if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
- glLogicOp(logic_op.operation);
- }
-}
-
-void OpenGLState::ApplyPolygonOffset() {
- if (!dirty.polygon_offset) {
- return;
- }
- dirty.polygon_offset = false;
-
- Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
- polygon_offset.fill_enable);
- Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
- polygon_offset.line_enable);
- Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
- polygon_offset.point_enable);
-
- if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
- cur_state.polygon_offset.clamp),
- std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
- if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
- glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
- } else {
- UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
- "Unimplemented Depth polygon offset clamp.");
- glPolygonOffset(polygon_offset.factor, polygon_offset.units);
- }
- }
-}
-
-void OpenGLState::ApplyAlphaTest() {
- Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled);
- if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref),
- std::tie(alpha_test.func, alpha_test.ref))) {
- glAlphaFunc(alpha_test.func, alpha_test.ref);
- }
-}
-
-void OpenGLState::ApplyClipControl() {
- if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode),
- std::tie(clip_control.origin, clip_control.depth_mode))) {
- glClipControl(clip_control.origin, clip_control.depth_mode);
- }
-}
-
-void OpenGLState::ApplyRenderBuffer() {
- if (cur_state.renderbuffer != renderbuffer) {
- cur_state.renderbuffer = renderbuffer;
- glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
- }
-}
-
-void OpenGLState::ApplyTextures() {
- const std::size_t size = std::size(textures);
- for (std::size_t i = 0; i < size; ++i) {
- if (UpdateValue(cur_state.textures[i], textures[i])) {
- // BindTextureUnit doesn't support binding null textures, skip those binds.
- // TODO(Rodrigo): Stop using null textures
- if (textures[i] != 0) {
- glBindTextureUnit(static_cast<GLuint>(i), textures[i]);
- }
- }
- }
-}
-
-void OpenGLState::ApplySamplers() {
- const std::size_t size = std::size(samplers);
- for (std::size_t i = 0; i < size; ++i) {
- if (UpdateValue(cur_state.samplers[i], samplers[i])) {
- glBindSampler(static_cast<GLuint>(i), samplers[i]);
- }
- }
-}
-
-void OpenGLState::ApplyImages() {
- if (const auto update = UpdateArray(cur_state.images, images)) {
- glBindImageTextures(update->first, update->second, images.data() + update->first);
- }
-}
-
-void OpenGLState::Apply() {
- MICROPROFILE_SCOPE(OpenGL_State);
- ApplyFramebufferState();
- ApplyVertexArrayState();
- ApplyShaderProgram();
- ApplyProgramPipeline();
- ApplyClipDistances();
- ApplyPointSize();
- ApplyFragmentColorClamp();
- ApplyMultisample();
- ApplyRasterizerDiscard();
- ApplyColorMask();
- ApplyDepthClamp();
- ApplyViewport();
- ApplyStencilTest();
- ApplySRgb();
- ApplyCulling();
- ApplyDepth();
- ApplyPrimitiveRestart();
- ApplyBlending();
- ApplyLogicOp();
- ApplyTextures();
- ApplySamplers();
- ApplyImages();
- ApplyPolygonOffset();
- ApplyAlphaTest();
- ApplyClipControl();
- ApplyRenderBuffer();
-}
-
-void OpenGLState::EmulateViewportWithScissor() {
- auto& current = viewports[0];
- if (current.scissor.enabled) {
- const GLint left = std::max(current.x, current.scissor.x);
- const GLint right =
- std::max(current.x + current.width, current.scissor.x + current.scissor.width);
- const GLint bottom = std::max(current.y, current.scissor.y);
- const GLint top =
- std::max(current.y + current.height, current.scissor.y + current.scissor.height);
- current.scissor.x = std::max(left, 0);
- current.scissor.y = std::max(bottom, 0);
- current.scissor.width = std::max(right - left, 0);
- current.scissor.height = std::max(top - bottom, 0);
- } else {
- current.scissor.enabled = true;
- current.scissor.x = current.x;
- current.scissor.y = current.y;
- current.scissor.width = current.width;
- current.scissor.height = current.height;
- }
-}
-
-OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
- for (auto& texture : textures) {
- if (texture == handle) {
- texture = 0;
- }
- }
- return *this;
-}
-
-OpenGLState& OpenGLState::ResetSampler(GLuint handle) {
- for (auto& sampler : samplers) {
- if (sampler == handle) {
- sampler = 0;
- }
- }
- return *this;
-}
-
-OpenGLState& OpenGLState::ResetProgram(GLuint handle) {
- if (draw.shader_program == handle) {
- draw.shader_program = 0;
- }
- return *this;
-}
-
-OpenGLState& OpenGLState::ResetPipeline(GLuint handle) {
- if (draw.program_pipeline == handle) {
- draw.program_pipeline = 0;
- }
- return *this;
-}
-
-OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) {
- if (draw.vertex_array == handle) {
- draw.vertex_array = 0;
- }
- return *this;
-}
-
-OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) {
- if (draw.read_framebuffer == handle) {
- draw.read_framebuffer = 0;
- }
- if (draw.draw_framebuffer == handle) {
- draw.draw_framebuffer = 0;
- }
- return *this;
-}
-
-OpenGLState& OpenGLState::ResetRenderbuffer(GLuint handle) {
- if (renderbuffer == handle) {
- renderbuffer = 0;
- }
- return *this;
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
deleted file mode 100644
index bce662f2c..000000000
--- a/src/video_core/renderer_opengl/gl_state.h
+++ /dev/null
@@ -1,251 +0,0 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <type_traits>
-#include <glad/glad.h>
-#include "video_core/engines/maxwell_3d.h"
-
-namespace OpenGL {
-
-class OpenGLState {
-public:
- struct {
- bool enabled = false; // GL_FRAMEBUFFER_SRGB
- } framebuffer_srgb;
-
- struct {
- bool alpha_to_coverage = false; // GL_ALPHA_TO_COVERAGE
- bool alpha_to_one = false; // GL_ALPHA_TO_ONE
- } multisample_control;
-
- struct {
- bool enabled = false; // GL_CLAMP_FRAGMENT_COLOR_ARB
- } fragment_color_clamp;
-
- struct {
- bool far_plane = false;
- bool near_plane = false;
- } depth_clamp; // GL_DEPTH_CLAMP
-
- struct {
- bool enabled = false; // GL_CULL_FACE
- GLenum mode = GL_BACK; // GL_CULL_FACE_MODE
- GLenum front_face = GL_CCW; // GL_FRONT_FACE
- } cull;
-
- struct {
- bool test_enabled = false; // GL_DEPTH_TEST
- GLboolean write_mask = GL_TRUE; // GL_DEPTH_WRITEMASK
- GLenum test_func = GL_LESS; // GL_DEPTH_FUNC
- } depth;
-
- struct {
- bool enabled = false;
- GLuint index = 0;
- } primitive_restart; // GL_PRIMITIVE_RESTART
-
- bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD
-
- struct ColorMask {
- GLboolean red_enabled = GL_TRUE;
- GLboolean green_enabled = GL_TRUE;
- GLboolean blue_enabled = GL_TRUE;
- GLboolean alpha_enabled = GL_TRUE;
- };
- std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
- color_mask; // GL_COLOR_WRITEMASK
-
- struct {
- bool test_enabled = false; // GL_STENCIL_TEST
- struct {
- GLenum test_func = GL_ALWAYS; // GL_STENCIL_FUNC
- GLint test_ref = 0; // GL_STENCIL_REF
- GLuint test_mask = 0xFFFFFFFF; // GL_STENCIL_VALUE_MASK
- GLuint write_mask = 0xFFFFFFFF; // GL_STENCIL_WRITEMASK
- GLenum action_stencil_fail = GL_KEEP; // GL_STENCIL_FAIL
- GLenum action_depth_fail = GL_KEEP; // GL_STENCIL_PASS_DEPTH_FAIL
- GLenum action_depth_pass = GL_KEEP; // GL_STENCIL_PASS_DEPTH_PASS
- } front, back;
- } stencil;
-
- struct Blend {
- bool enabled = false; // GL_BLEND
- GLenum rgb_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_RGB
- GLenum a_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_ALPHA
- GLenum src_rgb_func = GL_ONE; // GL_BLEND_SRC_RGB
- GLenum dst_rgb_func = GL_ZERO; // GL_BLEND_DST_RGB
- GLenum src_a_func = GL_ONE; // GL_BLEND_SRC_ALPHA
- GLenum dst_a_func = GL_ZERO; // GL_BLEND_DST_ALPHA
- };
- std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend;
-
- struct {
- bool enabled = false;
- } independant_blend;
-
- struct {
- GLclampf red = 0.0f;
- GLclampf green = 0.0f;
- GLclampf blue = 0.0f;
- GLclampf alpha = 0.0f;
- } blend_color; // GL_BLEND_COLOR
-
- struct {
- bool enabled = false; // GL_LOGIC_OP_MODE
- GLenum operation = GL_COPY;
- } logic_op;
-
- static constexpr std::size_t NumSamplers = 32 * 5;
- static constexpr std::size_t NumImages = 8 * 5;
- std::array<GLuint, NumSamplers> textures = {};
- std::array<GLuint, NumSamplers> samplers = {};
- std::array<GLuint, NumImages> images = {};
-
- struct {
- GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING
- GLuint draw_framebuffer = 0; // GL_DRAW_FRAMEBUFFER_BINDING
- GLuint vertex_array = 0; // GL_VERTEX_ARRAY_BINDING
- GLuint shader_program = 0; // GL_CURRENT_PROGRAM
- GLuint program_pipeline = 0; // GL_PROGRAM_PIPELINE_BINDING
- } draw;
-
- struct Viewport {
- GLint x = 0;
- GLint y = 0;
- GLint width = 0;
- GLint height = 0;
- GLfloat depth_range_near = 0.0f; // GL_DEPTH_RANGE
- GLfloat depth_range_far = 1.0f; // GL_DEPTH_RANGE
- struct {
- bool enabled = false; // GL_SCISSOR_TEST
- GLint x = 0;
- GLint y = 0;
- GLsizei width = 0;
- GLsizei height = 0;
- } scissor;
- };
- std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
-
- struct {
- bool program_control = false; // GL_PROGRAM_POINT_SIZE
- bool sprite = false; // GL_POINT_SPRITE
- GLfloat size = 1.0f; // GL_POINT_SIZE
- } point;
-
- struct {
- bool point_enable = false;
- bool line_enable = false;
- bool fill_enable = false;
- GLfloat units = 0.0f;
- GLfloat factor = 0.0f;
- GLfloat clamp = 0.0f;
- } polygon_offset;
-
- struct {
- bool enabled = false; // GL_ALPHA_TEST
- GLenum func = GL_ALWAYS; // GL_ALPHA_TEST_FUNC
- GLfloat ref = 0.0f; // GL_ALPHA_TEST_REF
- } alpha_test;
-
- std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE
-
- struct {
- GLenum origin = GL_LOWER_LEFT;
- GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE;
- } clip_control;
-
- GLuint renderbuffer{}; // GL_RENDERBUFFER_BINDING
-
- OpenGLState();
-
- /// Get the currently active OpenGL state
- static OpenGLState GetCurState() {
- return cur_state;
- }
-
- void SetDefaultViewports();
- /// Apply this state as the current OpenGL state
- void Apply();
-
- void ApplyFramebufferState();
- void ApplyVertexArrayState();
- void ApplyShaderProgram();
- void ApplyProgramPipeline();
- void ApplyClipDistances();
- void ApplyPointSize();
- void ApplyFragmentColorClamp();
- void ApplyMultisample();
- void ApplySRgb();
- void ApplyCulling();
- void ApplyRasterizerDiscard();
- void ApplyColorMask();
- void ApplyDepth();
- void ApplyPrimitiveRestart();
- void ApplyStencilTest();
- void ApplyViewport();
- void ApplyTargetBlending(std::size_t target, bool force);
- void ApplyGlobalBlending();
- void ApplyBlending();
- void ApplyLogicOp();
- void ApplyTextures();
- void ApplySamplers();
- void ApplyImages();
- void ApplyDepthClamp();
- void ApplyPolygonOffset();
- void ApplyAlphaTest();
- void ApplyClipControl();
- void ApplyRenderBuffer();
-
- /// Resets any references to the given resource
- OpenGLState& UnbindTexture(GLuint handle);
- OpenGLState& ResetSampler(GLuint handle);
- OpenGLState& ResetProgram(GLuint handle);
- OpenGLState& ResetPipeline(GLuint handle);
- OpenGLState& ResetVertexArray(GLuint handle);
- OpenGLState& ResetFramebuffer(GLuint handle);
- OpenGLState& ResetRenderbuffer(GLuint handle);
-
- /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
- void EmulateViewportWithScissor();
-
- void MarkDirtyBlendState() {
- dirty.blend_state = true;
- }
-
- void MarkDirtyStencilState() {
- dirty.stencil_state = true;
- }
-
- void MarkDirtyPolygonOffset() {
- dirty.polygon_offset = true;
- }
-
- void MarkDirtyColorMask() {
- dirty.color_mask = true;
- }
-
- void AllDirty() {
- dirty.blend_state = true;
- dirty.stencil_state = true;
- dirty.polygon_offset = true;
- dirty.color_mask = true;
- }
-
-private:
- static OpenGLState cur_state;
-
- struct {
- bool blend_state;
- bool stencil_state;
- bool viewport_state;
- bool polygon_offset;
- bool color_mask;
- } dirty{};
-};
-static_assert(std::is_trivially_copyable_v<OpenGLState>);
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
new file mode 100644
index 000000000..255ac3147
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -0,0 +1,247 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+
+#include "common/common_types.h"
+#include "core/core.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
+
+#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
+#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32))
+
+namespace OpenGL {
+
+namespace {
+
+using namespace Dirty;
+using namespace VideoCommon::Dirty;
+using Tegra::Engines::Maxwell3D;
+using Regs = Maxwell3D::Regs;
+using Tables = Maxwell3D::DirtyState::Tables;
+using Table = Maxwell3D::DirtyState::Table;
+
+void SetupDirtyColorMasks(Tables& tables) {
+ tables[0][OFF(color_mask_common)] = ColorMaskCommon;
+ for (std::size_t rt = 0; rt < Regs::NumRenderTargets; ++rt) {
+ const std::size_t offset = OFF(color_mask) + rt * NUM(color_mask[0]);
+ FillBlock(tables[0], offset, NUM(color_mask[0]), ColorMask0 + rt);
+ }
+
+ FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
+}
+
+void SetupDirtyVertexArrays(Tables& tables) {
+ static constexpr std::size_t num_array = 3;
+ static constexpr std::size_t instance_base_offset = 3;
+ for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
+ const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
+ const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
+
+ FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
+ FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
+
+ const std::size_t instance_array_offset = array_offset + instance_base_offset;
+ tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
+ tables[1][instance_array_offset] = VertexInstances;
+
+ const std::size_t instance_offset = OFF(instanced_arrays) + i;
+ tables[0][instance_offset] = static_cast<u8>(VertexInstance0 + i);
+ tables[1][instance_offset] = VertexInstances;
+ }
+}
+
+void SetupDirtyVertexFormat(Tables& tables) {
+ for (std::size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
+ const std::size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
+ FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexFormat0 + i);
+ }
+
+ FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexFormats);
+}
+
+void SetupDirtyViewports(Tables& tables) {
+ for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
+ const std::size_t transf_offset = OFF(viewport_transform) + i * NUM(viewport_transform[0]);
+ const std::size_t viewport_offset = OFF(viewports) + i * NUM(viewports[0]);
+
+ FillBlock(tables[0], transf_offset, NUM(viewport_transform[0]), Viewport0 + i);
+ FillBlock(tables[0], viewport_offset, NUM(viewports[0]), Viewport0 + i);
+ }
+
+ FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports);
+ FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports);
+
+ tables[0][OFF(viewport_transform_enabled)] = ViewportTransform;
+ tables[1][OFF(viewport_transform_enabled)] = Viewports;
+}
+
+void SetupDirtyScissors(Tables& tables) {
+ for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
+ const std::size_t offset = OFF(scissor_test) + i * NUM(scissor_test[0]);
+ FillBlock(tables[0], offset, NUM(scissor_test[0]), Scissor0 + i);
+ }
+ FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
+}
+
+void SetupDirtyShaders(Tables& tables) {
+ FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
+ Shaders);
+}
+
+void SetupDirtyPolygonModes(Tables& tables) {
+ tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
+ tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
+
+ tables[1][OFF(polygon_mode_front)] = PolygonModes;
+ tables[1][OFF(polygon_mode_back)] = PolygonModes;
+ tables[0][OFF(fill_rectangle)] = PolygonModes;
+}
+
+void SetupDirtyDepthTest(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(depth_test_enable)] = DepthTest;
+ table[OFF(depth_write_enabled)] = DepthMask;
+ table[OFF(depth_test_func)] = DepthTest;
+}
+
+void SetupDirtyStencilTest(Tables& tables) {
+ static constexpr std::array offsets = {
+ OFF(stencil_enable), OFF(stencil_front_func_func), OFF(stencil_front_func_ref),
+ OFF(stencil_front_func_mask), OFF(stencil_front_op_fail), OFF(stencil_front_op_zfail),
+ OFF(stencil_front_op_zpass), OFF(stencil_front_mask), OFF(stencil_two_side_enable),
+ OFF(stencil_back_func_func), OFF(stencil_back_func_ref), OFF(stencil_back_func_mask),
+ OFF(stencil_back_op_fail), OFF(stencil_back_op_zfail), OFF(stencil_back_op_zpass),
+ OFF(stencil_back_mask)};
+ for (const auto offset : offsets) {
+ tables[0][offset] = StencilTest;
+ }
+}
+
+void SetupDirtyAlphaTest(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(alpha_test_ref)] = AlphaTest;
+ table[OFF(alpha_test_func)] = AlphaTest;
+ table[OFF(alpha_test_enabled)] = AlphaTest;
+}
+
+void SetupDirtyBlend(Tables& tables) {
+ FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendColor);
+
+ tables[0][OFF(independent_blend_enable)] = BlendIndependentEnabled;
+
+ for (std::size_t i = 0; i < Regs::NumRenderTargets; ++i) {
+ const std::size_t offset = OFF(independent_blend) + i * NUM(independent_blend[0]);
+ FillBlock(tables[0], offset, NUM(independent_blend[0]), BlendState0 + i);
+
+ tables[0][OFF(blend.enable) + i] = static_cast<u8>(BlendState0 + i);
+ }
+ FillBlock(tables[1], OFF(independent_blend), NUM(independent_blend), BlendStates);
+ FillBlock(tables[1], OFF(blend), NUM(blend), BlendStates);
+}
+
+void SetupDirtyPrimitiveRestart(Tables& tables) {
+ FillBlock(tables[0], OFF(primitive_restart), NUM(primitive_restart), PrimitiveRestart);
+}
+
+void SetupDirtyPolygonOffset(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(polygon_offset_fill_enable)] = PolygonOffset;
+ table[OFF(polygon_offset_line_enable)] = PolygonOffset;
+ table[OFF(polygon_offset_point_enable)] = PolygonOffset;
+ table[OFF(polygon_offset_factor)] = PolygonOffset;
+ table[OFF(polygon_offset_units)] = PolygonOffset;
+ table[OFF(polygon_offset_clamp)] = PolygonOffset;
+}
+
+void SetupDirtyMultisampleControl(Tables& tables) {
+ FillBlock(tables[0], OFF(multisample_control), NUM(multisample_control), MultisampleControl);
+}
+
+void SetupDirtyRasterizeEnable(Tables& tables) {
+ tables[0][OFF(rasterize_enable)] = RasterizeEnable;
+}
+
+void SetupDirtyFramebufferSRGB(Tables& tables) {
+ tables[0][OFF(framebuffer_srgb)] = FramebufferSRGB;
+}
+
+void SetupDirtyLogicOp(Tables& tables) {
+ FillBlock(tables[0], OFF(logic_op), NUM(logic_op), LogicOp);
+}
+
+void SetupDirtyFragmentClampColor(Tables& tables) {
+ tables[0][OFF(frag_color_clamp)] = FragmentClampColor;
+}
+
+void SetupDirtyPointSize(Tables& tables) {
+ tables[0][OFF(vp_point_size)] = PointSize;
+ tables[0][OFF(point_size)] = PointSize;
+ tables[0][OFF(point_sprite_enable)] = PointSize;
+}
+
+void SetupDirtyClipControl(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(screen_y_control)] = ClipControl;
+ table[OFF(depth_mode)] = ClipControl;
+}
+
+void SetupDirtyDepthClampEnabled(Tables& tables) {
+ tables[0][OFF(view_volume_clip_control)] = DepthClampEnabled;
+}
+
+void SetupDirtyMisc(Tables& tables) {
+ auto& table = tables[0];
+
+ table[OFF(clip_distance_enabled)] = ClipDistances;
+
+ table[OFF(front_face)] = FrontFace;
+
+ table[OFF(cull_test_enabled)] = CullTest;
+ table[OFF(cull_face)] = CullTest;
+}
+
+} // Anonymous namespace
+
+StateTracker::StateTracker(Core::System& system) : system{system} {}
+
+void StateTracker::Initialize() {
+ auto& dirty = system.GPU().Maxwell3D().dirty;
+ auto& tables = dirty.tables;
+ SetupDirtyRenderTargets(tables);
+ SetupDirtyColorMasks(tables);
+ SetupDirtyViewports(tables);
+ SetupDirtyScissors(tables);
+ SetupDirtyVertexArrays(tables);
+ SetupDirtyVertexFormat(tables);
+ SetupDirtyShaders(tables);
+ SetupDirtyPolygonModes(tables);
+ SetupDirtyDepthTest(tables);
+ SetupDirtyStencilTest(tables);
+ SetupDirtyAlphaTest(tables);
+ SetupDirtyBlend(tables);
+ SetupDirtyPrimitiveRestart(tables);
+ SetupDirtyPolygonOffset(tables);
+ SetupDirtyMultisampleControl(tables);
+ SetupDirtyRasterizeEnable(tables);
+ SetupDirtyFramebufferSRGB(tables);
+ SetupDirtyLogicOp(tables);
+ SetupDirtyFragmentClampColor(tables);
+ SetupDirtyPointSize(tables);
+ SetupDirtyClipControl(tables);
+ SetupDirtyDepthClampEnabled(tables);
+ SetupDirtyMisc(tables);
+
+ auto& store = dirty.on_write_stores;
+ store[VertexBuffers] = true;
+ for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
+ store[VertexBuffer0 + i] = true;
+ }
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
new file mode 100644
index 000000000..b882d75c3
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -0,0 +1,215 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <limits>
+
+#include <glad/glad.h>
+
+#include "common/common_types.h"
+#include "core/core.h"
+#include "video_core/dirty_flags.h"
+#include "video_core/engines/maxwell_3d.h"
+
+namespace Core {
+class System;
+}
+
+namespace OpenGL {
+
+namespace Dirty {
+
+enum : u8 {
+ First = VideoCommon::Dirty::LastCommonEntry,
+
+ VertexFormats,
+ VertexFormat0,
+ VertexFormat31 = VertexFormat0 + 31,
+
+ VertexBuffers,
+ VertexBuffer0,
+ VertexBuffer31 = VertexBuffer0 + 31,
+
+ VertexInstances,
+ VertexInstance0,
+ VertexInstance31 = VertexInstance0 + 31,
+
+ ViewportTransform,
+ Viewports,
+ Viewport0,
+ Viewport15 = Viewport0 + 15,
+
+ Scissors,
+ Scissor0,
+ Scissor15 = Scissor0 + 15,
+
+ ColorMaskCommon,
+ ColorMasks,
+ ColorMask0,
+ ColorMask7 = ColorMask0 + 7,
+
+ BlendColor,
+ BlendIndependentEnabled,
+ BlendStates,
+ BlendState0,
+ BlendState7 = BlendState0 + 7,
+
+ Shaders,
+ ClipDistances,
+
+ PolygonModes,
+ PolygonModeFront,
+ PolygonModeBack,
+
+ ColorMask,
+ FrontFace,
+ CullTest,
+ DepthMask,
+ DepthTest,
+ StencilTest,
+ AlphaTest,
+ PrimitiveRestart,
+ PolygonOffset,
+ MultisampleControl,
+ RasterizeEnable,
+ FramebufferSRGB,
+ LogicOp,
+ FragmentClampColor,
+ PointSize,
+ ClipControl,
+ DepthClampEnabled,
+
+ Last
+};
+static_assert(Last <= std::numeric_limits<u8>::max());
+
+} // namespace Dirty
+
+class StateTracker {
+public:
+ explicit StateTracker(Core::System& system);
+
+ void Initialize();
+
+ void BindIndexBuffer(GLuint new_index_buffer) {
+ if (index_buffer == new_index_buffer) {
+ return;
+ }
+ index_buffer = new_index_buffer;
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
+ }
+
+ void NotifyScreenDrawVertexArray() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::VertexFormats] = true;
+ flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
+ flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
+
+ flags[OpenGL::Dirty::VertexBuffers] = true;
+ flags[OpenGL::Dirty::VertexBuffer0] = true;
+
+ flags[OpenGL::Dirty::VertexInstances] = true;
+ flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
+ flags[OpenGL::Dirty::VertexInstance0 + 1] = true;
+ }
+
+ void NotifyPolygonModes() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::PolygonModes] = true;
+ flags[OpenGL::Dirty::PolygonModeFront] = true;
+ flags[OpenGL::Dirty::PolygonModeBack] = true;
+ }
+
+ void NotifyViewport0() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::Viewports] = true;
+ flags[OpenGL::Dirty::Viewport0] = true;
+ }
+
+ void NotifyScissor0() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::Scissors] = true;
+ flags[OpenGL::Dirty::Scissor0] = true;
+ }
+
+ void NotifyColorMask0() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::ColorMasks] = true;
+ flags[OpenGL::Dirty::ColorMask0] = true;
+ }
+
+ void NotifyBlend0() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::BlendStates] = true;
+ flags[OpenGL::Dirty::BlendState0] = true;
+ }
+
+ void NotifyFramebuffer() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[VideoCommon::Dirty::RenderTargets] = true;
+ }
+
+ void NotifyFrontFace() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::FrontFace] = true;
+ }
+
+ void NotifyCullTest() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::CullTest] = true;
+ }
+
+ void NotifyDepthMask() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::DepthMask] = true;
+ }
+
+ void NotifyDepthTest() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::DepthTest] = true;
+ }
+
+ void NotifyStencilTest() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::StencilTest] = true;
+ }
+
+ void NotifyPolygonOffset() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::PolygonOffset] = true;
+ }
+
+ void NotifyRasterizeEnable() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::RasterizeEnable] = true;
+ }
+
+ void NotifyFramebufferSRGB() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::FramebufferSRGB] = true;
+ }
+
+ void NotifyLogicOp() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::LogicOp] = true;
+ }
+
+ void NotifyClipControl() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::ClipControl] = true;
+ }
+
+ void NotifyAlphaTest() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::AlphaTest] = true;
+ }
+
+private:
+ Core::System& system;
+
+ GLuint index_buffer = 0;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 35ba334e4..6ec328c53 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -7,7 +7,6 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/microprofile.h"
-#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index cf934b0d8..f424e3000 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -10,7 +10,7 @@
#include "core/core.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/texture_cache/surface_base.h"
@@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U
+ {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI
@@ -397,6 +398,7 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
const bool is_proxy)
: VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} {
target = GetTextureTarget(params.target);
+ format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format;
if (!is_proxy) {
texture_view = CreateTextureView();
}
@@ -467,25 +469,20 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou
}
OGLTextureView CachedSurfaceView::CreateTextureView() const {
- const auto& owner_params = surface.GetSurfaceParams();
OGLTextureView texture_view;
texture_view.Create();
- const GLuint handle{texture_view.handle};
- const FormatTuple& tuple{GetFormatTuple(owner_params.pixel_format)};
-
- glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level,
+ glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level,
params.num_levels, params.base_layer, params.num_layers);
-
- ApplyTextureDefaults(owner_params, handle);
+ ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
return texture_view;
}
TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
VideoCore::RasterizerInterface& rasterizer,
- const Device& device)
- : TextureCacheBase{system, rasterizer} {
+ const Device& device, StateTracker& state_tracker)
+ : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} {
src_framebuffer.Create();
dst_framebuffer.Create();
}
@@ -519,25 +516,26 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
const Tegra::Engines::Fermi2D::Config& copy_config) {
const auto& src_params{src_view->GetSurfaceParams()};
const auto& dst_params{dst_view->GetSurfaceParams()};
+ UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
+ UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
- OpenGLState prev_state{OpenGLState::GetCurState()};
- SCOPE_EXIT({
- prev_state.AllDirty();
- prev_state.Apply();
- });
-
- OpenGLState state;
- state.draw.read_framebuffer = src_framebuffer.handle;
- state.draw.draw_framebuffer = dst_framebuffer.handle;
- state.framebuffer_srgb.enabled = dst_params.srgb_conversion;
- state.AllDirty();
- state.Apply();
+ state_tracker.NotifyScissor0();
+ state_tracker.NotifyFramebuffer();
+ state_tracker.NotifyRasterizeEnable();
+ state_tracker.NotifyFramebufferSRGB();
- u32 buffers{};
+ if (dst_params.srgb_conversion) {
+ glEnable(GL_FRAMEBUFFER_SRGB);
+ } else {
+ glDisable(GL_FRAMEBUFFER_SRGB);
+ }
+ glDisable(GL_RASTERIZER_DISCARD);
+ glDisablei(GL_SCISSOR_TEST, 0);
- UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
- UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle);
+ GLenum buffers = 0;
if (src_params.type == SurfaceType::ColorTexture) {
src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 8e13ab38b..6658c6ffd 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -27,6 +27,7 @@ using VideoCommon::ViewParams;
class CachedSurfaceView;
class CachedSurface;
class TextureCacheOpenGL;
+class StateTracker;
using Surface = std::shared_ptr<CachedSurface>;
using View = std::shared_ptr<CachedSurfaceView>;
@@ -96,6 +97,10 @@ public:
return texture_view.handle;
}
+ GLenum GetFormat() const {
+ return format;
+ }
+
const SurfaceParams& GetSurfaceParams() const {
return surface.GetSurfaceParams();
}
@@ -113,6 +118,7 @@ private:
CachedSurface& surface;
GLenum target{};
+ GLenum format{};
OGLTextureView texture_view;
u32 swizzle{};
@@ -122,7 +128,7 @@ private:
class TextureCacheOpenGL final : public TextureCacheBase {
public:
explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const Device& device);
+ const Device& device, StateTracker& state_tracker);
~TextureCacheOpenGL();
protected:
@@ -139,6 +145,8 @@ protected:
private:
GLuint FetchPBO(std::size_t buffer_size);
+ StateTracker& state_tracker;
+
OGLFramebuffer src_framebuffer;
OGLFramebuffer dst_framebuffer;
std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 7ed505628..89f0e04ef 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -92,8 +92,32 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
}
case Maxwell::VertexAttribute::Type::UnsignedScaled:
switch (attrib.size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_UNSIGNED_BYTE;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return GL_UNSIGNED_SHORT;
+ default:
+ LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+ return {};
+ }
+ case Maxwell::VertexAttribute::Type::SignedScaled:
+ switch (attrib.size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return GL_BYTE;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return GL_SHORT;
default:
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
return {};
@@ -401,24 +425,24 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
return GL_KEEP;
}
-inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) {
+inline GLenum FrontFace(Maxwell::FrontFace front_face) {
switch (front_face) {
- case Maxwell::Cull::FrontFace::ClockWise:
+ case Maxwell::FrontFace::ClockWise:
return GL_CW;
- case Maxwell::Cull::FrontFace::CounterClockWise:
+ case Maxwell::FrontFace::CounterClockWise:
return GL_CCW;
}
LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
return GL_CCW;
}
-inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) {
+inline GLenum CullFace(Maxwell::CullFace cull_face) {
switch (cull_face) {
- case Maxwell::Cull::CullFace::Front:
+ case Maxwell::CullFace::Front:
return GL_FRONT;
- case Maxwell::Cull::CullFace::Back:
+ case Maxwell::CullFace::Back:
return GL_BACK;
- case Maxwell::Cull::CullFace::FrontAndBack:
+ case Maxwell::CullFace::FrontAndBack:
return GL_FRONT_AND_BACK;
}
LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
@@ -464,5 +488,18 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
return GL_COPY;
}
+inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
+ switch (polygon_mode) {
+ case Maxwell::PolygonMode::Point:
+ return GL_POINT;
+ case Maxwell::PolygonMode::Line:
+ return GL_LINE;
+ case Maxwell::PolygonMode::Fill:
+ return GL_FILL;
+ }
+ UNREACHABLE_MSG("Invalid polygon mode={}", static_cast<int>(polygon_mode));
+ return GL_FILL;
+}
+
} // namespace MaxwellToGL
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index a4340b502..fca5e3ec0 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,8 +5,11 @@
#include <algorithm>
#include <cstddef>
#include <cstdlib>
+#include <cstring>
#include <memory>
+
#include <glad/glad.h>
+
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
@@ -20,10 +23,13 @@
#include "core/telemetry_session.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
namespace OpenGL {
+namespace {
+
// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
// to wait on available presentation frames.
constexpr std::size_t SWAP_CHAIN_SIZE = 3;
@@ -40,133 +46,13 @@ struct Frame {
bool is_srgb{}; /// Framebuffer is sRGB or RGB
};
-/**
- * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
- * but also make sure that rendering happens at the pace that the frontend dictates. This is a
- * helper class that the renderer uses to sync frames between the render thread and the presentation
- * thread
- */
-class FrameMailbox {
-public:
- std::mutex swap_chain_lock;
- std::condition_variable present_cv;
- std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
- std::queue<Frame*> free_queue;
- std::deque<Frame*> present_queue;
- Frame* previous_frame{};
-
- FrameMailbox() {
- for (auto& frame : swap_chain) {
- free_queue.push(&frame);
- }
- }
-
- ~FrameMailbox() {
- // lock the mutex and clear out the present and free_queues and notify any people who are
- // blocked to prevent deadlock on shutdown
- std::scoped_lock lock{swap_chain_lock};
- std::queue<Frame*>().swap(free_queue);
- present_queue.clear();
- present_cv.notify_all();
- }
-
- void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
- frame->present.Release();
- frame->present.Create();
- GLint previous_draw_fbo{};
- glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
- glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
- glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
- frame->color.handle);
- if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
- LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
- }
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
- frame->color_reloaded = false;
- }
-
- void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
- OpenGLState prev_state = OpenGLState::GetCurState();
- OpenGLState state = OpenGLState::GetCurState();
-
- // Recreate the color texture attachment
- frame->color.Release();
- frame->color.Create();
- state.renderbuffer = frame->color.handle;
- state.Apply();
- glRenderbufferStorage(GL_RENDERBUFFER, frame->is_srgb ? GL_SRGB8 : GL_RGB8, width, height);
-
- // Recreate the FBO for the render target
- frame->render.Release();
- frame->render.Create();
- state.draw.read_framebuffer = frame->render.handle;
- state.draw.draw_framebuffer = frame->render.handle;
- state.Apply();
- glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
- frame->color.handle);
- if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
- LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
- }
- prev_state.Apply();
- frame->width = width;
- frame->height = height;
- frame->color_reloaded = true;
- }
-
- Frame* GetRenderFrame() {
- std::unique_lock lock{swap_chain_lock};
-
- // If theres no free frames, we will reuse the oldest render frame
- if (free_queue.empty()) {
- auto frame = present_queue.back();
- present_queue.pop_back();
- return frame;
- }
-
- Frame* frame = free_queue.front();
- free_queue.pop();
- return frame;
- }
-
- void ReleaseRenderFrame(Frame* frame) {
- std::unique_lock lock{swap_chain_lock};
- present_queue.push_front(frame);
- present_cv.notify_one();
- }
-
- Frame* TryGetPresentFrame(int timeout_ms) {
- std::unique_lock lock{swap_chain_lock};
- // wait for new entries in the present_queue
- present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
- [&] { return !present_queue.empty(); });
- if (present_queue.empty()) {
- // timed out waiting for a frame to draw so return the previous frame
- return previous_frame;
- }
-
- // free the previous frame and add it back to the free queue
- if (previous_frame) {
- free_queue.push(previous_frame);
- }
+constexpr char VERTEX_SHADER[] = R"(
+#version 430 core
- // the newest entries are pushed to the front of the queue
- Frame* frame = present_queue.front();
- present_queue.pop_front();
- // remove all old entries from the present queue and move them back to the free_queue
- for (auto f : present_queue) {
- free_queue.push(f);
- }
- present_queue.clear();
- previous_frame = frame;
- return frame;
- }
+out gl_PerVertex {
+ vec4 gl_Position;
};
-namespace {
-
-constexpr char vertex_shader[] = R"(
-#version 430 core
-
layout (location = 0) in vec2 vert_position;
layout (location = 1) in vec2 vert_tex_coord;
layout (location = 0) out vec2 frag_tex_coord;
@@ -187,7 +73,7 @@ void main() {
}
)";
-constexpr char fragment_shader[] = R"(
+constexpr char FRAGMENT_SHADER[] = R"(
#version 430 core
layout (location = 0) in vec2 frag_tex_coord;
@@ -196,7 +82,7 @@ layout (location = 0) out vec4 color;
layout (binding = 0) uniform sampler2D color_texture;
void main() {
- color = texture(color_texture, frag_tex_coord);
+ color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
}
)";
@@ -205,13 +91,31 @@ constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
struct ScreenRectVertex {
- constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v)
- : position{{x, y}}, tex_coord{{u, v}} {}
+ constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v)
+ : position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {}
std::array<GLfloat, 2> position;
std::array<GLfloat, 2> tex_coord;
};
+/// Returns true if any debug tool is attached
+bool HasDebugTool() {
+ const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
+ if (nsight) {
+ return true;
+ }
+
+ GLint num_extensions;
+ glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
+ for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) {
+ const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index));
+ if (!std::strcmp(name, "GL_EXT_debug_tool")) {
+ return true;
+ }
+ }
+ return false;
+}
+
/**
* Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
* corner and (width, height) on the lower-bottom.
@@ -295,6 +199,153 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
} // Anonymous namespace
+/**
+ * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
+ * but also make sure that rendering happens at the pace that the frontend dictates. This is a
+ * helper class that the renderer uses to sync frames between the render thread and the presentation
+ * thread
+ */
+class FrameMailbox {
+public:
+ std::mutex swap_chain_lock;
+ std::condition_variable present_cv;
+ std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
+ std::queue<Frame*> free_queue;
+ std::deque<Frame*> present_queue;
+ Frame* previous_frame{};
+
+ FrameMailbox() : has_debug_tool{HasDebugTool()} {
+ for (auto& frame : swap_chain) {
+ free_queue.push(&frame);
+ }
+ }
+
+ ~FrameMailbox() {
+ // lock the mutex and clear out the present and free_queues and notify any people who are
+ // blocked to prevent deadlock on shutdown
+ std::scoped_lock lock{swap_chain_lock};
+ std::queue<Frame*>().swap(free_queue);
+ present_queue.clear();
+ present_cv.notify_all();
+ }
+
+ void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
+ frame->present.Release();
+ frame->present.Create();
+ GLint previous_draw_fbo{};
+ glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
+ glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
+ glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
+ frame->color.handle);
+ if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
+ }
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
+ frame->color_reloaded = false;
+ }
+
+ void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
+ // Recreate the color texture attachment
+ frame->color.Release();
+ frame->color.Create();
+ const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
+ glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
+
+ // Recreate the FBO for the render target
+ frame->render.Release();
+ frame->render.Create();
+ glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
+ glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
+ frame->color.handle);
+ if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
+ }
+
+ frame->width = width;
+ frame->height = height;
+ frame->color_reloaded = true;
+ }
+
+ Frame* GetRenderFrame() {
+ std::unique_lock lock{swap_chain_lock};
+
+ // If theres no free frames, we will reuse the oldest render frame
+ if (free_queue.empty()) {
+ auto frame = present_queue.back();
+ present_queue.pop_back();
+ return frame;
+ }
+
+ Frame* frame = free_queue.front();
+ free_queue.pop();
+ return frame;
+ }
+
+ void ReleaseRenderFrame(Frame* frame) {
+ std::unique_lock lock{swap_chain_lock};
+ present_queue.push_front(frame);
+ present_cv.notify_one();
+
+ DebugNotifyNextFrame();
+ }
+
+ Frame* TryGetPresentFrame(int timeout_ms) {
+ DebugWaitForNextFrame();
+
+ std::unique_lock lock{swap_chain_lock};
+ // wait for new entries in the present_queue
+ present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
+ [&] { return !present_queue.empty(); });
+ if (present_queue.empty()) {
+ // timed out waiting for a frame to draw so return the previous frame
+ return previous_frame;
+ }
+
+ // free the previous frame and add it back to the free queue
+ if (previous_frame) {
+ free_queue.push(previous_frame);
+ }
+
+ // the newest entries are pushed to the front of the queue
+ Frame* frame = present_queue.front();
+ present_queue.pop_front();
+ // remove all old entries from the present queue and move them back to the free_queue
+ for (auto f : present_queue) {
+ free_queue.push(f);
+ }
+ present_queue.clear();
+ previous_frame = frame;
+ return frame;
+ }
+
+private:
+ std::mutex debug_synch_mutex;
+ std::condition_variable debug_synch_condition;
+ std::atomic_int frame_for_debug{};
+ const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step
+
+ /// Signal that a new frame is available (called from GPU thread)
+ void DebugNotifyNextFrame() {
+ if (!has_debug_tool) {
+ return;
+ }
+ frame_for_debug++;
+ std::lock_guard lock{debug_synch_mutex};
+ debug_synch_condition.notify_one();
+ }
+
+ /// Wait for a new frame to be available (called from presentation thread)
+ void DebugWaitForNextFrame() {
+ if (!has_debug_tool) {
+ return;
+ }
+ const int last_frame = frame_for_debug;
+ std::unique_lock lock{debug_synch_mutex};
+ debug_synch_condition.wait(lock,
+ [this, last_frame] { return frame_for_debug > last_frame; });
+ }
+};
+
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
: VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
frame_mailbox{std::make_unique<FrameMailbox>()} {}
@@ -311,11 +362,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
return;
}
- // Maintain the rasterizer's state as a priority
- OpenGLState prev_state = OpenGLState::GetCurState();
- state.AllDirty();
- state.Apply();
-
PrepareRendertarget(framebuffer);
RenderScreenshot();
@@ -358,8 +404,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
frame->is_srgb = screen_info.display_srgb;
frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height);
}
- state.draw.draw_framebuffer = frame->render.handle;
- state.Apply();
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frame->render.handle);
DrawScreen(layout);
// Create a fence for the frontend to wait on and swap this frame to OffTex
frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
@@ -368,10 +413,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
m_current_frame++;
rasterizer->TickFrame();
}
-
- // Restore the rasterizer state
- prev_state.AllDirty();
- prev_state.Apply();
}
void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
@@ -442,31 +483,24 @@ void RendererOpenGL::InitOpenGLObjects() {
glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
0.0f);
- // Link shaders and get variable locations
- shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
- state.draw.shader_program = shader.handle;
- state.AllDirty();
- state.Apply();
+ // Create shader programs
+ OGLShader vertex_shader;
+ vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER);
+
+ OGLShader fragment_shader;
+ fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER);
+
+ vertex_program.Create(true, false, vertex_shader.handle);
+ fragment_program.Create(true, false, fragment_shader.handle);
+
+ // Create program pipeline
+ program_manager.Create();
// Generate VBO handle for drawing
vertex_buffer.Create();
- // Generate VAO
- vertex_array.Create();
- state.draw.vertex_array = vertex_array.handle;
-
// Attach vertex data to VAO
glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
- glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE,
- offsetof(ScreenRectVertex, position));
- glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
- offsetof(ScreenRectVertex, tex_coord));
- glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0);
- glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0);
- glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation);
- glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation);
- glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0,
- sizeof(ScreenRectVertex));
// Allocate textures for the screen
screen_info.texture.resource.Create(GL_TEXTURE_2D);
@@ -499,7 +533,8 @@ void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
}
- rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
+ rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info,
+ program_manager, state_tracker);
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
@@ -538,8 +573,19 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
}
-void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w,
- float h) {
+void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
+ if (renderer_settings.set_background_color) {
+ // Update background color before drawing
+ glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
+ 0.0f);
+ }
+
+ // Set projection matrix
+ const std::array ortho_matrix =
+ MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
+ glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
+ std::data(ortho_matrix));
+
const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left;
auto right = texcoords.right;
@@ -571,46 +617,79 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
static_cast<f32>(screen_info.texture.height);
}
+ const auto& screen = layout.screen;
const std::array vertices = {
- ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v),
- ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v),
- ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v),
- ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
+ ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u, left * scale_v),
+ ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u, left * scale_v),
+ ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u, right * scale_v),
+ ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u, right * scale_v),
};
-
- state.textures[0] = screen_info.display_texture;
- state.framebuffer_srgb.enabled = screen_info.display_srgb;
- state.AllDirty();
- state.Apply();
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
- glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
- // Restore default state
- state.framebuffer_srgb.enabled = false;
- state.textures[0] = 0;
- state.AllDirty();
- state.Apply();
-}
-void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
- if (renderer_settings.set_background_color) {
- // Update background color before drawing
- glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
- 0.0f);
+ // TODO: Signal state tracker about these changes
+ state_tracker.NotifyScreenDrawVertexArray();
+ state_tracker.NotifyPolygonModes();
+ state_tracker.NotifyViewport0();
+ state_tracker.NotifyScissor0();
+ state_tracker.NotifyColorMask0();
+ state_tracker.NotifyBlend0();
+ state_tracker.NotifyFramebuffer();
+ state_tracker.NotifyFrontFace();
+ state_tracker.NotifyCullTest();
+ state_tracker.NotifyDepthTest();
+ state_tracker.NotifyStencilTest();
+ state_tracker.NotifyPolygonOffset();
+ state_tracker.NotifyRasterizeEnable();
+ state_tracker.NotifyFramebufferSRGB();
+ state_tracker.NotifyLogicOp();
+ state_tracker.NotifyClipControl();
+ state_tracker.NotifyAlphaTest();
+
+ program_manager.UseVertexShader(vertex_program.handle);
+ program_manager.UseGeometryShader(0);
+ program_manager.UseFragmentShader(fragment_program.handle);
+ program_manager.BindGraphicsPipeline();
+
+ glEnable(GL_CULL_FACE);
+ if (screen_info.display_srgb) {
+ glEnable(GL_FRAMEBUFFER_SRGB);
+ } else {
+ glDisable(GL_FRAMEBUFFER_SRGB);
}
+ glDisable(GL_COLOR_LOGIC_OP);
+ glDisable(GL_DEPTH_TEST);
+ glDisable(GL_STENCIL_TEST);
+ glDisable(GL_POLYGON_OFFSET_FILL);
+ glDisable(GL_RASTERIZER_DISCARD);
+ glDisable(GL_ALPHA_TEST);
+ glDisablei(GL_BLEND, 0);
+ glDisablei(GL_SCISSOR_TEST, 0);
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+ glCullFace(GL_BACK);
+ glFrontFace(GL_CW);
+ glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+ glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
+ glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
+ static_cast<GLfloat>(layout.height));
+ glDepthRangeIndexed(0, 0.0, 0.0);
+
+ glEnableVertexAttribArray(PositionLocation);
+ glEnableVertexAttribArray(TexCoordLocation);
+ glVertexAttribDivisor(PositionLocation, 0);
+ glVertexAttribDivisor(TexCoordLocation, 0);
+ glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE,
+ offsetof(ScreenRectVertex, position));
+ glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
+ offsetof(ScreenRectVertex, tex_coord));
+ glVertexAttribBinding(PositionLocation, 0);
+ glVertexAttribBinding(TexCoordLocation, 0);
+ glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
+
+ glBindTextureUnit(0, screen_info.display_texture);
+ glBindSampler(0, 0);
- const auto& screen = layout.screen;
-
- glViewport(0, 0, layout.width, layout.height);
glClear(GL_COLOR_BUFFER_BIT);
-
- // Set projection matrix
- const std::array ortho_matrix =
- MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
- glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());
-
- DrawScreenTriangles(screen_info, static_cast<float>(screen.left),
- static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()),
- static_cast<float>(screen.GetHeight()));
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
void RendererOpenGL::TryPresent(int timeout_ms) {
@@ -653,13 +732,14 @@ void RendererOpenGL::RenderScreenshot() {
return;
}
+ GLint old_read_fb;
+ GLint old_draw_fb;
+ glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
+ glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
+
// Draw the current frame to the screenshot framebuffer
screenshot_framebuffer.Create();
- GLuint old_read_fb = state.draw.read_framebuffer;
- GLuint old_draw_fb = state.draw.draw_framebuffer;
- state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
- state.AllDirty();
- state.Apply();
+ glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle);
Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
@@ -676,12 +756,11 @@ void RendererOpenGL::RenderScreenshot() {
renderer_settings.screenshot_bits);
screenshot_framebuffer.Release();
- state.draw.read_framebuffer = old_read_fb;
- state.draw.draw_framebuffer = old_draw_fb;
- state.AllDirty();
- state.Apply();
glDeleteRenderbuffers(1, &renderbuffer);
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
+
renderer_settings.screenshot_complete_callback();
renderer_settings.screenshot_requested = false;
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index d45e69cbc..33073ce5b 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -10,7 +10,8 @@
#include "common/math_util.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
class System;
@@ -76,8 +77,6 @@ private:
/// Draws the emulated screens to the emulator window.
void DrawScreen(const Layout::FramebufferLayout& layout);
- void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h);
-
void RenderScreenshot();
/// Loads framebuffer from emulated memory into the active OpenGL texture.
@@ -93,17 +92,20 @@ private:
Core::Frontend::EmuWindow& emu_window;
Core::System& system;
- OpenGLState state;
+ StateTracker state_tracker{system};
// OpenGL object IDs
- OGLVertexArray vertex_array;
OGLBuffer vertex_buffer;
- OGLProgram shader;
+ OGLProgram vertex_program;
+ OGLProgram fragment_program;
OGLFramebuffer screenshot_framebuffer;
/// Display information for Switch screen
ScreenInfo screen_info;
+ /// Global dummy shader pipeline
+ GLShader::ProgramManager program_manager;
+
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index ac99e6385..b751086fa 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -9,6 +9,7 @@
#include <glad/glad.h>
#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/utils.h"
namespace OpenGL {
@@ -20,12 +21,12 @@ struct VertexArrayPushBuffer::Entry {
GLsizei stride{};
};
-VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
+VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker)
+ : state_tracker{state_tracker} {}
VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
-void VertexArrayPushBuffer::Setup(GLuint vao_) {
- vao = vao_;
+void VertexArrayPushBuffer::Setup() {
index_buffer = nullptr;
vertex_buffers.clear();
}
@@ -41,13 +42,11 @@ void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint*
void VertexArrayPushBuffer::Bind() {
if (index_buffer) {
- glVertexArrayElementBuffer(vao, *index_buffer);
+ state_tracker.BindIndexBuffer(*index_buffer);
}
- // TODO(Rodrigo): Find a way to ARB_multi_bind this
for (const auto& entry : vertex_buffers) {
- glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset,
- entry.stride);
+ glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride);
}
}
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 3ad7c02d4..47ee3177b 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -11,12 +11,14 @@
namespace OpenGL {
+class StateTracker;
+
class VertexArrayPushBuffer final {
public:
- explicit VertexArrayPushBuffer();
+ explicit VertexArrayPushBuffer(StateTracker& state_tracker);
~VertexArrayPushBuffer();
- void Setup(GLuint vao_);
+ void Setup();
void SetIndexBuffer(const GLuint* buffer);
@@ -28,7 +30,8 @@ public:
private:
struct Entry;
- GLuint vao{};
+ StateTracker& state_tracker;
+
const GLuint* index_buffer{};
std::vector<Entry> vertex_buffers;
};
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 4e3ff231e..2bb376555 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -112,19 +112,18 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
const auto& clip = regs.view_volume_clip_control;
const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
- Maxwell::Cull::FrontFace front_face = regs.cull.front_face;
+ Maxwell::FrontFace front_face = regs.front_face;
if (regs.screen_y_control.triangle_rast_flip != 0 &&
regs.viewport_transform[0].scale_y > 0.0f) {
- if (front_face == Maxwell::Cull::FrontFace::CounterClockWise)
- front_face = Maxwell::Cull::FrontFace::ClockWise;
- else if (front_face == Maxwell::Cull::FrontFace::ClockWise)
- front_face = Maxwell::Cull::FrontFace::CounterClockWise;
+ if (front_face == Maxwell::FrontFace::CounterClockWise)
+ front_face = Maxwell::FrontFace::ClockWise;
+ else if (front_face == Maxwell::FrontFace::ClockWise)
+ front_face = Maxwell::FrontFace::CounterClockWise;
}
const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
- return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled,
- depth_clamp_enabled, gl_ndc, regs.cull.cull_face,
- front_face);
+ return FixedPipelineState::Rasterizer(regs.cull_test_enabled, depth_bias_enabled,
+ depth_clamp_enabled, gl_ndc, regs.cull_face, front_face);
}
} // Anonymous namespace
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 87056ef37..4c8ba7f90 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -171,8 +171,8 @@ struct FixedPipelineState {
struct Rasterizer {
constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable,
- bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face,
- Maxwell::Cull::FrontFace front_face)
+ bool ndc_minus_one_to_one, Maxwell::CullFace cull_face,
+ Maxwell::FrontFace front_face)
: cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one},
cull_face{cull_face}, front_face{front_face} {}
@@ -182,8 +182,8 @@ struct FixedPipelineState {
bool depth_bias_enable;
bool depth_clamp_enable;
bool ndc_minus_one_to_one;
- Maxwell::Cull::CullFace cull_face;
- Maxwell::Cull::FrontFace front_face;
+ Maxwell::CullFace cull_face;
+ Maxwell::FrontFace front_face;
std::size_t Hash() const noexcept;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index ef66dd141..f93447610 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -120,11 +120,12 @@ struct FormatTuple {
{vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI
{vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U
{vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U
- {vk::Format::eA1R5G5B5UnormPack16, Attachable | Storage}, // A1B5G5R5U (flipped with swizzle)
+ {vk::Format::eA1R5G5B5UnormPack16, Attachable}, // A1B5G5R5U (flipped with swizzle)
{vk::Format::eR8Unorm, Attachable | Storage}, // R8U
{vk::Format::eR8Uint, Attachable | Storage}, // R8UI
{vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F
{vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U
+ {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S
{vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI
{vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F
{vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI
@@ -256,6 +257,8 @@ vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
return vk::ShaderStageFlagBits::eGeometry;
case Tegra::Engines::ShaderType::Fragment:
return vk::ShaderStageFlagBits::eFragment;
+ case Tegra::Engines::ShaderType::Compute:
+ return vk::ShaderStageFlagBits::eCompute;
}
UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
return {};
@@ -331,6 +334,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
return vk::Format::eR16G16B16Unorm;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return vk::Format::eR16G16B16A16Unorm;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return vk::Format::eA2B10G10R10UnormPack32;
default:
break;
}
@@ -364,6 +369,10 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
return vk::Format::eR8G8B8A8Uint;
case Maxwell::VertexAttribute::Size::Size_32:
return vk::Format::eR32Uint;
+ case Maxwell::VertexAttribute::Size::Size_32_32:
+ return vk::Format::eR32G32Uint;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32:
+ return vk::Format::eR32G32B32Uint;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return vk::Format::eR32G32B32A32Uint;
default:
@@ -371,8 +380,22 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
}
case Maxwell::VertexAttribute::Type::UnsignedScaled:
switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ return vk::Format::eR8Uscaled;
case Maxwell::VertexAttribute::Size::Size_8_8:
return vk::Format::eR8G8Uscaled;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ return vk::Format::eR8G8B8Uscaled;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return vk::Format::eR8G8B8A8Uscaled;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return vk::Format::eR16Uscaled;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return vk::Format::eR16G16Uscaled;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return vk::Format::eR16G16B16Uscaled;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return vk::Format::eR16G16B16A16Uscaled;
default:
break;
}
@@ -572,24 +595,24 @@ vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
return {};
}
-vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) {
+vk::FrontFace FrontFace(Maxwell::FrontFace front_face) {
switch (front_face) {
- case Maxwell::Cull::FrontFace::ClockWise:
+ case Maxwell::FrontFace::ClockWise:
return vk::FrontFace::eClockwise;
- case Maxwell::Cull::FrontFace::CounterClockWise:
+ case Maxwell::FrontFace::CounterClockWise:
return vk::FrontFace::eCounterClockwise;
}
UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
return {};
}
-vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) {
+vk::CullModeFlags CullFace(Maxwell::CullFace cull_face) {
switch (cull_face) {
- case Maxwell::Cull::CullFace::Front:
+ case Maxwell::CullFace::Front:
return vk::CullModeFlagBits::eFront;
- case Maxwell::Cull::CullFace::Back:
+ case Maxwell::CullFace::Back:
return vk::CullModeFlagBits::eBack;
- case Maxwell::Cull::CullFace::FrontAndBack:
+ case Maxwell::CullFace::FrontAndBack:
return vk::CullModeFlagBits::eFrontAndBack;
}
UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7e9678b7b..24f6ab544 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -54,9 +54,9 @@ vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
-vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face);
+vk::FrontFace FrontFace(Maxwell::FrontFace front_face);
-vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face);
+vk::CullModeFlags CullFace(Maxwell::CullFace cull_face);
vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index ddc62bc97..42bb01418 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -27,6 +27,7 @@
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
namespace Vulkan {
@@ -177,10 +178,13 @@ bool RendererVulkan::Init() {
swapchain = std::make_unique<VKSwapchain>(surface, *device);
swapchain->Create(framebuffer.width, framebuffer.height, false);
- scheduler = std::make_unique<VKScheduler>(*device, *resource_manager);
+ state_tracker = std::make_unique<StateTracker>(system);
+
+ scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker);
rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device,
- *resource_manager, *memory_manager, *scheduler);
+ *resource_manager, *memory_manager,
+ *state_tracker, *scheduler);
blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device,
*resource_manager, *memory_manager, *swapchain,
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index f513397f0..3da08d2e4 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -4,8 +4,10 @@
#pragma once
+#include <memory>
#include <optional>
#include <vector>
+
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/declarations.h"
@@ -15,6 +17,7 @@ class System;
namespace Vulkan {
+class StateTracker;
class VKBlitScreen;
class VKDevice;
class VKFence;
@@ -61,6 +64,7 @@ private:
std::unique_ptr<VKSwapchain> swapchain;
std::unique_ptr<VKMemoryManager> memory_manager;
std::unique_ptr<VKResourceManager> resource_manager;
+ std::unique_ptr<StateTracker> state_tracker;
std::unique_ptr<VKScheduler> scheduler;
std::unique_ptr<VKBlitScreen> blit_screen;
};
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 9d5b8de7a..60f57d83e 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -73,7 +73,7 @@ UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate
std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
u32 binding = 0;
u32 offset = 0;
- FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries);
+ FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
if (template_entries.empty()) {
// If the shader doesn't use descriptor sets, skip template creation.
return UniqueDescriptorUpdateTemplate{};
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 886bde3b9..28d2fbc4f 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
features.occlusionQueryPrecise = true;
features.fragmentStoresAndAtomics = true;
features.shaderImageGatherExtended = true;
- features.shaderStorageImageReadWithoutFormat =
- is_shader_storage_img_read_without_format_supported;
+ features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
features.shaderStorageImageWriteWithoutFormat = true;
features.textureCompressionASTC_LDR = is_optimal_astc_supported;
@@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
}
+ vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
+ if (ext_transform_feedback) {
+ transform_feedback.transformFeedback = true;
+ transform_feedback.geometryStreams = true;
+ SetNext(next, transform_feedback);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
+ }
+
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
@@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
};
- extensions.reserve(14);
+ extensions.reserve(15);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
[[maybe_unused]] const bool nsight =
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
- bool khr_shader_float16_int8{};
- bool ext_subgroup_size_control{};
+ bool has_khr_shader_float16_int8{};
+ bool has_ext_subgroup_size_control{};
+ bool has_ext_transform_feedback{};
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
Test(extension, khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
- Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
+ Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
+ false);
Test(extension, ext_depth_range_unrestricted,
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
Test(extension, ext_shader_viewport_index_layer,
VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
- Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
+ Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
+ false);
+ Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
false);
if (Settings::values.renderer_debug) {
Test(extension, nv_device_diagnostic_checkpoints,
@@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
}
- if (khr_shader_float16_int8) {
+ if (has_khr_shader_float16_int8) {
is_float16_supported =
GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
- if (ext_subgroup_size_control) {
+ if (has_ext_subgroup_size_control) {
const auto features =
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
const auto properties =
@@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
is_warp_potentially_bigger = true;
}
+ if (has_ext_transform_feedback) {
+ const auto features =
+ GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
+ const auto properties =
+ GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
+
+ if (features.transformFeedback && features.geometryStreams &&
+ properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
+ properties.transformFeedbackQueries && properties.transformFeedbackDraw) {
+ extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
+ ext_transform_feedback = true;
+ }
+ }
+
return extensions;
}
@@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
const auto supported_features{physical.getFeatures(dldi)};
- is_shader_storage_img_read_without_format_supported =
- supported_features.shaderStorageImageReadWithoutFormat;
+ is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
}
@@ -510,6 +535,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
vk::Format::eR32G32Sfloat,
vk::Format::eR32G32Uint,
vk::Format::eR16G16B16A16Uint,
+ vk::Format::eR16G16B16A16Snorm,
vk::Format::eR16G16B16A16Unorm,
vk::Format::eR16G16Unorm,
vk::Format::eR16G16Snorm,
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 2c27ad730..6e656517f 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,11 +122,6 @@ public:
return properties.limits.maxPushConstantsSize;
}
- /// Returns true if Shader storage Image Read Without Format supported.
- bool IsShaderStorageImageReadWithoutFormatSupported() const {
- return is_shader_storage_img_read_without_format_supported;
- }
-
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
@@ -147,6 +142,11 @@ public:
return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
}
+ /// Returns true if formatless image load is supported.
+ bool IsFormatlessImageLoadSupported() const {
+ return is_formatless_image_load_supported;
+ }
+
/// Returns true if the device supports VK_EXT_scalar_block_layout.
bool IsKhrUniformBufferStandardLayoutSupported() const {
return khr_uniform_buffer_standard_layout;
@@ -167,6 +167,11 @@ public:
return ext_shader_viewport_index_layer;
}
+ /// Returns true if the device supports VK_EXT_transform_feedback.
+ bool IsExtTransformFeedbackSupported() const {
+ return ext_transform_feedback;
+ }
+
/// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
bool IsNvDeviceDiagnosticCheckpoints() const {
return nv_device_diagnostic_checkpoints;
@@ -214,26 +219,26 @@ private:
static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
- const vk::PhysicalDevice physical; ///< Physical device.
- vk::DispatchLoaderDynamic dld; ///< Device function pointers.
- vk::PhysicalDeviceProperties properties; ///< Device properties.
- UniqueDevice logical; ///< Logical device.
- vk::Queue graphics_queue; ///< Main graphics queue.
- vk::Queue present_queue; ///< Main present queue.
- u32 graphics_family{}; ///< Main graphics queue family index.
- u32 present_family{}; ///< Main present queue family index.
- vk::DriverIdKHR driver_id{}; ///< Driver ID.
- vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
- bool is_optimal_astc_supported{}; ///< Support for native ASTC.
- bool is_float16_supported{}; ///< Support for float16 arithmetics.
- bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
+ const vk::PhysicalDevice physical; ///< Physical device.
+ vk::DispatchLoaderDynamic dld; ///< Device function pointers.
+ vk::PhysicalDeviceProperties properties; ///< Device properties.
+ UniqueDevice logical; ///< Logical device.
+ vk::Queue graphics_queue; ///< Main graphics queue.
+ vk::Queue present_queue; ///< Main present queue.
+ u32 graphics_family{}; ///< Main graphics queue family index.
+ u32 present_family{}; ///< Main present queue family index.
+ vk::DriverIdKHR driver_id{}; ///< Driver ID.
+ vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
+ bool is_optimal_astc_supported{}; ///< Support for native ASTC.
+ bool is_float16_supported{}; ///< Support for float16 arithmetics.
+ bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
+ bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
+ bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
- bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage
- ///< image read without format
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index b155dfb49..6a02403c1 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -97,8 +97,7 @@ UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplat
u32 offset = 0;
for (const auto& stage : program) {
if (stage) {
- FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset,
- template_entries);
+ FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries);
}
}
if (template_entries.empty()) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 7ddf7d3ee..557b9d662 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -36,6 +36,13 @@ using Tegra::Engines::ShaderType;
namespace {
+// C++20's using enum
+constexpr auto eUniformBuffer = vk::DescriptorType::eUniformBuffer;
+constexpr auto eStorageBuffer = vk::DescriptorType::eStorageBuffer;
+constexpr auto eUniformTexelBuffer = vk::DescriptorType::eUniformTexelBuffer;
+constexpr auto eCombinedImageSampler = vk::DescriptorType::eCombinedImageSampler;
+constexpr auto eStorageImage = vk::DescriptorType::eStorageImage;
+
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
VideoCommon::Shader::CompileDepth::FullDecompile};
@@ -119,23 +126,32 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) {
}
}
+template <vk::DescriptorType descriptor_type, class Container>
+void AddBindings(std::vector<vk::DescriptorSetLayoutBinding>& bindings, u32& binding,
+ vk::ShaderStageFlags stage_flags, const Container& container) {
+ const u32 num_entries = static_cast<u32>(std::size(container));
+ for (std::size_t i = 0; i < num_entries; ++i) {
+ u32 count = 1;
+ if constexpr (descriptor_type == eCombinedImageSampler) {
+ // Combined image samplers can be arrayed.
+ count = container[i].Size();
+ }
+ bindings.emplace_back(binding++, descriptor_type, count, stage_flags, nullptr);
+ }
+}
+
u32 FillDescriptorLayout(const ShaderEntries& entries,
std::vector<vk::DescriptorSetLayoutBinding>& bindings,
Maxwell::ShaderProgram program_type, u32 base_binding) {
const ShaderType stage = GetStageFromProgram(program_type);
- const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage);
+ const vk::ShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
u32 binding = base_binding;
- const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
- for (std::size_t i = 0; i < num_entries; ++i) {
- bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr);
- }
- };
- AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
- AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
- AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
- AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
- AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
+ AddBindings<eUniformBuffer>(bindings, binding, flags, entries.const_buffers);
+ AddBindings<eStorageBuffer>(bindings, binding, flags, entries.global_buffers);
+ AddBindings<eUniformTexelBuffer>(bindings, binding, flags, entries.texel_buffers);
+ AddBindings<eCombinedImageSampler>(bindings, binding, flags, entries.samplers);
+ AddBindings<eStorageImage>(bindings, binding, flags, entries.images);
return binding;
}
@@ -145,8 +161,8 @@ CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stag
GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
ProgramCode program_code, u32 main_offset)
: RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
- program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)},
- shader_ir{this->program_code, main_offset, compiler_settings, locker},
+ program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
+ shader_ir{this->program_code, main_offset, compiler_settings, registry},
entries{GenerateShaderEntries(shader_ir)} {}
CachedShader::~CachedShader() = default;
@@ -163,24 +179,19 @@ Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue)
+ VKUpdateDescriptorQueue& update_descriptor_queue,
+ VKRenderPassCache& renderpass_cache)
: RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
- renderpass_cache(device) {}
+ renderpass_cache{renderpass_cache} {}
VKPipelineCache::~VKPipelineCache() = default;
std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const auto& gpu = system.GPU().Maxwell3D();
- auto& dirty = system.GPU().Maxwell3D().dirty.shaders;
- if (!dirty) {
- return last_shaders;
- }
- dirty = false;
std::array<Shader, Maxwell::MaxShaderProgram> shaders;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- const auto& shader_config = gpu.regs.shader_config[index];
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
@@ -262,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
specialization.workgroup_size = key.workgroup_size;
specialization.shared_memory_size = key.shared_memory_size;
- const SPIRVShader spirv_shader{
- Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
- shader->GetEntries()};
+ const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
+ shader->GetRegistry(), specialization),
+ shader->GetEntries()};
entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, spirv_shader);
return *entry;
@@ -313,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const auto& gpu = system.GPU().Maxwell3D();
Specialization specialization;
- specialization.primitive_topology = fixed_state.input_assembly.topology;
- if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
+ if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) {
ASSERT(fixed_state.input_assembly.point_size != 0.0f);
specialization.point_size = fixed_state.input_assembly.point_size;
}
@@ -322,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
}
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
- specialization.tessellation.primitive = fixed_state.tessellation.primitive;
- specialization.tessellation.spacing = fixed_state.tessellation.spacing;
- specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
SPIRVProgram program;
std::vector<vk::DescriptorSetLayoutBinding> bindings;
@@ -345,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
const auto program_type = GetShaderType(program_enum);
const auto& entries = shader->GetEntries();
- program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
- entries};
+ program[stage] = {
+ Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
+ entries};
if (program_enum == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
@@ -361,32 +369,45 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
return {std::move(program), std::move(bindings)};
}
-void FillDescriptorUpdateTemplateEntries(
- const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
- std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
- static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
- const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) {
- const u32 count = static_cast<u32>(count_);
- if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer &&
- device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
- // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
- // crash.
- for (u32 i = 0; i < count; ++i) {
- template_entries.emplace_back(binding + i, 0, 1, descriptor_type,
- offset + i * entry_size, entry_size);
- }
- } else if (count != 0) {
- template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
+template <vk::DescriptorType descriptor_type, class Container>
+void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, u32& binding,
+ u32& offset, const Container& container) {
+ static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
+ const u32 count = static_cast<u32>(std::size(container));
+
+ if constexpr (descriptor_type == eCombinedImageSampler) {
+ for (u32 i = 0; i < count; ++i) {
+ const u32 num_samplers = container[i].Size();
+ template_entries.emplace_back(binding, 0, num_samplers, descriptor_type, offset,
+ entry_size);
+ ++binding;
+ offset += num_samplers * entry_size;
}
- offset += count * entry_size;
- binding += count;
- };
+ return;
+ }
- AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
- AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
- AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
- AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
- AddEntry(vk::DescriptorType::eStorageImage, entries.images.size());
+ if constexpr (descriptor_type == eUniformTexelBuffer) {
+ // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
+ // crash.
+ for (u32 i = 0; i < count; ++i) {
+ template_entries.emplace_back(binding + i, 0, 1, descriptor_type,
+ offset + i * entry_size, entry_size);
+ }
+ } else if (count > 0) {
+ template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
+ }
+ offset += count * entry_size;
+ binding += count;
+}
+
+void FillDescriptorUpdateTemplateEntries(
+ const ShaderEntries& entries, u32& binding, u32& offset,
+ std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
+ AddEntry<eUniformBuffer>(template_entries, offset, binding, entries.const_buffers);
+ AddEntry<eStorageBuffer>(template_entries, offset, binding, entries.global_buffers);
+ AddEntry<eUniformTexelBuffer>(template_entries, offset, binding, entries.texel_buffers);
+ AddEntry<eCombinedImageSampler>(template_entries, offset, binding, entries.samplers);
+ AddEntry<eStorageImage>(template_entries, offset, binding, entries.images);
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 8678fc9c3..c4c112290 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -25,7 +25,7 @@
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
-#include "video_core/shader/const_buffer_locker.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/surface.h"
@@ -132,6 +132,10 @@ public:
return shader_ir;
}
+ const VideoCommon::Shader::Registry& GetRegistry() const {
+ return registry;
+ }
+
const VideoCommon::Shader::ShaderIR& GetIR() const {
return shader_ir;
}
@@ -147,7 +151,7 @@ private:
GPUVAddr gpu_addr{};
VAddr cpu_addr{};
ProgramCode program_code;
- VideoCommon::Shader::ConstBufferLocker locker;
+ VideoCommon::Shader::Registry registry;
VideoCommon::Shader::ShaderIR shader_ir;
ShaderEntries entries;
};
@@ -157,7 +161,8 @@ public:
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue);
+ VKUpdateDescriptorQueue& update_descriptor_queue,
+ VKRenderPassCache& renderpass_cache);
~VKPipelineCache();
std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
@@ -180,8 +185,7 @@ private:
VKScheduler& scheduler;
VKDescriptorPool& descriptor_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
-
- VKRenderPassCache renderpass_cache;
+ VKRenderPassCache& renderpass_cache;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
@@ -194,7 +198,7 @@ private:
};
void FillDescriptorUpdateTemplateEntries(
- const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
+ const ShaderEntries& entries, u32& binding, u32& offset,
std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries);
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 3bf86da87..58c69b786 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -36,6 +36,7 @@
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
@@ -105,17 +106,20 @@ void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlag
template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
- std::size_t stage) {
+ std::size_t stage, std::size_t index = 0) {
const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
if (entry.IsBindless()) {
const Tegra::Texture::TextureHandle tex_handle =
engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset());
return engine.GetTextureInfo(tex_handle);
}
+ const auto& gpu_profile = engine.AccessGuestDriverProfile();
+ const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
+ const u32 offset = entry.GetOffset() + entry_offset;
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
- return engine.GetStageTexture(stage_type, entry.GetOffset());
+ return engine.GetStageTexture(stage_type, offset);
} else {
- return engine.GetTexture(entry.GetOffset());
+ return engine.GetTexture(offset);
}
}
@@ -277,17 +281,19 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf,
RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
VKScreenInfo& screen_info, const VKDevice& device,
VKResourceManager& resource_manager,
- VKMemoryManager& memory_manager, VKScheduler& scheduler)
+ VKMemoryManager& memory_manager, StateTracker& state_tracker,
+ VKScheduler& scheduler)
: RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
screen_info{screen_info}, device{device}, resource_manager{resource_manager},
- memory_manager{memory_manager}, scheduler{scheduler},
+ memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler},
staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
- update_descriptor_queue(device, scheduler),
+ update_descriptor_queue(device, scheduler), renderpass_cache(device),
quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
staging_pool),
- pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
+ pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
+ renderpass_cache),
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
sampler_cache(device), query_cache(system, *this, device, scheduler) {
scheduler.SetQueryCache(query_cache);
@@ -342,6 +348,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
[&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
}
+ BeginTransformFeedback();
+
const auto pipeline_layout = pipeline.GetLayout();
const auto descriptor_set = pipeline.CommitDescriptorSet();
scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
@@ -351,18 +359,23 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
}
draw_params.Draw(cmdbuf, dld);
});
+
+ EndTransformFeedback();
}
void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing);
- query_cache.UpdateCounters();
-
const auto& gpu = system.GPU().Maxwell3D();
if (!system.GPU().Maxwell3D().ShouldExecute()) {
return;
}
+ sampled_views.clear();
+ image_views.clear();
+
+ query_cache.UpdateCounters();
+
const auto& regs = gpu.regs;
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A;
@@ -371,52 +384,54 @@ void RasterizerVulkan::Clear() {
if (!use_color && !use_depth && !use_stencil) {
return;
}
- // Clearing images requires to be out of a renderpass
- scheduler.RequestOutsideRenderPassOperationContext();
- // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass.
+ [[maybe_unused]] const auto texceptions = UpdateAttachments();
+ DEBUG_ASSERT(texceptions.none());
+ SetupImageTransitions(0, color_attachments, zeta_attachment);
- if (use_color) {
- View color_view;
- {
- MICROPROFILE_SCOPE(Vulkan_RenderTargets);
- color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false);
- }
+ const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0));
+ const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
+ scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr});
- color_view->Transition(vk::ImageLayout::eTransferDstOptimal,
- vk::PipelineStageFlagBits::eTransfer,
- vk::AccessFlagBits::eTransferWrite);
+ const auto& scissor = regs.scissor_test[0];
+ const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y);
+ vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y};
+ scissor_extent.width = std::min(scissor_extent.width, render_area.width);
+ scissor_extent.height = std::min(scissor_extent.height, render_area.height);
+ const u32 layer = regs.clear_buffers.layer;
+ const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1);
+
+ if (use_color) {
const std::array clear_color = {regs.clear_color[0], regs.clear_color[1],
regs.clear_color[2], regs.clear_color[3]};
- const vk::ClearColorValue clear(clear_color);
- scheduler.Record([image = color_view->GetImage(),
- subresource = color_view->GetImageSubresourceRange(),
- clear](auto cmdbuf, auto& dld) {
- cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource,
- dld);
+ const vk::ClearValue clear_value{clear_color};
+ const u32 color_attachment = regs.clear_buffers.RT;
+ scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) {
+ const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment,
+ clear_value);
+ cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld);
});
}
- if (use_depth || use_stencil) {
- View zeta_surface;
- {
- MICROPROFILE_SCOPE(Vulkan_RenderTargets);
- zeta_surface = texture_cache.GetDepthBufferSurface(false);
- }
- zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal,
- vk::PipelineStageFlagBits::eTransfer,
- vk::AccessFlagBits::eTransferWrite);
-
- const vk::ClearDepthStencilValue clear(regs.clear_depth,
- static_cast<u32>(regs.clear_stencil));
- scheduler.Record([image = zeta_surface->GetImage(),
- subresource = zeta_surface->GetImageSubresourceRange(),
- clear](auto cmdbuf, auto& dld) {
- cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear,
- subresource, dld);
- });
+ if (!use_depth && !use_stencil) {
+ return;
+ }
+ vk::ImageAspectFlags aspect_flags;
+ if (use_depth) {
+ aspect_flags |= vk::ImageAspectFlagBits::eDepth;
+ }
+ if (use_stencil) {
+ aspect_flags |= vk::ImageAspectFlagBits::eStencil;
}
+
+ scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
+ clear_rect, aspect_flags](auto cmdbuf, auto& dld) {
+ const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil);
+ const vk::ClearValue clear_value{clear_zeta};
+ const vk::ClearAttachment attachment(aspect_flags, 0, clear_value);
+ cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld);
+ });
}
void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
@@ -533,8 +548,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
// Verify that the cached surface is the same size and format as the requested framebuffer
const auto& params{surface->GetSurfaceParams()};
- const auto& pixel_format{
- VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
@@ -545,6 +558,10 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true;
}
+void RasterizerVulkan::SetupDirtyFlags() {
+ state_tracker.Initialize();
+}
+
void RasterizerVulkan::FlushWork() {
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
@@ -568,9 +585,9 @@ void RasterizerVulkan::FlushWork() {
RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
MICROPROFILE_SCOPE(Vulkan_RenderTargets);
- auto& dirty = system.GPU().Maxwell3D().dirty;
- const bool update_rendertargets = dirty.render_settings;
- dirty.render_settings = false;
+ auto& dirty = system.GPU().Maxwell3D().dirty.flags;
+ const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
+ dirty[VideoCommon::Dirty::RenderTargets] = false;
texture_cache.GuardRenderTargets(true);
@@ -720,13 +737,51 @@ void RasterizerVulkan::SetupImageTransitions(
}
void RasterizerVulkan::UpdateDynamicStates() {
- auto& gpu = system.GPU().Maxwell3D();
- UpdateViewportsState(gpu);
- UpdateScissorsState(gpu);
- UpdateDepthBias(gpu);
- UpdateBlendConstants(gpu);
- UpdateDepthBounds(gpu);
- UpdateStencilFaces(gpu);
+ auto& regs = system.GPU().Maxwell3D().regs;
+ UpdateViewportsState(regs);
+ UpdateScissorsState(regs);
+ UpdateDepthBias(regs);
+ UpdateBlendConstants(regs);
+ UpdateDepthBounds(regs);
+ UpdateStencilFaces(regs);
+}
+
+void RasterizerVulkan::BeginTransformFeedback() {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ if (regs.tfb_enabled == 0) {
+ return;
+ }
+
+ UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
+ regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
+ regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
+
+ UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
+ UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
+ UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
+
+ const auto& binding = regs.tfb_bindings[0];
+ UNIMPLEMENTED_IF(binding.buffer_enable == 0);
+ UNIMPLEMENTED_IF(binding.buffer_offset != 0);
+
+ const GPUVAddr gpu_addr = binding.Address();
+ const std::size_t size = binding.buffer_size;
+ const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
+
+ scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) {
+ cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld);
+ cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld);
+ });
+}
+
+void RasterizerVulkan::EndTransformFeedback() {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ if (regs.tfb_enabled == 0) {
+ return;
+ }
+
+ scheduler.Record(
+ [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); });
}
void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
@@ -836,14 +891,16 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.samplers) {
- const auto texture = GetTextureInfo(gpu, entry, stage);
- SetupTexture(texture, entry);
+ for (std::size_t i = 0; i < entry.Size(); ++i) {
+ const auto texture = GetTextureInfo(gpu, entry, stage, i);
+ SetupTexture(texture, entry);
+ }
}
}
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Images);
- const auto& gpu = system.GPU().KeplerCompute();
+ const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.images) {
const auto tic = GetTextureInfo(gpu, entry, stage).tic;
SetupImage(tic, entry);
@@ -886,8 +943,10 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.samplers) {
- const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex);
- SetupTexture(texture, entry);
+ for (std::size_t i = 0; i < entry.Size(); ++i) {
+ const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i);
+ SetupTexture(texture, entry);
+ }
}
}
@@ -902,6 +961,13 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
const Tegra::Engines::ConstBufferInfo& buffer) {
+ if (!buffer.enabled) {
+ // Set values to zero to unbind buffers
+ update_descriptor_queue.AddBuffer(buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
+ sizeof(float));
+ return;
+ }
+
// Align the size to avoid bad std140 interactions
const std::size_t size =
Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
@@ -972,12 +1038,10 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
image_views.push_back(ImageView{std::move(view), image_layout});
}
-void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) {
- if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) {
+void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchViewports()) {
return;
}
- gpu.dirty.viewport_transform = false;
- const auto& regs = gpu.regs;
const std::array viewports{
GetViewportState(device, regs, 0), GetViewportState(device, regs, 1),
GetViewportState(device, regs, 2), GetViewportState(device, regs, 3),
@@ -992,12 +1056,10 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) {
});
}
-void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) {
- if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) {
+void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchScissors()) {
return;
}
- gpu.dirty.scissor_test = false;
- const auto& regs = gpu.regs;
const std::array scissors = {
GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
@@ -1010,46 +1072,39 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) {
});
}
-void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) {
- if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) {
+void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchDepthBias()) {
return;
}
- gpu.dirty.polygon_offset = false;
- const auto& regs = gpu.regs;
scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp,
factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) {
cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld);
});
}
-void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) {
- if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) {
+void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchBlendConstants()) {
return;
}
- gpu.dirty.blend_state = false;
- const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g,
- gpu.regs.blend_color.b, gpu.regs.blend_color.a};
+ const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
+ regs.blend_color.a};
scheduler.Record([blend_color](auto cmdbuf, auto& dld) {
cmdbuf.setBlendConstants(blend_color.data(), dld);
});
}
-void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) {
- if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) {
+void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchDepthBounds()) {
return;
}
- gpu.dirty.depth_bounds_values = false;
- const auto& regs = gpu.regs;
scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); });
}
-void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) {
- if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) {
+void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchStencilProperties()) {
return;
}
- gpu.dirty.stencil_test = false;
- const auto& regs = gpu.regs;
if (regs.stencil_two_side_enable) {
// Separate values per face
scheduler.Record(
@@ -1100,7 +1155,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
// This implementation assumes that all attributes are used in the shader.
const GPUVAddr start{regs.vertex_array[index].StartAddress()};
const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
- DEBUG_ASSERT(end > start);
+ DEBUG_ASSERT(end >= start);
size += (end - start + 1) * regs.vertex_array[index].enable;
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 4dc8af6e8..3185868e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -96,6 +96,7 @@ struct hash<Vulkan::FramebufferCacheKey> {
namespace Vulkan {
+class StateTracker;
class BufferBindings;
struct ImageView {
@@ -108,7 +109,7 @@ public:
explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
VKScreenInfo& screen_info, const VKDevice& device,
VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- VKScheduler& scheduler);
+ StateTracker& state_tracker, VKScheduler& scheduler);
~RasterizerVulkan() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -127,6 +128,7 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
+ void SetupDirtyFlags() override;
/// Maximum supported size that a constbuffer can have in bytes.
static constexpr std::size_t MaxConstbufferSize = 0x10000;
@@ -167,6 +169,10 @@ private:
void UpdateDynamicStates();
+ void BeginTransformFeedback();
+
+ void EndTransformFeedback();
+
bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
@@ -215,12 +221,12 @@ private:
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
- void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu);
- void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu);
- void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu);
- void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu);
- void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu);
- void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu);
+ void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
@@ -241,11 +247,13 @@ private:
const VKDevice& device;
VKResourceManager& resource_manager;
VKMemoryManager& memory_manager;
+ StateTracker& state_tracker;
VKScheduler& scheduler;
VKStagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
+ VKRenderPassCache renderpass_cache;
QuadArrayPass quad_array_pass;
Uint8Pass uint8_pass;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 92bd6c344..b61d4fe63 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -2,6 +2,12 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <thread>
+#include <utility>
+
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/declarations.h"
@@ -9,6 +15,7 @@
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_state_tracker.h"
namespace Vulkan {
@@ -29,9 +36,10 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
last = nullptr;
}
-VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
- : device{device}, resource_manager{resource_manager}, next_fence{
- &resource_manager.CommitFence()} {
+VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
+ StateTracker& state_tracker)
+ : device{device}, resource_manager{resource_manager}, state_tracker{state_tracker},
+ next_fence{&resource_manager.CommitFence()} {
AcquireNewChunk();
AllocateNewContext();
worker_thread = std::thread(&VKScheduler::WorkerThread, this);
@@ -157,12 +165,7 @@ void VKScheduler::AllocateNewContext() {
void VKScheduler::InvalidateState() {
state.graphics_pipeline = nullptr;
- state.viewports = false;
- state.scissors = false;
- state.depth_bias = false;
- state.blend_constants = false;
- state.depth_bounds = false;
- state.stencil_values = false;
+ state_tracker.InvalidateCommandBufferState();
}
void VKScheduler::EndPendingOperations() {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 62fd7858b..c7cc291c3 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -17,6 +17,7 @@
namespace Vulkan {
+class StateTracker;
class VKDevice;
class VKFence;
class VKQueryCache;
@@ -43,7 +44,8 @@ private:
/// OpenGL-like operations on Vulkan command buffers.
class VKScheduler {
public:
- explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
+ explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
+ StateTracker& state_tracker);
~VKScheduler();
/// Sends the current execution context to the GPU.
@@ -74,36 +76,6 @@ public:
query_cache = &query_cache_;
}
- /// Returns true when viewports have been set in the current command buffer.
- bool TouchViewports() {
- return std::exchange(state.viewports, true);
- }
-
- /// Returns true when scissors have been set in the current command buffer.
- bool TouchScissors() {
- return std::exchange(state.scissors, true);
- }
-
- /// Returns true when depth bias have been set in the current command buffer.
- bool TouchDepthBias() {
- return std::exchange(state.depth_bias, true);
- }
-
- /// Returns true when blend constants have been set in the current command buffer.
- bool TouchBlendConstants() {
- return std::exchange(state.blend_constants, true);
- }
-
- /// Returns true when depth bounds have been set in the current command buffer.
- bool TouchDepthBounds() {
- return std::exchange(state.depth_bounds, true);
- }
-
- /// Returns true when stencil values have been set in the current command buffer.
- bool TouchStencilValues() {
- return std::exchange(state.stencil_values, true);
- }
-
/// Send work to a separate thread.
template <typename T>
void Record(T&& command) {
@@ -217,6 +189,8 @@ private:
const VKDevice& device;
VKResourceManager& resource_manager;
+ StateTracker& state_tracker;
+
VKQueryCache* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf;
@@ -226,12 +200,6 @@ private:
struct State {
std::optional<vk::RenderPassBeginInfo> renderpass;
vk::Pipeline graphics_pipeline;
- bool viewports = false;
- bool scissors = false;
- bool depth_bias = false;
- bool blend_constants = false;
- bool depth_bounds = false;
- bool stencil_values = false;
} state;
std::unique_ptr<CommandChunk> chunk;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 2da622d15..51ecb5567 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -5,7 +5,9 @@
#include <functional>
#include <limits>
#include <map>
+#include <optional>
#include <type_traits>
+#include <unordered_map>
#include <utility>
#include <fmt/format.h>
@@ -24,6 +26,7 @@
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader/transform_feedback.h"
namespace Vulkan {
@@ -69,8 +72,9 @@ struct TexelBuffer {
struct SampledImage {
Id image_type{};
- Id sampled_image_type{};
- Id sampler{};
+ Id sampler_type{};
+ Id sampler_pointer_type{};
+ Id variable{};
};
struct StorageImage {
@@ -92,6 +96,12 @@ struct VertexIndices {
std::optional<u32> clip_distances;
};
+struct GenericVaryingDescription {
+ Id id = nullptr;
+ u32 first_element = 0;
+ bool is_scalar = false;
+};
+
spv::Dim GetSamplerDim(const Sampler& sampler) {
ASSERT(!sampler.IsBuffer());
switch (sampler.GetType()) {
@@ -265,9 +275,13 @@ bool IsPrecise(Operation operand) {
class SPIRVDecompiler final : public Sirit::Module {
public:
explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage,
- const Specialization& specialization)
+ const Registry& registry, const Specialization& specialization)
: Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()},
- specialization{specialization} {
+ registry{registry}, specialization{specialization} {
+ if (stage != ShaderType::Compute) {
+ transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
+ }
+
AddCapability(spv::Capability::Shader);
AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
AddCapability(spv::Capability::ImageQuery);
@@ -285,6 +299,15 @@ public:
AddExtension("SPV_KHR_variable_pointers");
AddExtension("SPV_KHR_shader_draw_parameters");
+ if (!transform_feedback.empty()) {
+ if (device.IsExtTransformFeedbackSupported()) {
+ AddCapability(spv::Capability::TransformFeedback);
+ } else {
+ LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
+ "supported on this device");
+ }
+ }
+
if (ir.UsesLayer() || ir.UsesViewportIndex()) {
if (ir.UsesViewportIndex()) {
AddCapability(spv::Capability::MultiViewport);
@@ -295,7 +318,7 @@ public:
}
}
- if (device.IsShaderStorageImageReadWithoutFormatSupported()) {
+ if (device.IsFormatlessImageLoadSupported()) {
AddCapability(spv::Capability::StorageImageReadWithoutFormat);
}
@@ -317,25 +340,29 @@ public:
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
header.common2.threads_per_input_primitive);
break;
- case ShaderType::TesselationEval:
+ case ShaderType::TesselationEval: {
+ const auto& info = registry.GetGraphicsInfo();
AddCapability(spv::Capability::Tessellation);
AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
- AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive));
- AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing));
- AddExecutionMode(main, specialization.tessellation.clockwise
+ AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
+ AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
+ AddExecutionMode(main, info.tessellation_clockwise
? spv::ExecutionMode::VertexOrderCw
: spv::ExecutionMode::VertexOrderCcw);
break;
- case ShaderType::Geometry:
+ }
+ case ShaderType::Geometry: {
+ const auto& info = registry.GetGraphicsInfo();
AddCapability(spv::Capability::Geometry);
AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
- AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology));
+ AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
header.common4.max_output_vertices);
// TODO(Rodrigo): Where can we get this info from?
AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
break;
+ }
case ShaderType::Fragment:
AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
@@ -544,7 +571,8 @@ private:
if (stage != ShaderType::Geometry) {
return;
}
- const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology);
+ const auto& info = registry.GetGraphicsInfo();
+ const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
DeclareInputVertexArray(num_input);
DeclareOutputVertex();
}
@@ -741,12 +769,34 @@ private:
}
void DeclareOutputAttributes() {
+ if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
+ return;
+ }
+
+ UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
for (const auto index : ir.GetOutputAttributes()) {
if (!IsGenericAttribute(index)) {
continue;
}
- const u32 location = GetGenericAttributeLocation(index);
- Id type = t_float4;
+ DeclareOutputAttribute(index);
+ }
+ }
+
+ void DeclareOutputAttribute(Attribute::Index index) {
+ static constexpr std::string_view swizzle = "xyzw";
+
+ const u32 location = GetGenericAttributeLocation(index);
+ u8 element = 0;
+ while (element < 4) {
+ const std::size_t remainder = 4 - element;
+
+ std::size_t num_components = remainder;
+ const std::optional tfb = GetTransformFeedbackInfo(index, element);
+ if (tfb) {
+ num_components = tfb->components;
+ }
+
+ Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
Id varying_default = v_varying_default;
if (IsOutputAttributeArray()) {
const u32 num = GetNumOutputVertices();
@@ -759,15 +809,47 @@ private:
}
type = TypePointer(spv::StorageClass::Output, type);
+ std::string name = fmt::format("out_attr{}", location);
+ if (num_components < 4 || element > 0) {
+ name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
+ }
+
const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
- Name(AddGlobalVariable(id), fmt::format("out_attr{}", location));
- output_attributes.emplace(index, id);
+ Name(AddGlobalVariable(id), name);
+
+ GenericVaryingDescription description;
+ description.id = id;
+ description.first_element = element;
+ description.is_scalar = num_components == 1;
+ for (u32 i = 0; i < num_components; ++i) {
+ const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
+ output_attributes.emplace(offset, description);
+ }
interfaces.push_back(id);
Decorate(id, spv::Decoration::Location, location);
+ if (element > 0) {
+ Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
+ }
+ if (tfb && device.IsExtTransformFeedbackSupported()) {
+ Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
+ Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
+ Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
+ }
+
+ element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
}
}
+ std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
+ const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
+ const auto it = transform_feedback.find(location);
+ if (it == transform_feedback.end()) {
+ return {};
+ }
+ return it->second;
+ }
+
u32 DeclareConstantBuffers(u32 binding) {
for (const auto& [index, size] : ir.GetConstantBuffers()) {
const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
@@ -833,16 +915,20 @@ private:
constexpr int sampled = 1;
constexpr auto format = spv::ImageFormat::Unknown;
const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
- const Id sampled_image_type = TypeSampledImage(image_type);
- const Id pointer_type =
- TypePointer(spv::StorageClass::UniformConstant, sampled_image_type);
+ const Id sampler_type = TypeSampledImage(image_type);
+ const Id sampler_pointer_type =
+ TypePointer(spv::StorageClass::UniformConstant, sampler_type);
+ const Id type = sampler.IsIndexed()
+ ? TypeArray(sampler_type, Constant(t_uint, sampler.Size()))
+ : sampler_type;
+ const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type);
const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex())));
Decorate(id, spv::Decoration::Binding, binding++);
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
- sampled_images.emplace(sampler.GetIndex(),
- SampledImage{image_type, sampled_image_type, id});
+ sampled_images.emplace(sampler.GetIndex(), SampledImage{image_type, sampler_type,
+ sampler_pointer_type, id});
}
return binding;
}
@@ -893,7 +979,7 @@ private:
u32 GetNumInputVertices() const {
switch (stage) {
case ShaderType::Geometry:
- return GetNumPrimitiveTopologyVertices(specialization.primitive_topology);
+ return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
case ShaderType::TesselationControl:
case ShaderType::TesselationEval:
return NumInputPatches;
@@ -1341,8 +1427,14 @@ private:
}
default:
if (IsGenericAttribute(attribute)) {
- const Id composite = output_attributes.at(attribute);
- return {ArrayPass(t_out_float, composite, {element}), Type::Float};
+ const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
+ const GenericVaryingDescription description = output_attributes.at(offset);
+ const Id composite = description.id;
+ std::vector<u32> indices;
+ if (!description.is_scalar) {
+ indices.push_back(element - description.first_element);
+ }
+ return {ArrayPass(t_out_float, composite, indices), Type::Float};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
static_cast<u32>(attribute));
@@ -1525,7 +1617,12 @@ private:
ASSERT(!meta.sampler.IsBuffer());
const auto& entry = sampled_images.at(meta.sampler.GetIndex());
- return OpLoad(entry.sampled_image_type, entry.sampler);
+ Id sampler = entry.variable;
+ if (meta.sampler.IsIndexed()) {
+ const Id index = AsInt(Visit(meta.index));
+ sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index);
+ }
+ return OpLoad(entry.sampler_type, sampler);
}
Id GetTextureImage(Operation operation) {
@@ -1783,7 +1880,7 @@ private:
}
Expression ImageLoad(Operation operation) {
- if (!device.IsShaderStorageImageReadWithoutFormatSupported()) {
+ if (!device.IsFormatlessImageLoadSupported()) {
return {v_float_zero, Type::Float};
}
@@ -2211,16 +2308,14 @@ private:
switch (specialization.attribute_types.at(location)) {
case Maxwell::VertexAttribute::Type::SignedNorm:
case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ case Maxwell::VertexAttribute::Type::UnsignedScaled:
+ case Maxwell::VertexAttribute::Type::SignedScaled:
case Maxwell::VertexAttribute::Type::Float:
return {Type::Float, t_in_float, t_in_float4};
case Maxwell::VertexAttribute::Type::SignedInt:
return {Type::Int, t_in_int, t_in_int4};
case Maxwell::VertexAttribute::Type::UnsignedInt:
return {Type::Uint, t_in_uint, t_in_uint4};
- case Maxwell::VertexAttribute::Type::UnsignedScaled:
- case Maxwell::VertexAttribute::Type::SignedScaled:
- UNIMPLEMENTED();
- return {Type::Float, t_in_float, t_in_float4};
default:
UNREACHABLE();
return {Type::Float, t_in_float, t_in_float4};
@@ -2250,11 +2345,11 @@ private:
std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
switch (type) {
case Type::Float:
- return {nullptr, t_float2, t_float3, t_float4};
+ return {t_float, t_float2, t_float3, t_float4};
case Type::Int:
- return {nullptr, t_int2, t_int3, t_int4};
+ return {t_int, t_int2, t_int3, t_int4};
case Type::Uint:
- return {nullptr, t_uint2, t_uint3, t_uint4};
+ return {t_uint, t_uint2, t_uint3, t_uint4};
default:
UNIMPLEMENTED();
return {};
@@ -2487,7 +2582,9 @@ private:
const ShaderIR& ir;
const ShaderType stage;
const Tegra::Shader::Header header;
+ const Registry& registry;
const Specialization& specialization;
+ std::unordered_map<u8, VaryingTFB> transform_feedback;
const Id t_void = Name(TypeVoid(), "void");
@@ -2576,7 +2673,7 @@ private:
Id shared_memory{};
std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
std::map<Attribute::Index, Id> input_attributes;
- std::map<Attribute::Index, Id> output_attributes;
+ std::unordered_map<u8, GenericVaryingDescription> output_attributes;
std::map<u32, Id> constant_buffers;
std::map<GlobalMemoryBase, Id> global_buffers;
std::map<u32, TexelBuffer> texel_buffers;
@@ -2862,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
}
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
- ShaderType stage, const Specialization& specialization) {
- return SPIRVDecompiler(device, ir, stage, specialization).Assemble();
+ ShaderType stage, const VideoCommon::Shader::Registry& registry,
+ const Specialization& specialization) {
+ return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f5dc14d9e..ffea4709e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -15,6 +15,7 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace Vulkan {
@@ -91,17 +92,9 @@ struct Specialization final {
u32 shared_memory_size{};
// Graphics specific
- Maxwell::PrimitiveTopology primitive_topology{};
std::optional<float> point_size{};
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
bool ndc_minus_one_to_one{};
-
- // Tessellation specific
- struct {
- Maxwell::TessellationPrimitive primitive{};
- Maxwell::TessellationSpacing spacing{};
- bool clockwise{};
- } tessellation;
};
// Old gcc versions don't consider this trivially copyable.
// static_assert(std::is_trivially_copyable_v<Specialization>);
@@ -114,6 +107,8 @@ struct SPIRVShader {
ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
- Tegra::Engines::ShaderType stage, const Specialization& specialization);
+ Tegra::Engines::ShaderType stage,
+ const VideoCommon::Shader::Registry& registry,
+ const Specialization& specialization);
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 171d78afc..374959f82 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -73,7 +73,8 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_
VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
const auto usage =
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
- vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eIndexBuffer;
+ vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer |
+ vk::BufferUsageFlagBits::eIndexBuffer;
const u32 log2 = Common::Log2Ceil64(size);
const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0,
nullptr);
@@ -99,7 +100,6 @@ void VKStagingBufferPool::ReleaseCache(bool host_visible) {
}
u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) {
- static constexpr u64 epochs_to_destroy = 180;
static constexpr std::size_t deletions_per_tick = 16;
auto& staging = cache[log2];
@@ -107,6 +107,7 @@ u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t lo
const std::size_t old_size = entries.size();
const auto is_deleteable = [this](const auto& entry) {
+ static constexpr u64 epochs_to_destroy = 180;
return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed();
};
const std::size_t begin_offset = staging.delete_index;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
new file mode 100644
index 000000000..94a89e388
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -0,0 +1,99 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstddef>
+#include <iterator>
+
+#include "common/common_types.h"
+#include "core/core.h"
+#include "video_core/dirty_flags.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/renderer_vulkan/vk_state_tracker.h"
+
+#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
+#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32))
+
+namespace Vulkan {
+
+namespace {
+
+using namespace Dirty;
+using namespace VideoCommon::Dirty;
+using Tegra::Engines::Maxwell3D;
+using Regs = Maxwell3D::Regs;
+using Tables = Maxwell3D::DirtyState::Tables;
+using Table = Maxwell3D::DirtyState::Table;
+using Flags = Maxwell3D::DirtyState::Flags;
+
+Flags MakeInvalidationFlags() {
+ Flags flags{};
+ flags[Viewports] = true;
+ flags[Scissors] = true;
+ flags[DepthBias] = true;
+ flags[BlendConstants] = true;
+ flags[DepthBounds] = true;
+ flags[StencilProperties] = true;
+ return flags;
+}
+
+void SetupDirtyViewports(Tables& tables) {
+ FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports);
+ FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports);
+ tables[0][OFF(viewport_transform_enabled)] = Viewports;
+}
+
+void SetupDirtyScissors(Tables& tables) {
+ FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors);
+}
+
+void SetupDirtyDepthBias(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(polygon_offset_units)] = DepthBias;
+ table[OFF(polygon_offset_clamp)] = DepthBias;
+ table[OFF(polygon_offset_factor)] = DepthBias;
+}
+
+void SetupDirtyBlendConstants(Tables& tables) {
+ FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants);
+}
+
+void SetupDirtyDepthBounds(Tables& tables) {
+ FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds);
+}
+
+void SetupDirtyStencilProperties(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(stencil_two_side_enable)] = StencilProperties;
+ table[OFF(stencil_front_func_ref)] = StencilProperties;
+ table[OFF(stencil_front_mask)] = StencilProperties;
+ table[OFF(stencil_front_func_mask)] = StencilProperties;
+ table[OFF(stencil_back_func_ref)] = StencilProperties;
+ table[OFF(stencil_back_mask)] = StencilProperties;
+ table[OFF(stencil_back_func_mask)] = StencilProperties;
+}
+
+} // Anonymous namespace
+
+StateTracker::StateTracker(Core::System& system)
+ : system{system}, invalidation_flags{MakeInvalidationFlags()} {}
+
+void StateTracker::Initialize() {
+ auto& dirty = system.GPU().Maxwell3D().dirty;
+ auto& tables = dirty.tables;
+ SetupDirtyRenderTargets(tables);
+ SetupDirtyViewports(tables);
+ SetupDirtyScissors(tables);
+ SetupDirtyDepthBias(tables);
+ SetupDirtyBlendConstants(tables);
+ SetupDirtyDepthBounds(tables);
+ SetupDirtyStencilProperties(tables);
+}
+
+void StateTracker::InvalidateCommandBufferState() {
+ system.GPU().Maxwell3D().dirty.flags |= invalidation_flags;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
new file mode 100644
index 000000000..03bc415b2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -0,0 +1,79 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <limits>
+
+#include "common/common_types.h"
+#include "core/core.h"
+#include "video_core/dirty_flags.h"
+#include "video_core/engines/maxwell_3d.h"
+
+namespace Vulkan {
+
+namespace Dirty {
+
+enum : u8 {
+ First = VideoCommon::Dirty::LastCommonEntry,
+
+ Viewports,
+ Scissors,
+ DepthBias,
+ BlendConstants,
+ DepthBounds,
+ StencilProperties,
+
+ Last
+};
+static_assert(Last <= std::numeric_limits<u8>::max());
+
+} // namespace Dirty
+
+class StateTracker {
+public:
+ explicit StateTracker(Core::System& system);
+
+ void Initialize();
+
+ void InvalidateCommandBufferState();
+
+ bool TouchViewports() {
+ return Exchange(Dirty::Viewports, false);
+ }
+
+ bool TouchScissors() {
+ return Exchange(Dirty::Scissors, false);
+ }
+
+ bool TouchDepthBias() {
+ return Exchange(Dirty::DepthBias, false);
+ }
+
+ bool TouchBlendConstants() {
+ return Exchange(Dirty::BlendConstants, false);
+ }
+
+ bool TouchDepthBounds() {
+ return Exchange(Dirty::DepthBounds, false);
+ }
+
+ bool TouchStencilProperties() {
+ return Exchange(Dirty::StencilProperties, false);
+ }
+
+private:
+ bool Exchange(std::size_t id, bool new_value) const noexcept {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ const bool is_dirty = flags[id];
+ flags[id] = new_value;
+ return is_dirty;
+ }
+
+ Core::System& system;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index f47b691a8..9e73fa9cd 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -141,11 +141,6 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
- extent = ChooseSwapExtent(capabilities, width, height);
-
- current_width = extent.width;
- current_height = extent.height;
- current_srgb = srgb;
u32 requested_image_count{capabilities.minImageCount + 1};
if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
@@ -153,10 +148,9 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
}
vk::SwapchainCreateInfoKHR swapchain_ci(
- {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace,
- extent, 1, vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {},
- capabilities.currentTransform, vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false,
- {});
+ {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, {}, 1,
+ vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, capabilities.currentTransform,
+ vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, {});
const u32 graphics_family{device.GetGraphicsFamily()};
const u32 present_family{device.GetPresentFamily()};
@@ -169,9 +163,18 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
}
+ // Request the size again to reduce the possibility of a TOCTOU race condition.
+ const auto updated_capabilities = physical_device.getSurfaceCapabilitiesKHR(surface, dld);
+ swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
+ // Don't add code within this and the swapchain creation.
const auto dev{device.GetLogical()};
swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
+ extent = swapchain_ci.imageExtent;
+ current_width = extent.width;
+ current_height = extent.height;
+ current_srgb = srgb;
+
images = dev.getSwapchainImagesKHR(*swapchain, dld);
image_count = static_cast<u32>(images.size());
image_format = surface_format.format;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 51b0d38a6..26175921b 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -22,6 +22,7 @@
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/surface.h"
@@ -51,6 +52,9 @@ vk::ImageType SurfaceTargetToImage(SurfaceTarget target) {
return vk::ImageType::e2D;
case SurfaceTarget::Texture3D:
return vk::ImageType::e3D;
+ case SurfaceTarget::TextureBuffer:
+ UNREACHABLE();
+ return {};
}
UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target));
return {};
@@ -272,7 +276,6 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
for (u32 level = 0; level < params.num_levels; ++level) {
vk::BufferImageCopy copy = GetBufferImageCopy(level);
- const auto& dld = device.GetDispatchLoader();
if (image->GetAspectMask() ==
(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
vk::BufferImageCopy depth = copy;
@@ -421,7 +424,6 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer,
vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal);
- const auto& dld{device.GetDispatchLoader()};
const vk::ImageSubresourceLayers src_subresource(
src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers);
const vk::ImageSubresourceLayers dst_subresource(
@@ -457,7 +459,6 @@ void VKTextureCache::ImageBlit(View& src_view, View& dst_view,
dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right});
const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
- const auto& dld{device.GetDispatchLoader()};
scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit,
is_linear](auto cmdbuf, auto& dld) {
cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
deleted file mode 100644
index 0638be8cb..000000000
--- a/src/video_core/shader/const_buffer_locker.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <tuple>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/const_buffer_locker.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Engines::SamplerDescriptor;
-
-ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
- : stage{shader_stage} {}
-
-ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine)
- : stage{shader_stage}, engine{&engine} {}
-
-ConstBufferLocker::~ConstBufferLocker() = default;
-
-std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
- const std::pair<u32, u32> key = {buffer, offset};
- const auto iter = keys.find(key);
- if (iter != keys.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
- keys.emplace(key, value);
- return value;
-}
-
-std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
- const u32 key = offset;
- const auto iter = bound_samplers.find(key);
- if (iter != bound_samplers.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
- bound_samplers.emplace(key, value);
- return value;
-}
-
-std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
- u32 buffer, u32 offset) {
- const std::pair key = {buffer, offset};
- const auto iter = bindless_samplers.find(key);
- if (iter != bindless_samplers.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
- bindless_samplers.emplace(key, value);
- return value;
-}
-
-std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
- if (bound_buffer_saved) {
- return bound_buffer;
- }
- if (!engine) {
- return std::nullopt;
- }
- bound_buffer_saved = true;
- bound_buffer = engine->GetBoundBuffer();
- return bound_buffer;
-}
-
-void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
- keys.insert_or_assign({buffer, offset}, value);
-}
-
-void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
- bound_samplers.insert_or_assign(offset, sampler);
-}
-
-void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
- bindless_samplers.insert_or_assign({buffer, offset}, sampler);
-}
-
-void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
- bound_buffer_saved = true;
- bound_buffer = buffer;
-}
-
-bool ConstBufferLocker::IsConsistent() const {
- if (!engine) {
- return false;
- }
- return std::all_of(keys.begin(), keys.end(),
- [this](const auto& pair) {
- const auto [cbuf, offset] = pair.first;
- const auto value = pair.second;
- return value == engine->AccessConstBuffer32(stage, cbuf, offset);
- }) &&
- std::all_of(bound_samplers.begin(), bound_samplers.end(),
- [this](const auto& sampler) {
- const auto [key, value] = sampler;
- return value == engine->AccessBoundSampler(stage, key);
- }) &&
- std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
- [this](const auto& sampler) {
- const auto [cbuf, offset] = sampler.first;
- const auto value = sampler.second;
- return value == engine->AccessBindlessSampler(stage, cbuf, offset);
- });
-}
-
-bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
- return std::tie(keys, bound_samplers, bindless_samplers) ==
- std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
deleted file mode 100644
index d3ea11087..000000000
--- a/src/video_core/shader/const_buffer_locker.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <optional>
-#include <unordered_map>
-#include "common/common_types.h"
-#include "common/hash.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/guest_driver.h"
-
-namespace VideoCommon::Shader {
-
-using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
-using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
-using BindlessSamplerMap =
- std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
-
-/**
- * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
- * compiler. with it, the shader can obtain required data from GPU state and store it for disk
- * shader compilation.
- */
-class ConstBufferLocker {
-public:
- explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
-
- explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine);
-
- ~ConstBufferLocker();
-
- /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
- /// not it will obtain it from maxwell3d and register it.
- std::optional<u32> ObtainKey(u32 buffer, u32 offset);
-
- std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
-
- std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
-
- std::optional<u32> ObtainBoundBuffer();
-
- /// Inserts a key.
- void InsertKey(u32 buffer, u32 offset, u32 value);
-
- /// Inserts a bound sampler key.
- void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
-
- /// Inserts a bindless sampler key.
- void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
-
- /// Set the bound buffer for this locker.
- void SetBoundBuffer(u32 buffer);
-
- /// Checks keys and samplers against engine's current const buffers. Returns true if they are
- /// the same value, false otherwise;
- bool IsConsistent() const;
-
- /// Returns true if the keys are equal to the other ones in the locker.
- bool HasEqualKeys(const ConstBufferLocker& rhs) const;
-
- /// Gives an getter to the const buffer keys in the database.
- const KeyMap& GetKeys() const {
- return keys;
- }
-
- /// Gets samplers database.
- const BoundSamplerMap& GetBoundSamplers() const {
- return bound_samplers;
- }
-
- /// Gets bindless samplers database.
- const BindlessSamplerMap& GetBindlessSamplers() const {
- return bindless_samplers;
- }
-
- /// Gets bound buffer used on this shader
- u32 GetBoundBuffer() const {
- return bound_buffer;
- }
-
- /// Obtains access to the guest driver's profile.
- VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
- if (engine) {
- return &engine->AccessGuestDriverProfile();
- }
- return nullptr;
- }
-
-private:
- const Tegra::Engines::ShaderType stage;
- Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
- KeyMap keys;
- BoundSamplerMap bound_samplers;
- BindlessSamplerMap bindless_samplers;
- bool bound_buffer_saved{};
- u32 bound_buffer{};
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 0229733b6..2e2711350 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -13,6 +13,7 @@
#include "common/common_types.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/control_flow.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -64,11 +65,11 @@ struct BlockInfo {
};
struct CFGRebuildState {
- explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
- : program_code{program_code}, locker{locker}, start{start} {}
+ explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry)
+ : program_code{program_code}, registry{registry}, start{start} {}
const ProgramCode& program_code;
- ConstBufferLocker& locker;
+ Registry& registry;
u32 start{};
std::vector<BlockInfo> block_info;
std::list<u32> inspect_queries;
@@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
const s32 pc_target = offset + result.relative_position;
std::vector<CaseBranch> branches;
for (u32 i = 0; i < result.entries; i++) {
- auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
+ auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
if (!key) {
return {ParseResult::AbnormalFlow, parse_info};
}
@@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
const CompilerSettings& settings,
- ConstBufferLocker& locker) {
+ Registry& registry) {
auto result_out = std::make_unique<ShaderCharacteristics>();
if (settings.depth == CompileDepth::BruteForce) {
result_out->settings.depth = CompileDepth::BruteForce;
return result_out;
}
- CFGRebuildState state{program_code, start_address, locker};
+ CFGRebuildState state{program_code, start_address, registry};
// Inspect Code and generate blocks
state.labels.clear();
state.labels.emplace(start_address);
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 5304998b9..62a3510d8 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -12,6 +12,7 @@
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/compiler_settings.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -111,6 +112,6 @@ struct ShaderCharacteristics {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
const CompilerSettings& settings,
- ConstBufferLocker& locker);
+ Registry& registry);
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 6b697ed5d..87ac9ac6c 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
return (absolute_offset % SchedPeriod) == 0;
}
-void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
+void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
const std::list<Sampler>& used_samplers) {
- if (gpu_driver == nullptr) {
- LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
- return;
- }
- if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
+ if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
return;
}
u32 count{};
@@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
bound_offsets.emplace_back(sampler.GetOffset());
}
if (count > 1) {
- gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
+ gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
}
}
std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
- VideoCore::GuestDriverProfile* gpu_driver,
+ VideoCore::GuestDriverProfile& gpu_driver,
const std::list<Sampler>& used_samplers) {
- if (gpu_driver == nullptr) {
- LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
- return std::nullopt;
- }
const u32 base_offset = sampler_to_deduce.GetOffset();
u32 max_offset{std::numeric_limits<u32>::max()};
for (const auto& sampler : used_samplers) {
@@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
if (max_offset == std::numeric_limits<u32>::max()) {
return std::nullopt;
}
- return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize();
+ return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
}
} // Anonymous namespace
@@ -149,7 +141,7 @@ void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
decompiled = false;
- auto info = ScanFlow(program_code, main_offset, settings, locker);
+ auto info = ScanFlow(program_code, main_offset, settings, registry);
auto& shader_info = *info;
coverage_begin = shader_info.start;
coverage_end = shader_info.end;
@@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
void ShaderIR::PostDecode() {
// Deduce texture handler size if needed
- auto gpu_driver = locker.AccessGuestDriverProfile();
+ auto gpu_driver = registry.AccessGuestDriverProfile();
DeduceTextureHandlerSize(gpu_driver, used_samplers);
// Deduce Indexed Samplers
if (!uses_indexed_samplers) {
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index e02bcd097..8e3b46e8e 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -17,33 +17,60 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- UNIMPLEMENTED_IF(instr.bfe.negate_b);
-
Node op_a = GetRegister(instr.gpr8);
- op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::BFE_IMM: {
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in BFE is not implemented");
+ Node op_b = [&] {
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::BFE_R:
+ return GetRegister(instr.gpr20);
+ case OpCode::Id::BFE_C:
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+ case OpCode::Id::BFE_IMM:
+ return Immediate(instr.alu.GetSignedImm20_20());
+ default:
+ UNREACHABLE();
+ return Immediate(0);
+ }
+ }();
- const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue()));
- const Node outer_shift_imm =
- Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
+ UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
- const Node inner_shift =
- Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
- const Node outer_shift =
- Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
+ const bool is_signed = instr.bfe.is_signed;
- SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc);
- SetRegister(bb, instr.gpr0, outer_shift);
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
+ // using reverse parallel method in
+ // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
+ // note for later if possible to implement faster method.
+ if (instr.bfe.brev) {
+ const auto swap = [&](u32 s, u32 mask) {
+ Node v1 =
+ SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
+ if (mask != 0) {
+ v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
+ Immediate(mask));
+ }
+ Node v2 = op_a;
+ if (mask != 0) {
+ v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
+ Immediate(mask));
+ }
+ v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
+ Immediate(s));
+ return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
+ std::move(v2));
+ };
+ op_a = swap(1, 0x55555555U);
+ op_a = swap(2, 0x33333333U);
+ op_a = swap(4, 0x0F0F0F0FU);
+ op_a = swap(8, 0x00FF00FFU);
+ op_a = swap(16, 0);
}
+ const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
+ Immediate(0), Immediate(8));
+ const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
+ Immediate(8), Immediate(8));
+ auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
+ SetRegister(bb, instr.gpr0, std::move(result));
+
return pc;
}
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index bee7d8cad..48350e042 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -12,6 +12,7 @@
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample
if (sampler_info) {
return *sampler_info;
}
- const auto sampler =
- buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset);
+ const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
+ : registry.ObtainBoundSampler(offset);
if (!sampler) {
LOG_WARNING(HW_GPU, "Unknown sampler info");
return SamplerInfo{TextureType::Texture2D, false, false, false};
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 206961909..fbd7e9a17 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -12,6 +12,7 @@ namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
@@ -63,15 +64,18 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
}
}();
- op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
+ op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
+ instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
const Node original_b = op_b;
- op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16);
+ op_b = SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b),
+ is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
- // TODO(Rodrigo): Use an appropiate sign for this operation
- Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b);
+ // we already check sign_a and sign_b is difference or not before so just use one in here.
+ Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
if (is_psl) {
- product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
+ product =
+ SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
}
SetTemporary(bb, 0, product);
product = GetTemporary(0);
@@ -88,12 +92,40 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
return BitfieldExtract(original_c, 16, 16);
case Tegra::Shader::XmadMode::CBcc: {
const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
- NO_PRECISE, original_b, Immediate(16));
- return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c,
- shifted_b);
+ original_b, Immediate(16));
+ return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
+ }
+ case Tegra::Shader::XmadMode::CSfu: {
+ const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a,
+ op_a, Immediate(0));
+ const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b,
+ op_b, Immediate(0));
+ const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
+
+ const Node comp_minus_a = GetPredicateComparisonInteger(
+ PredCondition::NotEqual, is_signed_a,
+ SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
+ Immediate(0x80000000)),
+ Immediate(0));
+ const Node comp_minus_b = GetPredicateComparisonInteger(
+ PredCondition::NotEqual, is_signed_b,
+ SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
+ Immediate(0x80000000)),
+ Immediate(0));
+
+ Node new_c = Operation(
+ OperationCode::Select, comp_minus_a,
+ SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
+ original_c);
+ new_c = Operation(
+ OperationCode::Select, comp_minus_b,
+ SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
+ std::move(new_c));
+
+ return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
}
default:
- UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value()));
+ UNREACHABLE();
return Immediate(0);
}
}();
@@ -102,18 +134,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
op_c = GetTemporary(1);
// TODO(Rodrigo): Use an appropiate sign for this operation
- Node sum = Operation(OperationCode::IAdd, product, op_c);
+ Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
SetTemporary(bb, 2, sum);
sum = GetTemporary(2);
if (is_merge) {
- const Node a = BitfieldExtract(sum, 0, 16);
- const Node b =
- Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16));
- sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b);
+ const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
+ Immediate(0), Immediate(16));
+ const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
+ Immediate(16));
+ sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
}
SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
- SetRegister(bb, instr.gpr0, sum);
+ SetRegister(bb, instr.gpr0, std::move(sum));
return pc;
}
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index a0a7b9111..a1828546e 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -299,7 +299,7 @@ private:
u32 index{}; ///< Emulated index given for the this sampler.
u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
- u32 size{}; ///< Size of the sampler if indexed.
+ u32 size{1}; ///< Size of the sampler.
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index b3dcd291c..76c56abb5 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -68,6 +68,8 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
return OperationCode::UBitwiseXor;
case OperationCode::IBitwiseNot:
return OperationCode::UBitwiseNot;
+ case OperationCode::IBitfieldExtract:
+ return OperationCode::UBitfieldExtract;
case OperationCode::IBitfieldInsert:
return OperationCode::UBitfieldInsert;
case OperationCode::IBitCount:
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
new file mode 100644
index 000000000..af70b3f35
--- /dev/null
+++ b/src/video_core/shader/registry.cpp
@@ -0,0 +1,161 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <tuple>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
+#include "video_core/shader/registry.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Engines::ConstBufferEngineInterface;
+using Tegra::Engines::SamplerDescriptor;
+using Tegra::Engines::ShaderType;
+
+namespace {
+
+GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
+ if (shader_stage == ShaderType::Compute) {
+ return {};
+ }
+ auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
+
+ GraphicsInfo info;
+ info.tfb_layouts = graphics.regs.tfb_layouts;
+ info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
+ info.primitive_topology = graphics.regs.draw.topology;
+ info.tessellation_primitive = graphics.regs.tess_mode.prim;
+ info.tessellation_spacing = graphics.regs.tess_mode.spacing;
+ info.tfb_enabled = graphics.regs.tfb_enabled;
+ info.tessellation_clockwise = graphics.regs.tess_mode.cw;
+ return info;
+}
+
+ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
+ if (shader_stage != ShaderType::Compute) {
+ return {};
+ }
+ auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
+ const auto& launch = compute.launch_description;
+
+ ComputeInfo info;
+ info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
+ info.local_memory_size_in_words = launch.local_pos_alloc;
+ info.shared_memory_size_in_words = launch.shared_alloc;
+ return info;
+}
+
+} // Anonymous namespace
+
+Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
+ : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
+ bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
+
+Registry::Registry(Tegra::Engines::ShaderType shader_stage,
+ Tegra::Engines::ConstBufferEngineInterface& engine)
+ : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
+ graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
+ shader_stage, engine)} {}
+
+Registry::~Registry() = default;
+
+std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
+ const std::pair<u32, u32> key = {buffer, offset};
+ const auto iter = keys.find(key);
+ if (iter != keys.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
+ keys.emplace(key, value);
+ return value;
+}
+
+std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
+ const u32 key = offset;
+ const auto iter = bound_samplers.find(key);
+ if (iter != bound_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
+ bound_samplers.emplace(key, value);
+ return value;
+}
+
+std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
+ u32 offset) {
+ const std::pair key = {buffer, offset};
+ const auto iter = bindless_samplers.find(key);
+ if (iter != bindless_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
+ bindless_samplers.emplace(key, value);
+ return value;
+}
+
+void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
+ keys.insert_or_assign({buffer, offset}, value);
+}
+
+void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
+ bound_samplers.insert_or_assign(offset, sampler);
+}
+
+void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
+ bindless_samplers.insert_or_assign({buffer, offset}, sampler);
+}
+
+bool Registry::IsConsistent() const {
+ if (!engine) {
+ return true;
+ }
+ return std::all_of(keys.begin(), keys.end(),
+ [this](const auto& pair) {
+ const auto [cbuf, offset] = pair.first;
+ const auto value = pair.second;
+ return value == engine->AccessConstBuffer32(stage, cbuf, offset);
+ }) &&
+ std::all_of(bound_samplers.begin(), bound_samplers.end(),
+ [this](const auto& sampler) {
+ const auto [key, value] = sampler;
+ return value == engine->AccessBoundSampler(stage, key);
+ }) &&
+ std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
+ [this](const auto& sampler) {
+ const auto [cbuf, offset] = sampler.first;
+ const auto value = sampler.second;
+ return value == engine->AccessBindlessSampler(stage, cbuf, offset);
+ });
+}
+
+bool Registry::HasEqualKeys(const Registry& rhs) const {
+ return std::tie(keys, bound_samplers, bindless_samplers) ==
+ std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
+}
+
+const GraphicsInfo& Registry::GetGraphicsInfo() const {
+ ASSERT(stage != Tegra::Engines::ShaderType::Compute);
+ return graphics_info;
+}
+
+const ComputeInfo& Registry::GetComputeInfo() const {
+ ASSERT(stage == Tegra::Engines::ShaderType::Compute);
+ return compute_info;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
new file mode 100644
index 000000000..0c80d35fd
--- /dev/null
+++ b/src/video_core/shader/registry.h
@@ -0,0 +1,137 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <optional>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+
+#include "common/common_types.h"
+#include "common/hash.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
+#include "video_core/guest_driver.h"
+
+namespace VideoCommon::Shader {
+
+using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
+using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
+using BindlessSamplerMap =
+ std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
+
+struct GraphicsInfo {
+ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+ std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
+ tfb_layouts{};
+ std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
+ Maxwell::PrimitiveTopology primitive_topology{};
+ Maxwell::TessellationPrimitive tessellation_primitive{};
+ Maxwell::TessellationSpacing tessellation_spacing{};
+ bool tfb_enabled = false;
+ bool tessellation_clockwise = false;
+};
+static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
+ std::is_standard_layout_v<GraphicsInfo>);
+
+struct ComputeInfo {
+ std::array<u32, 3> workgroup_size{};
+ u32 shared_memory_size_in_words = 0;
+ u32 local_memory_size_in_words = 0;
+};
+static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
+
+struct SerializedRegistryInfo {
+ VideoCore::GuestDriverProfile guest_driver_profile;
+ u32 bound_buffer = 0;
+ GraphicsInfo graphics;
+ ComputeInfo compute;
+};
+
+/**
+ * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
+ * With it, the shader can obtain required data from GPU state and store it for disk shader
+ * compilation.
+ */
+class Registry {
+public:
+ explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
+
+ explicit Registry(Tegra::Engines::ShaderType shader_stage,
+ Tegra::Engines::ConstBufferEngineInterface& engine);
+
+ ~Registry();
+
+ /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
+ /// not it will obtain it from maxwell3d and register it.
+ std::optional<u32> ObtainKey(u32 buffer, u32 offset);
+
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
+
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
+
+ /// Inserts a key.
+ void InsertKey(u32 buffer, u32 offset, u32 value);
+
+ /// Inserts a bound sampler key.
+ void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+ /// Inserts a bindless sampler key.
+ void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+ /// Checks keys and samplers against engine's current const buffers.
+ /// Returns true if they are the same value, false otherwise.
+ bool IsConsistent() const;
+
+ /// Returns true if the keys are equal to the other ones in the registry.
+ bool HasEqualKeys(const Registry& rhs) const;
+
+ /// Returns graphics information from this shader
+ const GraphicsInfo& GetGraphicsInfo() const;
+
+ /// Returns compute information from this shader
+ const ComputeInfo& GetComputeInfo() const;
+
+ /// Gives an getter to the const buffer keys in the database.
+ const KeyMap& GetKeys() const {
+ return keys;
+ }
+
+ /// Gets samplers database.
+ const BoundSamplerMap& GetBoundSamplers() const {
+ return bound_samplers;
+ }
+
+ /// Gets bindless samplers database.
+ const BindlessSamplerMap& GetBindlessSamplers() const {
+ return bindless_samplers;
+ }
+
+ /// Gets bound buffer used on this shader
+ u32 GetBoundBuffer() const {
+ return bound_buffer;
+ }
+
+ /// Obtains access to the guest driver's profile.
+ VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
+ return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
+ }
+
+private:
+ const Tegra::Engines::ShaderType stage;
+ VideoCore::GuestDriverProfile stored_guest_driver_profile;
+ Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
+ KeyMap keys;
+ BoundSamplerMap bound_samplers;
+ BindlessSamplerMap bindless_samplers;
+ u32 bound_buffer;
+ GraphicsInfo graphics_info;
+ ComputeInfo compute_info;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 3a5d280a9..425927777 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -11,6 +11,7 @@
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
- ConstBufferLocker& locker)
- : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
+ Registry& registry)
+ : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} {
Decode();
PostDecode();
}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index b0851c3be..dde036b40 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -18,8 +18,8 @@
#include "video_core/engines/shader_header.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/node.h"
+#include "video_core/shader/registry.h"
namespace VideoCommon::Shader {
@@ -69,7 +69,7 @@ struct GlobalMemoryUsage {
class ShaderIR final {
public:
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
- ConstBufferLocker& locker);
+ Registry& registry);
~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -414,7 +414,7 @@ private:
const ProgramCode& program_code;
const u32 main_offset;
const CompilerSettings settings;
- ConstBufferLocker& locker;
+ Registry& registry;
bool decompiled{};
bool disable_flow_stack{};
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index face8c943..10739b37d 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
return {tracked, track};
} else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
- auto bound_buffer = locker.ObtainBoundBuffer();
- if (!bound_buffer) {
+ const u32 bound_buffer = registry.GetBoundBuffer();
+ if (bound_buffer != cbuf->GetIndex()) {
return {};
}
- if (*bound_buffer != cbuf->GetIndex()) {
- return {};
- }
- auto pair = DecoupleIndirectRead(*operation);
+ const auto pair = DecoupleIndirectRead(*operation);
if (!pair) {
return {};
}
auto [gpr, base_offset] = *pair;
const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
- auto gpu_driver = locker.AccessGuestDriverProfile();
- if (gpu_driver == nullptr) {
- return {};
- }
+ const auto& gpu_driver = registry.AccessGuestDriverProfile();
const u32 bindless_cv = NewCustomVariable();
- const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr,
- Immediate(gpu_driver->GetTextureHandlerSize()));
+ const Node op =
+ Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
const Node cv_node = GetCustomVariable(bindless_cv);
Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
@@ -157,13 +151,21 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co
if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
return {};
}
- // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
- // register that it uses as operand
- const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
- if (!source) {
- return {};
+ s64 current_cursor = cursor;
+ while (current_cursor > 0) {
+ // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
+ // register that it uses as operand
+ const auto [source, new_cursor] = TrackRegister(gpr, code, current_cursor - 1);
+ current_cursor = new_cursor;
+ if (!source) {
+ continue;
+ }
+ const auto [base_address, index, offset] = TrackCbuf(source, code, current_cursor);
+ if (base_address != nullptr) {
+ return {base_address, index, offset};
+ }
}
- return TrackCbuf(source, code, new_cursor);
+ return {};
}
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
new file mode 100644
index 000000000..22a933761
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.cpp
@@ -0,0 +1,115 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <unordered_map>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/shader/registry.h"
+#include "video_core/shader/transform_feedback.h"
+
+namespace VideoCommon::Shader {
+
+namespace {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
+
+/// Attribute offsets that describe a vector
+constexpr std::array VECTORS = {
+ 28, // gl_Position
+ 32, // Generic 0
+ 36, // Generic 1
+ 40, // Generic 2
+ 44, // Generic 3
+ 48, // Generic 4
+ 52, // Generic 5
+ 56, // Generic 6
+ 60, // Generic 7
+ 64, // Generic 8
+ 68, // Generic 9
+ 72, // Generic 10
+ 76, // Generic 11
+ 80, // Generic 12
+ 84, // Generic 13
+ 88, // Generic 14
+ 92, // Generic 15
+ 96, // Generic 16
+ 100, // Generic 17
+ 104, // Generic 18
+ 108, // Generic 19
+ 112, // Generic 20
+ 116, // Generic 21
+ 120, // Generic 22
+ 124, // Generic 23
+ 128, // Generic 24
+ 132, // Generic 25
+ 136, // Generic 26
+ 140, // Generic 27
+ 144, // Generic 28
+ 148, // Generic 29
+ 152, // Generic 30
+ 156, // Generic 31
+ 160, // gl_FrontColor
+ 164, // gl_FrontSecondaryColor
+ 160, // gl_BackColor
+ 164, // gl_BackSecondaryColor
+ 192, // gl_TexCoord[0]
+ 196, // gl_TexCoord[1]
+ 200, // gl_TexCoord[2]
+ 204, // gl_TexCoord[3]
+ 208, // gl_TexCoord[4]
+ 212, // gl_TexCoord[5]
+ 216, // gl_TexCoord[6]
+ 220, // gl_TexCoord[7]
+};
+} // namespace
+
+std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
+
+ std::unordered_map<u8, VaryingTFB> tfb;
+
+ for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
+ const auto& locations = info.tfb_varying_locs[buffer];
+ const auto& layout = info.tfb_layouts[buffer];
+ const std::size_t varying_count = layout.varying_count;
+
+ std::size_t highest = 0;
+
+ for (std::size_t offset = 0; offset < varying_count; ++offset) {
+ const std::size_t base_offset = offset;
+ const u8 location = locations[offset];
+
+ VaryingTFB varying;
+ varying.buffer = layout.stream;
+ varying.stride = layout.stride;
+ varying.offset = offset * sizeof(u32);
+ varying.components = 1;
+
+ if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
+ UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
+
+ const u8 base_index = location / 4;
+ while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
+ ++offset;
+ ++varying.components;
+ }
+ }
+
+ [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
+ UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
+
+ highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
+ }
+
+ UNIMPLEMENTED_IF(highest != layout.stride);
+ }
+ return tfb;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
new file mode 100644
index 000000000..77d05f64c
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.h
@@ -0,0 +1,23 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+
+#include "common/common_types.h"
+#include "video_core/shader/registry.h"
+
+namespace VideoCommon::Shader {
+
+struct VaryingTFB {
+ std::size_t buffer;
+ std::size_t stride;
+ std::size_t offset;
+ std::size_t components;
+};
+
+std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 9707c353d..cc7181229 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -111,6 +111,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
return PixelFormat::RGBA16F;
case Tegra::RenderTargetFormat::RGBA16_UNORM:
return PixelFormat::RGBA16U;
+ case Tegra::RenderTargetFormat::RGBA16_SNORM:
+ return PixelFormat::RGBA16S;
case Tegra::RenderTargetFormat::RGBA16_UINT:
return PixelFormat::RGBA16UI;
case Tegra::RenderTargetFormat::RGBA32_FLOAT:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index d88109e5a..ae8817465 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -25,82 +25,83 @@ enum class PixelFormat {
R8UI = 7,
RGBA16F = 8,
RGBA16U = 9,
- RGBA16UI = 10,
- R11FG11FB10F = 11,
- RGBA32UI = 12,
- DXT1 = 13,
- DXT23 = 14,
- DXT45 = 15,
- DXN1 = 16, // This is also known as BC4
- DXN2UNORM = 17,
- DXN2SNORM = 18,
- BC7U = 19,
- BC6H_UF16 = 20,
- BC6H_SF16 = 21,
- ASTC_2D_4X4 = 22,
- BGRA8 = 23,
- RGBA32F = 24,
- RG32F = 25,
- R32F = 26,
- R16F = 27,
- R16U = 28,
- R16S = 29,
- R16UI = 30,
- R16I = 31,
- RG16 = 32,
- RG16F = 33,
- RG16UI = 34,
- RG16I = 35,
- RG16S = 36,
- RGB32F = 37,
- RGBA8_SRGB = 38,
- RG8U = 39,
- RG8S = 40,
- RG32UI = 41,
- RGBX16F = 42,
- R32UI = 43,
- R32I = 44,
- ASTC_2D_8X8 = 45,
- ASTC_2D_8X5 = 46,
- ASTC_2D_5X4 = 47,
- BGRA8_SRGB = 48,
- DXT1_SRGB = 49,
- DXT23_SRGB = 50,
- DXT45_SRGB = 51,
- BC7U_SRGB = 52,
- R4G4B4A4U = 53,
- ASTC_2D_4X4_SRGB = 54,
- ASTC_2D_8X8_SRGB = 55,
- ASTC_2D_8X5_SRGB = 56,
- ASTC_2D_5X4_SRGB = 57,
- ASTC_2D_5X5 = 58,
- ASTC_2D_5X5_SRGB = 59,
- ASTC_2D_10X8 = 60,
- ASTC_2D_10X8_SRGB = 61,
- ASTC_2D_6X6 = 62,
- ASTC_2D_6X6_SRGB = 63,
- ASTC_2D_10X10 = 64,
- ASTC_2D_10X10_SRGB = 65,
- ASTC_2D_12X12 = 66,
- ASTC_2D_12X12_SRGB = 67,
- ASTC_2D_8X6 = 68,
- ASTC_2D_8X6_SRGB = 69,
- ASTC_2D_6X5 = 70,
- ASTC_2D_6X5_SRGB = 71,
- E5B9G9R9F = 72,
+ RGBA16S = 10,
+ RGBA16UI = 11,
+ R11FG11FB10F = 12,
+ RGBA32UI = 13,
+ DXT1 = 14,
+ DXT23 = 15,
+ DXT45 = 16,
+ DXN1 = 17, // This is also known as BC4
+ DXN2UNORM = 18,
+ DXN2SNORM = 19,
+ BC7U = 20,
+ BC6H_UF16 = 21,
+ BC6H_SF16 = 22,
+ ASTC_2D_4X4 = 23,
+ BGRA8 = 24,
+ RGBA32F = 25,
+ RG32F = 26,
+ R32F = 27,
+ R16F = 28,
+ R16U = 29,
+ R16S = 30,
+ R16UI = 31,
+ R16I = 32,
+ RG16 = 33,
+ RG16F = 34,
+ RG16UI = 35,
+ RG16I = 36,
+ RG16S = 37,
+ RGB32F = 38,
+ RGBA8_SRGB = 39,
+ RG8U = 40,
+ RG8S = 41,
+ RG32UI = 42,
+ RGBX16F = 43,
+ R32UI = 44,
+ R32I = 45,
+ ASTC_2D_8X8 = 46,
+ ASTC_2D_8X5 = 47,
+ ASTC_2D_5X4 = 48,
+ BGRA8_SRGB = 49,
+ DXT1_SRGB = 50,
+ DXT23_SRGB = 51,
+ DXT45_SRGB = 52,
+ BC7U_SRGB = 53,
+ R4G4B4A4U = 54,
+ ASTC_2D_4X4_SRGB = 55,
+ ASTC_2D_8X8_SRGB = 56,
+ ASTC_2D_8X5_SRGB = 57,
+ ASTC_2D_5X4_SRGB = 58,
+ ASTC_2D_5X5 = 59,
+ ASTC_2D_5X5_SRGB = 60,
+ ASTC_2D_10X8 = 61,
+ ASTC_2D_10X8_SRGB = 62,
+ ASTC_2D_6X6 = 63,
+ ASTC_2D_6X6_SRGB = 64,
+ ASTC_2D_10X10 = 65,
+ ASTC_2D_10X10_SRGB = 66,
+ ASTC_2D_12X12 = 67,
+ ASTC_2D_12X12_SRGB = 68,
+ ASTC_2D_8X6 = 69,
+ ASTC_2D_8X6_SRGB = 70,
+ ASTC_2D_6X5 = 71,
+ ASTC_2D_6X5_SRGB = 72,
+ E5B9G9R9F = 73,
MaxColorFormat,
// Depth formats
- Z32F = 73,
- Z16 = 74,
+ Z32F = 74,
+ Z16 = 75,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 75,
- S8Z24 = 76,
- Z32FS8 = 77,
+ Z24S8 = 76,
+ S8Z24 = 77,
+ Z32FS8 = 78,
MaxDepthStencilFormat,
@@ -138,6 +139,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
0, // R8UI
0, // RGBA16F
0, // RGBA16U
+ 0, // RGBA16S
0, // RGBA16UI
0, // R11FG11FB10F
0, // RGBA32UI
@@ -235,6 +237,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
1, // R8UI
1, // RGBA16F
1, // RGBA16U
+ 1, // RGBA16S
1, // RGBA16UI
1, // R11FG11FB10F
1, // RGBA32UI
@@ -324,6 +327,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
1, // R8UI
1, // RGBA16F
1, // RGBA16U
+ 1, // RGBA16S
1, // RGBA16UI
1, // R11FG11FB10F
1, // RGBA32UI
@@ -413,6 +417,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
8, // R8UI
64, // RGBA16F
64, // RGBA16U
+ 64, // RGBA16S
64, // RGBA16UI
32, // R11FG11FB10F
128, // RGBA32UI
@@ -517,6 +522,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
SurfaceCompression::None, // R8UI
SurfaceCompression::None, // RGBA16F
SurfaceCompression::None, // RGBA16U
+ SurfaceCompression::None, // RGBA16S
SurfaceCompression::None, // RGBA16UI
SurfaceCompression::None, // R11FG11FB10F
SurfaceCompression::None, // RGBA32UI
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index cc3ad8417..e151c26c4 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
ComponentType alpha_component;
bool is_srgb;
};
-constexpr std::array<Table, 75> DefinitionTable = {{
+constexpr std::array<Table, 76> DefinitionTable = {{
{TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
{TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
{TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -61,6 +61,7 @@ constexpr std::array<Table, 75> DefinitionTable = {{
{TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
{TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
+ {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S},
{TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
{TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F},
{TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI},
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index f00839313..9931c5ef7 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -113,8 +113,10 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
params.height = tic.Height();
params.depth = tic.Depth();
params.pitch = params.is_tiled ? 0 : tic.Pitch();
- if (params.target == SurfaceTarget::TextureCubemap ||
- params.target == SurfaceTarget::TextureCubeArray) {
+ if (params.target == SurfaceTarget::Texture2D && params.depth > 1) {
+ params.depth = 1;
+ } else if (params.target == SurfaceTarget::TextureCubemap ||
+ params.target == SurfaceTarget::TextureCubeArray) {
params.depth *= 6;
}
params.num_levels = tic.max_mip_level + 1;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c70e4aec2..6cdbe63d0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -22,6 +22,7 @@
#include "core/core.h"
#include "core/memory.h"
#include "core/settings.h"
+#include "video_core/dirty_flags.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
@@ -103,6 +104,11 @@ public:
if (!cache_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
+
+ if (!IsTypeCompatible(tic.texture_type, entry)) {
+ return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
+ }
+
const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
if (guard_samplers) {
@@ -142,11 +148,10 @@ public:
TView GetDepthBufferSurface(bool preserve_contents) {
std::lock_guard lock{mutex};
auto& maxwell3d = system.GPU().Maxwell3D();
-
- if (!maxwell3d.dirty.depth_buffer) {
+ if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
return depth_buffer.view;
}
- maxwell3d.dirty.depth_buffer = false;
+ maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
const auto& regs{maxwell3d.regs};
const auto gpu_addr{regs.zeta.Address()};
@@ -175,10 +180,10 @@ public:
std::lock_guard lock{mutex};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.render_target[index]) {
+ if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
return render_targets[index].view;
}
- maxwell3d.dirty.render_target[index] = false;
+ maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false;
const auto& regs{maxwell3d.regs};
if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
@@ -320,14 +325,14 @@ protected:
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
void ManageRenderTargetUnregister(TSurface& surface) {
- auto& maxwell3d = system.GPU().Maxwell3D();
+ auto& dirty = system.GPU().Maxwell3D().dirty;
const u32 index = surface->GetRenderTarget();
if (index == DEPTH_RT) {
- maxwell3d.dirty.depth_buffer = true;
+ dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true;
} else {
- maxwell3d.dirty.render_target[index] = true;
+ dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true;
}
- maxwell3d.dirty.render_settings = true;
+ dirty.flags[VideoCommon::Dirty::RenderTargets] = true;
}
void Register(TSurface surface) {
@@ -914,13 +919,15 @@ private:
params.width = 1;
params.height = 1;
params.depth = 1;
+ if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) {
+ params.depth = 6;
+ }
params.pitch = 4;
params.num_levels = 1;
params.emulated_levels = 1;
- params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F;
+ params.pixel_format = VideoCore::Surface::PixelFormat::R8U;
params.type = VideoCore::Surface::SurfaceType::ColorTexture;
auto surface = CreateSurface(0ULL, params);
- invalid_memory.clear();
invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
surface->UploadTexture(invalid_memory);
surface->MarkAsModified(false, Tick());
@@ -1082,6 +1089,36 @@ private:
return siblings_table[static_cast<std::size_t>(format)];
}
+ /// Returns true the shader sampler entry is compatible with the TIC texture type.
+ static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type,
+ const VideoCommon::Shader::Sampler& entry) {
+ const auto shader_type = entry.GetType();
+ switch (tic_type) {
+ case Tegra::Texture::TextureType::Texture1D:
+ case Tegra::Texture::TextureType::Texture1DArray:
+ return shader_type == Tegra::Shader::TextureType::Texture1D;
+ case Tegra::Texture::TextureType::Texture1DBuffer:
+ // TODO(Rodrigo): Assume as valid for now
+ return true;
+ case Tegra::Texture::TextureType::Texture2D:
+ case Tegra::Texture::TextureType::Texture2DNoMipmap:
+ return shader_type == Tegra::Shader::TextureType::Texture2D;
+ case Tegra::Texture::TextureType::Texture2DArray:
+ return shader_type == Tegra::Shader::TextureType::Texture2D ||
+ shader_type == Tegra::Shader::TextureType::TextureCube;
+ case Tegra::Texture::TextureType::Texture3D:
+ return shader_type == Tegra::Shader::TextureType::Texture3D;
+ case Tegra::Texture::TextureType::TextureCubeArray:
+ case Tegra::Texture::TextureType::TextureCubemap:
+ if (shader_type == Tegra::Shader::TextureType::TextureCube) {
+ return true;
+ }
+ return shader_type == Tegra::Shader::TextureType::Texture2D && entry.IsArray();
+ }
+ UNREACHABLE();
+ return true;
+ }
+
struct FramebufferTargetInfo {
TSurface target;
TView view;
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 33bd31865..062b4f252 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -17,26 +17,37 @@
#include <algorithm>
#include <cassert>
-#include <cstdint>
#include <cstring>
#include <vector>
+#include "common/common_types.h"
+
#include "video_core/textures/astc.h"
+namespace {
+
+/// Count the number of bits set in a number.
+constexpr u32 Popcnt(u32 n) {
+ u32 c = 0;
+ for (; n; c++) {
+ n &= n - 1;
+ }
+ return c;
+}
+
+} // Anonymous namespace
+
class InputBitStream {
public:
- explicit InputBitStream(const unsigned char* ptr, int start_offset = 0)
+ explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0)
: m_CurByte(ptr), m_NextBit(start_offset % 8) {}
- ~InputBitStream() = default;
-
- int GetBitsRead() const {
+ std::size_t GetBitsRead() const {
return m_BitsRead;
}
- int ReadBit() {
-
- int bit = *m_CurByte >> m_NextBit++;
+ u32 ReadBit() {
+ u32 bit = *m_CurByte >> m_NextBit++;
while (m_NextBit >= 8) {
m_NextBit -= 8;
m_CurByte++;
@@ -46,57 +57,66 @@ public:
return bit & 1;
}
- unsigned int ReadBits(unsigned int nBits) {
- unsigned int ret = 0;
- for (unsigned int i = 0; i < nBits; i++) {
+ u32 ReadBits(std::size_t nBits) {
+ u32 ret = 0;
+ for (std::size_t i = 0; i < nBits; ++i) {
+ ret |= (ReadBit() & 1) << i;
+ }
+ return ret;
+ }
+
+ template <std::size_t nBits>
+ u32 ReadBits() {
+ u32 ret = 0;
+ for (std::size_t i = 0; i < nBits; ++i) {
ret |= (ReadBit() & 1) << i;
}
return ret;
}
private:
- const unsigned char* m_CurByte;
- int m_NextBit = 0;
- int m_BitsRead = 0;
+ const u8* m_CurByte;
+ std::size_t m_NextBit = 0;
+ std::size_t m_BitsRead = 0;
};
class OutputBitStream {
public:
- explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
+ explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0)
: m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
~OutputBitStream() = default;
- int GetBitsWritten() const {
+ s32 GetBitsWritten() const {
return m_BitsWritten;
}
- void WriteBitsR(unsigned int val, unsigned int nBits) {
- for (unsigned int i = 0; i < nBits; i++) {
+ void WriteBitsR(u32 val, u32 nBits) {
+ for (u32 i = 0; i < nBits; i++) {
WriteBit((val >> (nBits - i - 1)) & 1);
}
}
- void WriteBits(unsigned int val, unsigned int nBits) {
- for (unsigned int i = 0; i < nBits; i++) {
+ void WriteBits(u32 val, u32 nBits) {
+ for (u32 i = 0; i < nBits; i++) {
WriteBit((val >> i) & 1);
}
}
private:
- void WriteBit(int b) {
+ void WriteBit(s32 b) {
if (done)
return;
- const unsigned int mask = 1 << m_NextBit++;
+ const u32 mask = 1 << m_NextBit++;
// clear the bit
- *m_CurByte &= static_cast<unsigned char>(~mask);
+ *m_CurByte &= static_cast<u8>(~mask);
// Write the bit, if necessary
if (b)
- *m_CurByte |= static_cast<unsigned char>(mask);
+ *m_CurByte |= static_cast<u8>(mask);
// Next byte?
if (m_NextBit >= 8) {
@@ -107,10 +127,10 @@ private:
done = done || ++m_BitsWritten >= m_NumBits;
}
- int m_BitsWritten = 0;
- const int m_NumBits;
- unsigned char* m_CurByte;
- int m_NextBit = 0;
+ s32 m_BitsWritten = 0;
+ const s32 m_NumBits;
+ u8* m_CurByte;
+ s32 m_NextBit = 0;
bool done = false;
};
@@ -123,20 +143,20 @@ public:
Bits(const Bits&) = delete;
Bits& operator=(const Bits&) = delete;
- uint8_t operator[](uint32_t bitPos) const {
- return static_cast<uint8_t>((m_Bits >> bitPos) & 1);
+ u8 operator[](u32 bitPos) const {
+ return static_cast<u8>((m_Bits >> bitPos) & 1);
}
- IntType operator()(uint32_t start, uint32_t end) const {
+ IntType operator()(u32 start, u32 end) const {
if (start == end) {
return (*this)[start];
} else if (start > end) {
- uint32_t t = start;
+ u32 t = start;
start = end;
end = t;
}
- uint64_t mask = (1 << (end - start + 1)) - 1;
+ u64 mask = (1 << (end - start + 1)) - 1;
return (m_Bits >> start) & static_cast<IntType>(mask);
}
@@ -144,273 +164,236 @@ private:
const IntType& m_Bits;
};
-enum EIntegerEncoding { eIntegerEncoding_JustBits, eIntegerEncoding_Quint, eIntegerEncoding_Trit };
-
-class IntegerEncodedValue {
-private:
- const EIntegerEncoding m_Encoding;
- const uint32_t m_NumBits;
- uint32_t m_BitValue;
- union {
- uint32_t m_QuintValue;
- uint32_t m_TritValue;
- };
+enum class IntegerEncoding { JustBits, Qus32, Trit };
-public:
- // Jank, but we're not doing any heavy lifting in this class, so it's
- // probably OK. It allows us to use these in std::vectors...
- IntegerEncodedValue& operator=(const IntegerEncodedValue& other) {
- new (this) IntegerEncodedValue(other);
- return *this;
- }
+struct IntegerEncodedValue {
+ constexpr IntegerEncodedValue() = default;
- IntegerEncodedValue(EIntegerEncoding encoding, uint32_t numBits)
- : m_Encoding(encoding), m_NumBits(numBits) {}
+ constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
+ : encoding{encoding_}, num_bits{num_bits_} {}
- EIntegerEncoding GetEncoding() const {
- return m_Encoding;
- }
- uint32_t BaseBitLength() const {
- return m_NumBits;
- }
-
- uint32_t GetBitValue() const {
- return m_BitValue;
- }
- void SetBitValue(uint32_t val) {
- m_BitValue = val;
- }
-
- uint32_t GetTritValue() const {
- return m_TritValue;
- }
- void SetTritValue(uint32_t val) {
- m_TritValue = val;
- }
-
- uint32_t GetQuintValue() const {
- return m_QuintValue;
- }
- void SetQuintValue(uint32_t val) {
- m_QuintValue = val;
- }
-
- bool MatchesEncoding(const IntegerEncodedValue& other) const {
- return m_Encoding == other.m_Encoding && m_NumBits == other.m_NumBits;
+ constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
+ return encoding == other.encoding && num_bits == other.num_bits;
}
// Returns the number of bits required to encode nVals values.
- uint32_t GetBitLength(uint32_t nVals) const {
- uint32_t totalBits = m_NumBits * nVals;
- if (m_Encoding == eIntegerEncoding_Trit) {
+ u32 GetBitLength(u32 nVals) const {
+ u32 totalBits = num_bits * nVals;
+ if (encoding == IntegerEncoding::Trit) {
totalBits += (nVals * 8 + 4) / 5;
- } else if (m_Encoding == eIntegerEncoding_Quint) {
+ } else if (encoding == IntegerEncoding::Qus32) {
totalBits += (nVals * 7 + 2) / 3;
}
return totalBits;
}
- // Count the number of bits set in a number.
- static inline uint32_t Popcnt(uint32_t n) {
- uint32_t c;
- for (c = 0; n; c++) {
- n &= n - 1;
+ IntegerEncoding encoding{};
+ u32 num_bits = 0;
+ u32 bit_value = 0;
+ union {
+ u32 qus32_value = 0;
+ u32 trit_value;
+ };
+};
+
+static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
+ u32 nBitsPerValue) {
+ // Implement the algorithm in section C.2.12
+ u32 m[5];
+ u32 t[5];
+ u32 T;
+
+ // Read the trit encoded block according to
+ // table C.2.14
+ m[0] = bits.ReadBits(nBitsPerValue);
+ T = bits.ReadBits<2>();
+ m[1] = bits.ReadBits(nBitsPerValue);
+ T |= bits.ReadBits<2>() << 2;
+ m[2] = bits.ReadBits(nBitsPerValue);
+ T |= bits.ReadBit() << 4;
+ m[3] = bits.ReadBits(nBitsPerValue);
+ T |= bits.ReadBits<2>() << 5;
+ m[4] = bits.ReadBits(nBitsPerValue);
+ T |= bits.ReadBit() << 7;
+
+ u32 C = 0;
+
+ Bits<u32> Tb(T);
+ if (Tb(2, 4) == 7) {
+ C = (Tb(5, 7) << 2) | Tb(0, 1);
+ t[4] = t[3] = 2;
+ } else {
+ C = Tb(0, 4);
+ if (Tb(5, 6) == 3) {
+ t[4] = 2;
+ t[3] = Tb[7];
+ } else {
+ t[4] = Tb[7];
+ t[3] = Tb(5, 6);
}
- return c;
}
- // Returns a new instance of this struct that corresponds to the
- // can take no more than maxval values
- static IntegerEncodedValue CreateEncoding(uint32_t maxVal) {
- while (maxVal > 0) {
- uint32_t check = maxVal + 1;
-
- // Is maxVal a power of two?
- if (!(check & (check - 1))) {
- return IntegerEncodedValue(eIntegerEncoding_JustBits, Popcnt(maxVal));
- }
-
- // Is maxVal of the type 3*2^n - 1?
- if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
- return IntegerEncodedValue(eIntegerEncoding_Trit, Popcnt(check / 3 - 1));
- }
+ Bits<u32> Cb(C);
+ if (Cb(0, 1) == 3) {
+ t[2] = 2;
+ t[1] = Cb[4];
+ t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]);
+ } else if (Cb(2, 3) == 3) {
+ t[2] = 2;
+ t[1] = 2;
+ t[0] = Cb(0, 1);
+ } else {
+ t[2] = Cb[4];
+ t[1] = Cb(2, 3);
+ t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]);
+ }
- // Is maxVal of the type 5*2^n - 1?
- if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
- return IntegerEncodedValue(eIntegerEncoding_Quint, Popcnt(check / 5 - 1));
- }
+ for (std::size_t i = 0; i < 5; ++i) {
+ IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Trit, nBitsPerValue);
+ val.bit_value = m[i];
+ val.trit_value = t[i];
+ }
+}
- // Apparently it can't be represented with a bounded integer sequence...
- // just iterate.
- maxVal--;
+static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
+ u32 nBitsPerValue) {
+ // Implement the algorithm in section C.2.12
+ u32 m[3];
+ u32 q[3];
+ u32 Q;
+
+ // Read the trit encoded block according to
+ // table C.2.15
+ m[0] = bits.ReadBits(nBitsPerValue);
+ Q = bits.ReadBits<3>();
+ m[1] = bits.ReadBits(nBitsPerValue);
+ Q |= bits.ReadBits<2>() << 3;
+ m[2] = bits.ReadBits(nBitsPerValue);
+ Q |= bits.ReadBits<2>() << 5;
+
+ Bits<u32> Qb(Q);
+ if (Qb(1, 2) == 3 && Qb(5, 6) == 0) {
+ q[0] = q[1] = 4;
+ q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]);
+ } else {
+ u32 C = 0;
+ if (Qb(1, 2) == 3) {
+ q[2] = 4;
+ C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0];
+ } else {
+ q[2] = Qb(5, 6);
+ C = Qb(0, 4);
}
- return IntegerEncodedValue(eIntegerEncoding_JustBits, 0);
- }
-
- // Fills result with the values that are encoded in the given
- // bitstream. We must know beforehand what the maximum possible
- // value is, and how many values we're decoding.
- static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
- InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
- // Determine encoding parameters
- IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
-
- // Start decoding
- uint32_t nValsDecoded = 0;
- while (nValsDecoded < nValues) {
- switch (val.GetEncoding()) {
- case eIntegerEncoding_Quint:
- DecodeQuintBlock(bits, result, val.BaseBitLength());
- nValsDecoded += 3;
- break;
- case eIntegerEncoding_Trit:
- DecodeTritBlock(bits, result, val.BaseBitLength());
- nValsDecoded += 5;
- break;
-
- case eIntegerEncoding_JustBits:
- val.SetBitValue(bits.ReadBits(val.BaseBitLength()));
- result.push_back(val);
- nValsDecoded++;
- break;
- }
+ Bits<u32> Cb(C);
+ if (Cb(0, 2) == 5) {
+ q[1] = 4;
+ q[0] = Cb(3, 4);
+ } else {
+ q[1] = Cb(3, 4);
+ q[0] = Cb(0, 2);
}
}
-private:
- static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
- uint32_t nBitsPerValue) {
- // Implement the algorithm in section C.2.12
- uint32_t m[5];
- uint32_t t[5];
- uint32_t T;
-
- // Read the trit encoded block according to
- // table C.2.14
- m[0] = bits.ReadBits(nBitsPerValue);
- T = bits.ReadBits(2);
- m[1] = bits.ReadBits(nBitsPerValue);
- T |= bits.ReadBits(2) << 2;
- m[2] = bits.ReadBits(nBitsPerValue);
- T |= bits.ReadBit() << 4;
- m[3] = bits.ReadBits(nBitsPerValue);
- T |= bits.ReadBits(2) << 5;
- m[4] = bits.ReadBits(nBitsPerValue);
- T |= bits.ReadBit() << 7;
-
- uint32_t C = 0;
-
- Bits<uint32_t> Tb(T);
- if (Tb(2, 4) == 7) {
- C = (Tb(5, 7) << 2) | Tb(0, 1);
- t[4] = t[3] = 2;
- } else {
- C = Tb(0, 4);
- if (Tb(5, 6) == 3) {
- t[4] = 2;
- t[3] = Tb[7];
- } else {
- t[4] = Tb[7];
- t[3] = Tb(5, 6);
- }
+ for (std::size_t i = 0; i < 3; ++i) {
+ IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Qus32, nBitsPerValue);
+ val.bit_value = m[i];
+ val.qus32_value = q[i];
+ }
+}
+
+// Returns a new instance of this struct that corresponds to the
+// can take no more than maxval values
+static constexpr IntegerEncodedValue CreateEncoding(u32 maxVal) {
+ while (maxVal > 0) {
+ u32 check = maxVal + 1;
+
+ // Is maxVal a power of two?
+ if (!(check & (check - 1))) {
+ return IntegerEncodedValue(IntegerEncoding::JustBits, Popcnt(maxVal));
}
- Bits<uint32_t> Cb(C);
- if (Cb(0, 1) == 3) {
- t[2] = 2;
- t[1] = Cb[4];
- t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]);
- } else if (Cb(2, 3) == 3) {
- t[2] = 2;
- t[1] = 2;
- t[0] = Cb(0, 1);
- } else {
- t[2] = Cb[4];
- t[1] = Cb(2, 3);
- t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]);
+ // Is maxVal of the type 3*2^n - 1?
+ if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
+ return IntegerEncodedValue(IntegerEncoding::Trit, Popcnt(check / 3 - 1));
}
- for (uint32_t i = 0; i < 5; i++) {
- IntegerEncodedValue val(eIntegerEncoding_Trit, nBitsPerValue);
- val.SetBitValue(m[i]);
- val.SetTritValue(t[i]);
- result.push_back(val);
+ // Is maxVal of the type 5*2^n - 1?
+ if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
+ return IntegerEncodedValue(IntegerEncoding::Qus32, Popcnt(check / 5 - 1));
}
+
+ // Apparently it can't be represented with a bounded integer sequence...
+ // just iterate.
+ maxVal--;
}
+ return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
+}
- static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
- uint32_t nBitsPerValue) {
- // Implement the algorithm in section C.2.12
- uint32_t m[3];
- uint32_t q[3];
- uint32_t Q;
-
- // Read the trit encoded block according to
- // table C.2.15
- m[0] = bits.ReadBits(nBitsPerValue);
- Q = bits.ReadBits(3);
- m[1] = bits.ReadBits(nBitsPerValue);
- Q |= bits.ReadBits(2) << 3;
- m[2] = bits.ReadBits(nBitsPerValue);
- Q |= bits.ReadBits(2) << 5;
-
- Bits<uint32_t> Qb(Q);
- if (Qb(1, 2) == 3 && Qb(5, 6) == 0) {
- q[0] = q[1] = 4;
- q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]);
- } else {
- uint32_t C = 0;
- if (Qb(1, 2) == 3) {
- q[2] = 4;
- C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0];
- } else {
- q[2] = Qb(5, 6);
- C = Qb(0, 4);
- }
+static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
+ std::array<IntegerEncodedValue, 256> encodings{};
+ for (std::size_t i = 0; i < encodings.size(); ++i) {
+ encodings[i] = CreateEncoding(static_cast<u32>(i));
+ }
+ return encodings;
+}
- Bits<uint32_t> Cb(C);
- if (Cb(0, 2) == 5) {
- q[1] = 4;
- q[0] = Cb(3, 4);
- } else {
- q[1] = Cb(3, 4);
- q[0] = Cb(0, 2);
- }
- }
+static constexpr std::array EncodingsValues = MakeEncodedValues();
+
+// Fills result with the values that are encoded in the given
+// bitstream. We must know beforehand what the maximum possible
+// value is, and how many values we're decoding.
+static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits,
+ u32 maxRange, u32 nValues) {
+ // Determine encoding parameters
+ IntegerEncodedValue val = EncodingsValues[maxRange];
+
+ // Start decoding
+ u32 nValsDecoded = 0;
+ while (nValsDecoded < nValues) {
+ switch (val.encoding) {
+ case IntegerEncoding::Qus32:
+ DecodeQus32Block(bits, result, val.num_bits);
+ nValsDecoded += 3;
+ break;
+
+ case IntegerEncoding::Trit:
+ DecodeTritBlock(bits, result, val.num_bits);
+ nValsDecoded += 5;
+ break;
- for (uint32_t i = 0; i < 3; i++) {
- IntegerEncodedValue val(eIntegerEncoding_Quint, nBitsPerValue);
- val.m_BitValue = m[i];
- val.m_QuintValue = q[i];
+ case IntegerEncoding::JustBits:
+ val.bit_value = bits.ReadBits(val.num_bits);
result.push_back(val);
+ nValsDecoded++;
+ break;
}
}
-};
+}
namespace ASTCC {
struct TexelWeightParams {
- uint32_t m_Width = 0;
- uint32_t m_Height = 0;
+ u32 m_Width = 0;
+ u32 m_Height = 0;
bool m_bDualPlane = false;
- uint32_t m_MaxWeight = 0;
+ u32 m_MaxWeight = 0;
bool m_bError = false;
bool m_bVoidExtentLDR = false;
bool m_bVoidExtentHDR = false;
- uint32_t GetPackedBitSize() const {
+ u32 GetPackedBitSize() const {
// How many indices do we have?
- uint32_t nIdxs = m_Height * m_Width;
+ u32 nIdxs = m_Height * m_Width;
if (m_bDualPlane) {
nIdxs *= 2;
}
- return IntegerEncodedValue::CreateEncoding(m_MaxWeight).GetBitLength(nIdxs);
+ return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs);
}
- uint32_t GetNumWeightValues() const {
- uint32_t ret = m_Width * m_Height;
+ u32 GetNumWeightValues() const {
+ u32 ret = m_Width * m_Height;
if (m_bDualPlane) {
ret *= 2;
}
@@ -422,7 +405,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
TexelWeightParams params;
// Read the entire block mode all at once
- uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11));
+ u16 modeBits = static_cast<u16>(strm.ReadBits<11>());
// Does this match the void extent block mode?
if ((modeBits & 0x01FF) == 0x1FC) {
@@ -457,7 +440,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
// of the block mode. Layout is determined by a number
// between 0 and 9 corresponding to table C.2.8 of the
// ASTC spec.
- uint32_t layout = 0;
+ u32 layout = 0;
if ((modeBits & 0x1) || (modeBits & 0x2)) {
// layout is in [0-4]
@@ -509,7 +492,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
assert(layout < 10);
// Determine R
- uint32_t R = !!(modeBits & 0x10);
+ u32 R = !!(modeBits & 0x10);
if (layout < 5) {
R |= (modeBits & 0x3) << 1;
} else {
@@ -520,54 +503,54 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
// Determine width & height
switch (layout) {
case 0: {
- uint32_t A = (modeBits >> 5) & 0x3;
- uint32_t B = (modeBits >> 7) & 0x3;
+ u32 A = (modeBits >> 5) & 0x3;
+ u32 B = (modeBits >> 7) & 0x3;
params.m_Width = B + 4;
params.m_Height = A + 2;
break;
}
case 1: {
- uint32_t A = (modeBits >> 5) & 0x3;
- uint32_t B = (modeBits >> 7) & 0x3;
+ u32 A = (modeBits >> 5) & 0x3;
+ u32 B = (modeBits >> 7) & 0x3;
params.m_Width = B + 8;
params.m_Height = A + 2;
break;
}
case 2: {
- uint32_t A = (modeBits >> 5) & 0x3;
- uint32_t B = (modeBits >> 7) & 0x3;
+ u32 A = (modeBits >> 5) & 0x3;
+ u32 B = (modeBits >> 7) & 0x3;
params.m_Width = A + 2;
params.m_Height = B + 8;
break;
}
case 3: {
- uint32_t A = (modeBits >> 5) & 0x3;
- uint32_t B = (modeBits >> 7) & 0x1;
+ u32 A = (modeBits >> 5) & 0x3;
+ u32 B = (modeBits >> 7) & 0x1;
params.m_Width = A + 2;
params.m_Height = B + 6;
break;
}
case 4: {
- uint32_t A = (modeBits >> 5) & 0x3;
- uint32_t B = (modeBits >> 7) & 0x1;
+ u32 A = (modeBits >> 5) & 0x3;
+ u32 B = (modeBits >> 7) & 0x1;
params.m_Width = B + 2;
params.m_Height = A + 2;
break;
}
case 5: {
- uint32_t A = (modeBits >> 5) & 0x3;
+ u32 A = (modeBits >> 5) & 0x3;
params.m_Width = 12;
params.m_Height = A + 2;
break;
}
case 6: {
- uint32_t A = (modeBits >> 5) & 0x3;
+ u32 A = (modeBits >> 5) & 0x3;
params.m_Width = A + 2;
params.m_Height = 12;
break;
@@ -586,15 +569,15 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
}
case 9: {
- uint32_t A = (modeBits >> 5) & 0x3;
- uint32_t B = (modeBits >> 9) & 0x3;
+ u32 A = (modeBits >> 5) & 0x3;
+ u32 B = (modeBits >> 9) & 0x3;
params.m_Width = A + 6;
params.m_Height = B + 6;
break;
}
default:
- assert(!"Don't know this layout...");
+ assert(false && "Don't know this layout...");
params.m_bError = true;
break;
}
@@ -605,10 +588,10 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
bool H = (layout != 9) && (modeBits & 0x200);
if (H) {
- const uint32_t maxWeights[6] = {9, 11, 15, 19, 23, 31};
+ const u32 maxWeights[6] = {9, 11, 15, 19, 23, 31};
params.m_MaxWeight = maxWeights[R - 2];
} else {
- const uint32_t maxWeights[6] = {1, 2, 3, 4, 5, 7};
+ const u32 maxWeights[6] = {1, 2, 3, 4, 5, 7};
params.m_MaxWeight = maxWeights[R - 2];
}
@@ -617,32 +600,32 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
return params;
}
-static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
- uint32_t blockHeight) {
+static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth,
+ u32 blockHeight) {
// Don't actually care about the void extent, just read the bits...
- for (int i = 0; i < 4; ++i) {
- strm.ReadBits(13);
+ for (s32 i = 0; i < 4; ++i) {
+ strm.ReadBits<13>();
}
// Decode the RGBA components and renormalize them to the range [0, 255]
- uint16_t r = static_cast<uint16_t>(strm.ReadBits(16));
- uint16_t g = static_cast<uint16_t>(strm.ReadBits(16));
- uint16_t b = static_cast<uint16_t>(strm.ReadBits(16));
- uint16_t a = static_cast<uint16_t>(strm.ReadBits(16));
+ u16 r = static_cast<u16>(strm.ReadBits<16>());
+ u16 g = static_cast<u16>(strm.ReadBits<16>());
+ u16 b = static_cast<u16>(strm.ReadBits<16>());
+ u16 a = static_cast<u16>(strm.ReadBits<16>());
- uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 |
- (static_cast<uint32_t>(a) & 0xFF00) << 16;
+ u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 |
+ (static_cast<u32>(a) & 0xFF00) << 16;
- for (uint32_t j = 0; j < blockHeight; j++) {
- for (uint32_t i = 0; i < blockWidth; i++) {
+ for (u32 j = 0; j < blockHeight; j++) {
+ for (u32 i = 0; i < blockWidth; i++) {
outBuf[j * blockWidth + i] = rgba;
}
}
}
-static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeight) {
- for (uint32_t j = 0; j < blockHeight; j++) {
- for (uint32_t i = 0; i < blockWidth; i++) {
+static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) {
+ for (u32 j = 0; j < blockHeight; j++) {
+ for (u32 i = 0; i < blockWidth; i++) {
outBuf[j * blockWidth + i] = 0xFFFF00FF;
}
}
@@ -651,18 +634,18 @@ static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeigh
// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
// is the same as [(numBits - 1):0] and repeats all the way down.
template <typename IntType>
-static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
+static IntType Replicate(IntType val, u32 numBits, u32 toBit) {
if (numBits == 0)
return 0;
if (toBit == 0)
return 0;
IntType v = val & static_cast<IntType>((1 << numBits) - 1);
IntType res = v;
- uint32_t reslen = numBits;
+ u32 reslen = numBits;
while (reslen < toBit) {
- uint32_t comp = 0;
+ u32 comp = 0;
if (numBits > toBit - reslen) {
- uint32_t newshift = toBit - reslen;
+ u32 newshift = toBit - reslen;
comp = numBits - newshift;
numBits = newshift;
}
@@ -675,14 +658,14 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
class Pixel {
protected:
- using ChannelType = int16_t;
- uint8_t m_BitDepth[4] = {8, 8, 8, 8};
- int16_t color[4] = {};
+ using ChannelType = s16;
+ u8 m_BitDepth[4] = {8, 8, 8, 8};
+ s16 color[4] = {};
public:
Pixel() = default;
- Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8)
- : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)},
+ Pixel(u32 a, u32 r, u32 g, u32 b, u32 bitDepth = 8)
+ : m_BitDepth{u8(bitDepth), u8(bitDepth), u8(bitDepth), u8(bitDepth)},
color{static_cast<ChannelType>(a), static_cast<ChannelType>(r),
static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {}
@@ -691,22 +674,22 @@ public:
// significant bits when going from larger to smaller bit depth
// or by repeating the most significant bits when going from
// smaller to larger bit depths.
- void ChangeBitDepth(const uint8_t (&depth)[4]) {
- for (uint32_t i = 0; i < 4; i++) {
+ void ChangeBitDepth(const u8 (&depth)[4]) {
+ for (u32 i = 0; i < 4; i++) {
Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]);
m_BitDepth[i] = depth[i];
}
}
template <typename IntType>
- static float ConvertChannelToFloat(IntType channel, uint8_t bitDepth) {
+ static float ConvertChannelToFloat(IntType channel, u8 bitDepth) {
float denominator = static_cast<float>((1 << bitDepth) - 1);
return static_cast<float>(channel) / denominator;
}
// Changes the bit depth of a single component. See the comment
// above for how we do this.
- static ChannelType ChangeBitDepth(Pixel::ChannelType val, uint8_t oldDepth, uint8_t newDepth) {
+ static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) {
assert(newDepth <= 8);
assert(oldDepth <= 8);
@@ -722,16 +705,15 @@ public:
if (newDepth == 0) {
return 0xFF;
} else {
- uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth);
- uint16_t v = static_cast<uint16_t>(val);
- v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
- v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v),
- static_cast<uint16_t>((1 << newDepth) - 1));
- return static_cast<uint8_t>(v);
+ u8 bitsWasted = static_cast<u8>(oldDepth - newDepth);
+ u16 v = static_cast<u16>(val);
+ v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
+ v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1));
+ return static_cast<u8>(v);
}
}
- assert(!"We shouldn't get here.");
+ assert(false && "We shouldn't get here.");
return 0;
}
@@ -759,15 +741,15 @@ public:
ChannelType& B() {
return color[3];
}
- const ChannelType& Component(uint32_t idx) const {
+ const ChannelType& Component(u32 idx) const {
return color[idx];
}
- ChannelType& Component(uint32_t idx) {
+ ChannelType& Component(u32 idx) {
return color[idx];
}
- void GetBitDepth(uint8_t (&outDepth)[4]) const {
- for (int i = 0; i < 4; i++) {
+ void GetBitDepth(u8 (&outDepth)[4]) const {
+ for (s32 i = 0; i < 4; i++) {
outDepth[i] = m_BitDepth[i];
}
}
@@ -776,12 +758,12 @@ public:
// and then pack each channel into an R8G8B8A8 32-bit integer. We assume
// that the architecture is little-endian, so the alpha channel will end
// up in the most-significant byte.
- uint32_t Pack() const {
+ u32 Pack() const {
Pixel eightBit(*this);
- const uint8_t eightBitDepth[4] = {8, 8, 8, 8};
+ const u8 eightBitDepth[4] = {8, 8, 8, 8};
eightBit.ChangeBitDepth(eightBitDepth);
- uint32_t r = 0;
+ u32 r = 0;
r |= eightBit.A();
r <<= 8;
r |= eightBit.B();
@@ -794,7 +776,7 @@ public:
// Clamps the pixel to the range [0,255]
void ClampByte() {
- for (uint32_t i = 0; i < 4; i++) {
+ for (u32 i = 0; i < 4; i++) {
color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]);
}
}
@@ -804,24 +786,24 @@ public:
}
};
-static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* modes,
- const uint32_t nPartitions, const uint32_t nBitsForColorData) {
+static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nPartitions,
+ const u32 nBitsForColorData) {
// First figure out how many color values we have
- uint32_t nValues = 0;
- for (uint32_t i = 0; i < nPartitions; i++) {
+ u32 nValues = 0;
+ for (u32 i = 0; i < nPartitions; i++) {
nValues += ((modes[i] >> 2) + 1) << 1;
}
// Then based on the number of values and the remaining number of bits,
// figure out the max value for each of them...
- uint32_t range = 256;
+ u32 range = 256;
while (--range > 0) {
- IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(range);
- uint32_t bitLength = val.GetBitLength(nValues);
+ IntegerEncodedValue val = EncodingsValues[range];
+ u32 bitLength = val.GetBitLength(nValues);
if (bitLength <= nBitsForColorData) {
// Find the smallest possible range that matches the given encoding
while (--range > 0) {
- IntegerEncodedValue newval = IntegerEncodedValue::CreateEncoding(range);
+ IntegerEncodedValue newval = EncodingsValues[range];
if (!newval.MatchesEncoding(val)) {
break;
}
@@ -835,12 +817,14 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
// We now have enough to decode our integer sequence.
std::vector<IntegerEncodedValue> decodedColorValues;
+ decodedColorValues.reserve(32);
+
InputBitStream colorStream(data);
- IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
+ DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
// Once we have the decoded values, we need to dequantize them to the 0-255 range
// This procedure is outlined in ASTC spec C.2.13
- uint32_t outIdx = 0;
+ u32 outIdx = 0;
for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) {
// Have we already decoded all that we need?
if (outIdx >= nValues) {
@@ -848,25 +832,25 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
}
const IntegerEncodedValue& val = *itr;
- uint32_t bitlen = val.BaseBitLength();
- uint32_t bitval = val.GetBitValue();
+ u32 bitlen = val.num_bits;
+ u32 bitval = val.bit_value;
assert(bitlen >= 1);
- uint32_t A = 0, B = 0, C = 0, D = 0;
+ u32 A = 0, B = 0, C = 0, D = 0;
// A is just the lsb replicated 9 times.
A = Replicate(bitval & 1, 1, 9);
- switch (val.GetEncoding()) {
+ switch (val.encoding) {
// Replicate bits
- case eIntegerEncoding_JustBits:
+ case IntegerEncoding::JustBits:
out[outIdx++] = Replicate(bitval, bitlen, 8);
break;
// Use algorithm in C.2.13
- case eIntegerEncoding_Trit: {
+ case IntegerEncoding::Trit: {
- D = val.GetTritValue();
+ D = val.trit_value;
switch (bitlen) {
case 1: {
@@ -876,48 +860,48 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
case 2: {
C = 93;
// B = b000b0bb0
- uint32_t b = (bitval >> 1) & 1;
+ u32 b = (bitval >> 1) & 1;
B = (b << 8) | (b << 4) | (b << 2) | (b << 1);
} break;
case 3: {
C = 44;
// B = cb000cbcb
- uint32_t cb = (bitval >> 1) & 3;
+ u32 cb = (bitval >> 1) & 3;
B = (cb << 7) | (cb << 2) | cb;
} break;
case 4: {
C = 22;
// B = dcb000dcb
- uint32_t dcb = (bitval >> 1) & 7;
+ u32 dcb = (bitval >> 1) & 7;
B = (dcb << 6) | dcb;
} break;
case 5: {
C = 11;
// B = edcb000ed
- uint32_t edcb = (bitval >> 1) & 0xF;
+ u32 edcb = (bitval >> 1) & 0xF;
B = (edcb << 5) | (edcb >> 2);
} break;
case 6: {
C = 5;
// B = fedcb000f
- uint32_t fedcb = (bitval >> 1) & 0x1F;
+ u32 fedcb = (bitval >> 1) & 0x1F;
B = (fedcb << 4) | (fedcb >> 4);
} break;
default:
- assert(!"Unsupported trit encoding for color values!");
+ assert(false && "Unsupported trit encoding for color values!");
break;
} // switch(bitlen)
- } // case eIntegerEncoding_Trit
+ } // case IntegerEncoding::Trit
break;
- case eIntegerEncoding_Quint: {
+ case IntegerEncoding::Qus32: {
- D = val.GetQuintValue();
+ D = val.qus32_value;
switch (bitlen) {
case 1: {
@@ -927,41 +911,41 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
case 2: {
C = 54;
// B = b0000bb00
- uint32_t b = (bitval >> 1) & 1;
+ u32 b = (bitval >> 1) & 1;
B = (b << 8) | (b << 3) | (b << 2);
} break;
case 3: {
C = 26;
// B = cb0000cbc
- uint32_t cb = (bitval >> 1) & 3;
+ u32 cb = (bitval >> 1) & 3;
B = (cb << 7) | (cb << 1) | (cb >> 1);
} break;
case 4: {
C = 13;
// B = dcb0000dc
- uint32_t dcb = (bitval >> 1) & 7;
+ u32 dcb = (bitval >> 1) & 7;
B = (dcb << 6) | (dcb >> 1);
} break;
case 5: {
C = 6;
// B = edcb0000e
- uint32_t edcb = (bitval >> 1) & 0xF;
+ u32 edcb = (bitval >> 1) & 0xF;
B = (edcb << 5) | (edcb >> 3);
} break;
default:
- assert(!"Unsupported quint encoding for color values!");
+ assert(false && "Unsupported quint encoding for color values!");
break;
} // switch(bitlen)
- } // case eIntegerEncoding_Quint
+ } // case IntegerEncoding::Qus32
break;
- } // switch(val.GetEncoding())
+ } // switch(val.encoding)
- if (val.GetEncoding() != eIntegerEncoding_JustBits) {
- uint32_t T = D * C + B;
+ if (val.encoding != IntegerEncoding::JustBits) {
+ u32 T = D * C + B;
T ^= A;
T = (A & 0x80) | (T >> 2);
out[outIdx++] = T;
@@ -969,31 +953,31 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
}
// Make sure that each of our values is in the proper range...
- for (uint32_t i = 0; i < nValues; i++) {
+ for (u32 i = 0; i < nValues; i++) {
assert(out[i] <= 255);
}
}
-static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
- uint32_t bitval = val.GetBitValue();
- uint32_t bitlen = val.BaseBitLength();
+static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) {
+ u32 bitval = val.bit_value;
+ u32 bitlen = val.num_bits;
- uint32_t A = Replicate(bitval & 1, 1, 7);
- uint32_t B = 0, C = 0, D = 0;
+ u32 A = Replicate(bitval & 1, 1, 7);
+ u32 B = 0, C = 0, D = 0;
- uint32_t result = 0;
- switch (val.GetEncoding()) {
- case eIntegerEncoding_JustBits:
+ u32 result = 0;
+ switch (val.encoding) {
+ case IntegerEncoding::JustBits:
result = Replicate(bitval, bitlen, 6);
break;
- case eIntegerEncoding_Trit: {
- D = val.GetTritValue();
+ case IntegerEncoding::Trit: {
+ D = val.trit_value;
assert(D < 3);
switch (bitlen) {
case 0: {
- uint32_t results[3] = {0, 32, 63};
+ u32 results[3] = {0, 32, 63};
result = results[D];
} break;
@@ -1003,29 +987,29 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
case 2: {
C = 23;
- uint32_t b = (bitval >> 1) & 1;
+ u32 b = (bitval >> 1) & 1;
B = (b << 6) | (b << 2) | b;
} break;
case 3: {
C = 11;
- uint32_t cb = (bitval >> 1) & 3;
+ u32 cb = (bitval >> 1) & 3;
B = (cb << 5) | cb;
} break;
default:
- assert(!"Invalid trit encoding for texel weight");
+ assert(false && "Invalid trit encoding for texel weight");
break;
}
} break;
- case eIntegerEncoding_Quint: {
- D = val.GetQuintValue();
+ case IntegerEncoding::Qus32: {
+ D = val.qus32_value;
assert(D < 5);
switch (bitlen) {
case 0: {
- uint32_t results[5] = {0, 16, 32, 47, 63};
+ u32 results[5] = {0, 16, 32, 47, 63};
result = results[D];
} break;
@@ -1035,18 +1019,18 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
case 2: {
C = 13;
- uint32_t b = (bitval >> 1) & 1;
+ u32 b = (bitval >> 1) & 1;
B = (b << 6) | (b << 1);
} break;
default:
- assert(!"Invalid quint encoding for texel weight");
+ assert(false && "Invalid quint encoding for texel weight");
break;
}
} break;
}
- if (val.GetEncoding() != eIntegerEncoding_JustBits && bitlen > 0) {
+ if (val.encoding != IntegerEncoding::JustBits && bitlen > 0) {
// Decode the value...
result = D * C + B;
result ^= A;
@@ -1063,12 +1047,11 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
return result;
}
-static void UnquantizeTexelWeights(uint32_t out[2][144],
- const std::vector<IntegerEncodedValue>& weights,
- const TexelWeightParams& params, const uint32_t blockWidth,
- const uint32_t blockHeight) {
- uint32_t weightIdx = 0;
- uint32_t unquantized[2][144];
+static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights,
+ const TexelWeightParams& params, const u32 blockWidth,
+ const u32 blockHeight) {
+ u32 weightIdx = 0;
+ u32 unquantized[2][144];
for (auto itr = weights.begin(); itr != weights.end(); ++itr) {
unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr);
@@ -1086,34 +1069,34 @@ static void UnquantizeTexelWeights(uint32_t out[2][144],
}
// Do infill if necessary (Section C.2.18) ...
- uint32_t Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1);
- uint32_t Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1);
+ u32 Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1);
+ u32 Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1);
- const uint32_t kPlaneScale = params.m_bDualPlane ? 2U : 1U;
- for (uint32_t plane = 0; plane < kPlaneScale; plane++)
- for (uint32_t t = 0; t < blockHeight; t++)
- for (uint32_t s = 0; s < blockWidth; s++) {
- uint32_t cs = Ds * s;
- uint32_t ct = Dt * t;
+ const u32 kPlaneScale = params.m_bDualPlane ? 2U : 1U;
+ for (u32 plane = 0; plane < kPlaneScale; plane++)
+ for (u32 t = 0; t < blockHeight; t++)
+ for (u32 s = 0; s < blockWidth; s++) {
+ u32 cs = Ds * s;
+ u32 ct = Dt * t;
- uint32_t gs = (cs * (params.m_Width - 1) + 32) >> 6;
- uint32_t gt = (ct * (params.m_Height - 1) + 32) >> 6;
+ u32 gs = (cs * (params.m_Width - 1) + 32) >> 6;
+ u32 gt = (ct * (params.m_Height - 1) + 32) >> 6;
- uint32_t js = gs >> 4;
- uint32_t fs = gs & 0xF;
+ u32 js = gs >> 4;
+ u32 fs = gs & 0xF;
- uint32_t jt = gt >> 4;
- uint32_t ft = gt & 0x0F;
+ u32 jt = gt >> 4;
+ u32 ft = gt & 0x0F;
- uint32_t w11 = (fs * ft + 8) >> 4;
- uint32_t w10 = ft - w11;
- uint32_t w01 = fs - w11;
- uint32_t w00 = 16 - fs - ft + w11;
+ u32 w11 = (fs * ft + 8) >> 4;
+ u32 w10 = ft - w11;
+ u32 w01 = fs - w11;
+ u32 w00 = 16 - fs - ft + w11;
- uint32_t v0 = js + jt * params.m_Width;
+ u32 v0 = js + jt * params.m_Width;
#define FIND_TEXEL(tidx, bidx) \
- uint32_t p##bidx = 0; \
+ u32 p##bidx = 0; \
do { \
if ((tidx) < (params.m_Width * params.m_Height)) { \
p##bidx = unquantized[plane][(tidx)]; \
@@ -1133,7 +1116,7 @@ static void UnquantizeTexelWeights(uint32_t out[2][144],
}
// Transfers a bit as described in C.2.14
-static inline void BitTransferSigned(int32_t& a, int32_t& b) {
+static inline void BitTransferSigned(s32& a, s32& b) {
b >>= 1;
b |= a & 0x80;
a >>= 1;
@@ -1144,14 +1127,14 @@ static inline void BitTransferSigned(int32_t& a, int32_t& b) {
// Adds more precision to the blue channel as described
// in C.2.14
-static inline Pixel BlueContract(int32_t a, int32_t r, int32_t g, int32_t b) {
- return Pixel(static_cast<int16_t>(a), static_cast<int16_t>((r + b) >> 1),
- static_cast<int16_t>((g + b) >> 1), static_cast<int16_t>(b));
+static inline Pixel BlueContract(s32 a, s32 r, s32 g, s32 b) {
+ return Pixel(static_cast<s16>(a), static_cast<s16>((r + b) >> 1),
+ static_cast<s16>((g + b) >> 1), static_cast<s16>(b));
}
// Partition selection functions as specified in
// C.2.21
-static inline uint32_t hash52(uint32_t p) {
+static inline u32 hash52(u32 p) {
p ^= p >> 15;
p -= p << 17;
p += p << 7;
@@ -1165,8 +1148,7 @@ static inline uint32_t hash52(uint32_t p) {
return p;
}
-static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
- int32_t partitionCount, int32_t smallBlock) {
+static u32 SelectPartition(s32 seed, s32 x, s32 y, s32 z, s32 partitionCount, s32 smallBlock) {
if (1 == partitionCount)
return 0;
@@ -1178,34 +1160,34 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
seed += (partitionCount - 1) * 1024;
- uint32_t rnum = hash52(static_cast<uint32_t>(seed));
- uint8_t seed1 = static_cast<uint8_t>(rnum & 0xF);
- uint8_t seed2 = static_cast<uint8_t>((rnum >> 4) & 0xF);
- uint8_t seed3 = static_cast<uint8_t>((rnum >> 8) & 0xF);
- uint8_t seed4 = static_cast<uint8_t>((rnum >> 12) & 0xF);
- uint8_t seed5 = static_cast<uint8_t>((rnum >> 16) & 0xF);
- uint8_t seed6 = static_cast<uint8_t>((rnum >> 20) & 0xF);
- uint8_t seed7 = static_cast<uint8_t>((rnum >> 24) & 0xF);
- uint8_t seed8 = static_cast<uint8_t>((rnum >> 28) & 0xF);
- uint8_t seed9 = static_cast<uint8_t>((rnum >> 18) & 0xF);
- uint8_t seed10 = static_cast<uint8_t>((rnum >> 22) & 0xF);
- uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF);
- uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF);
-
- seed1 = static_cast<uint8_t>(seed1 * seed1);
- seed2 = static_cast<uint8_t>(seed2 * seed2);
- seed3 = static_cast<uint8_t>(seed3 * seed3);
- seed4 = static_cast<uint8_t>(seed4 * seed4);
- seed5 = static_cast<uint8_t>(seed5 * seed5);
- seed6 = static_cast<uint8_t>(seed6 * seed6);
- seed7 = static_cast<uint8_t>(seed7 * seed7);
- seed8 = static_cast<uint8_t>(seed8 * seed8);
- seed9 = static_cast<uint8_t>(seed9 * seed9);
- seed10 = static_cast<uint8_t>(seed10 * seed10);
- seed11 = static_cast<uint8_t>(seed11 * seed11);
- seed12 = static_cast<uint8_t>(seed12 * seed12);
-
- int32_t sh1, sh2, sh3;
+ u32 rnum = hash52(static_cast<u32>(seed));
+ u8 seed1 = static_cast<u8>(rnum & 0xF);
+ u8 seed2 = static_cast<u8>((rnum >> 4) & 0xF);
+ u8 seed3 = static_cast<u8>((rnum >> 8) & 0xF);
+ u8 seed4 = static_cast<u8>((rnum >> 12) & 0xF);
+ u8 seed5 = static_cast<u8>((rnum >> 16) & 0xF);
+ u8 seed6 = static_cast<u8>((rnum >> 20) & 0xF);
+ u8 seed7 = static_cast<u8>((rnum >> 24) & 0xF);
+ u8 seed8 = static_cast<u8>((rnum >> 28) & 0xF);
+ u8 seed9 = static_cast<u8>((rnum >> 18) & 0xF);
+ u8 seed10 = static_cast<u8>((rnum >> 22) & 0xF);
+ u8 seed11 = static_cast<u8>((rnum >> 26) & 0xF);
+ u8 seed12 = static_cast<u8>(((rnum >> 30) | (rnum << 2)) & 0xF);
+
+ seed1 = static_cast<u8>(seed1 * seed1);
+ seed2 = static_cast<u8>(seed2 * seed2);
+ seed3 = static_cast<u8>(seed3 * seed3);
+ seed4 = static_cast<u8>(seed4 * seed4);
+ seed5 = static_cast<u8>(seed5 * seed5);
+ seed6 = static_cast<u8>(seed6 * seed6);
+ seed7 = static_cast<u8>(seed7 * seed7);
+ seed8 = static_cast<u8>(seed8 * seed8);
+ seed9 = static_cast<u8>(seed9 * seed9);
+ seed10 = static_cast<u8>(seed10 * seed10);
+ seed11 = static_cast<u8>(seed11 * seed11);
+ seed12 = static_cast<u8>(seed12 * seed12);
+
+ s32 sh1, sh2, sh3;
if (seed & 1) {
sh1 = (seed & 2) ? 4 : 5;
sh2 = (partitionCount == 3) ? 6 : 5;
@@ -1215,23 +1197,23 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
}
sh3 = (seed & 0x10) ? sh1 : sh2;
- seed1 = static_cast<uint8_t>(seed1 >> sh1);
- seed2 = static_cast<uint8_t>(seed2 >> sh2);
- seed3 = static_cast<uint8_t>(seed3 >> sh1);
- seed4 = static_cast<uint8_t>(seed4 >> sh2);
- seed5 = static_cast<uint8_t>(seed5 >> sh1);
- seed6 = static_cast<uint8_t>(seed6 >> sh2);
- seed7 = static_cast<uint8_t>(seed7 >> sh1);
- seed8 = static_cast<uint8_t>(seed8 >> sh2);
- seed9 = static_cast<uint8_t>(seed9 >> sh3);
- seed10 = static_cast<uint8_t>(seed10 >> sh3);
- seed11 = static_cast<uint8_t>(seed11 >> sh3);
- seed12 = static_cast<uint8_t>(seed12 >> sh3);
-
- int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
- int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
- int32_t c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
- int32_t d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
+ seed1 = static_cast<u8>(seed1 >> sh1);
+ seed2 = static_cast<u8>(seed2 >> sh2);
+ seed3 = static_cast<u8>(seed3 >> sh1);
+ seed4 = static_cast<u8>(seed4 >> sh2);
+ seed5 = static_cast<u8>(seed5 >> sh1);
+ seed6 = static_cast<u8>(seed6 >> sh2);
+ seed7 = static_cast<u8>(seed7 >> sh1);
+ seed8 = static_cast<u8>(seed8 >> sh2);
+ seed9 = static_cast<u8>(seed9 >> sh3);
+ seed10 = static_cast<u8>(seed10 >> sh3);
+ seed11 = static_cast<u8>(seed11 >> sh3);
+ seed12 = static_cast<u8>(seed12 >> sh3);
+
+ s32 a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
+ s32 b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
+ s32 c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
+ s32 d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
a &= 0x3F;
b &= 0x3F;
@@ -1252,27 +1234,26 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
return 3;
}
-static inline uint32_t Select2DPartition(int32_t seed, int32_t x, int32_t y, int32_t partitionCount,
- int32_t smallBlock) {
+static inline u32 Select2DPartition(s32 seed, s32 x, s32 y, s32 partitionCount, s32 smallBlock) {
return SelectPartition(seed, x, y, 0, partitionCount, smallBlock);
}
// Section C.2.14
-static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValues,
- uint32_t colorEndpointMode) {
+static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
+ u32 colorEndpos32Mode) {
#define READ_UINT_VALUES(N) \
- uint32_t v[N]; \
- for (uint32_t i = 0; i < N; i++) { \
+ u32 v[N]; \
+ for (u32 i = 0; i < N; i++) { \
v[i] = *(colorValues++); \
}
#define READ_INT_VALUES(N) \
- int32_t v[N]; \
- for (uint32_t i = 0; i < N; i++) { \
- v[i] = static_cast<int32_t>(*(colorValues++)); \
+ s32 v[N]; \
+ for (u32 i = 0; i < N; i++) { \
+ v[i] = static_cast<s32>(*(colorValues++)); \
}
- switch (colorEndpointMode) {
+ switch (colorEndpos32Mode) {
case 0: {
READ_UINT_VALUES(2)
ep1 = Pixel(0xFF, v[0], v[0], v[0]);
@@ -1281,8 +1262,8 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
case 1: {
READ_UINT_VALUES(2)
- uint32_t L0 = (v[0] >> 2) | (v[1] & 0xC0);
- uint32_t L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU);
+ u32 L0 = (v[0] >> 2) | (v[1] & 0xC0);
+ u32 L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU);
ep1 = Pixel(0xFF, L0, L0, L0);
ep2 = Pixel(0xFF, L1, L1, L1);
} break;
@@ -1371,7 +1352,7 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
} break;
default:
- assert(!"Unsupported color endpoint mode (is it HDR?)");
+ assert(false && "Unsupported color endpoint mode (is it HDR?)");
break;
}
@@ -1379,14 +1360,14 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
#undef READ_INT_VALUES
}
-static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
- const uint32_t blockHeight, uint32_t* outBuf) {
+static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight,
+ u32* outBuf) {
InputBitStream strm(inBuf);
TexelWeightParams weightParams = DecodeBlockInfo(strm);
// Was there an error?
if (weightParams.m_bError) {
- assert(!"Invalid block mode");
+ assert(false && "Invalid block mode");
FillError(outBuf, blockWidth, blockHeight);
return;
}
@@ -1397,63 +1378,63 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
}
if (weightParams.m_bVoidExtentHDR) {
- assert(!"HDR void extent blocks are unsupported!");
+ assert(false && "HDR void extent blocks are unsupported!");
FillError(outBuf, blockWidth, blockHeight);
return;
}
if (weightParams.m_Width > blockWidth) {
- assert(!"Texel weight grid width should be smaller than block width");
+ assert(false && "Texel weight grid width should be smaller than block width");
FillError(outBuf, blockWidth, blockHeight);
return;
}
if (weightParams.m_Height > blockHeight) {
- assert(!"Texel weight grid height should be smaller than block height");
+ assert(false && "Texel weight grid height should be smaller than block height");
FillError(outBuf, blockWidth, blockHeight);
return;
}
// Read num partitions
- uint32_t nPartitions = strm.ReadBits(2) + 1;
+ u32 nPartitions = strm.ReadBits<2>() + 1;
assert(nPartitions <= 4);
if (nPartitions == 4 && weightParams.m_bDualPlane) {
- assert(!"Dual plane mode is incompatible with four partition blocks");
+ assert(false && "Dual plane mode is incompatible with four partition blocks");
FillError(outBuf, blockWidth, blockHeight);
return;
}
- // Based on the number of partitions, read the color endpoint mode for
+ // Based on the number of partitions, read the color endpos32 mode for
// each partition.
- // Determine partitions, partition index, and color endpoint modes
- int32_t planeIdx = -1;
- uint32_t partitionIndex;
- uint32_t colorEndpointMode[4] = {0, 0, 0, 0};
+ // Determine partitions, partition index, and color endpos32 modes
+ s32 planeIdx = -1;
+ u32 partitionIndex;
+ u32 colorEndpos32Mode[4] = {0, 0, 0, 0};
// Define color data.
- uint8_t colorEndpointData[16];
- memset(colorEndpointData, 0, sizeof(colorEndpointData));
- OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
+ u8 colorEndpos32Data[16];
+ memset(colorEndpos32Data, 0, sizeof(colorEndpos32Data));
+ OutputBitStream colorEndpos32Stream(colorEndpos32Data, 16 * 8, 0);
// Read extra config data...
- uint32_t baseCEM = 0;
+ u32 baseCEM = 0;
if (nPartitions == 1) {
- colorEndpointMode[0] = strm.ReadBits(4);
+ colorEndpos32Mode[0] = strm.ReadBits<4>();
partitionIndex = 0;
} else {
- partitionIndex = strm.ReadBits(10);
- baseCEM = strm.ReadBits(6);
+ partitionIndex = strm.ReadBits<10>();
+ baseCEM = strm.ReadBits<6>();
}
- uint32_t baseMode = (baseCEM & 3);
+ u32 baseMode = (baseCEM & 3);
- // Remaining bits are color endpoint data...
- uint32_t nWeightBits = weightParams.GetPackedBitSize();
- int32_t remainingBits = 128 - nWeightBits - strm.GetBitsRead();
+ // Remaining bits are color endpos32 data...
+ u32 nWeightBits = weightParams.GetPackedBitSize();
+ s32 remainingBits = 128 - nWeightBits - static_cast<s32>(strm.GetBitsRead());
// Consider extra bits prior to texel data...
- uint32_t extraCEMbits = 0;
+ u32 extraCEMbits = 0;
if (baseMode) {
switch (nPartitions) {
case 2:
@@ -1473,18 +1454,18 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
remainingBits -= extraCEMbits;
// Do we have a dual plane situation?
- uint32_t planeSelectorBits = 0;
+ u32 planeSelectorBits = 0;
if (weightParams.m_bDualPlane) {
planeSelectorBits = 2;
}
remainingBits -= planeSelectorBits;
// Read color data...
- uint32_t colorDataBits = remainingBits;
+ u32 colorDataBits = remainingBits;
while (remainingBits > 0) {
- uint32_t nb = std::min(remainingBits, 8);
- uint32_t b = strm.ReadBits(nb);
- colorEndpointStream.WriteBits(b, nb);
+ u32 nb = std::min(remainingBits, 8);
+ u32 b = strm.ReadBits(nb);
+ colorEndpos32Stream.WriteBits(b, nb);
remainingBits -= 8;
}
@@ -1493,64 +1474,64 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
// Read the rest of the CEM
if (baseMode) {
- uint32_t extraCEM = strm.ReadBits(extraCEMbits);
- uint32_t CEM = (extraCEM << 6) | baseCEM;
+ u32 extraCEM = strm.ReadBits(extraCEMbits);
+ u32 CEM = (extraCEM << 6) | baseCEM;
CEM >>= 2;
bool C[4] = {0};
- for (uint32_t i = 0; i < nPartitions; i++) {
+ for (u32 i = 0; i < nPartitions; i++) {
C[i] = CEM & 1;
CEM >>= 1;
}
- uint8_t M[4] = {0};
- for (uint32_t i = 0; i < nPartitions; i++) {
+ u8 M[4] = {0};
+ for (u32 i = 0; i < nPartitions; i++) {
M[i] = CEM & 3;
CEM >>= 2;
assert(M[i] <= 3);
}
- for (uint32_t i = 0; i < nPartitions; i++) {
- colorEndpointMode[i] = baseMode;
+ for (u32 i = 0; i < nPartitions; i++) {
+ colorEndpos32Mode[i] = baseMode;
if (!(C[i]))
- colorEndpointMode[i] -= 1;
- colorEndpointMode[i] <<= 2;
- colorEndpointMode[i] |= M[i];
+ colorEndpos32Mode[i] -= 1;
+ colorEndpos32Mode[i] <<= 2;
+ colorEndpos32Mode[i] |= M[i];
}
} else if (nPartitions > 1) {
- uint32_t CEM = baseCEM >> 2;
- for (uint32_t i = 0; i < nPartitions; i++) {
- colorEndpointMode[i] = CEM;
+ u32 CEM = baseCEM >> 2;
+ for (u32 i = 0; i < nPartitions; i++) {
+ colorEndpos32Mode[i] = CEM;
}
}
// Make sure everything up till here is sane.
- for (uint32_t i = 0; i < nPartitions; i++) {
- assert(colorEndpointMode[i] < 16);
+ for (u32 i = 0; i < nPartitions; i++) {
+ assert(colorEndpos32Mode[i] < 16);
}
assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128);
// Decode both color data and texel weight data
- uint32_t colorValues[32]; // Four values, two endpoints, four maximum paritions
- DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions,
+ u32 colorValues[32]; // Four values, two endpos32s, four maximum paritions
+ DecodeColorValues(colorValues, colorEndpos32Data, colorEndpos32Mode, nPartitions,
colorDataBits);
- Pixel endpoints[4][2];
- const uint32_t* colorValuesPtr = colorValues;
- for (uint32_t i = 0; i < nPartitions; i++) {
- ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]);
+ Pixel endpos32s[4][2];
+ const u32* colorValuesPtr = colorValues;
+ for (u32 i = 0; i < nPartitions; i++) {
+ ComputeEndpos32s(endpos32s[i][0], endpos32s[i][1], colorValuesPtr, colorEndpos32Mode[i]);
}
// Read the texel weight data..
- uint8_t texelWeightData[16];
+ u8 texelWeightData[16];
memcpy(texelWeightData, inBuf, sizeof(texelWeightData));
// Reverse everything
- for (uint32_t i = 0; i < 8; i++) {
+ for (u32 i = 0; i < 8; i++) {
// Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits
#define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32
- unsigned char a = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[i]));
- unsigned char b = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[15 - i]));
+ u8 a = static_cast<u8>(REVERSE_BYTE(texelWeightData[i]));
+ u8 b = static_cast<u8>(REVERSE_BYTE(texelWeightData[15 - i]));
#undef REVERSE_BYTE
texelWeightData[i] = b;
@@ -1558,50 +1539,51 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
}
// Make sure that higher non-texel bits are set to zero
- const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
+ const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
texelWeightData[clearByteStart - 1] =
texelWeightData[clearByteStart - 1] &
- static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
+ static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
std::vector<IntegerEncodedValue> texelWeightValues;
+ texelWeightValues.reserve(64);
+
InputBitStream weightStream(texelWeightData);
- IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
- weightParams.m_MaxWeight,
- weightParams.GetNumWeightValues());
+ DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
+ weightParams.GetNumWeightValues());
// Blocks can be at most 12x12, so we can have as many as 144 weights
- uint32_t weights[2][144];
+ u32 weights[2][144];
UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight);
- // Now that we have endpoints and weights, we can interpolate and generate
+ // Now that we have endpos32s and weights, we can s32erpolate and generate
// the proper decoding...
- for (uint32_t j = 0; j < blockHeight; j++)
- for (uint32_t i = 0; i < blockWidth; i++) {
- uint32_t partition = Select2DPartition(partitionIndex, i, j, nPartitions,
- (blockHeight * blockWidth) < 32);
+ for (u32 j = 0; j < blockHeight; j++)
+ for (u32 i = 0; i < blockWidth; i++) {
+ u32 partition = Select2DPartition(partitionIndex, i, j, nPartitions,
+ (blockHeight * blockWidth) < 32);
assert(partition < nPartitions);
Pixel p;
- for (uint32_t c = 0; c < 4; c++) {
- uint32_t C0 = endpoints[partition][0].Component(c);
+ for (u32 c = 0; c < 4; c++) {
+ u32 C0 = endpos32s[partition][0].Component(c);
C0 = Replicate(C0, 8, 16);
- uint32_t C1 = endpoints[partition][1].Component(c);
+ u32 C1 = endpos32s[partition][1].Component(c);
C1 = Replicate(C1, 8, 16);
- uint32_t plane = 0;
+ u32 plane = 0;
if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) {
plane = 1;
}
- uint32_t weight = weights[plane][j * blockWidth + i];
- uint32_t C = (C0 * (64 - weight) + C1 * weight + 32) / 64;
+ u32 weight = weights[plane][j * blockWidth + i];
+ u32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64;
if (C == 65535) {
p.Component(c) = 255;
} else {
double Cf = static_cast<double>(C);
- p.Component(c) = static_cast<uint16_t>(255.0 * (Cf / 65536.0) + 0.5);
+ p.Component(c) = static_cast<u16>(255.0 * (Cf / 65536.0) + 0.5);
}
}
@@ -1613,26 +1595,26 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
namespace Tegra::Texture::ASTC {
-std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
- uint32_t depth, uint32_t block_width, uint32_t block_height) {
- uint32_t blockIdx = 0;
+std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width,
+ u32 block_height) {
+ u32 blockIdx = 0;
std::size_t depth_offset = 0;
- std::vector<uint8_t> outData(height * width * depth * 4);
- for (uint32_t k = 0; k < depth; k++) {
- for (uint32_t j = 0; j < height; j += block_height) {
- for (uint32_t i = 0; i < width; i += block_width) {
+ std::vector<u8> outData(height * width * depth * 4);
+ for (u32 k = 0; k < depth; k++) {
+ for (u32 j = 0; j < height; j += block_height) {
+ for (u32 i = 0; i < width; i += block_width) {
- const uint8_t* blockPtr = data + blockIdx * 16;
+ const u8* blockPtr = data + blockIdx * 16;
// Blocks can be at most 12x12
- uint32_t uncompData[144];
+ u32 uncompData[144];
ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
- uint32_t decompWidth = std::min(block_width, width - i);
- uint32_t decompHeight = std::min(block_height, height - j);
+ u32 decompWidth = std::min(block_width, width - i);
+ u32 decompHeight = std::min(block_height, height - j);
- uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4;
- for (uint32_t jj = 0; jj < decompHeight; jj++) {
+ u8* outRow = depth_offset + outData.data() + (j * width + i) * 4;
+ for (u32 jj = 0; jj < decompHeight; jj++) {
memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
}
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 8e82c6748..7edc4abe1 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -8,6 +8,7 @@
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
+#include "core/settings.h"
namespace Tegra::Texture {
@@ -294,6 +295,14 @@ enum class TextureMipmapFilter : u32 {
Linear = 3,
};
+enum class Anisotropy {
+ Default,
+ Filter2x,
+ Filter4x,
+ Filter8x,
+ Filter16x,
+};
+
struct TSCEntry {
union {
struct {
@@ -328,7 +337,22 @@ struct TSCEntry {
};
float GetMaxAnisotropy() const {
- return static_cast<float>(1U << max_anisotropy);
+ const u32 min_value = [] {
+ switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
+ default:
+ case Anisotropy::Default:
+ return 1U;
+ case Anisotropy::Filter2x:
+ return 2U;
+ case Anisotropy::Filter4x:
+ return 4U;
+ case Anisotropy::Filter8x:
+ return 8U;
+ case Anisotropy::Filter16x:
+ return 16U;
+ }
+ }();
+ return static_cast<float>(std::max(1U << max_anisotropy, min_value));
}
float GetMinLod() const {