summaryrefslogtreecommitdiffstats
path: root/src/video_core/engines
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h119
-rw-r--r--src/video_core/engines/fermi_2d.h12
-rw-r--r--src/video_core/engines/kepler_compute.cpp30
-rw-r--r--src/video_core/engines/kepler_compute.h39
-rw-r--r--src/video_core/engines/kepler_memory.h4
-rw-r--r--src/video_core/engines/maxwell_3d.cpp128
-rw-r--r--src/video_core/engines/maxwell_3d.h140
-rw-r--r--src/video_core/engines/maxwell_dma.h10
-rw-r--r--src/video_core/engines/shader_bytecode.h56
-rw-r--r--src/video_core/engines/shader_header.h50
10 files changed, 368 insertions, 220 deletions
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
new file mode 100644
index 000000000..ac27b6cbe
--- /dev/null
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -0,0 +1,119 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra::Engines {
+
+enum class ShaderType : u32 {
+ Vertex = 0,
+ TesselationControl = 1,
+ TesselationEval = 2,
+ Geometry = 3,
+ Fragment = 4,
+ Compute = 5,
+};
+
+struct SamplerDescriptor {
+ union {
+ BitField<0, 20, Tegra::Shader::TextureType> texture_type;
+ BitField<20, 1, u32> is_array;
+ BitField<21, 1, u32> is_buffer;
+ BitField<22, 1, u32> is_shadow;
+ u32 raw{};
+ };
+
+ bool operator==(const SamplerDescriptor& rhs) const noexcept {
+ return raw == rhs.raw;
+ }
+
+ bool operator!=(const SamplerDescriptor& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+
+ static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) {
+ SamplerDescriptor result;
+ switch (tic_texture_type) {
+ case Tegra::Texture::TextureType::Texture1D:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::Texture2D:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::Texture3D:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::TextureCubemap:
+ result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::Texture1DArray:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
+ result.is_array.Assign(1);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::Texture2DArray:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
+ result.is_array.Assign(1);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::Texture1DBuffer:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(1);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::Texture2DNoMipmap:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::TextureCubeArray:
+ result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
+ result.is_array.Assign(1);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ default:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ }
+ }
+};
+static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
+
+class ConstBufferEngineInterface {
+public:
+ virtual ~ConstBufferEngineInterface() = default;
+ virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
+ virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
+ virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+ u64 offset) const = 0;
+ virtual u32 GetBoundBuffer() const = 0;
+};
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 0901cf2fa..dba342c70 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -99,19 +99,19 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x80);
+ INSERT_UNION_PADDING_WORDS(0x80);
Surface dst;
- INSERT_PADDING_WORDS(2);
+ INSERT_UNION_PADDING_WORDS(2);
Surface src;
- INSERT_PADDING_WORDS(0x15);
+ INSERT_UNION_PADDING_WORDS(0x15);
Operation operation;
- INSERT_PADDING_WORDS(0x177);
+ INSERT_UNION_PADDING_WORDS(0x177);
union {
u32 raw;
@@ -119,7 +119,7 @@ public:
BitField<4, 1, Filter> filter;
} blit_control;
- INSERT_PADDING_WORDS(0x8);
+ INSERT_UNION_PADDING_WORDS(0x8);
u32 blit_dst_x;
u32 blit_dst_y;
@@ -130,7 +130,7 @@ public:
u64 blit_src_x;
u64 blit_src_y;
- INSERT_PADDING_WORDS(0x21);
+ INSERT_UNION_PADDING_WORDS(0x21);
};
std::array<u32, NUM_REGS> reg_array;
};
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 63d449135..3a39aeabe 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -50,7 +50,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
}
}
-Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
+Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
ASSERT(cbuf_mask[regs.tex_cb_index]);
@@ -61,22 +61,38 @@ Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) co
ASSERT(address < texinfo.Address() + texinfo.size);
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
- return GetTextureInfo(tex_handle, offset);
+ return GetTextureInfo(tex_handle);
}
-Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHandle tex_handle,
- std::size_t offset) const {
- return Texture::FullTextureInfo{static_cast<u32>(offset), GetTICEntry(tex_handle.tic_id),
- GetTSCEntry(tex_handle.tsc_id)};
+Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const {
+ return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
}
-u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const {
+u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
+ ASSERT(stage == ShaderType::Compute);
const auto& buffer = launch_description.const_buffer_config[const_buffer];
u32 result;
std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
return result;
}
+SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
+ return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
+}
+
+SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+ u64 offset) const {
+ ASSERT(stage == ShaderType::Compute);
+ const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
+ const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
+
+ const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+ const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
+ SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
+ result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
+ return result;
+}
+
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 90cf650d2..5259d92bd 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -10,6 +10,7 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
#include "video_core/textures/texture.h"
@@ -37,7 +38,7 @@ namespace Tegra::Engines {
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
-class KeplerCompute final {
+class KeplerCompute final : public ConstBufferEngineInterface {
public:
explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
@@ -50,7 +51,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x60);
+ INSERT_UNION_PADDING_WORDS(0x60);
Upload::Registers upload;
@@ -62,7 +63,7 @@ public:
u32 data_upload;
- INSERT_PADDING_WORDS(0x3F);
+ INSERT_UNION_PADDING_WORDS(0x3F);
struct {
u32 address;
@@ -71,11 +72,11 @@ public:
}
} launch_desc_loc;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
u32 launch;
- INSERT_PADDING_WORDS(0x4A7);
+ INSERT_UNION_PADDING_WORDS(0x4A7);
struct {
u32 address_high;
@@ -87,7 +88,7 @@ public:
}
} tsc;
- INSERT_PADDING_WORDS(0x3);
+ INSERT_UNION_PADDING_WORDS(0x3);
struct {
u32 address_high;
@@ -99,7 +100,7 @@ public:
}
} tic;
- INSERT_PADDING_WORDS(0x22);
+ INSERT_UNION_PADDING_WORDS(0x22);
struct {
u32 address_high;
@@ -110,11 +111,11 @@ public:
}
} code_loc;
- INSERT_PADDING_WORDS(0x3FE);
+ INSERT_UNION_PADDING_WORDS(0x3FE);
u32 tex_cb_index;
- INSERT_PADDING_WORDS(0x374);
+ INSERT_UNION_PADDING_WORDS(0x374);
};
std::array<u32, NUM_REGS> reg_array;
};
@@ -178,7 +179,7 @@ public:
};
INSERT_PADDING_WORDS(0x11);
- } launch_description;
+ } launch_description{};
struct {
u32 write_offset = 0;
@@ -195,13 +196,21 @@ public:
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
- Tegra::Texture::FullTextureInfo GetTexture(std::size_t offset) const;
+ Texture::FullTextureInfo GetTexture(std::size_t offset) const;
- /// Given a Texture Handle, returns the TSC and TIC entries.
- Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
- std::size_t offset) const;
+ /// Given a texture handle, returns the TSC and TIC entries.
+ Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
- u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const;
+ u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
+
+ SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
+
+ SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+ u64 offset) const override;
+
+ u32 GetBoundBuffer() const override {
+ return regs.tex_cb_index;
+ }
private:
Core::System& system;
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index e0e25c321..396fb6e86 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -45,7 +45,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x60);
+ INSERT_UNION_PADDING_WORDS(0x60);
Upload::Registers upload;
@@ -57,7 +57,7 @@ public:
u32 data;
- INSERT_PADDING_WORDS(0x11);
+ INSERT_UNION_PADDING_WORDS(0x11);
};
std::array<u32, NUM_REGS> reg_array;
};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7802fd808..2bed6cb38 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -98,10 +98,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
}
-#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
+#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name))
void Maxwell3D::InitDirtySettings() {
- const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
+ const auto set_block = [this](std::size_t start, std::size_t range, u8 position) {
const auto start_itr = dirty_pointers.begin() + start;
const auto end_itr = start_itr + range;
std::fill(start_itr, end_itr, position);
@@ -112,10 +112,10 @@ void Maxwell3D::InitDirtySettings() {
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
- u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
+ u8 rt_dirty_reg = DIRTY_REGS_POS(render_target);
for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
set_block(rt_reg, registers_per_rt, rt_dirty_reg);
- rt_dirty_reg++;
+ ++rt_dirty_reg;
}
constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
@@ -129,35 +129,35 @@ void Maxwell3D::InitDirtySettings() {
constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
- u32 va_reg = DIRTY_REGS_POS(vertex_array);
- u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
+ u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array);
+ u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
vertex_reg += vertex_array_size) {
- set_block(vertex_reg, 3, va_reg);
+ set_block(vertex_reg, 3, va_dirty_reg);
// The divisor concerns vertex array instances
- dirty_pointers[vertex_reg + 3] = vi_reg;
- va_reg++;
- vi_reg++;
+ dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg;
+ ++va_dirty_reg;
+ ++vi_dirty_reg;
}
constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
- va_reg = DIRTY_REGS_POS(vertex_array);
+ va_dirty_reg = DIRTY_REGS_POS(vertex_array);
for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
vertex_reg += vertex_limit_size) {
- set_block(vertex_reg, vertex_limit_size, va_reg);
- va_reg++;
+ set_block(vertex_reg, vertex_limit_size, va_dirty_reg);
+ va_dirty_reg++;
}
constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
constexpr u32 vertex_instance_size =
sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
constexpr u32 vertex_instance_end =
vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
- vi_reg = DIRTY_REGS_POS(vertex_instance);
+ vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
vertex_reg += vertex_instance_size) {
- set_block(vertex_reg, vertex_instance_size, vi_reg);
- vi_reg++;
+ set_block(vertex_reg, vertex_instance_size, vi_dirty_reg);
+ vi_dirty_reg++;
}
set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
DIRTY_REGS_POS(vertex_attrib_format));
@@ -171,7 +171,7 @@ void Maxwell3D::InitDirtySettings() {
// State
// Viewport
- constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
+ constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
set_block(viewport_start, viewport_size, viewport_dirty_reg);
@@ -198,7 +198,7 @@ void Maxwell3D::InitDirtySettings() {
set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
// Depth Test
- constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
+ constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
@@ -223,12 +223,12 @@ void Maxwell3D::InitDirtySettings() {
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
// Color Mask
- constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
+ constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
color_mask_dirty_reg);
// Blend State
- constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
+ constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
blend_state_dirty_reg);
dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
@@ -237,12 +237,12 @@ void Maxwell3D::InitDirtySettings() {
blend_state_dirty_reg);
// Scissor State
- constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
+ constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
scissor_test_dirty_reg);
// Polygon Offset
- constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
+ constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
@@ -251,7 +251,7 @@ void Maxwell3D::InitDirtySettings() {
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
// Depth bounds
- constexpr u32 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values);
+ constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values);
dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg;
}
@@ -478,7 +478,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) {
}
void Maxwell3D::FlushMMEInlineDraw() {
- LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+ LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
@@ -760,61 +760,8 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
return tsc_entry;
}
-std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const {
- std::vector<Texture::FullTextureInfo> textures;
-
- auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)];
- auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index];
- ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
-
- GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size;
-
- // Offset into the texture constbuffer where the texture info begins.
- static constexpr std::size_t TextureInfoOffset = 0x20;
-
- for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
- current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
-
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)};
-
- Texture::FullTextureInfo tex_info{};
- // TODO(Subv): Use the shader to determine which textures are actually accessed.
- tex_info.index =
- static_cast<u32>(current_texture - tex_info_buffer.address - TextureInfoOffset) /
- sizeof(Texture::TextureHandle);
-
- // Load the TIC data.
- auto tic_entry = GetTICEntry(tex_handle.tic_id);
- // TODO(Subv): Workaround for BitField's move constructor being deleted.
- std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
-
- // Load the TSC data
- auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
- // TODO(Subv): Workaround for BitField's move constructor being deleted.
- std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
-
- textures.push_back(tex_info);
- }
-
- return textures;
-}
-
-Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle,
- std::size_t offset) const {
- Texture::FullTextureInfo tex_info{};
- tex_info.index = static_cast<u32>(offset);
-
- // Load the TIC data.
- auto tic_entry = GetTICEntry(tex_handle.tic_id);
- // TODO(Subv): Workaround for BitField's move constructor being deleted.
- std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
-
- // Load the TSC data
- auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
- // TODO(Subv): Workaround for BitField's move constructor being deleted.
- std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
-
- return tex_info;
+Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
+ return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
}
Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
@@ -830,7 +777,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
- return GetTextureInfo(tex_handle, offset);
+ return GetTextureInfo(tex_handle);
}
u32 Maxwell3D::GetRegisterValue(u32 method) const {
@@ -846,7 +793,8 @@ void Maxwell3D::ProcessClearBuffers() {
rasterizer.Clear();
}
-u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const {
+u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
+ ASSERT(stage != ShaderType::Compute);
const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& buffer = shader_stage.const_buffers[const_buffer];
u32 result;
@@ -854,4 +802,22 @@ u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u6
return result;
}
+SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
+ return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
+}
+
+SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+ u64 offset) const {
+ ASSERT(stage != ShaderType::Compute);
+ const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
+ const auto& tex_info_buffer = shader.const_buffers[const_buffer];
+ const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
+
+ const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+ const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
+ SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
+ result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
+ return result;
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index e3f1047d5..1aa7c274f 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -15,6 +15,7 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/math_util.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
@@ -44,7 +45,7 @@ namespace Tegra::Engines {
#define MAXWELL3D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
-class Maxwell3D final {
+class Maxwell3D final : public ConstBufferEngineInterface {
public:
explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
@@ -495,7 +496,7 @@ public:
Equation equation_a;
Factor factor_source_a;
Factor factor_dest_a;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
};
struct RenderTargetConfig {
@@ -516,7 +517,7 @@ public:
};
u32 layer_stride;
u32 base_layer;
- INSERT_PADDING_WORDS(7);
+ INSERT_UNION_PADDING_WORDS(7);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
@@ -541,7 +542,7 @@ public:
f32 translate_x;
f32 translate_y;
f32 translate_z;
- INSERT_PADDING_WORDS(2);
+ INSERT_UNION_PADDING_WORDS(2);
Common::Rectangle<s32> GetRect() const {
return {
@@ -605,7 +606,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x45);
+ INSERT_UNION_PADDING_WORDS(0x45);
struct {
u32 upload_address;
@@ -614,7 +615,7 @@ public:
u32 bind;
} macros;
- INSERT_PADDING_WORDS(0x17);
+ INSERT_UNION_PADDING_WORDS(0x17);
Upload::Registers upload;
struct {
@@ -625,7 +626,7 @@ public:
u32 data_upload;
- INSERT_PADDING_WORDS(0x44);
+ INSERT_UNION_PADDING_WORDS(0x44);
struct {
union {
@@ -635,11 +636,11 @@ public:
};
} sync_info;
- INSERT_PADDING_WORDS(0x11E);
+ INSERT_UNION_PADDING_WORDS(0x11E);
u32 tfb_enabled;
- INSERT_PADDING_WORDS(0x2E);
+ INSERT_UNION_PADDING_WORDS(0x2E);
std::array<RenderTargetConfig, NumRenderTargets> rt;
@@ -647,49 +648,49 @@ public:
std::array<ViewPort, NumViewports> viewports;
- INSERT_PADDING_WORDS(0x1D);
+ INSERT_UNION_PADDING_WORDS(0x1D);
struct {
u32 first;
u32 count;
} vertex_buffer;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
float clear_color[4];
float clear_depth;
- INSERT_PADDING_WORDS(0x3);
+ INSERT_UNION_PADDING_WORDS(0x3);
s32 clear_stencil;
- INSERT_PADDING_WORDS(0x7);
+ INSERT_UNION_PADDING_WORDS(0x7);
u32 polygon_offset_point_enable;
u32 polygon_offset_line_enable;
u32 polygon_offset_fill_enable;
- INSERT_PADDING_WORDS(0xD);
+ INSERT_UNION_PADDING_WORDS(0xD);
std::array<ScissorTest, NumViewports> scissor_test;
- INSERT_PADDING_WORDS(0x15);
+ INSERT_UNION_PADDING_WORDS(0x15);
s32 stencil_back_func_ref;
u32 stencil_back_mask;
u32 stencil_back_func_mask;
- INSERT_PADDING_WORDS(0xC);
+ INSERT_UNION_PADDING_WORDS(0xC);
u32 color_mask_common;
- INSERT_PADDING_WORDS(0x6);
+ INSERT_UNION_PADDING_WORDS(0x6);
u32 rt_separate_frag_data;
f32 depth_bounds[2];
- INSERT_PADDING_WORDS(0xA);
+ INSERT_UNION_PADDING_WORDS(0xA);
struct {
u32 address_high;
@@ -709,7 +710,7 @@ public:
}
} zeta;
- INSERT_PADDING_WORDS(0x41);
+ INSERT_UNION_PADDING_WORDS(0x41);
union {
BitField<0, 4, u32> stencil;
@@ -718,11 +719,11 @@ public:
BitField<12, 4, u32> viewport;
} clear_flags;
- INSERT_PADDING_WORDS(0x19);
+ INSERT_UNION_PADDING_WORDS(0x19);
std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
- INSERT_PADDING_WORDS(0xF);
+ INSERT_UNION_PADDING_WORDS(0xF);
struct {
union {
@@ -745,16 +746,16 @@ public:
}
} rt_control;
- INSERT_PADDING_WORDS(0x2);
+ INSERT_UNION_PADDING_WORDS(0x2);
u32 zeta_width;
u32 zeta_height;
- INSERT_PADDING_WORDS(0x27);
+ INSERT_UNION_PADDING_WORDS(0x27);
u32 depth_test_enable;
- INSERT_PADDING_WORDS(0x5);
+ INSERT_UNION_PADDING_WORDS(0x5);
u32 independent_blend_enable;
@@ -762,7 +763,7 @@ public:
u32 alpha_test_enabled;
- INSERT_PADDING_WORDS(0x6);
+ INSERT_UNION_PADDING_WORDS(0x6);
u32 d3d_cull_mode;
@@ -776,7 +777,7 @@ public:
float b;
float a;
} blend_color;
- INSERT_PADDING_WORDS(0x4);
+ INSERT_UNION_PADDING_WORDS(0x4);
struct {
u32 separate_alpha;
@@ -785,7 +786,7 @@ public:
Blend::Factor factor_dest_rgb;
Blend::Equation equation_a;
Blend::Factor factor_source_a;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
Blend::Factor factor_dest_a;
u32 enable_common;
@@ -801,7 +802,7 @@ public:
u32 stencil_front_func_mask;
u32 stencil_front_mask;
- INSERT_PADDING_WORDS(0x2);
+ INSERT_UNION_PADDING_WORDS(0x2);
u32 frag_color_clamp;
@@ -810,12 +811,12 @@ public:
BitField<4, 1, u32> triangle_rast_flip;
} screen_y_control;
- INSERT_PADDING_WORDS(0x21);
+ INSERT_UNION_PADDING_WORDS(0x21);
u32 vb_element_base;
u32 vb_base_instance;
- INSERT_PADDING_WORDS(0x35);
+ INSERT_UNION_PADDING_WORDS(0x35);
union {
BitField<0, 1, u32> c0;
@@ -828,11 +829,11 @@ public:
BitField<7, 1, u32> c7;
} clip_distance_enabled;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
float point_size;
- INSERT_PADDING_WORDS(0x7);
+ INSERT_UNION_PADDING_WORDS(0x7);
u32 zeta_enable;
@@ -841,7 +842,7 @@ public:
BitField<4, 1, u32> alpha_to_one;
} multisample_control;
- INSERT_PADDING_WORDS(0x4);
+ INSERT_UNION_PADDING_WORDS(0x4);
struct {
u32 address_high;
@@ -865,11 +866,11 @@ public:
}
} tsc;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
float polygon_offset_factor;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
struct {
u32 tic_address_high;
@@ -882,7 +883,7 @@ public:
}
} tic;
- INSERT_PADDING_WORDS(0x5);
+ INSERT_UNION_PADDING_WORDS(0x5);
u32 stencil_two_side_enable;
StencilOp stencil_back_op_fail;
@@ -890,13 +891,13 @@ public:
StencilOp stencil_back_op_zpass;
ComparisonOp stencil_back_func_func;
- INSERT_PADDING_WORDS(0x4);
+ INSERT_UNION_PADDING_WORDS(0x4);
u32 framebuffer_srgb;
float polygon_offset_units;
- INSERT_PADDING_WORDS(0x11);
+ INSERT_UNION_PADDING_WORDS(0x11);
union {
BitField<2, 1, u32> coord_origin;
@@ -912,7 +913,7 @@ public:
(static_cast<GPUVAddr>(code_address_high) << 32) | code_address_low);
}
} code_address;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
struct {
u32 vertex_end_gl;
@@ -924,14 +925,14 @@ public:
};
} draw;
- INSERT_PADDING_WORDS(0xA);
+ INSERT_UNION_PADDING_WORDS(0xA);
struct {
u32 enabled;
u32 index;
} primitive_restart;
- INSERT_PADDING_WORDS(0x5F);
+ INSERT_UNION_PADDING_WORDS(0x5F);
struct {
u32 start_addr_high;
@@ -972,9 +973,9 @@ public:
}
} index_array;
- INSERT_PADDING_WORDS(0x7);
+ INSERT_UNION_PADDING_WORDS(0x7);
- INSERT_PADDING_WORDS(0x1F);
+ INSERT_UNION_PADDING_WORDS(0x1F);
float polygon_offset_clamp;
@@ -988,17 +989,17 @@ public:
}
} instanced_arrays;
- INSERT_PADDING_WORDS(0x6);
+ INSERT_UNION_PADDING_WORDS(0x6);
Cull cull;
u32 pixel_center_integer;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
u32 viewport_transform_enabled;
- INSERT_PADDING_WORDS(0x3);
+ INSERT_UNION_PADDING_WORDS(0x3);
union {
BitField<0, 1, u32> depth_range_0_1;
@@ -1006,13 +1007,13 @@ public:
BitField<4, 1, u32> depth_clamp_far;
} view_volume_clip_control;
- INSERT_PADDING_WORDS(0x21);
+ INSERT_UNION_PADDING_WORDS(0x21);
struct {
u32 enable;
LogicOperation operation;
} logic_op;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
union {
u32 raw;
@@ -1025,9 +1026,9 @@ public:
BitField<6, 4, u32> RT;
BitField<10, 11, u32> layer;
} clear_buffers;
- INSERT_PADDING_WORDS(0xB);
+ INSERT_UNION_PADDING_WORDS(0xB);
std::array<ColorMask, NumRenderTargets> color_mask;
- INSERT_PADDING_WORDS(0x38);
+ INSERT_UNION_PADDING_WORDS(0x38);
struct {
u32 query_address_high;
@@ -1049,7 +1050,7 @@ public:
}
} query;
- INSERT_PADDING_WORDS(0x3C);
+ INSERT_UNION_PADDING_WORDS(0x3C);
struct {
union {
@@ -1089,10 +1090,10 @@ public:
BitField<4, 4, ShaderProgram> program;
};
u32 offset;
- INSERT_PADDING_WORDS(14);
+ INSERT_UNION_PADDING_WORDS(14);
} shader_config[MaxShaderProgram];
- INSERT_PADDING_WORDS(0x60);
+ INSERT_UNION_PADDING_WORDS(0x60);
u32 firmware[0x20];
@@ -1109,7 +1110,7 @@ public:
}
} const_buffer;
- INSERT_PADDING_WORDS(0x10);
+ INSERT_UNION_PADDING_WORDS(0x10);
struct {
union {
@@ -1117,14 +1118,14 @@ public:
BitField<0, 1, u32> valid;
BitField<4, 5, u32> index;
};
- INSERT_PADDING_WORDS(7);
+ INSERT_UNION_PADDING_WORDS(7);
} cb_bind[MaxShaderStage];
- INSERT_PADDING_WORDS(0x56);
+ INSERT_UNION_PADDING_WORDS(0x56);
u32 tex_cb_index;
- INSERT_PADDING_WORDS(0x395);
+ INSERT_UNION_PADDING_WORDS(0x395);
struct {
/// Compressed address of a buffer that holds information about bound SSBOs.
@@ -1136,14 +1137,14 @@ public:
}
} ssbo_info;
- INSERT_PADDING_WORDS(0x11);
+ INSERT_UNION_PADDING_WORDS(0x11);
struct {
u32 address[MaxShaderStage];
u32 size[MaxShaderStage];
} tex_info_buffers;
- INSERT_PADDING_WORDS(0xCC);
+ INSERT_UNION_PADDING_WORDS(0xCC);
};
std::array<u32, NUM_REGS> reg_array;
};
@@ -1165,6 +1166,8 @@ public:
struct DirtyRegs {
static constexpr std::size_t NUM_REGS = 256;
+ static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max());
+
union {
struct {
bool null_dirty;
@@ -1247,17 +1250,22 @@ public:
void FlushMMEInlineDraw();
- /// Given a Texture Handle, returns the TSC and TIC entries.
- Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
- std::size_t offset) const;
-
- /// Returns a list of enabled textures for the specified shader stage.
- std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
+ /// Given a texture handle, returns the TSC and TIC entries.
+ Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
/// Returns the texture information for a specific texture in a specific shader stage.
Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
- u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const;
+ u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
+
+ SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
+
+ SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+ u64 offset) const override;
+
+ u32 GetBoundBuffer() const override {
+ return regs.tex_cb_index;
+ }
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
/// we've seen used.
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 93808a9bb..4f40d1d1f 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -94,7 +94,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0xC0);
+ INSERT_UNION_PADDING_WORDS(0xC0);
struct {
union {
@@ -112,7 +112,7 @@ public:
};
} exec;
- INSERT_PADDING_WORDS(0x3F);
+ INSERT_UNION_PADDING_WORDS(0x3F);
struct {
u32 address_high;
@@ -139,7 +139,7 @@ public:
u32 x_count;
u32 y_count;
- INSERT_PADDING_WORDS(0xB8);
+ INSERT_UNION_PADDING_WORDS(0xB8);
u32 const0;
u32 const1;
@@ -162,11 +162,11 @@ public:
Parameters dst_params;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
Parameters src_params;
- INSERT_PADDING_WORDS(0x13);
+ INSERT_UNION_PADDING_WORDS(0x13);
};
std::array<u32, NUM_REGS> reg_array;
};
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7a6355ce2..8f6bc76eb 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -574,7 +574,7 @@ enum class ShuffleOperation : u64 {
};
union Instruction {
- Instruction& operator=(const Instruction& instr) {
+ constexpr Instruction& operator=(const Instruction& instr) {
value = instr.value;
return *this;
}
@@ -1238,6 +1238,32 @@ union Instruction {
} tld4;
union {
+ BitField<35, 1, u64> ndv_flag;
+ BitField<49, 1, u64> nodep_flag;
+ BitField<50, 1, u64> dc_flag;
+ BitField<33, 2, u64> info;
+ BitField<37, 2, u64> component;
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::NDV:
+ return ndv_flag != 0;
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ case TextureMiscMode::DC:
+ return dc_flag != 0;
+ case TextureMiscMode::AOFFI:
+ return info == 1;
+ case TextureMiscMode::PTP:
+ return info == 2;
+ default:
+ break;
+ }
+ return false;
+ }
+ } tld4_b;
+
+ union {
BitField<49, 1, u64> nodep_flag;
BitField<50, 1, u64> dc_flag;
BitField<51, 1, u64> aoffi_flag;
@@ -1590,7 +1616,8 @@ public:
TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
TLD, // Texture Load
TLDS, // Texture Load with scalar/non-vec4 source/destinations
- TLD4, // Texture Load 4
+ TLD4, // Texture Gather 4
+ TLD4_B, // Texture Gather 4 Bindless
TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
TMML_B, // Texture Mip Map Level
TMML, // Texture Mip Map Level
@@ -1760,22 +1787,22 @@ public:
class Matcher {
public:
- Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type)
+ constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type)
: name{name}, mask{mask}, expected{expected}, id{id}, type{type} {}
- const char* GetName() const {
+ constexpr const char* GetName() const {
return name;
}
- u16 GetMask() const {
+ constexpr u16 GetMask() const {
return mask;
}
- Id GetId() const {
+ constexpr Id GetId() const {
return id;
}
- Type GetType() const {
+ constexpr Type GetType() const {
return type;
}
@@ -1784,7 +1811,7 @@ public:
* @param instruction The instruction to test
* @returns true if the given instruction matches.
*/
- bool Matches(u16 instruction) const {
+ constexpr bool Matches(u16 instruction) const {
return (instruction & mask) == expected;
}
@@ -1818,7 +1845,7 @@ private:
* A '0' in a bitstring indicates that a zero must be present at that bit position.
* A '1' in a bitstring indicates that a one must be present at that bit position.
*/
- static auto GetMaskAndExpect(const char* const bitstring) {
+ static constexpr auto GetMaskAndExpect(const char* const bitstring) {
u16 mask = 0, expect = 0;
for (std::size_t i = 0; i < opcode_bitsize; i++) {
const std::size_t bit_position = opcode_bitsize - i - 1;
@@ -1835,15 +1862,15 @@ private:
break;
}
}
- return std::make_tuple(mask, expect);
+ return std::make_pair(mask, expect);
}
public:
/// Creates a matcher that can match and parse instructions based on bitstring.
- static auto GetMatcher(const char* const bitstring, OpCode::Id op, OpCode::Type type,
- const char* const name) {
- const auto mask_expect = GetMaskAndExpect(bitstring);
- return Matcher(name, std::get<0>(mask_expect), std::get<1>(mask_expect), op, type);
+ static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type,
+ const char* const name) {
+ const auto [mask, expected] = GetMaskAndExpect(bitstring);
+ return Matcher(name, mask, expected, op, type);
}
};
@@ -1881,6 +1908,7 @@ private:
INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
+ INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index e86a7f04a..bc80661d8 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -38,37 +38,37 @@ struct Header {
BitField<26, 1, u32> does_load_or_store;
BitField<27, 1, u32> does_fp64;
BitField<28, 4, u32> stream_out_mask;
- } common0;
+ } common0{};
union {
BitField<0, 24, u32> shader_local_memory_low_size;
BitField<24, 8, u32> per_patch_attribute_count;
- } common1;
+ } common1{};
union {
BitField<0, 24, u32> shader_local_memory_high_size;
BitField<24, 8, u32> threads_per_input_primitive;
- } common2;
+ } common2{};
union {
BitField<0, 24, u32> shader_local_memory_crs_size;
BitField<24, 4, OutputTopology> output_topology;
BitField<28, 4, u32> reserved;
- } common3;
+ } common3{};
union {
BitField<0, 12, u32> max_output_vertices;
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
BitField<24, 4, u32> reserved;
BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
- } common4;
+ } common4{};
union {
struct {
- INSERT_PADDING_BYTES(3); // ImapSystemValuesA
- INSERT_PADDING_BYTES(1); // ImapSystemValuesB
- INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
- INSERT_PADDING_BYTES(2); // ImapColor
+ INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
+ INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
+ INSERT_UNION_PADDING_BYTES(16); // ImapGenericVector[32]
+ INSERT_UNION_PADDING_BYTES(2); // ImapColor
union {
BitField<0, 8, u16> clip_distances;
BitField<8, 1, u16> point_sprite_s;
@@ -79,20 +79,20 @@ struct Header {
BitField<14, 1, u16> instance_id;
BitField<15, 1, u16> vertex_id;
};
- INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
- INSERT_PADDING_BYTES(1); // ImapReserved
- INSERT_PADDING_BYTES(3); // OmapSystemValuesA
- INSERT_PADDING_BYTES(1); // OmapSystemValuesB
- INSERT_PADDING_BYTES(16); // OmapGenericVector[32]
- INSERT_PADDING_BYTES(2); // OmapColor
- INSERT_PADDING_BYTES(2); // OmapSystemValuesC
- INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10]
- INSERT_PADDING_BYTES(1); // OmapReserved
+ INSERT_UNION_PADDING_BYTES(5); // ImapFixedFncTexture[10]
+ INSERT_UNION_PADDING_BYTES(1); // ImapReserved
+ INSERT_UNION_PADDING_BYTES(3); // OmapSystemValuesA
+ INSERT_UNION_PADDING_BYTES(1); // OmapSystemValuesB
+ INSERT_UNION_PADDING_BYTES(16); // OmapGenericVector[32]
+ INSERT_UNION_PADDING_BYTES(2); // OmapColor
+ INSERT_UNION_PADDING_BYTES(2); // OmapSystemValuesC
+ INSERT_UNION_PADDING_BYTES(5); // OmapFixedFncTexture[10]
+ INSERT_UNION_PADDING_BYTES(1); // OmapReserved
} vtg;
struct {
- INSERT_PADDING_BYTES(3); // ImapSystemValuesA
- INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+ INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
+ INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
union {
BitField<0, 2, AttributeUse> x;
BitField<2, 2, AttributeUse> y;
@@ -100,10 +100,10 @@ struct Header {
BitField<6, 2, AttributeUse> z;
u8 raw;
} imap_generic_vector[32];
- INSERT_PADDING_BYTES(2); // ImapColor
- INSERT_PADDING_BYTES(2); // ImapSystemValuesC
- INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
- INSERT_PADDING_BYTES(2); // ImapReserved
+ INSERT_UNION_PADDING_BYTES(2); // ImapColor
+ INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC
+ INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10]
+ INSERT_UNION_PADDING_BYTES(2); // ImapReserved
struct {
u32 target;
union {
@@ -139,6 +139,8 @@ struct Header {
return result;
}
} ps;
+
+ std::array<u32, 0xF> raw{};
};
u64 GetLocalMemorySize() const {