summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt9
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h60
-rw-r--r--src/video_core/compatible_formats.cpp87
-rw-r--r--src/video_core/compatible_formats.h2
-rw-r--r--src/video_core/engines/fermi_2d.cpp10
-rw-r--r--src/video_core/engines/fermi_2d.h9
-rw-r--r--src/video_core/engines/kepler_compute.cpp17
-rw-r--r--src/video_core/engines/kepler_compute.h16
-rw-r--r--src/video_core/engines/maxwell_3d.cpp35
-rw-r--r--src/video_core/engines/maxwell_3d.h23
-rw-r--r--src/video_core/engines/maxwell_dma.cpp297
-rw-r--r--src/video_core/engines/maxwell_dma.h348
-rw-r--r--src/video_core/fence_manager.h32
-rw-r--r--src/video_core/gpu.cpp33
-rw-r--r--src/video_core/gpu.h113
-rw-r--r--src/video_core/gpu_asynch.cpp10
-rw-r--r--src/video_core/gpu_asynch.h4
-rw-r--r--src/video_core/gpu_synch.cpp8
-rw-r--r--src/video_core/gpu_synch.h6
-rw-r--r--src/video_core/gpu_thread.cpp4
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt43
-rw-r--r--src/video_core/host_shaders/StringShaderHeader.cmake11
-rw-r--r--src/video_core/host_shaders/opengl_present.frag10
-rw-r--r--src/video_core/host_shaders/opengl_present.vert24
-rw-r--r--src/video_core/host_shaders/source_shader.h.in9
-rw-r--r--src/video_core/macro/macro.h3
-rw-r--r--src/video_core/macro/macro_hle.cpp26
-rw-r--r--src/video_core/macro/macro_interpreter.cpp1
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp11
-rw-r--r--src/video_core/memory_manager.cpp540
-rw-r--r--src/video_core/memory_manager.h189
-rw-r--r--src/video_core/morton.cpp276
-rw-r--r--src/video_core/query_cache.h31
-rw-r--r--src/video_core/rasterizer_interface.h7
-rw-r--r--src/video_core/renderer_base.cpp4
-rw-r--r--src/video_core/renderer_base.h17
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp117
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_device.h10
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h7
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp359
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h40
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h9
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp232
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h59
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp26
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp67
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h26
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp71
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h2
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h28
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp174
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h8
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp77
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h28
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp1
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp173
-rw-r--r--src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp14
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp25
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h19
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp743
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h9
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp80
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp194
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp131
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp33
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp294
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h13
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp432
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h28
-rw-r--r--src/video_core/renderer_vulkan/vk_image.cpp38
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp190
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h59
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp251
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h23
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.cpp129
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp29
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp52
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp57
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp25
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h28
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp39
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp96
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp289
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h16
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp124
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h30
-rw-r--r--src/video_core/shader/async_shaders.cpp221
-rw-r--r--src/video_core/shader/async_shaders.h136
-rw-r--r--src/video_core/shader/control_flow.cpp30
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp6
-rw-r--r--src/video_core/shader/decode/image.cpp54
-rw-r--r--src/video_core/shader/decode/memory.cpp9
-rw-r--r--src/video_core/shader/decode/other.cpp3
-rw-r--r--src/video_core/shader/decode/video.cpp19
-rw-r--r--src/video_core/shader/decode/xmad.cpp15
-rw-r--r--src/video_core/shader/memory_util.cpp7
-rw-r--r--src/video_core/shader/memory_util.h6
-rw-r--r--src/video_core/shader/shader_ir.cpp4
-rw-r--r--src/video_core/shader_cache.h4
-rw-r--r--src/video_core/shader_notify.cpp42
-rw-r--r--src/video_core/shader_notify.h29
-rw-r--r--src/video_core/surface.cpp257
-rw-r--r--src/video_core/surface.h770
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp149
-rw-r--r--src/video_core/texture_cache/surface_base.cpp2
-rw-r--r--src/video_core/texture_cache/surface_params.cpp126
-rw-r--r--src/video_core/texture_cache/surface_params.h7
-rw-r--r--src/video_core/texture_cache/texture_cache.h59
-rw-r--r--src/video_core/textures/convert.cpp6
-rw-r--r--src/video_core/textures/decoders.cpp249
-rw-r--r--src/video_core/textures/decoders.h47
-rw-r--r--src/video_core/textures/texture.h49
-rw-r--r--src/video_core/video_core.cpp35
133 files changed, 5348 insertions, 4526 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 21c46a567..d85f1e9d1 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_subdirectory(host_shaders)
+
add_library(video_core STATIC
buffer_cache/buffer_block.h
buffer_cache/buffer_cache.h
@@ -98,6 +100,8 @@ add_library(video_core STATIC
sampler_cache.cpp
sampler_cache.h
shader_cache.h
+ shader_notify.cpp
+ shader_notify.h
shader/decode/arithmetic.cpp
shader/decode/arithmetic_immediate.cpp
shader/decode/bfe.cpp
@@ -128,6 +132,8 @@ add_library(video_core STATIC
shader/decode/other.cpp
shader/ast.cpp
shader/ast.h
+ shader/async_shaders.cpp
+ shader/async_shaders.h
shader/compiler_settings.cpp
shader/compiler_settings.h
shader/control_flow.cpp
@@ -240,6 +246,9 @@ create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PRIVATE glad xbyak)
+add_dependencies(video_core host_shaders)
+target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
+
if (ENABLE_VULKAN)
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index dd7ce8c99..e7edd733f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -51,46 +51,43 @@ public:
bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex};
- auto& memory_manager = system.GPU().MemoryManager();
- const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
- if (!cpu_addr_opt) {
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
return GetEmptyBuffer(size);
}
- const VAddr cpu_addr = *cpu_addr_opt;
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
constexpr std::size_t max_stream_size = 0x800;
if (use_fast_cbuf || size < max_stream_size) {
- if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
- const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
+ if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) {
+ const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size);
if (use_fast_cbuf) {
u8* dest;
if (is_granular) {
- dest = memory_manager.GetPointer(gpu_addr);
+ dest = gpu_memory.GetPointer(gpu_addr);
} else {
staging_buffer.resize(size);
dest = staging_buffer.data();
- memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
+ gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
}
return ConstBufferUpload(dest, size);
}
if (is_granular) {
- u8* const host_ptr = memory_manager.GetPointer(gpu_addr);
+ u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
std::memcpy(dest, host_ptr, size);
});
} else {
- return StreamBufferUpload(
- size, alignment, [&memory_manager, gpu_addr, size](u8* dest) {
- memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
- });
+ return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) {
+ gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
+ });
}
}
}
- Buffer* const block = GetBlock(cpu_addr, size);
- MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
+ Buffer* const block = GetBlock(*cpu_addr, size);
+ MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size);
if (!map) {
return GetEmptyBuffer(size);
}
@@ -106,7 +103,7 @@ public:
}
}
- return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
+ return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()};
}
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@@ -262,9 +259,11 @@ public:
virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
protected:
- explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
- std::unique_ptr<StreamBuffer> stream_buffer)
- : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
+ explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+ std::unique_ptr<StreamBuffer> stream_buffer_)
+ : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
+ stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {}
~BufferCache() = default;
@@ -326,14 +325,13 @@ private:
MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
if (overlaps.empty()) {
- auto& memory_manager = system.GPU().MemoryManager();
const VAddr cpu_addr_end = cpu_addr + size;
- if (memory_manager.IsGranularRange(gpu_addr, size)) {
- u8* host_ptr = memory_manager.GetPointer(gpu_addr);
+ if (gpu_memory.IsGranularRange(gpu_addr, size)) {
+ u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
block->Upload(block->Offset(cpu_addr), size, host_ptr);
} else {
staging_buffer.resize(size);
- memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
+ gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
}
return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
@@ -392,7 +390,7 @@ private:
continue;
}
staging_buffer.resize(size);
- system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
+ cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
}
}
@@ -431,7 +429,7 @@ private:
const std::size_t size = map->end - map->start;
staging_buffer.resize(size);
block->Download(block->Offset(map->start), size, staging_buffer.data());
- system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
+ cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size);
map->MarkAsModified(false, 0);
}
@@ -524,11 +522,8 @@ private:
void MarkRegionAsWritten(VAddr start, VAddr end) {
const u64 page_end = end >> WRITE_PAGE_BIT;
for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
- auto it = written_pages.find(page_start);
- if (it != written_pages.end()) {
- it->second = it->second + 1;
- } else {
- written_pages.insert_or_assign(page_start, 1);
+ if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) {
+ ++it->second;
}
}
}
@@ -539,7 +534,7 @@ private:
auto it = written_pages.find(page_start);
if (it != written_pages.end()) {
if (it->second > 1) {
- it->second = it->second - 1;
+ --it->second;
} else {
written_pages.erase(it);
}
@@ -570,7 +565,8 @@ private:
}
VideoCore::RasterizerInterface& rasterizer;
- Core::System& system;
+ Tegra::MemoryManager& gpu_memory;
+ Core::Memory::Memory& cpu_memory;
std::unique_ptr<StreamBuffer> stream_buffer;
BufferType stream_buffer_handle;
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
index 6c426b035..b06c32c84 100644
--- a/src/video_core/compatible_formats.cpp
+++ b/src/video_core/compatible_formats.cpp
@@ -17,101 +17,94 @@ namespace {
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt
constexpr std::array VIEW_CLASS_128_BITS = {
- PixelFormat::RGBA32F,
- PixelFormat::RGBA32UI,
+ PixelFormat::R32G32B32A32_FLOAT,
+ PixelFormat::R32G32B32A32_UINT,
+ PixelFormat::R32G32B32A32_SINT,
};
-// Missing formats:
-// PixelFormat::RGBA32I
constexpr std::array VIEW_CLASS_96_BITS = {
- PixelFormat::RGB32F,
+ PixelFormat::R32G32B32_FLOAT,
};
// Missing formats:
// PixelFormat::RGB32UI,
// PixelFormat::RGB32I,
constexpr std::array VIEW_CLASS_64_BITS = {
- PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI,
- PixelFormat::RGBA16U, PixelFormat::RGBA16F, PixelFormat::RGBA16S,
+ PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT,
+ PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT,
+ PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
+ PixelFormat::R16G16B16A16_UINT, PixelFormat::R16G16B16A16_SINT,
};
-// Missing formats:
-// PixelFormat::RGBA16I
-// PixelFormat::RG32I
// TODO: How should we handle 48 bits?
constexpr std::array VIEW_CLASS_32_BITS = {
- PixelFormat::RG16F, PixelFormat::R11FG11FB10F, PixelFormat::R32F,
- PixelFormat::A2B10G10R10U, PixelFormat::RG16UI, PixelFormat::R32UI,
- PixelFormat::RG16I, PixelFormat::R32I, PixelFormat::ABGR8U,
- PixelFormat::RG16, PixelFormat::ABGR8S, PixelFormat::RG16S,
- PixelFormat::RGBA8_SRGB, PixelFormat::E5B9G9R9F, PixelFormat::BGRA8,
- PixelFormat::BGRA8_SRGB,
+ PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
+ PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
+ PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
+ PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM,
+ PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::B8G8R8A8_UNORM,
+ PixelFormat::B8G8R8A8_SRGB, PixelFormat::A8B8G8R8_UINT, PixelFormat::A8B8G8R8_SINT,
+ PixelFormat::A2B10G10R10_UINT,
};
-// Missing formats:
-// PixelFormat::RGBA8UI
-// PixelFormat::RGBA8I
-// PixelFormat::RGB10_A2_UI
// TODO: How should we handle 24 bits?
constexpr std::array VIEW_CLASS_16_BITS = {
- PixelFormat::R16F, PixelFormat::RG8UI, PixelFormat::R16UI, PixelFormat::R16I,
- PixelFormat::RG8U, PixelFormat::R16U, PixelFormat::RG8S, PixelFormat::R16S,
+ PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT,
+ PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM,
+ PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT,
};
-// Missing formats:
-// PixelFormat::RG8I
constexpr std::array VIEW_CLASS_8_BITS = {
- PixelFormat::R8UI,
- PixelFormat::R8U,
+ PixelFormat::R8_UINT,
+ PixelFormat::R8_UNORM,
+ PixelFormat::R8_SINT,
+ PixelFormat::R8_SNORM,
};
-// Missing formats:
-// PixelFormat::R8I
-// PixelFormat::R8S
constexpr std::array VIEW_CLASS_RGTC1_RED = {
- PixelFormat::DXN1,
+ PixelFormat::BC4_UNORM,
+ PixelFormat::BC4_SNORM,
};
-// Missing formats:
-// COMPRESSED_SIGNED_RED_RGTC1
constexpr std::array VIEW_CLASS_RGTC2_RG = {
- PixelFormat::DXN2UNORM,
- PixelFormat::DXN2SNORM,
+ PixelFormat::BC5_UNORM,
+ PixelFormat::BC5_SNORM,
};
constexpr std::array VIEW_CLASS_BPTC_UNORM = {
- PixelFormat::BC7U,
- PixelFormat::BC7U_SRGB,
+ PixelFormat::BC7_UNORM,
+ PixelFormat::BC7_SRGB,
};
constexpr std::array VIEW_CLASS_BPTC_FLOAT = {
- PixelFormat::BC6H_SF16,
- PixelFormat::BC6H_UF16,
+ PixelFormat::BC6H_SFLOAT,
+ PixelFormat::BC6H_UFLOAT,
};
// Compatibility table taken from Table 4.X.1 in:
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt
constexpr std::array COPY_CLASS_128_BITS = {
- PixelFormat::RGBA32UI, PixelFormat::RGBA32F, PixelFormat::DXT23,
- PixelFormat::DXT23_SRGB, PixelFormat::DXT45, PixelFormat::DXT45_SRGB,
- PixelFormat::DXN2SNORM, PixelFormat::BC7U, PixelFormat::BC7U_SRGB,
- PixelFormat::BC6H_SF16, PixelFormat::BC6H_UF16,
+ PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT,
+ PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM,
+ PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM,
+ PixelFormat::BC7_UNORM, PixelFormat::BC7_SRGB, PixelFormat::BC6H_SFLOAT,
+ PixelFormat::BC6H_UFLOAT,
};
// Missing formats:
// PixelFormat::RGBA32I
// COMPRESSED_RG_RGTC2
constexpr std::array COPY_CLASS_64_BITS = {
- PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI,
- PixelFormat::RGBA16U, PixelFormat::RGBA16S, PixelFormat::DXT1_SRGB, PixelFormat::DXT1,
-
+ PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
+ PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
+ PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT,
+ PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_SINT,
+ PixelFormat::BC1_RGBA_UNORM, PixelFormat::BC1_RGBA_SRGB,
};
// Missing formats:
-// PixelFormat::RGBA16I
-// PixelFormat::RG32I,
// COMPRESSED_RGB_S3TC_DXT1_EXT
// COMPRESSED_SRGB_S3TC_DXT1_EXT
// COMPRESSED_RGBA_S3TC_DXT1_EXT
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
index d1082566d..51766349b 100644
--- a/src/video_core/compatible_formats.h
+++ b/src/video_core/compatible_formats.h
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#pragma once
+
#include <array>
#include <bitset>
#include <cstddef>
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index ff10ff40d..6e50661a3 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,7 +10,13 @@
namespace Tegra::Engines {
-Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
+Fermi2D::Fermi2D() = default;
+
+Fermi2D::~Fermi2D() = default;
+
+void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+ rasterizer = &rasterizer_;
+}
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
@@ -87,7 +93,7 @@ void Fermi2D::HandleSurfaceCopy() {
copy_config.src_rect = src_rect;
copy_config.dst_rect = dst_rect;
- if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
+ if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
UNIMPLEMENTED();
}
}
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 8f37d053f..213abfaae 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -34,8 +34,11 @@ namespace Tegra::Engines {
class Fermi2D final : public EngineInterface {
public:
- explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer);
- ~Fermi2D() = default;
+ explicit Fermi2D();
+ ~Fermi2D();
+
+ /// Binds a rasterizer to this engine.
+ void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
/// Write the value to the register identified by method.
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
@@ -149,7 +152,7 @@ public:
};
private:
- VideoCore::RasterizerInterface& rasterizer;
+ VideoCore::RasterizerInterface* rasterizer;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index a82b06a38..898370739 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -16,14 +16,15 @@
namespace Tegra::Engines {
-KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager)
- : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
- memory_manager,
- regs.upload} {}
+KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
+ : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
KeplerCompute::~KeplerCompute() = default;
+void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+ rasterizer = &rasterizer_;
+}
+
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid KeplerCompute register, increase the size of the Regs structure");
@@ -104,11 +105,11 @@ SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
}
VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
- return rasterizer.AccessGuestDriverProfile();
+ return rasterizer->AccessGuestDriverProfile();
}
const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
- return rasterizer.AccessGuestDriverProfile();
+ return rasterizer->AccessGuestDriverProfile();
}
void KeplerCompute::ProcessLaunch() {
@@ -119,7 +120,7 @@ void KeplerCompute::ProcessLaunch() {
const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
- rasterizer.DispatchCompute(code_addr);
+ rasterizer->DispatchCompute(code_addr);
}
Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index b7f668d88..7f2500aab 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -42,10 +42,12 @@ namespace Tegra::Engines {
class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface {
public:
- explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager);
+ explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
~KeplerCompute();
+ /// Binds a rasterizer to this engine.
+ void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+
static constexpr std::size_t NumConstBuffers = 8;
struct Regs {
@@ -230,11 +232,6 @@ public:
const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
private:
- Core::System& system;
- VideoCore::RasterizerInterface& rasterizer;
- MemoryManager& memory_manager;
- Upload::State upload_state;
-
void ProcessLaunch();
/// Retrieves information about a specific TIC entry from the TIC buffer.
@@ -242,6 +239,11 @@ private:
/// Retrieves information about a specific TSC entry from the TSC buffer.
Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
+
+ Core::System& system;
+ MemoryManager& memory_manager;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
+ Upload::State upload_state;
};
#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index c01436295..33854445f 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,14 +22,19 @@ using VideoCore::QueryType;
/// First register id that is actually a Macro call.
constexpr u32 MacroRegistersStart = 0xE00;
-Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager)
- : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
- macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
+Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
+ : system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)},
+ upload_state{memory_manager, regs.upload} {
dirty.flags.flip();
InitializeRegisterDefaults();
}
+Maxwell3D::~Maxwell3D() = default;
+
+void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+ rasterizer = &rasterizer_;
+}
+
void Maxwell3D::InitializeRegisterDefaults() {
// Initializes registers to their default values - what games expect them to be at boot. This is
// for certain registers that may not be explicitly set by games.
@@ -192,7 +197,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
switch (method) {
case MAXWELL3D_REG_INDEX(wait_for_idle): {
- rasterizer.WaitForIdle();
+ rasterizer->WaitForIdle();
break;
}
case MAXWELL3D_REG_INDEX(shadow_ram_control): {
@@ -402,7 +407,7 @@ void Maxwell3D::FlushMMEInlineDraw() {
const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
if (ShouldExecute()) {
- rasterizer.Draw(is_indexed, true);
+ rasterizer->Draw(is_indexed, true);
}
// TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
@@ -465,7 +470,7 @@ void Maxwell3D::ProcessQueryGet() {
switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release:
if (regs.query.query_get.fence == 1) {
- rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
+ rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
} else {
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
}
@@ -533,7 +538,7 @@ void Maxwell3D::ProcessQueryCondition() {
void Maxwell3D::ProcessCounterReset() {
switch (regs.counter_reset) {
case Regs::CounterReset::SampleCnt:
- rasterizer.ResetCounter(QueryType::SamplesPassed);
+ rasterizer->ResetCounter(QueryType::SamplesPassed);
break;
default:
LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
@@ -547,7 +552,7 @@ void Maxwell3D::ProcessSyncPoint() {
const u32 increment = regs.sync_info.increment.Value();
[[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
if (increment) {
- rasterizer.SignalSyncPoint(sync_point);
+ rasterizer->SignalSyncPoint(sync_point);
}
}
@@ -570,7 +575,7 @@ void Maxwell3D::DrawArrays() {
const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
if (ShouldExecute()) {
- rasterizer.Draw(is_indexed, false);
+ rasterizer->Draw(is_indexed, false);
}
// TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
@@ -590,8 +595,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
return 0;
case Regs::QuerySelect::SamplesPassed:
// Deferred.
- rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
- system.GPU().GetTicks());
+ rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
+ system.GPU().GetTicks());
return {};
default:
LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
@@ -718,7 +723,7 @@ void Maxwell3D::ProcessClearBuffers() {
regs.clear_buffers.R == regs.clear_buffers.B &&
regs.clear_buffers.R == regs.clear_buffers.A);
- rasterizer.Clear();
+ rasterizer->Clear();
}
u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
@@ -752,11 +757,11 @@ SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
}
VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
- return rasterizer.AccessGuestDriverProfile();
+ return rasterizer->AccessGuestDriverProfile();
}
const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
- return rasterizer.AccessGuestDriverProfile();
+ return rasterizer->AccessGuestDriverProfile();
}
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ef1618990..bc289c55d 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -51,9 +51,11 @@ namespace Tegra::Engines {
class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
public:
- explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager);
- ~Maxwell3D() = default;
+ explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
+ ~Maxwell3D();
+
+ /// Binds a rasterizer to this engine.
+ void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
@@ -647,7 +649,7 @@ public:
GetX() + GetWidth(), // right
GetY() // bottom
};
- };
+ }
f32 GetX() const {
return std::max(0.0f, translate_x - std::fabs(scale_x));
@@ -1418,12 +1420,12 @@ public:
return execute_on;
}
- VideoCore::RasterizerInterface& GetRasterizer() {
- return rasterizer;
+ VideoCore::RasterizerInterface& Rasterizer() {
+ return *rasterizer;
}
- const VideoCore::RasterizerInterface& GetRasterizer() const {
- return rasterizer;
+ const VideoCore::RasterizerInterface& Rasterizer() const {
+ return *rasterizer;
}
/// Notify a memory write has happened.
@@ -1460,11 +1462,10 @@ private:
void InitializeRegisterDefaults();
Core::System& system;
-
- VideoCore::RasterizerInterface& rasterizer;
-
MemoryManager& memory_manager;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
+
/// Start offsets of each macro in macro_memory
std::array<u32, 0x80> macro_positions = {};
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 01d7df405..e88290754 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -14,50 +14,45 @@
namespace Tegra::Engines {
+using namespace Texture;
+
MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
: system{system}, memory_manager{memory_manager} {}
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
- ASSERT_MSG(method < Regs::NUM_REGS,
- "Invalid MaxwellDMA register, increase the size of the Regs structure");
+ ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
regs.reg_array[method] = method_argument;
-#define MAXWELLDMA_REG_INDEX(field_name) \
- (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
-
- switch (method) {
- case MAXWELLDMA_REG_INDEX(exec): {
- HandleCopy();
- break;
- }
+ if (method == offsetof(Regs, launch_dma) / sizeof(u32)) {
+ Launch();
}
-
-#undef MAXWELLDMA_REG_INDEX
}
void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) {
- for (std::size_t i = 0; i < amount; i++) {
+ for (size_t i = 0; i < amount; ++i) {
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
}
}
-void MaxwellDMA::HandleCopy() {
- LOG_TRACE(HW_GPU, "Requested a DMA copy");
-
- const GPUVAddr source = regs.src_address.Address();
- const GPUVAddr dest = regs.dst_address.Address();
+void MaxwellDMA::Launch() {
+ LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in),
+ static_cast<GPUVAddr>(regs.offset_out));
// TODO(Subv): Perform more research and implement all features of this engine.
- ASSERT(regs.exec.enable_swizzle == 0);
- ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
- ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
- ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
- ASSERT(regs.dst_params.pos_x == 0);
- ASSERT(regs.dst_params.pos_y == 0);
-
- if (!regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
+ const LaunchDMA& launch = regs.launch_dma;
+ ASSERT(launch.remap_enable == 0);
+ ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
+ ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
+ ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
+ ASSERT(regs.dst_params.origin.x == 0);
+ ASSERT(regs.dst_params.origin.y == 0);
+
+ const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
+ const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
+
+ if (!is_src_pitch && !is_dst_pitch) {
// If both the source and the destination are in block layout, assert.
UNREACHABLE_MSG("Tiled->Tiled DMA transfers are not yet implemented");
return;
@@ -66,144 +61,156 @@ void MaxwellDMA::HandleCopy() {
// All copies here update the main memory, so mark all rasterizer states as invalid.
system.GPU().Maxwell3D().OnMemoryWrite();
- if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
- // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
- // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
- // y_count).
- if (!regs.exec.enable_2d) {
- memory_manager.CopyBlock(dest, source, regs.x_count);
- return;
- }
+ if (is_src_pitch && is_dst_pitch) {
+ CopyPitchToPitch();
+ } else {
+ ASSERT(launch.multi_line_enable == 1);
- // If both the source and the destination are in linear layout, perform a line-by-line
- // copy. We're going to take a subrect of size (x_count, y_count) from the source
- // rectangle. There is no need to manually flush/invalidate the regions because
- // CopyBlock does that for us.
- for (u32 line = 0; line < regs.y_count; ++line) {
- const GPUVAddr source_line = source + line * regs.src_pitch;
- const GPUVAddr dest_line = dest + line * regs.dst_pitch;
- memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
+ if (!is_src_pitch && is_dst_pitch) {
+ CopyBlockLinearToPitch();
+ } else {
+ CopyPitchToBlockLinear();
}
- return;
}
+}
- ASSERT(regs.exec.enable_2d == 1);
-
- if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
-
- ASSERT(regs.src_params.BlockDepth() == 0);
- // Optimized path for micro copies.
- if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) {
- const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
- const std::size_t src_size = Texture::GetGOBSize();
- const std::size_t dst_size = regs.dst_pitch * regs.y_count;
- u32 pos_x = regs.src_params.pos_x;
- u32 pos_y = regs.src_params.pos_y;
- const u64 offset =
- Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y,
- regs.src_params.BlockDepth(), bytes_per_pixel);
- const u32 x_in_gob = 64 / bytes_per_pixel;
- pos_x = pos_x % x_in_gob;
- pos_y = pos_y % 8;
-
- if (read_buffer.size() < src_size) {
- read_buffer.resize(src_size);
- }
-
- if (write_buffer.size() < dst_size) {
- write_buffer.resize(dst_size);
- }
-
- if (Settings::IsGPULevelExtreme()) {
- memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size);
- memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
- } else {
- memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size);
- memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
- }
-
- Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
- regs.src_params.size_x, bytes_per_pixel, read_buffer.data(),
- write_buffer.data(), regs.src_params.BlockHeight(), pos_x,
- pos_y);
-
- memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
-
- return;
- }
- // If the input is tiled and the output is linear, deswizzle the input and copy it over.
- const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
- const std::size_t src_size = Texture::CalculateSize(
- true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
- regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
-
- const std::size_t src_layer_size = Texture::CalculateSize(
- true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1,
- regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
+void MaxwellDMA::CopyPitchToPitch() {
+ // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D
+ // buffer of length `line_length_in`.
+ // Otherwise we copy a 2D image of dimensions (line_length_in, line_count).
+ if (!regs.launch_dma.multi_line_enable) {
+ memory_manager.CopyBlock(regs.offset_out, regs.offset_in, regs.line_length_in);
+ return;
+ }
- const std::size_t dst_size = regs.dst_pitch * regs.y_count;
+ // Perform a line-by-line copy.
+ // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle.
+ // There is no need to manually flush/invalidate the regions because CopyBlock does that for us.
+ for (u32 line = 0; line < regs.line_count; ++line) {
+ const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
+ const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
+ memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
+ }
+}
- if (read_buffer.size() < src_size) {
- read_buffer.resize(src_size);
- }
+void MaxwellDMA::CopyBlockLinearToPitch() {
+ UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
+ UNIMPLEMENTED_IF(regs.src_params.layer != 0);
- if (write_buffer.size() < dst_size) {
- write_buffer.resize(dst_size);
- }
+ // Optimized path for micro copies.
+ const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
+ if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) {
+ FastCopyBlockLinearToPitch();
+ return;
+ }
- if (Settings::IsGPULevelExtreme()) {
- memory_manager.ReadBlock(source, read_buffer.data(), src_size);
- memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
- } else {
- memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
- memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
- }
+ // Deswizzle the input and copy it over.
+ const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
+ const Parameters& src_params = regs.src_params;
+ const u32 width = src_params.width;
+ const u32 height = src_params.height;
+ const u32 depth = src_params.depth;
+ const u32 block_height = src_params.block_size.height;
+ const u32 block_depth = src_params.block_size.depth;
+ const size_t src_size =
+ CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
+ const size_t src_layer_size =
+ CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
+
+ if (read_buffer.size() < src_size) {
+ read_buffer.resize(src_size);
+ }
+ if (write_buffer.size() < dst_size) {
+ write_buffer.resize(dst_size);
+ }
- Texture::UnswizzleSubrect(
- regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
- read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(),
- regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y);
+ memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
- memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
- } else {
- ASSERT(regs.dst_params.BlockDepth() == 0);
+ UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel,
+ block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(),
+ read_buffer.data());
- const u32 bytes_per_pixel = regs.src_pitch / regs.x_count;
+ memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
+}
- const std::size_t dst_size = Texture::CalculateSize(
- true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
- regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
+void MaxwellDMA::CopyPitchToBlockLinear() {
+ const auto& dst_params = regs.dst_params;
+ const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
+ const u32 width = dst_params.width;
+ const u32 height = dst_params.height;
+ const u32 depth = dst_params.depth;
+ const u32 block_height = dst_params.block_size.height;
+ const u32 block_depth = dst_params.block_size.depth;
+ const size_t dst_size =
+ CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
+ const size_t dst_layer_size =
+ CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
+
+ const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
+
+ if (read_buffer.size() < src_size) {
+ read_buffer.resize(src_size);
+ }
+ if (write_buffer.size() < dst_size) {
+ write_buffer.resize(dst_size);
+ }
- const std::size_t dst_layer_size = Texture::CalculateSize(
- true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
- regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
+ if (Settings::IsGPULevelExtreme()) {
+ memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
+ } else {
+ memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size);
+ memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
+ }
- const std::size_t src_size = regs.src_pitch * regs.y_count;
+ // If the input is linear and the output is tiled, swizzle the input and copy it over.
+ if (regs.dst_params.block_size.depth > 0) {
+ ASSERT(dst_params.layer == 0);
+ SwizzleSliceToVoxel(regs.line_length_in, regs.line_count, regs.pitch_in, width, height,
+ bytes_per_pixel, block_height, block_depth, dst_params.origin.x,
+ dst_params.origin.y, write_buffer.data(), read_buffer.data());
+ } else {
+ SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, width, bytes_per_pixel,
+ write_buffer.data() + dst_layer_size * dst_params.layer, read_buffer.data(),
+ block_height, dst_params.origin.x, dst_params.origin.y);
+ }
- if (read_buffer.size() < src_size) {
- read_buffer.resize(src_size);
- }
+ memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
+}
- if (write_buffer.size() < dst_size) {
- write_buffer.resize(dst_size);
- }
+void MaxwellDMA::FastCopyBlockLinearToPitch() {
+ const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
+ const size_t src_size = GOB_SIZE;
+ const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
+ u32 pos_x = regs.src_params.origin.x;
+ u32 pos_y = regs.src_params.origin.y;
+ const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y,
+ regs.src_params.block_size.height, bytes_per_pixel);
+ const u32 x_in_gob = 64 / bytes_per_pixel;
+ pos_x = pos_x % x_in_gob;
+ pos_y = pos_y % 8;
+
+ if (read_buffer.size() < src_size) {
+ read_buffer.resize(src_size);
+ }
+ if (write_buffer.size() < dst_size) {
+ write_buffer.resize(dst_size);
+ }
- if (Settings::IsGPULevelExtreme()) {
- memory_manager.ReadBlock(source, read_buffer.data(), src_size);
- memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
- } else {
- memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
- memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
- }
+ if (Settings::IsGPULevelExtreme()) {
+ memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
+ } else {
+ memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size);
+ memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
+ }
- // If the input is linear and the output is tiled, swizzle the input and copy it over.
- Texture::SwizzleSubrect(
- regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel,
- write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(),
- regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y);
+ UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width,
+ bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y,
+ write_buffer.data(), read_buffer.data());
- memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
- }
+ memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 502dd8509..50f445efc 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -24,160 +24,190 @@ class MemoryManager;
namespace Tegra::Engines {
/**
- * This Engine is known as GK104_Copy. Documentation can be found in:
+ * This engine is known as gk104_copy. Documentation can be found in:
+ * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
* https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
*/
class MaxwellDMA final : public EngineInterface {
public:
- explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
- ~MaxwellDMA() = default;
-
- /// Write the value to the register identified by method.
- void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
-
- /// Write multiple values to the register identified by method.
- void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
- u32 methods_pending) override;
+ struct PackedGPUVAddr {
+ u32 upper;
+ u32 lower;
+
+ constexpr operator GPUVAddr() const noexcept {
+ return (static_cast<GPUVAddr>(upper & 0xff) << 32) | lower;
+ }
+ };
+
+ union BlockSize {
+ BitField<0, 4, u32> width;
+ BitField<4, 4, u32> height;
+ BitField<8, 4, u32> depth;
+ BitField<12, 4, u32> gob_height;
+ };
+ static_assert(sizeof(BlockSize) == 4);
+
+ union Origin {
+ BitField<0, 16, u32> x;
+ BitField<16, 16, u32> y;
+ };
+ static_assert(sizeof(Origin) == 4);
+
+ struct Parameters {
+ BlockSize block_size;
+ u32 width;
+ u32 height;
+ u32 depth;
+ u32 layer;
+ Origin origin;
+ };
+ static_assert(sizeof(Parameters) == 24);
+
+ struct Semaphore {
+ PackedGPUVAddr address;
+ u32 payload;
+ };
+ static_assert(sizeof(Semaphore) == 12);
+
+ struct RenderEnable {
+ enum class Mode : u32 {
+ FALSE = 0,
+ TRUE = 1,
+ CONDITIONAL = 2,
+ RENDER_IF_EQUAL = 3,
+ RENDER_IF_NOT_EQUAL = 4,
+ };
- struct Regs {
- static constexpr std::size_t NUM_REGS = 0x1D6;
+ PackedGPUVAddr address;
+ BitField<0, 3, Mode> mode;
+ };
+ static_assert(sizeof(RenderEnable) == 12);
+
+ enum class PhysModeTarget : u32 {
+ LOCAL_FB = 0,
+ COHERENT_SYSMEM = 1,
+ NONCOHERENT_SYSMEM = 2,
+ };
+ using PhysMode = BitField<0, 2, PhysModeTarget>;
+
+ union LaunchDMA {
+ enum class DataTransferType : u32 {
+ NONE = 0,
+ PIPELINED = 1,
+ NON_PIPELINED = 2,
+ };
- struct Parameters {
- union {
- BitField<0, 4, u32> block_depth;
- BitField<4, 4, u32> block_height;
- BitField<8, 4, u32> block_width;
- };
- u32 size_x;
- u32 size_y;
- u32 size_z;
- u32 pos_z;
- union {
- BitField<0, 16, u32> pos_x;
- BitField<16, 16, u32> pos_y;
- };
+ enum class SemaphoreType : u32 {
+ NONE = 0,
+ RELEASE_ONE_WORD_SEMAPHORE = 1,
+ RELEASE_FOUR_WORD_SEMAPHORE = 2,
+ };
- u32 BlockHeight() const {
- return block_height.Value();
- }
+ enum class InterruptType : u32 {
+ NONE = 0,
+ BLOCKING = 1,
+ NON_BLOCKING = 2,
+ };
- u32 BlockDepth() const {
- return block_depth.Value();
- }
+ enum class MemoryLayout : u32 {
+ BLOCKLINEAR = 0,
+ PITCH = 1,
};
- static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
+ enum class Type : u32 {
+ VIRTUAL = 0,
+ PHYSICAL = 1,
+ };
- enum class ComponentMode : u32 {
- Src0 = 0,
- Src1 = 1,
- Src2 = 2,
- Src3 = 3,
- Const0 = 4,
- Const1 = 5,
- Zero = 6,
+ enum class SemaphoreReduction : u32 {
+ IMIN = 0,
+ IMAX = 1,
+ IXOR = 2,
+ IAND = 3,
+ IOR = 4,
+ IADD = 5,
+ INC = 6,
+ DEC = 7,
+ FADD = 0xA,
};
- enum class CopyMode : u32 {
- None = 0,
- Unk1 = 1,
- Unk2 = 2,
+ enum class SemaphoreReductionSign : u32 {
+ SIGNED = 0,
+ UNSIGNED = 1,
};
- enum class QueryMode : u32 {
- None = 0,
- Short = 1,
- Long = 2,
+ enum class BypassL2 : u32 {
+ USE_PTE_SETTING = 0,
+ FORCE_VOLATILE = 1,
};
- enum class QueryIntr : u32 {
- None = 0,
- Block = 1,
- NonBlock = 2,
+ BitField<0, 2, DataTransferType> data_transfer_type;
+ BitField<2, 1, u32> flush_enable;
+ BitField<3, 2, SemaphoreType> semaphore_type;
+ BitField<5, 2, InterruptType> interrupt_type;
+ BitField<7, 1, MemoryLayout> src_memory_layout;
+ BitField<8, 1, MemoryLayout> dst_memory_layout;
+ BitField<9, 1, u32> multi_line_enable;
+ BitField<10, 1, u32> remap_enable;
+ BitField<11, 1, u32> rmwdisable;
+ BitField<12, 1, Type> src_type;
+ BitField<13, 1, Type> dst_type;
+ BitField<14, 4, SemaphoreReduction> semaphore_reduction;
+ BitField<18, 1, SemaphoreReductionSign> semaphore_reduction_sign;
+ BitField<19, 1, u32> reduction_enable;
+ BitField<20, 1, BypassL2> bypass_l2;
+ };
+ static_assert(sizeof(LaunchDMA) == 4);
+
+ struct RemapConst {
+ enum Swizzle : u32 {
+ SRC_X = 0,
+ SRC_Y = 1,
+ SRC_Z = 2,
+ SRC_W = 3,
+ CONST_A = 4,
+ CONST_B = 5,
+ NO_WRITE = 6,
};
- union {
- struct {
- INSERT_UNION_PADDING_WORDS(0xC0);
-
- struct {
- union {
- BitField<0, 2, CopyMode> copy_mode;
- BitField<2, 1, u32> flush;
-
- BitField<3, 2, QueryMode> query_mode;
- BitField<5, 2, QueryIntr> query_intr;
-
- BitField<7, 1, u32> is_src_linear;
- BitField<8, 1, u32> is_dst_linear;
-
- BitField<9, 1, u32> enable_2d;
- BitField<10, 1, u32> enable_swizzle;
- };
- } exec;
-
- INSERT_UNION_PADDING_WORDS(0x3F);
-
- struct {
- u32 address_high;
- u32 address_low;
-
- GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
- }
- } src_address;
-
- struct {
- u32 address_high;
- u32 address_low;
-
- GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
- }
- } dst_address;
-
- u32 src_pitch;
- u32 dst_pitch;
- u32 x_count;
- u32 y_count;
-
- INSERT_UNION_PADDING_WORDS(0xB8);
-
- u32 const0;
- u32 const1;
- union {
- BitField<0, 4, ComponentMode> component0;
- BitField<4, 4, ComponentMode> component1;
- BitField<8, 4, ComponentMode> component2;
- BitField<12, 4, ComponentMode> component3;
- BitField<16, 2, u32> component_size;
- BitField<20, 3, u32> src_num_components;
- BitField<24, 3, u32> dst_num_components;
-
- u32 SrcBytePerPixel() const {
- return src_num_components.Value() * component_size.Value();
- }
- u32 DstBytePerPixel() const {
- return dst_num_components.Value() * component_size.Value();
- }
- } swizzle_config;
+ PackedGPUVAddr address;
- Parameters dst_params;
+ union {
+ BitField<0, 3, Swizzle> dst_x;
+ BitField<4, 3, Swizzle> dst_y;
+ BitField<8, 3, Swizzle> dst_z;
+ BitField<12, 3, Swizzle> dst_w;
+ BitField<16, 2, u32> component_size_minus_one;
+ BitField<20, 2, u32> num_src_components_minus_one;
+ BitField<24, 2, u32> num_dst_components_minus_one;
+ };
+ };
+ static_assert(sizeof(RemapConst) == 12);
- INSERT_UNION_PADDING_WORDS(1);
+ explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
+ ~MaxwellDMA() = default;
- Parameters src_params;
+ /// Write the value to the register identified by method.
+ void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
- INSERT_UNION_PADDING_WORDS(0x13);
- };
- std::array<u32, NUM_REGS> reg_array;
- };
- } regs{};
+ /// Write multiple values to the register identified by method.
+ void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+ u32 methods_pending) override;
private:
+ /// Performs the copy from the source buffer to the destination buffer as configured in the
+ /// registers.
+ void Launch();
+
+ void CopyPitchToPitch();
+
+ void CopyBlockLinearToPitch();
+
+ void CopyPitchToBlockLinear();
+
+ void FastCopyBlockLinearToPitch();
+
Core::System& system;
MemoryManager& memory_manager;
@@ -185,28 +215,58 @@ private:
std::vector<u8> read_buffer;
std::vector<u8> write_buffer;
- /// Performs the copy from the source buffer to the destination buffer as configured in the
- /// registers.
- void HandleCopy();
-};
+ static constexpr std::size_t NUM_REGS = 0x800;
+ struct Regs {
+ union {
+ struct {
+ u32 reserved[0x40];
+ u32 nop;
+ u32 reserved01[0xf];
+ u32 pm_trigger;
+ u32 reserved02[0x3f];
+ Semaphore semaphore;
+ u32 reserved03[0x2];
+ RenderEnable render_enable;
+ PhysMode src_phys_mode;
+ PhysMode dst_phys_mode;
+ u32 reserved04[0x26];
+ LaunchDMA launch_dma;
+ u32 reserved05[0x3f];
+ PackedGPUVAddr offset_in;
+ PackedGPUVAddr offset_out;
+ u32 pitch_in;
+ u32 pitch_out;
+ u32 line_length_in;
+ u32 line_count;
+ u32 reserved06[0xb8];
+ RemapConst remap_const;
+ Parameters dst_params;
+ u32 reserved07[0x1];
+ Parameters src_params;
+ u32 reserved08[0x275];
+ u32 pm_trigger_end;
+ u32 reserved09[0x3ba];
+ };
+ std::array<u32, NUM_REGS> reg_array;
+ };
+ } regs{};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(exec, 0xC0);
-ASSERT_REG_POSITION(src_address, 0x100);
-ASSERT_REG_POSITION(dst_address, 0x102);
-ASSERT_REG_POSITION(src_pitch, 0x104);
-ASSERT_REG_POSITION(dst_pitch, 0x105);
-ASSERT_REG_POSITION(x_count, 0x106);
-ASSERT_REG_POSITION(y_count, 0x107);
-ASSERT_REG_POSITION(const0, 0x1C0);
-ASSERT_REG_POSITION(const1, 0x1C1);
-ASSERT_REG_POSITION(swizzle_config, 0x1C2);
-ASSERT_REG_POSITION(dst_params, 0x1C3);
-ASSERT_REG_POSITION(src_params, 0x1CA);
+ ASSERT_REG_POSITION(launch_dma, 0xC0);
+ ASSERT_REG_POSITION(offset_in, 0x100);
+ ASSERT_REG_POSITION(offset_out, 0x102);
+ ASSERT_REG_POSITION(pitch_in, 0x104);
+ ASSERT_REG_POSITION(pitch_out, 0x105);
+ ASSERT_REG_POSITION(line_length_in, 0x106);
+ ASSERT_REG_POSITION(line_count, 0x107);
+ ASSERT_REG_POSITION(remap_const, 0x1C0);
+ ASSERT_REG_POSITION(dst_params, 0x1C3);
+ ASSERT_REG_POSITION(src_params, 0x1CA);
#undef ASSERT_REG_POSITION
+};
} // namespace Tegra::Engines
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 8b2a6a42c..de6991ef6 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -5,15 +5,10 @@
#pragma once
#include <algorithm>
-#include <array>
-#include <memory>
#include <queue>
-#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
-#include "core/memory.h"
-#include "core/settings.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
@@ -79,8 +74,6 @@ public:
}
void WaitPendingFences() {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait()) {
@@ -88,8 +81,8 @@ public:
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
- memory_manager.template Write<u32>(current_fence->GetAddress(),
- current_fence->GetPayload());
+ gpu_memory.template Write<u32>(current_fence->GetAddress(),
+ current_fence->GetPayload());
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
@@ -98,13 +91,13 @@ public:
}
protected:
- FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- TTextureCache& texture_cache, TTBufferCache& buffer_cache,
- TQueryCache& query_cache)
- : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
- buffer_cache{buffer_cache}, query_cache{query_cache} {}
+ explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
+ TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
+ TQueryCache& query_cache_)
+ : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()},
+ texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
- virtual ~FenceManager() {}
+ virtual ~FenceManager() = default;
/// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true
@@ -118,16 +111,15 @@ protected:
/// Waits until a fence has been signalled by the host GPU.
virtual void WaitFence(TFence& fence) = 0;
- Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
+ Tegra::GPU& gpu;
+ Tegra::MemoryManager& gpu_memory;
TTextureCache& texture_cache;
TTBufferCache& buffer_cache;
TQueryCache& query_cache;
private:
void TryReleasePendingFences() {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait() && !IsFenceSignaled(current_fence)) {
@@ -135,8 +127,8 @@ private:
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
- memory_manager.template Write<u32>(current_fence->GetAddress(),
- current_fence->GetPayload());
+ gpu_memory.template Write<u32>(current_fence->GetAddress(),
+ current_fence->GetPayload());
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 758bfe148..4bb9256e9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -20,26 +20,35 @@
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_base.h"
+#include "video_core/shader_notify.h"
#include "video_core/video_core.h"
namespace Tegra {
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
-GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async)
- : system{system}, renderer{std::move(renderer_)}, is_async{is_async} {
- auto& rasterizer{renderer->Rasterizer()};
- memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
- dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this);
- maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
- fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
- kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
- maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
- kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
-}
+GPU::GPU(Core::System& system_, bool is_async_)
+ : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
+ dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
+ maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
+ fermi_2d{std::make_unique<Engines::Fermi2D>()},
+ kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
+ maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
+ kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
+ shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {}
GPU::~GPU() = default;
+void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
+ renderer = std::move(renderer_);
+
+ VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
+ memory_manager->BindRasterizer(rasterizer);
+ maxwell_3d->BindRasterizer(rasterizer);
+ fermi_2d->BindRasterizer(rasterizer);
+ kepler_compute->BindRasterizer(rasterizer);
+}
+
Engines::Maxwell3D& GPU::Maxwell3D() {
return *maxwell_3d;
}
@@ -79,7 +88,7 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) {
}
MICROPROFILE_SCOPE(GPU_wait);
std::unique_lock lock{sync_mutex};
- sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
+ sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; });
}
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 2c42483bd..2d15d1c6f 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -33,59 +33,68 @@ class System;
namespace VideoCore {
class RendererBase;
+class ShaderNotify;
} // namespace VideoCore
namespace Tegra {
enum class RenderTargetFormat : u32 {
NONE = 0x0,
- RGBA32_FLOAT = 0xC0,
- RGBA32_UINT = 0xC2,
- RGBA16_UNORM = 0xC6,
- RGBA16_SNORM = 0xC7,
- RGBA16_UINT = 0xC9,
- RGBA16_FLOAT = 0xCA,
- RG32_FLOAT = 0xCB,
- RG32_UINT = 0xCD,
- RGBX16_FLOAT = 0xCE,
- BGRA8_UNORM = 0xCF,
- BGRA8_SRGB = 0xD0,
- RGB10_A2_UNORM = 0xD1,
- RGBA8_UNORM = 0xD5,
- RGBA8_SRGB = 0xD6,
- RGBA8_SNORM = 0xD7,
- RGBA8_UINT = 0xD9,
- RG16_UNORM = 0xDA,
- RG16_SNORM = 0xDB,
- RG16_SINT = 0xDC,
- RG16_UINT = 0xDD,
- RG16_FLOAT = 0xDE,
- R11G11B10_FLOAT = 0xE0,
+ R32B32G32A32_FLOAT = 0xC0,
+ R32G32B32A32_SINT = 0xC1,
+ R32G32B32A32_UINT = 0xC2,
+ R16G16B16A16_UNORM = 0xC6,
+ R16G16B16A16_SNORM = 0xC7,
+ R16G16B16A16_SINT = 0xC8,
+ R16G16B16A16_UINT = 0xC9,
+ R16G16B16A16_FLOAT = 0xCA,
+ R32G32_FLOAT = 0xCB,
+ R32G32_SINT = 0xCC,
+ R32G32_UINT = 0xCD,
+ R16G16B16X16_FLOAT = 0xCE,
+ B8G8R8A8_UNORM = 0xCF,
+ B8G8R8A8_SRGB = 0xD0,
+ A2B10G10R10_UNORM = 0xD1,
+ A2B10G10R10_UINT = 0xD2,
+ A8B8G8R8_UNORM = 0xD5,
+ A8B8G8R8_SRGB = 0xD6,
+ A8B8G8R8_SNORM = 0xD7,
+ A8B8G8R8_SINT = 0xD8,
+ A8B8G8R8_UINT = 0xD9,
+ R16G16_UNORM = 0xDA,
+ R16G16_SNORM = 0xDB,
+ R16G16_SINT = 0xDC,
+ R16G16_UINT = 0xDD,
+ R16G16_FLOAT = 0xDE,
+ B10G11R11_FLOAT = 0xE0,
R32_SINT = 0xE3,
R32_UINT = 0xE4,
R32_FLOAT = 0xE5,
- B5G6R5_UNORM = 0xE8,
- BGR5A1_UNORM = 0xE9,
- RG8_UNORM = 0xEA,
- RG8_SNORM = 0xEB,
- RG8_UINT = 0xED,
+ R5G6B5_UNORM = 0xE8,
+ A1R5G5B5_UNORM = 0xE9,
+ R8G8_UNORM = 0xEA,
+ R8G8_SNORM = 0xEB,
+ R8G8_SINT = 0xEC,
+ R8G8_UINT = 0xED,
R16_UNORM = 0xEE,
R16_SNORM = 0xEF,
R16_SINT = 0xF0,
R16_UINT = 0xF1,
R16_FLOAT = 0xF2,
R8_UNORM = 0xF3,
+ R8_SNORM = 0xF4,
+ R8_SINT = 0xF5,
R8_UINT = 0xF6,
};
enum class DepthFormat : u32 {
- Z32_FLOAT = 0xA,
- Z16_UNORM = 0x13,
- S8_Z24_UNORM = 0x14,
- Z24_X8_UNORM = 0x15,
- Z24_S8_UNORM = 0x16,
- Z24_C8_UNORM = 0x18,
- Z32_S8_X24_FLOAT = 0x19,
+ D32_FLOAT = 0xA,
+ D16_UNORM = 0x13,
+ S8_UINT_Z24_UNORM = 0x14,
+ D24X8_UNORM = 0x15,
+ D24S8_UNORM = 0x16,
+ D24C8_UNORM = 0x18,
+ D32_FLOAT_S8X24_UINT = 0x19,
};
struct CommandListHeader;
@@ -96,9 +105,9 @@ class DebugContext;
*/
struct FramebufferConfig {
enum class PixelFormat : u32 {
- ABGR8 = 1,
- RGB565 = 4,
- BGRA8 = 5,
+ A8B8G8R8_UNORM = 1,
+ RGB565_UNORM = 4,
+ B8G8R8A8_UNORM = 5,
};
VAddr address;
@@ -133,11 +142,6 @@ class MemoryManager;
class GPU {
public:
- explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
- bool is_async);
-
- virtual ~GPU();
-
struct MethodCall {
u32 method{};
u32 argument{};
@@ -153,6 +157,12 @@ public:
method_count(method_count) {}
};
+ explicit GPU(Core::System& system, bool is_async);
+ virtual ~GPU();
+
+ /// Binds a renderer to the GPU.
+ void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
+
/// Calls a GPU method.
void CallMethod(const MethodCall& method_call);
@@ -207,6 +217,14 @@ public:
return *renderer;
}
+ VideoCore::ShaderNotify& ShaderNotify() {
+ return *shader_notify;
+ }
+
+ const VideoCore::ShaderNotify& ShaderNotify() const {
+ return *shader_notify;
+ }
+
// Waits for the GPU to finish working
virtual void WaitIdle() const = 0;
@@ -235,7 +253,7 @@ public:
const Tegra::DmaPusher& DmaPusher() const;
struct Regs {
- static constexpr size_t NUM_REGS = 0x100;
+ static constexpr size_t NUM_REGS = 0x40;
union {
struct {
@@ -254,7 +272,7 @@ public:
u32 semaphore_trigger;
INSERT_UNION_PADDING_WORDS(0xC);
- // The puser and the puller share the reference counter, the pusher only has read
+ // The pusher and the puller share the reference counter, the pusher only has read
// access
u32 reference_count;
INSERT_UNION_PADDING_WORDS(0x5);
@@ -328,13 +346,12 @@ private:
bool ExecuteMethodOnEngine(u32 method);
protected:
- std::unique_ptr<Tegra::DmaPusher> dma_pusher;
Core::System& system;
+ std::unique_ptr<Tegra::MemoryManager> memory_manager;
+ std::unique_ptr<Tegra::DmaPusher> dma_pusher;
std::unique_ptr<VideoCore::RendererBase> renderer;
private:
- std::unique_ptr<Tegra::MemoryManager> memory_manager;
-
/// Mapping of command subchannels to their bound engine ids
std::array<EngineID, 8> bound_engines = {};
/// 3D engine
@@ -347,6 +364,8 @@ private:
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
/// Inline memory engine
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+ /// Shader build notifier
+ std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 7b855f63e..70a3d5738 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -10,16 +10,14 @@
namespace VideoCommon {
-GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_,
- std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
- : GPU(system, std::move(renderer_), true), gpu_thread{system},
- cpu_context(renderer->GetRenderWindow().CreateSharedContext()),
- gpu_context(std::move(context)) {}
+GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {}
GPUAsynch::~GPUAsynch() = default;
void GPUAsynch::Start() {
- gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher);
+ gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
+ cpu_context = renderer->GetRenderWindow().CreateSharedContext();
+ cpu_context->MakeCurrent();
}
void GPUAsynch::ObtainContext() {
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 15e9f1d38..f89c855a5 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -20,8 +20,7 @@ namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU asynchronously
class GPUAsynch final : public Tegra::GPU {
public:
- explicit GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
- std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
+ explicit GPUAsynch(Core::System& system);
~GPUAsynch() override;
void Start() override;
@@ -42,7 +41,6 @@ protected:
private:
GPUThread::ThreadManager gpu_thread;
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
- std::unique_ptr<Core::Frontend::GraphicsContext> gpu_context;
};
} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index aaeb9811d..1ca47ddef 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -7,20 +7,18 @@
namespace VideoCommon {
-GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
- std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
- : GPU(system, std::move(renderer), false), context{std::move(context)} {}
+GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {}
GPUSynch::~GPUSynch() = default;
void GPUSynch::Start() {}
void GPUSynch::ObtainContext() {
- context->MakeCurrent();
+ renderer->Context().MakeCurrent();
}
void GPUSynch::ReleaseContext() {
- context->DoneCurrent();
+ renderer->Context().DoneCurrent();
}
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 762c20aa5..297258cb1 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -19,8 +19,7 @@ namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU synchronously
class GPUSynch final : public Tegra::GPU {
public:
- explicit GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
- std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
+ explicit GPUSynch(Core::System& system);
~GPUSynch() override;
void Start() override;
@@ -36,9 +35,6 @@ public:
protected:
void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
[[maybe_unused]] u32 value) const override {}
-
-private:
- std::unique_ptr<Core::Frontend::GraphicsContext> context;
};
} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 738c6f0c1..bf761abf2 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -44,9 +44,9 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
- } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
+ } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
renderer.Rasterizer().ReleaseFences();
- } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
+ } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
system.GPU().TickWork();
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
new file mode 100644
index 000000000..aa62363a7
--- /dev/null
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -0,0 +1,43 @@
+set(SHADER_FILES
+ opengl_present.frag
+ opengl_present.vert
+)
+
+set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
+set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
+
+set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders)
+add_custom_command(
+ OUTPUT
+ ${SHADER_DIR}
+ COMMAND
+ ${CMAKE_COMMAND} -E make_directory ${SHADER_DIR}
+)
+
+set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in)
+set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake)
+
+foreach(FILENAME IN ITEMS ${SHADER_FILES})
+ string(REPLACE "." "_" SHADER_NAME ${FILENAME})
+ set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
+ set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
+ add_custom_command(
+ OUTPUT
+ ${HEADER_FILE}
+ COMMAND
+ ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE}
+ MAIN_DEPENDENCY
+ ${SOURCE_FILE}
+ DEPENDS
+ ${HEADER_GENERATOR}
+ ${INPUT_FILE}
+ )
+ set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE})
+endforeach()
+
+add_custom_target(host_shaders
+ DEPENDS
+ ${SHADER_HEADERS}
+ SOURCES
+ ${SHADER_FILES}
+)
diff --git a/src/video_core/host_shaders/StringShaderHeader.cmake b/src/video_core/host_shaders/StringShaderHeader.cmake
new file mode 100644
index 000000000..368bce0ed
--- /dev/null
+++ b/src/video_core/host_shaders/StringShaderHeader.cmake
@@ -0,0 +1,11 @@
+set(SOURCE_FILE ${CMAKE_ARGV3})
+set(HEADER_FILE ${CMAKE_ARGV4})
+set(INPUT_FILE ${CMAKE_ARGV5})
+
+get_filename_component(CONTENTS_NAME ${SOURCE_FILE} NAME)
+string(REPLACE "." "_" CONTENTS_NAME ${CONTENTS_NAME})
+string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME)
+
+file(READ ${SOURCE_FILE} CONTENTS)
+
+configure_file(${INPUT_FILE} ${HEADER_FILE} @ONLY)
diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag
new file mode 100644
index 000000000..8a4cb024b
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_present.frag
@@ -0,0 +1,10 @@
+#version 430 core
+
+layout (location = 0) in vec2 frag_tex_coord;
+layout (location = 0) out vec4 color;
+
+layout (binding = 0) uniform sampler2D color_texture;
+
+void main() {
+ color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
+}
diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert
new file mode 100644
index 000000000..2235d31a4
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_present.vert
@@ -0,0 +1,24 @@
+#version 430 core
+
+out gl_PerVertex {
+ vec4 gl_Position;
+};
+
+layout (location = 0) in vec2 vert_position;
+layout (location = 1) in vec2 vert_tex_coord;
+layout (location = 0) out vec2 frag_tex_coord;
+
+// This is a truncated 3x3 matrix for 2D transformations:
+// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
+// The third column performs translation.
+// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
+// implicitly be [0, 0, 1]
+layout (location = 0) uniform mat3x2 modelview_matrix;
+
+void main() {
+ // Multiply input position by the rotscale part of the matrix and then manually translate by
+ // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
+ // to `vec3(vert_position.xy, 1.0)`
+ gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0);
+ frag_tex_coord = vert_tex_coord;
+}
diff --git a/src/video_core/host_shaders/source_shader.h.in b/src/video_core/host_shaders/source_shader.h.in
new file mode 100644
index 000000000..ccdb0d2a9
--- /dev/null
+++ b/src/video_core/host_shaders/source_shader.h.in
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <string_view>
+
+namespace HostShaders {
+
+constexpr std::string_view @CONTENTS_NAME@ = R"(@CONTENTS@)";
+
+} // namespace HostShaders
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h
index 4d00b84b0..31ee3440a 100644
--- a/src/video_core/macro/macro.h
+++ b/src/video_core/macro/macro.h
@@ -103,8 +103,9 @@ public:
virtual ~CachedMacro() = default;
/**
* Executes the macro code with the specified input parameters.
- * @param code The macro byte code to execute
+ *
* @param parameters The parameters of the macro
+ * @param method The method to execute
*/
virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0;
};
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 410f99018..df00b57df 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -12,13 +12,11 @@ namespace Tegra {
namespace {
// HLE'd functions
-static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d,
- const std::vector<u32>& parameters) {
+void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
maxwell3d.regs.draw.topology.Assign(
- static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] &
- ~(0x3ffffff << 26)));
+ static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff));
maxwell3d.regs.vb_base_instance = parameters[5];
maxwell3d.mme_draw.instance_count = instance_count;
maxwell3d.regs.vb_element_base = parameters[3];
@@ -26,15 +24,14 @@ static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d,
maxwell3d.regs.index_array.first = parameters[4];
if (maxwell3d.ShouldExecute()) {
- maxwell3d.GetRasterizer().Draw(true, true);
+ maxwell3d.Rasterizer().Draw(true, true);
}
maxwell3d.regs.index_array.count = 0;
maxwell3d.mme_draw.instance_count = 0;
maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
}
-static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d,
- const std::vector<u32>& parameters) {
+void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
maxwell3d.regs.vertex_buffer.first = parameters[3];
@@ -45,15 +42,14 @@ static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d,
maxwell3d.mme_draw.instance_count = count;
if (maxwell3d.ShouldExecute()) {
- maxwell3d.GetRasterizer().Draw(false, true);
+ maxwell3d.Rasterizer().Draw(false, true);
}
maxwell3d.regs.vertex_buffer.count = 0;
maxwell3d.mme_draw.instance_count = 0;
maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
}
-static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d,
- const std::vector<u32>& parameters) {
+void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
const u32 element_base = parameters[4];
const u32 base_instance = parameters[5];
@@ -69,7 +65,7 @@ static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d,
maxwell3d.regs.draw.topology.Assign(
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
if (maxwell3d.ShouldExecute()) {
- maxwell3d.GetRasterizer().Draw(true, true);
+ maxwell3d.Rasterizer().Draw(true, true);
}
maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
maxwell3d.regs.index_array.count = 0;
@@ -81,12 +77,12 @@ static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d,
maxwell3d.CallMethodFromMME(0x8e5, 0x0);
maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
}
-} // namespace
+} // Anonymous namespace
constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
- std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0),
- std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD),
- std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7),
+ {0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
+ {0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD},
+ {0x0217920100488FF7, &HLE_0217920100488FF7},
}};
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index aa5256419..bd01fd1f2 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -34,7 +34,6 @@ void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 metho
this->parameters = std::make_unique<u32[]>(num_parameters);
}
std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32));
- this->num_parameters = num_parameters;
// Execute the code until we hit an exit condition.
bool keep_executing = true;
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 07292702f..954b87515 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -14,11 +14,11 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
namespace Tegra {
-static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
-static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
-static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
-static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
-static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
+constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
+constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp;
+constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
+constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
+constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
STATE,
@@ -419,7 +419,6 @@ void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() {
void MacroJITx64Impl::Compile() {
MICROPROFILE_SCOPE(MacroJitCompile);
- bool keep_executing = true;
labels.fill(Xbyak::Label());
Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index ff5505d12..16b2aaa27 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -4,7 +4,6 @@
#include "common/alignment.h"
#include "common/assert.h"
-#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/kernel/memory/page_table.h"
#include "core/hle/kernel/process.h"
@@ -15,122 +14,142 @@
namespace Tegra {
-MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
- : rasterizer{rasterizer}, system{system} {
- page_table.Resize(address_space_width, page_bits, false);
-
- // Initialize the map with a single free region covering the entire managed space.
- VirtualMemoryArea initial_vma;
- initial_vma.size = address_space_end;
- vma_map.emplace(initial_vma.base, initial_vma);
-
- UpdatePageTableForVMA(initial_vma);
-}
+MemoryManager::MemoryManager(Core::System& system_)
+ : system{system_}, page_table(page_table_size) {}
MemoryManager::~MemoryManager() = default;
-GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
- const u64 aligned_size{Common::AlignUp(size, page_size)};
- const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
-
- AllocateMemory(gpu_addr, 0, aligned_size);
+void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+ rasterizer = &rasterizer_;
+}
+GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
+ u64 remaining_size{size};
+ for (u64 offset{}; offset < size; offset += page_size) {
+ if (remaining_size < page_size) {
+ SetPageEntry(gpu_addr + offset, page_entry + offset, remaining_size);
+ } else {
+ SetPageEntry(gpu_addr + offset, page_entry + offset);
+ }
+ remaining_size -= page_size;
+ }
return gpu_addr;
}
-GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
- const u64 aligned_size{Common::AlignUp(size, page_size)};
-
- AllocateMemory(gpu_addr, 0, aligned_size);
+GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
+ return UpdateRange(gpu_addr, cpu_addr, size);
+}
- return gpu_addr;
+GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) {
+ return Map(cpu_addr, *FindFreeRange(size, align), size);
}
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
- const u64 aligned_size{Common::AlignUp(size, page_size)};
- const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
+void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
+ if (!size) {
+ return;
+ }
- MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr);
- ASSERT(
- system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess());
+ // Flush and invalidate through the GPU interface, to be asynchronous if possible.
+ system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size);
- return gpu_addr;
+ UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
}
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
- ASSERT((gpu_addr & page_mask) == 0);
+std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) {
+ for (u64 offset{}; offset < size; offset += page_size) {
+ if (!GetPageEntry(gpu_addr + offset).IsUnmapped()) {
+ return {};
+ }
+ }
- const u64 aligned_size{Common::AlignUp(size, page_size)};
+ return UpdateRange(gpu_addr, PageEntry::State::Allocated, size);
+}
- MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr);
- ASSERT(
- system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess());
- return gpu_addr;
+GPUVAddr MemoryManager::Allocate(std::size_t size, std::size_t align) {
+ return *AllocateFixed(*FindFreeRange(size, align), size);
}
-GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
- ASSERT((gpu_addr & page_mask) == 0);
+void MemoryManager::TryLockPage(PageEntry page_entry, std::size_t size) {
+ if (!page_entry.IsValid()) {
+ return;
+ }
- const u64 aligned_size{Common::AlignUp(size, page_size)};
- const auto cpu_addr = GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr);
+ ASSERT(system.CurrentProcess()
+ ->PageTable()
+ .LockForDeviceAddressSpace(page_entry.ToAddress(), size)
+ .IsSuccess());
+}
- // Flush and invalidate through the GPU interface, to be asynchronous if possible.
- system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size);
+void MemoryManager::TryUnlockPage(PageEntry page_entry, std::size_t size) {
+ if (!page_entry.IsValid()) {
+ return;
+ }
- UnmapRange(gpu_addr, aligned_size);
ASSERT(system.CurrentProcess()
->PageTable()
- .UnlockForDeviceAddressSpace(cpu_addr.value(), size)
+ .UnlockForDeviceAddressSpace(page_entry.ToAddress(), size)
.IsSuccess());
-
- return gpu_addr;
}
-GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) const {
- // Find the first Free VMA.
- const VMAHandle vma_handle{
- std::find_if(vma_map.begin(), vma_map.end(), [region_start, size](const auto& vma) {
- if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
- return false;
- }
+PageEntry MemoryManager::GetPageEntry(GPUVAddr gpu_addr) const {
+ return page_table[PageEntryIndex(gpu_addr)];
+}
- const VAddr vma_end{vma.second.base + vma.second.size};
- return vma_end > region_start && vma_end >= region_start + size;
- })};
+void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
+ // TODO(bunnei): We should lock/unlock device regions. This currently causes issues due to
+ // improper tracking, but should be fixed in the future.
- if (vma_handle == vma_map.end()) {
- return {};
- }
+ //// Unlock the old page
+ // TryUnlockPage(page_table[PageEntryIndex(gpu_addr)], size);
- return std::max(region_start, vma_handle->second.base);
-}
+ //// Lock the new page
+ // TryLockPage(page_entry, size);
-bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
- return (addr >> page_bits) < page_table.pointers.size();
+ page_table[PageEntryIndex(gpu_addr)] = page_entry;
}
-std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) const {
- if (!IsAddressValid(addr)) {
- return {};
+std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align) const {
+ if (!align) {
+ align = page_size;
+ } else {
+ align = Common::AlignUp(align, page_size);
}
- const VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
- if (cpu_addr) {
- return cpu_addr + (addr & page_mask);
+ u64 available_size{};
+ GPUVAddr gpu_addr{address_space_start};
+ while (gpu_addr + available_size < address_space_size) {
+ if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) {
+ available_size += page_size;
+
+ if (available_size >= size) {
+ return gpu_addr;
+ }
+ } else {
+ gpu_addr += available_size + page_size;
+ available_size = 0;
+
+ const auto remainder{gpu_addr % align};
+ if (remainder) {
+ gpu_addr = (gpu_addr - remainder) + align;
+ }
+ }
}
return {};
}
-template <typename T>
-T MemoryManager::Read(GPUVAddr addr) const {
- if (!IsAddressValid(addr)) {
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
+ const auto page_entry{GetPageEntry(gpu_addr)};
+ if (!page_entry.IsValid()) {
return {};
}
- const u8* page_pointer{GetPointer(addr)};
- if (page_pointer) {
+ return page_entry.ToAddress() + (gpu_addr & page_mask);
+}
+
+template <typename T>
+T MemoryManager::Read(GPUVAddr addr) const {
+ if (auto page_pointer{GetPointer(addr)}; page_pointer) {
// NOTE: Avoid adding any extra logic to this fast-path block
T value;
std::memcpy(&value, page_pointer, sizeof(T));
@@ -144,12 +163,7 @@ T MemoryManager::Read(GPUVAddr addr) const {
template <typename T>
void MemoryManager::Write(GPUVAddr addr, T data) {
- if (!IsAddressValid(addr)) {
- return;
- }
-
- u8* page_pointer{GetPointer(addr)};
- if (page_pointer) {
+ if (auto page_pointer{GetPointer(addr)}; page_pointer) {
// NOTE: Avoid adding any extra logic to this fast-path block
std::memcpy(page_pointer, &data, sizeof(T));
return;
@@ -167,66 +181,49 @@ template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);
-u8* MemoryManager::GetPointer(GPUVAddr addr) {
- if (!IsAddressValid(addr)) {
+u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) {
+ if (!GetPageEntry(gpu_addr).IsValid()) {
return {};
}
- auto& memory = system.Memory();
-
- const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
-
- if (page_addr != 0) {
- return memory.GetPointer(page_addr + (addr & page_mask));
+ const auto address{GpuToCpuAddress(gpu_addr)};
+ if (!address) {
+ return {};
}
- LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
- return {};
+ return system.Memory().GetPointer(*address);
}
-const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
- if (!IsAddressValid(addr)) {
+const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
+ if (!GetPageEntry(gpu_addr).IsValid()) {
return {};
}
- const auto& memory = system.Memory();
-
- const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
-
- if (page_addr != 0) {
- return memory.GetPointer(page_addr + (addr & page_mask));
+ const auto address{GpuToCpuAddress(gpu_addr)};
+ if (!address) {
+ return {};
}
- LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
- return {};
-}
-
-bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const {
- const std::size_t inner_size = size - 1;
- const GPUVAddr end = start + inner_size;
- const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
- const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
- const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
- return range == inner_size;
+ return system.Memory().GetPointer(*address);
}
-void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer,
- const std::size_t size) const {
+void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const {
std::size_t remaining_size{size};
std::size_t page_index{gpu_src_addr >> page_bits};
std::size_t page_offset{gpu_src_addr & page_mask};
- auto& memory = system.Memory();
-
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
- const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
- // Flush must happen on the rasterizer interface, such that memory is always synchronous
- // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
- rasterizer.FlushRegion(src_addr, copy_amount);
- memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
+ if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
+ const auto src_addr{*page_addr + page_offset};
+
+ // Flush must happen on the rasterizer interface, such that memory is always synchronous
+ // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
+ rasterizer->FlushRegion(src_addr, copy_amount);
+ system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
+ }
page_index++;
page_offset = 0;
@@ -241,18 +238,17 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
std::size_t page_index{gpu_src_addr >> page_bits};
std::size_t page_offset{gpu_src_addr & page_mask};
- auto& memory = system.Memory();
-
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
- const u8* page_pointer = page_table.pointers[page_index];
- if (page_pointer) {
- const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
- memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
+
+ if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
+ const auto src_addr{*page_addr + page_offset};
+ system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
} else {
std::memset(dest_buffer, 0, copy_amount);
}
+
page_index++;
page_offset = 0;
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
@@ -260,23 +256,23 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
}
}
-void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer,
- const std::size_t size) {
+void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) {
std::size_t remaining_size{size};
std::size_t page_index{gpu_dest_addr >> page_bits};
std::size_t page_offset{gpu_dest_addr & page_mask};
- auto& memory = system.Memory();
-
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
- const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
- // Invalidate must happen on the rasterizer interface, such that memory is always
- // synchronous when it is written (even when in asynchronous GPU mode).
- rasterizer.InvalidateRegion(dest_addr, copy_amount);
- memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
+ if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
+ const auto dest_addr{*page_addr + page_offset};
+
+ // Invalidate must happen on the rasterizer interface, such that memory is always
+ // synchronous when it is written (even when in asynchronous GPU mode).
+ rasterizer->InvalidateRegion(dest_addr, copy_amount);
+ system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
+ }
page_index++;
page_offset = 0;
@@ -286,21 +282,20 @@ void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer,
}
void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
- const std::size_t size) {
+ std::size_t size) {
std::size_t remaining_size{size};
std::size_t page_index{gpu_dest_addr >> page_bits};
std::size_t page_offset{gpu_dest_addr & page_mask};
- auto& memory = system.Memory();
-
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
- u8* page_pointer = page_table.pointers[page_index];
- if (page_pointer) {
- const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
- memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
+
+ if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
+ const auto dest_addr{*page_addr + page_offset};
+ system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
}
+
page_index++;
page_offset = 0;
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
@@ -308,273 +303,26 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf
}
}
-void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
- const std::size_t size) {
+void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) {
std::vector<u8> tmp_buffer(size);
ReadBlock(gpu_src_addr, tmp_buffer.data(), size);
WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);
}
void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
- const std::size_t size) {
+ std::size_t size) {
std::vector<u8> tmp_buffer(size);
ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size);
WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);
}
-bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
- const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits];
- const std::size_t page = (addr & Core::Memory::PAGE_MASK) + size;
- return page <= Core::Memory::PAGE_SIZE;
-}
-
-void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
- VAddr backing_addr) {
- LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
- (base + size) * page_size);
-
- const VAddr end{base + size};
- ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
- base + page_table.pointers.size());
-
- if (memory == nullptr) {
- while (base != end) {
- page_table.pointers[base] = nullptr;
- page_table.backing_addr[base] = 0;
-
- base += 1;
- }
- } else {
- while (base != end) {
- page_table.pointers[base] = memory;
- page_table.backing_addr[base] = backing_addr;
-
- base += 1;
- memory += page_size;
- backing_addr += page_size;
- }
- }
-}
-
-void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) {
- ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
- ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
- MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr);
-}
-
-void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) {
- ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
- ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
- MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped);
-}
-
-bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
- ASSERT(base + size == next.base);
- if (type != next.type) {
- return {};
- }
- if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) {
- return {};
- }
- if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) {
- return {};
- }
- return true;
-}
-
-MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const {
- if (target >= address_space_end) {
- return vma_map.end();
- } else {
- return std::prev(vma_map.upper_bound(target));
- }
-}
-
-MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) {
- VirtualMemoryArea& vma{vma_handle->second};
-
- vma.type = VirtualMemoryArea::Type::Allocated;
- vma.backing_addr = 0;
- vma.backing_memory = {};
- UpdatePageTableForVMA(vma);
-
- return MergeAdjacent(vma_handle);
-}
-
-MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset,
- u64 size) {
-
- // This is the appropriately sized VMA that will turn into our allocation.
- VMAIter vma_handle{CarveVMA(target, size)};
- VirtualMemoryArea& vma{vma_handle->second};
-
- ASSERT(vma.size == size);
-
- vma.offset = offset;
-
- return Allocate(vma_handle);
-}
-
-MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size,
- VAddr backing_addr) {
- // This is the appropriately sized VMA that will turn into our allocation.
- VMAIter vma_handle{CarveVMA(target, size)};
- VirtualMemoryArea& vma{vma_handle->second};
-
- ASSERT(vma.size == size);
-
- vma.type = VirtualMemoryArea::Type::Mapped;
- vma.backing_memory = memory;
- vma.backing_addr = backing_addr;
- UpdatePageTableForVMA(vma);
-
- return MergeAdjacent(vma_handle);
-}
-
-void MemoryManager::UnmapRange(GPUVAddr target, u64 size) {
- VMAIter vma{CarveVMARange(target, size)};
- const VAddr target_end{target + size};
- const VMAIter end{vma_map.end()};
-
- // The comparison against the end of the range must be done using addresses since VMAs can be
- // merged during this process, causing invalidation of the iterators.
- while (vma != end && vma->second.base < target_end) {
- // Unmapped ranges return to allocated state and can be reused
- // This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games
- vma = std::next(Allocate(vma));
- }
-
- ASSERT(FindVMA(target)->second.size >= size);
-}
-
-MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) {
- // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given
- // non-const access to its container.
- return vma_map.erase(iter, iter); // Erases an empty range of elements
-}
-
-MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
- ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
- ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base);
-
- VMAIter vma_handle{StripIterConstness(FindVMA(base))};
- if (vma_handle == vma_map.end()) {
- // Target address is outside the managed range
- return {};
- }
-
- const VirtualMemoryArea& vma{vma_handle->second};
- if (vma.type == VirtualMemoryArea::Type::Mapped) {
- // Region is already allocated
- return vma_handle;
- }
-
- const VAddr start_in_vma{base - vma.base};
- const VAddr end_in_vma{start_in_vma + size};
-
- ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}",
- vma.size, end_in_vma);
-
- if (end_in_vma < vma.size) {
- // Split VMA at the end of the allocated region
- SplitVMA(vma_handle, end_in_vma);
- }
- if (start_in_vma != 0) {
- // Split VMA at the start of the allocated region
- vma_handle = SplitVMA(vma_handle, start_in_vma);
- }
-
- return vma_handle;
-}
-
-MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) {
- ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
- ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target);
-
- const VAddr target_end{target + size};
- ASSERT(target_end >= target);
- ASSERT(size > 0);
-
- VMAIter begin_vma{StripIterConstness(FindVMA(target))};
- const VMAIter i_end{vma_map.lower_bound(target_end)};
- if (std::any_of(begin_vma, i_end, [](const auto& entry) {
- return entry.second.type == VirtualMemoryArea::Type::Unmapped;
- })) {
- return {};
- }
-
- if (target != begin_vma->second.base) {
- begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
- }
-
- VMAIter end_vma{StripIterConstness(FindVMA(target_end))};
- if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
- end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
- }
-
- return begin_vma;
-}
-
-MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
- VirtualMemoryArea& old_vma{vma_handle->second};
- VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA
-
- // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably
- // a bug. This restriction might be removed later.
- ASSERT(offset_in_vma < old_vma.size);
- ASSERT(offset_in_vma > 0);
-
- old_vma.size = offset_in_vma;
- new_vma.base += offset_in_vma;
- new_vma.size -= offset_in_vma;
-
- switch (new_vma.type) {
- case VirtualMemoryArea::Type::Unmapped:
- break;
- case VirtualMemoryArea::Type::Allocated:
- new_vma.offset += offset_in_vma;
- break;
- case VirtualMemoryArea::Type::Mapped:
- new_vma.backing_memory += offset_in_vma;
- break;
- }
-
- ASSERT(old_vma.CanBeMergedWith(new_vma));
-
- return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma);
-}
-
-MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) {
- const VMAIter next_vma{std::next(iter)};
- if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
- iter->second.size += next_vma->second.size;
- vma_map.erase(next_vma);
- }
-
- if (iter != vma_map.begin()) {
- VMAIter prev_vma{std::prev(iter)};
- if (prev_vma->second.CanBeMergedWith(iter->second)) {
- prev_vma->second.size += iter->second.size;
- vma_map.erase(iter);
- iter = prev_vma;
- }
- }
-
- return iter;
-}
-
-void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
- switch (vma.type) {
- case VirtualMemoryArea::Type::Unmapped:
- UnmapRegion(vma.base, vma.size);
- break;
- case VirtualMemoryArea::Type::Allocated:
- MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr);
- break;
- case VirtualMemoryArea::Type::Mapped:
- MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr);
- break;
+bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
+ const auto cpu_addr{GpuToCpuAddress(gpu_addr)};
+ if (!cpu_addr) {
+ return false;
}
+ const std::size_t page{(*cpu_addr & Core::Memory::PAGE_MASK) + size};
+ return page <= Core::Memory::PAGE_SIZE;
}
} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 87658e87a..53c8d122a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -6,9 +6,9 @@
#include <map>
#include <optional>
+#include <vector>
#include "common/common_types.h"
-#include "common/page_table.h"
namespace VideoCore {
class RasterizerInterface;
@@ -20,58 +20,70 @@ class System;
namespace Tegra {
-/**
- * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space
- * with homogeneous attributes across its extents. In this particular implementation each VMA is
- * also backed by a single host memory allocation.
- */
-struct VirtualMemoryArea {
- enum class Type : u8 {
- Unmapped,
- Allocated,
- Mapped,
+class PageEntry final {
+public:
+ enum class State : u32 {
+ Unmapped = static_cast<u32>(-1),
+ Allocated = static_cast<u32>(-2),
};
- /// Virtual base address of the region.
- GPUVAddr base{};
- /// Size of the region.
- u64 size{};
- /// Memory area mapping type.
- Type type{Type::Unmapped};
- /// CPU memory mapped address corresponding to this memory area.
- VAddr backing_addr{};
- /// Offset into the backing_memory the mapping starts from.
- std::size_t offset{};
- /// Pointer backing this VMA.
- u8* backing_memory{};
-
- /// Tests if this area can be merged to the right with `next`.
- bool CanBeMergedWith(const VirtualMemoryArea& next) const;
+ constexpr PageEntry() = default;
+ constexpr PageEntry(State state) : state{state} {}
+ constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {}
+
+ [[nodiscard]] constexpr bool IsUnmapped() const {
+ return state == State::Unmapped;
+ }
+
+ [[nodiscard]] constexpr bool IsAllocated() const {
+ return state == State::Allocated;
+ }
+
+ [[nodiscard]] constexpr bool IsValid() const {
+ return !IsUnmapped() && !IsAllocated();
+ }
+
+ [[nodiscard]] constexpr VAddr ToAddress() const {
+ if (!IsValid()) {
+ return {};
+ }
+
+ return static_cast<VAddr>(state) << ShiftBits;
+ }
+
+ [[nodiscard]] constexpr PageEntry operator+(u64 offset) const {
+ // If this is a reserved value, offsets do not apply
+ if (!IsValid()) {
+ return *this;
+ }
+ return PageEntry{(static_cast<VAddr>(state) << ShiftBits) + offset};
+ }
+
+private:
+ static constexpr std::size_t ShiftBits{12};
+
+ State state{State::Unmapped};
};
+static_assert(sizeof(PageEntry) == 4, "PageEntry is too large");
class MemoryManager final {
public:
- explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
+ explicit MemoryManager(Core::System& system);
~MemoryManager();
- GPUVAddr AllocateSpace(u64 size, u64 align);
- GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
- GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
- GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
- GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
- std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
+ /// Binds a renderer to the memory manager.
+ void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+
+ [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
template <typename T>
- T Read(GPUVAddr addr) const;
+ [[nodiscard]] T Read(GPUVAddr addr) const;
template <typename T>
void Write(GPUVAddr addr, T data);
- u8* GetPointer(GPUVAddr addr);
- const u8* GetPointer(GPUVAddr addr) const;
-
- /// Returns true if the block is continuous in host memory, false otherwise
- bool IsBlockContinuous(GPUVAddr start, std::size_t size) const;
+ [[nodiscard]] u8* GetPointer(GPUVAddr addr);
+ [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const;
/**
* ReadBlock and WriteBlock are full read and write operations over virtual
@@ -98,92 +110,43 @@ public:
void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
/**
- * IsGranularRange checks if a gpu region can be simply read with a pointer
+ * IsGranularRange checks if a gpu region can be simply read with a pointer.
*/
- bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
-
-private:
- using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
- using VMAHandle = VMAMap::const_iterator;
- using VMAIter = VMAMap::iterator;
-
- bool IsAddressValid(GPUVAddr addr) const;
- void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
- VAddr backing_addr = 0);
- void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr);
- void UnmapRegion(GPUVAddr base, u64 size);
+ [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
- /// Finds the VMA in which the given address is included in, or `vma_map.end()`.
- VMAHandle FindVMA(GPUVAddr target) const;
-
- VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size);
-
- /**
- * Maps an unmanaged host memory pointer at a given address.
- *
- * @param target The guest address to start the mapping at.
- * @param memory The memory to be mapped.
- * @param size Size of the mapping in bytes.
- * @param backing_addr The base address of the range to back this mapping.
- */
- VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
+ [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
+ [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
+ [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
+ [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
+ void Unmap(GPUVAddr gpu_addr, std::size_t size);
- /// Unmaps a range of addresses, splitting VMAs as necessary.
- void UnmapRange(GPUVAddr target, u64 size);
-
- /// Converts a VMAHandle to a mutable VMAIter.
- VMAIter StripIterConstness(const VMAHandle& iter);
-
- /// Marks as the specified VMA as allocated.
- VMAIter Allocate(VMAIter vma);
-
- /**
- * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
- * the appropriate error checking.
- */
- VMAIter CarveVMA(GPUVAddr base, u64 size);
-
- /**
- * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
- * end of the range.
- */
- VMAIter CarveVMARange(GPUVAddr base, u64 size);
-
- /**
- * Splits a VMA in two, at the specified offset.
- * @returns the right side of the split, with the original iterator becoming the left side.
- */
- VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma);
-
- /**
- * Checks for and merges the specified VMA with adjacent ones if possible.
- * @returns the merged VMA or the original if no merging was possible.
- */
- VMAIter MergeAdjacent(VMAIter vma);
+private:
+ [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
+ void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
+ GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
+ [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const;
- /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
- void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
+ void TryLockPage(PageEntry page_entry, std::size_t size);
+ void TryUnlockPage(PageEntry page_entry, std::size_t size);
- /// Finds a free (unmapped region) of the specified size starting at the specified address.
- GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size) const;
+ [[nodiscard]] static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) {
+ return (gpu_addr >> page_bits) & page_table_mask;
+ }
-private:
+ static constexpr u64 address_space_size = 1ULL << 40;
+ static constexpr u64 address_space_start = 1ULL << 32;
static constexpr u64 page_bits{16};
static constexpr u64 page_size{1 << page_bits};
static constexpr u64 page_mask{page_size - 1};
+ static constexpr u64 page_table_bits{24};
+ static constexpr u64 page_table_size{1 << page_table_bits};
+ static constexpr u64 page_table_mask{page_table_size - 1};
- /// Address space in bits, according to Tegra X1 TRM
- static constexpr u32 address_space_width{40};
- /// Start address for mapping, this is fairly arbitrary but must be non-zero.
- static constexpr GPUVAddr address_space_base{0x100000};
- /// End of address space, based on address space in bits.
- static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
+ Core::System& system;
- Common::PageTable page_table;
- VMAMap vma_map;
- VideoCore::RasterizerInterface& rasterizer;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
- Core::System& system;
+ std::vector<PageEntry> page_table;
};
} // namespace Tegra
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 836b25c1d..9da9fb4ff 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -41,146 +41,168 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
}
static constexpr ConversionArray morton_to_linear_fns = {
- MortonCopy<true, PixelFormat::ABGR8U>,
- MortonCopy<true, PixelFormat::ABGR8S>,
- MortonCopy<true, PixelFormat::ABGR8UI>,
- MortonCopy<true, PixelFormat::B5G6R5U>,
- MortonCopy<true, PixelFormat::A2B10G10R10U>,
- MortonCopy<true, PixelFormat::A1B5G5R5U>,
- MortonCopy<true, PixelFormat::R8U>,
- MortonCopy<true, PixelFormat::R8UI>,
- MortonCopy<true, PixelFormat::RGBA16F>,
- MortonCopy<true, PixelFormat::RGBA16U>,
- MortonCopy<true, PixelFormat::RGBA16S>,
- MortonCopy<true, PixelFormat::RGBA16UI>,
- MortonCopy<true, PixelFormat::R11FG11FB10F>,
- MortonCopy<true, PixelFormat::RGBA32UI>,
- MortonCopy<true, PixelFormat::DXT1>,
- MortonCopy<true, PixelFormat::DXT23>,
- MortonCopy<true, PixelFormat::DXT45>,
- MortonCopy<true, PixelFormat::DXN1>,
- MortonCopy<true, PixelFormat::DXN2UNORM>,
- MortonCopy<true, PixelFormat::DXN2SNORM>,
- MortonCopy<true, PixelFormat::BC7U>,
- MortonCopy<true, PixelFormat::BC6H_UF16>,
- MortonCopy<true, PixelFormat::BC6H_SF16>,
- MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
- MortonCopy<true, PixelFormat::BGRA8>,
- MortonCopy<true, PixelFormat::RGBA32F>,
- MortonCopy<true, PixelFormat::RG32F>,
- MortonCopy<true, PixelFormat::R32F>,
- MortonCopy<true, PixelFormat::R16F>,
- MortonCopy<true, PixelFormat::R16U>,
- MortonCopy<true, PixelFormat::R16S>,
- MortonCopy<true, PixelFormat::R16UI>,
- MortonCopy<true, PixelFormat::R16I>,
- MortonCopy<true, PixelFormat::RG16>,
- MortonCopy<true, PixelFormat::RG16F>,
- MortonCopy<true, PixelFormat::RG16UI>,
- MortonCopy<true, PixelFormat::RG16I>,
- MortonCopy<true, PixelFormat::RG16S>,
- MortonCopy<true, PixelFormat::RGB32F>,
- MortonCopy<true, PixelFormat::RGBA8_SRGB>,
- MortonCopy<true, PixelFormat::RG8U>,
- MortonCopy<true, PixelFormat::RG8S>,
- MortonCopy<true, PixelFormat::RG8UI>,
- MortonCopy<true, PixelFormat::RG32UI>,
- MortonCopy<true, PixelFormat::RGBX16F>,
- MortonCopy<true, PixelFormat::R32UI>,
- MortonCopy<true, PixelFormat::R32I>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
- MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
- MortonCopy<true, PixelFormat::BGRA8_SRGB>,
- MortonCopy<true, PixelFormat::DXT1_SRGB>,
- MortonCopy<true, PixelFormat::DXT23_SRGB>,
- MortonCopy<true, PixelFormat::DXT45_SRGB>,
- MortonCopy<true, PixelFormat::BC7U_SRGB>,
- MortonCopy<true, PixelFormat::R4G4B4A4U>,
+ MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>,
+ MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>,
+ MortonCopy<true, PixelFormat::A8B8G8R8_SINT>,
+ MortonCopy<true, PixelFormat::A8B8G8R8_UINT>,
+ MortonCopy<true, PixelFormat::R5G6B5_UNORM>,
+ MortonCopy<true, PixelFormat::B5G6R5_UNORM>,
+ MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>,
+ MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>,
+ MortonCopy<true, PixelFormat::A2B10G10R10_UINT>,
+ MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>,
+ MortonCopy<true, PixelFormat::R8_UNORM>,
+ MortonCopy<true, PixelFormat::R8_SNORM>,
+ MortonCopy<true, PixelFormat::R8_SINT>,
+ MortonCopy<true, PixelFormat::R8_UINT>,
+ MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>,
+ MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>,
+ MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>,
+ MortonCopy<true, PixelFormat::R16G16B16A16_SINT>,
+ MortonCopy<true, PixelFormat::R16G16B16A16_UINT>,
+ MortonCopy<true, PixelFormat::B10G11R11_FLOAT>,
+ MortonCopy<true, PixelFormat::R32G32B32A32_UINT>,
+ MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>,
+ MortonCopy<true, PixelFormat::BC2_UNORM>,
+ MortonCopy<true, PixelFormat::BC3_UNORM>,
+ MortonCopy<true, PixelFormat::BC4_UNORM>,
+ MortonCopy<true, PixelFormat::BC4_SNORM>,
+ MortonCopy<true, PixelFormat::BC5_UNORM>,
+ MortonCopy<true, PixelFormat::BC5_SNORM>,
+ MortonCopy<true, PixelFormat::BC7_UNORM>,
+ MortonCopy<true, PixelFormat::BC6H_UFLOAT>,
+ MortonCopy<true, PixelFormat::BC6H_SFLOAT>,
+ MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>,
+ MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>,
+ MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>,
+ MortonCopy<true, PixelFormat::R32G32B32A32_SINT>,
+ MortonCopy<true, PixelFormat::R32G32_FLOAT>,
+ MortonCopy<true, PixelFormat::R32G32_SINT>,
+ MortonCopy<true, PixelFormat::R32_FLOAT>,
+ MortonCopy<true, PixelFormat::R16_FLOAT>,
+ MortonCopy<true, PixelFormat::R16_UNORM>,
+ MortonCopy<true, PixelFormat::R16_SNORM>,
+ MortonCopy<true, PixelFormat::R16_UINT>,
+ MortonCopy<true, PixelFormat::R16_SINT>,
+ MortonCopy<true, PixelFormat::R16G16_UNORM>,
+ MortonCopy<true, PixelFormat::R16G16_FLOAT>,
+ MortonCopy<true, PixelFormat::R16G16_UINT>,
+ MortonCopy<true, PixelFormat::R16G16_SINT>,
+ MortonCopy<true, PixelFormat::R16G16_SNORM>,
+ MortonCopy<true, PixelFormat::R32G32B32_FLOAT>,
+ MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>,
+ MortonCopy<true, PixelFormat::R8G8_UNORM>,
+ MortonCopy<true, PixelFormat::R8G8_SNORM>,
+ MortonCopy<true, PixelFormat::R8G8_SINT>,
+ MortonCopy<true, PixelFormat::R8G8_UINT>,
+ MortonCopy<true, PixelFormat::R32G32_UINT>,
+ MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>,
+ MortonCopy<true, PixelFormat::R32_UINT>,
+ MortonCopy<true, PixelFormat::R32_SINT>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>,
+ MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>,
+ MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>,
+ MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>,
+ MortonCopy<true, PixelFormat::BC2_SRGB>,
+ MortonCopy<true, PixelFormat::BC3_SRGB>,
+ MortonCopy<true, PixelFormat::BC7_SRGB>,
+ MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
+ MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_6X6>,
+ MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_10X10>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_12X12>,
+ MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X6>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_6X5>,
+ MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
- MortonCopy<true, PixelFormat::E5B9G9R9F>,
- MortonCopy<true, PixelFormat::Z32F>,
- MortonCopy<true, PixelFormat::Z16>,
- MortonCopy<true, PixelFormat::Z24S8>,
- MortonCopy<true, PixelFormat::S8Z24>,
- MortonCopy<true, PixelFormat::Z32FS8>,
+ MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>,
+ MortonCopy<true, PixelFormat::D32_FLOAT>,
+ MortonCopy<true, PixelFormat::D16_UNORM>,
+ MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>,
+ MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>,
+ MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>,
};
static constexpr ConversionArray linear_to_morton_fns = {
- MortonCopy<false, PixelFormat::ABGR8U>,
- MortonCopy<false, PixelFormat::ABGR8S>,
- MortonCopy<false, PixelFormat::ABGR8UI>,
- MortonCopy<false, PixelFormat::B5G6R5U>,
- MortonCopy<false, PixelFormat::A2B10G10R10U>,
- MortonCopy<false, PixelFormat::A1B5G5R5U>,
- MortonCopy<false, PixelFormat::R8U>,
- MortonCopy<false, PixelFormat::R8UI>,
- MortonCopy<false, PixelFormat::RGBA16F>,
- MortonCopy<false, PixelFormat::RGBA16S>,
- MortonCopy<false, PixelFormat::RGBA16U>,
- MortonCopy<false, PixelFormat::RGBA16UI>,
- MortonCopy<false, PixelFormat::R11FG11FB10F>,
- MortonCopy<false, PixelFormat::RGBA32UI>,
- MortonCopy<false, PixelFormat::DXT1>,
- MortonCopy<false, PixelFormat::DXT23>,
- MortonCopy<false, PixelFormat::DXT45>,
- MortonCopy<false, PixelFormat::DXN1>,
- MortonCopy<false, PixelFormat::DXN2UNORM>,
- MortonCopy<false, PixelFormat::DXN2SNORM>,
- MortonCopy<false, PixelFormat::BC7U>,
- MortonCopy<false, PixelFormat::BC6H_UF16>,
- MortonCopy<false, PixelFormat::BC6H_SF16>,
+ MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>,
+ MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>,
+ MortonCopy<false, PixelFormat::A8B8G8R8_SINT>,
+ MortonCopy<false, PixelFormat::A8B8G8R8_UINT>,
+ MortonCopy<false, PixelFormat::R5G6B5_UNORM>,
+ MortonCopy<false, PixelFormat::B5G6R5_UNORM>,
+ MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>,
+ MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>,
+ MortonCopy<false, PixelFormat::A2B10G10R10_UINT>,
+ MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>,
+ MortonCopy<false, PixelFormat::R8_UNORM>,
+ MortonCopy<false, PixelFormat::R8_SNORM>,
+ MortonCopy<false, PixelFormat::R8_SINT>,
+ MortonCopy<false, PixelFormat::R8_UINT>,
+ MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>,
+ MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>,
+ MortonCopy<false, PixelFormat::R16G16B16A16_SINT>,
+ MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>,
+ MortonCopy<false, PixelFormat::R16G16B16A16_UINT>,
+ MortonCopy<false, PixelFormat::B10G11R11_FLOAT>,
+ MortonCopy<false, PixelFormat::R32G32B32A32_UINT>,
+ MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>,
+ MortonCopy<false, PixelFormat::BC2_UNORM>,
+ MortonCopy<false, PixelFormat::BC3_UNORM>,
+ MortonCopy<false, PixelFormat::BC4_UNORM>,
+ MortonCopy<false, PixelFormat::BC4_SNORM>,
+ MortonCopy<false, PixelFormat::BC5_UNORM>,
+ MortonCopy<false, PixelFormat::BC5_SNORM>,
+ MortonCopy<false, PixelFormat::BC7_UNORM>,
+ MortonCopy<false, PixelFormat::BC6H_UFLOAT>,
+ MortonCopy<false, PixelFormat::BC6H_SFLOAT>,
// TODO(Subv): Swizzling ASTC formats are not supported
nullptr,
- MortonCopy<false, PixelFormat::BGRA8>,
- MortonCopy<false, PixelFormat::RGBA32F>,
- MortonCopy<false, PixelFormat::RG32F>,
- MortonCopy<false, PixelFormat::R32F>,
- MortonCopy<false, PixelFormat::R16F>,
- MortonCopy<false, PixelFormat::R16U>,
- MortonCopy<false, PixelFormat::R16S>,
- MortonCopy<false, PixelFormat::R16UI>,
- MortonCopy<false, PixelFormat::R16I>,
- MortonCopy<false, PixelFormat::RG16>,
- MortonCopy<false, PixelFormat::RG16F>,
- MortonCopy<false, PixelFormat::RG16UI>,
- MortonCopy<false, PixelFormat::RG16I>,
- MortonCopy<false, PixelFormat::RG16S>,
- MortonCopy<false, PixelFormat::RGB32F>,
- MortonCopy<false, PixelFormat::RGBA8_SRGB>,
- MortonCopy<false, PixelFormat::RG8U>,
- MortonCopy<false, PixelFormat::RG8S>,
- MortonCopy<false, PixelFormat::RG8UI>,
- MortonCopy<false, PixelFormat::RG32UI>,
- MortonCopy<false, PixelFormat::RGBX16F>,
- MortonCopy<false, PixelFormat::R32UI>,
- MortonCopy<false, PixelFormat::R32I>,
+ MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>,
+ MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>,
+ MortonCopy<false, PixelFormat::R32G32B32A32_SINT>,
+ MortonCopy<false, PixelFormat::R32G32_FLOAT>,
+ MortonCopy<false, PixelFormat::R32G32_SINT>,
+ MortonCopy<false, PixelFormat::R32_FLOAT>,
+ MortonCopy<false, PixelFormat::R16_FLOAT>,
+ MortonCopy<false, PixelFormat::R16_UNORM>,
+ MortonCopy<false, PixelFormat::R16_SNORM>,
+ MortonCopy<false, PixelFormat::R16_UINT>,
+ MortonCopy<false, PixelFormat::R16_SINT>,
+ MortonCopy<false, PixelFormat::R16G16_UNORM>,
+ MortonCopy<false, PixelFormat::R16G16_FLOAT>,
+ MortonCopy<false, PixelFormat::R16G16_UINT>,
+ MortonCopy<false, PixelFormat::R16G16_SINT>,
+ MortonCopy<false, PixelFormat::R16G16_SNORM>,
+ MortonCopy<false, PixelFormat::R32G32B32_FLOAT>,
+ MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>,
+ MortonCopy<false, PixelFormat::R8G8_UNORM>,
+ MortonCopy<false, PixelFormat::R8G8_SNORM>,
+ MortonCopy<false, PixelFormat::R8G8_SINT>,
+ MortonCopy<false, PixelFormat::R8G8_UINT>,
+ MortonCopy<false, PixelFormat::R32G32_UINT>,
+ MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>,
+ MortonCopy<false, PixelFormat::R32_UINT>,
+ MortonCopy<false, PixelFormat::R32_SINT>,
nullptr,
nullptr,
nullptr,
- MortonCopy<false, PixelFormat::BGRA8_SRGB>,
- MortonCopy<false, PixelFormat::DXT1_SRGB>,
- MortonCopy<false, PixelFormat::DXT23_SRGB>,
- MortonCopy<false, PixelFormat::DXT45_SRGB>,
- MortonCopy<false, PixelFormat::BC7U_SRGB>,
- MortonCopy<false, PixelFormat::R4G4B4A4U>,
+ MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>,
+ MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>,
+ MortonCopy<false, PixelFormat::BC2_SRGB>,
+ MortonCopy<false, PixelFormat::BC3_SRGB>,
+ MortonCopy<false, PixelFormat::BC7_SRGB>,
+ MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>,
nullptr,
nullptr,
nullptr,
@@ -199,12 +221,12 @@ static constexpr ConversionArray linear_to_morton_fns = {
nullptr,
nullptr,
nullptr,
- MortonCopy<false, PixelFormat::E5B9G9R9F>,
- MortonCopy<false, PixelFormat::Z32F>,
- MortonCopy<false, PixelFormat::Z16>,
- MortonCopy<false, PixelFormat::Z24S8>,
- MortonCopy<false, PixelFormat::S8Z24>,
- MortonCopy<false, PixelFormat::Z32FS8>,
+ MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>,
+ MortonCopy<false, PixelFormat::D32_FLOAT>,
+ MortonCopy<false, PixelFormat::D16_UNORM>,
+ MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>,
+ MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>,
+ MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>,
};
static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 0d3a88765..d13a66bb6 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -95,10 +95,12 @@ template <class QueryCache, class CachedQuery, class CounterStream, class HostCo
class QueryPool>
class QueryCacheBase {
public:
- explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
- : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
- static_cast<QueryCache&>(*this),
- VideoCore::QueryType::SamplesPassed}}} {}
+ explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::MemoryManager& gpu_memory_)
+ : rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+ gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
+ VideoCore::QueryType::SamplesPassed}}} {}
void InvalidateRegion(VAddr addr, std::size_t size) {
std::unique_lock lock{mutex};
@@ -118,29 +120,27 @@ public:
*/
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
std::unique_lock lock{mutex};
- auto& memory_manager = system.GPU().MemoryManager();
- const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr_opt);
- VAddr cpu_addr = *cpu_addr_opt;
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ ASSERT(cpu_addr);
- CachedQuery* query = TryGet(cpu_addr);
+ CachedQuery* query = TryGet(*cpu_addr);
if (!query) {
- ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
- const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+ ASSERT_OR_EXECUTE(cpu_addr, return;);
+ u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
- query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
+ query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
}
query->BindCounter(Stream(type).Current(), timestamp);
if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- AsyncFlushQuery(cpu_addr);
+ AsyncFlushQuery(*cpu_addr);
}
}
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
void UpdateCounters() {
std::unique_lock lock{mutex};
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
}
@@ -270,8 +270,9 @@ private:
static constexpr std::uintptr_t PAGE_SIZE = 4096;
static constexpr unsigned PAGE_BITS = 12;
- Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::MemoryManager& gpu_memory;
std::recursive_mutex mutex;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 3cbdac8e7..b3e0919f8 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -106,11 +106,8 @@ public:
virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
/// Initialize disk cached resources for the game being emulated
- virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
- const DiskResourceLoadCallback& callback = {}) {}
-
- /// Initializes renderer dirty flags
- virtual void SetupDirtyFlags() {}
+ virtual void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
+ const DiskResourceLoadCallback& callback) {}
/// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
GuestDriverProfile& AccessGuestDriverProfile() {
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index dfb06e87e..a93a1732c 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -9,7 +9,9 @@
namespace VideoCore {
-RendererBase::RendererBase(Core::Frontend::EmuWindow& window) : render_window{window} {
+RendererBase::RendererBase(Core::Frontend::EmuWindow& window_,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context_)
+ : render_window{window_}, context{std::move(context_)} {
RefreshBaseSettings();
}
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 1d85219b6..649074acd 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -15,7 +15,8 @@
namespace Core::Frontend {
class EmuWindow;
-}
+class GraphicsContext;
+} // namespace Core::Frontend
namespace VideoCore {
@@ -25,14 +26,15 @@ struct RendererSettings {
// Screenshot
std::atomic<bool> screenshot_requested{false};
- void* screenshot_bits;
+ void* screenshot_bits{};
std::function<void()> screenshot_complete_callback;
Layout::FramebufferLayout screenshot_framebuffer_layout;
};
class RendererBase : NonCopyable {
public:
- explicit RendererBase(Core::Frontend::EmuWindow& window);
+ explicit RendererBase(Core::Frontend::EmuWindow& window,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context);
virtual ~RendererBase();
/// Initialize the renderer
@@ -68,6 +70,14 @@ public:
return *rasterizer;
}
+ Core::Frontend::GraphicsContext& Context() {
+ return *context;
+ }
+
+ const Core::Frontend::GraphicsContext& Context() const {
+ return *context;
+ }
+
Core::Frontend::EmuWindow& GetRenderWindow() {
return render_window;
}
@@ -94,6 +104,7 @@ public:
protected:
Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
std::unique_ptr<RasterizerInterface> rasterizer;
+ std::unique_ptr<Core::Frontend::GraphicsContext> context;
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
int m_current_frame = 0; ///< Current frame, should be set by the renderer
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index eb5158407..b7e9ed2e9 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -185,10 +185,6 @@ std::string TextureType(const MetaTexture& meta) {
return type;
}
-std::string GlobalMemoryName(const GlobalMemoryBase& base) {
- return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset);
-}
-
class ARBDecompiler final {
public:
explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
@@ -199,6 +195,8 @@ public:
}
private:
+ void DefineGlobalMemory();
+
void DeclareHeader();
void DeclareVertex();
void DeclareGeometry();
@@ -228,6 +226,7 @@ private:
std::pair<std::string, std::size_t> BuildCoords(Operation);
std::string BuildAoffi(Operation);
+ std::string GlobalMemoryPointer(const GmemNode& gmem);
void Exit();
std::string Assign(Operation);
@@ -378,10 +377,8 @@ private:
std::string address;
std::string_view opname;
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
- AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
- Visit(gmem->GetBaseAddress()));
- address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary);
- opname = "ATOMB";
+ address = GlobalMemoryPointer(*gmem);
+ opname = "ATOM";
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
opname = "ATOMS";
@@ -456,9 +453,13 @@ private:
shader_source += '\n';
}
- std::string AllocTemporary() {
- max_temporaries = std::max(max_temporaries, num_temporaries + 1);
- return fmt::format("T{}.x", num_temporaries++);
+ std::string AllocLongVectorTemporary() {
+ max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
+ return fmt::format("L{}", num_long_temporaries++);
+ }
+
+ std::string AllocLongTemporary() {
+ return fmt::format("{}.x", AllocLongVectorTemporary());
}
std::string AllocVectorTemporary() {
@@ -466,8 +467,13 @@ private:
return fmt::format("T{}", num_temporaries++);
}
+ std::string AllocTemporary() {
+ return fmt::format("{}.x", AllocVectorTemporary());
+ }
+
void ResetTemporaries() noexcept {
num_temporaries = 0;
+ num_long_temporaries = 0;
}
const Device& device;
@@ -478,6 +484,11 @@ private:
std::size_t num_temporaries = 0;
std::size_t max_temporaries = 0;
+ std::size_t num_long_temporaries = 0;
+ std::size_t max_long_temporaries = 0;
+
+ std::map<GlobalMemoryBase, u32> global_memory_names;
+
std::string shader_source;
static constexpr std::string_view ADD_F32 = "ADD.F32";
@@ -784,6 +795,8 @@ private:
ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
ShaderType stage, std::string_view identifier)
: device{device}, ir{ir}, registry{registry}, stage{stage} {
+ DefineGlobalMemory();
+
AddLine("TEMP RC;");
AddLine("TEMP FSWZA[4];");
AddLine("TEMP FSWZB[4];");
@@ -829,12 +842,20 @@ std::string_view HeaderStageName(ShaderType stage) {
}
}
+void ARBDecompiler::DefineGlobalMemory() {
+ u32 binding = 0;
+ for (const auto& pair : ir.GetGlobalMemory()) {
+ const GlobalMemoryBase base = pair.first;
+ global_memory_names.emplace(base, binding);
+ ++binding;
+ }
+}
+
void ARBDecompiler::DeclareHeader() {
AddLine("!!NV{}5.0", HeaderStageName(stage));
// Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
AddLine("OPTION NV_internal;");
AddLine("OPTION NV_gpu_program_fp64;");
- AddLine("OPTION NV_shader_storage_buffer;");
AddLine("OPTION NV_shader_thread_group;");
if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
AddLine("OPTION NV_shader_thread_shuffle;");
@@ -892,11 +913,19 @@ void ARBDecompiler::DeclareCompute() {
const ComputeInfo& info = registry.GetComputeInfo();
AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
info.workgroup_size[2]);
- if (info.shared_memory_size_in_words > 0) {
- const u32 size_in_bytes = info.shared_memory_size_in_words * 4;
- AddLine("SHARED_MEMORY {};", size_in_bytes);
- AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
+ if (info.shared_memory_size_in_words == 0) {
+ return;
+ }
+ const u32 limit = device.GetMaxComputeSharedMemorySize();
+ u32 size_in_bytes = info.shared_memory_size_in_words * 4;
+ if (size_in_bytes > limit) {
+ LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
+ size_in_bytes, limit);
+ size_in_bytes = limit;
}
+
+ AddLine("SHARED_MEMORY {};", size_in_bytes);
+ AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
}
void ARBDecompiler::DeclareInputAttributes() {
@@ -951,11 +980,10 @@ void ARBDecompiler::DeclareLocalMemory() {
}
void ARBDecompiler::DeclareGlobalMemory() {
- u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer;
- for (const auto& pair : ir.GetGlobalMemory()) {
- const auto& base = pair.first;
- AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding);
- ++binding;
+ const std::size_t num_entries = ir.GetGlobalMemory().size();
+ if (num_entries > 0) {
+ const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2;
+ AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
}
}
@@ -977,6 +1005,9 @@ void ARBDecompiler::DeclareTemporaries() {
for (std::size_t i = 0; i < max_temporaries; ++i) {
AddLine("TEMP T{};", i);
}
+ for (std::size_t i = 0; i < max_long_temporaries; ++i) {
+ AddLine("LONG TEMP L{};", i);
+ }
}
void ARBDecompiler::DeclarePredicates() {
@@ -1260,13 +1291,6 @@ std::string ARBDecompiler::Visit(const Node& node) {
return "{0, 0, 0, 0}.x";
}
- const auto buffer_index = [this, &abuf]() -> std::string {
- if (stage != ShaderType::Geometry) {
- return "";
- }
- return fmt::format("[{}]", Visit(abuf->GetBuffer()));
- };
-
const Attribute::Index index = abuf->GetIndex();
const u32 element = abuf->GetElement();
const char swizzle = Swizzle(element);
@@ -1339,10 +1363,7 @@ std::string ARBDecompiler::Visit(const Node& node) {
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
std::string temporary = AllocTemporary();
- AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
- Visit(gmem->GetBaseAddress()));
- AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()),
- temporary);
+ AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));
return temporary;
}
@@ -1375,7 +1396,7 @@ std::string ARBDecompiler::Visit(const Node& node) {
return {};
}
- if (const auto cmt = std::get_if<CommentNode>(&*node)) {
+ if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
// Uncommenting this will generate invalid code. GLASM lacks comments.
// AddLine("// {}", cmt->GetText());
return {};
@@ -1419,6 +1440,22 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {
return fmt::format(", offset({})", temporary);
}
+std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
+ const u32 binding = global_memory_names.at(gmem.GetDescriptor());
+ const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
+
+ const std::string pointer = AllocLongVectorTemporary();
+ std::string temporary = AllocTemporary();
+
+ const u32 local_index = binding / 2;
+ AddLine("PK64.U {}, c[{}];", pointer, local_index);
+ AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
+ Visit(gmem.GetBaseAddress()));
+ AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
+ AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer);
+ return fmt::format("{}.x", pointer);
+}
+
void ARBDecompiler::Exit() {
if (stage != ShaderType::Fragment) {
AddLine("RET;");
@@ -1515,11 +1552,7 @@ std::string ARBDecompiler::Assign(Operation operation) {
ResetTemporaries();
return {};
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- const std::string temporary = AllocTemporary();
- AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
- Visit(gmem->GetBaseAddress()));
- AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()),
- temporary);
+ AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
ResetTemporaries();
return {};
} else {
@@ -1671,7 +1704,7 @@ std::string ARBDecompiler::HCastFloat(Operation operation) {
}
std::string ARBDecompiler::HUnpack(Operation operation) {
- const std::string operand = Visit(operation[0]);
+ std::string operand = Visit(operation[0]);
switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
case Tegra::Shader::HalfType::H0_H1:
return operand;
@@ -2021,7 +2054,7 @@ std::string ARBDecompiler::InvocationId(Operation) {
std::string ARBDecompiler::YNegate(Operation) {
LOG_WARNING(Render_OpenGL, "(STUBBED)");
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("MOV.F {}, 1;", temporary);
return temporary;
}
@@ -2044,10 +2077,6 @@ std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
}
std::string ARBDecompiler::Barrier(Operation) {
- if (!ir.IsDecompiled()) {
- LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled");
- return {};
- }
AddLine("BAR;");
return {};
}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index e461e4c70..b1c4cd62f 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -26,7 +26,7 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
: VideoCommon::BufferBlock{cpu_addr, size} {
gl_buffer.Create();
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
- if (device.HasVertexBufferUnifiedMemory()) {
+ if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
}
@@ -59,9 +59,10 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
}
-OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
+OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const Device& device_, std::size_t stream_size)
- : GenericBufferCache{rasterizer, system,
+ : GenericBufferCache{rasterizer, gpu_memory, cpu_memory,
std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
device{device_} {
if (!device.HasFastBufferSubData()) {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 88fdc0536..f75b32e31 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -52,7 +52,8 @@ private:
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class OGLBufferCache final : public GenericBufferCache {
public:
- explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
+ explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const Device& device, std::size_t stream_size);
~OGLBufferCache();
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index c1f20f0ab..e7d95149f 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -212,6 +212,7 @@ Device::Device()
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
+ max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@@ -233,6 +234,8 @@ Device::Device()
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
+ use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
+
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
@@ -248,6 +251,7 @@ Device::Device(std::nullptr_t) {
shader_storage_alignment = 4;
max_vertex_attributes = 16;
max_varyings = 15;
+ max_compute_shared_memory_size = 0x10000;
has_warp_intrinsics = true;
has_shader_ballot = true;
has_vertex_viewport_layer = true;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index e1d811966..8a4b6b9fc 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -52,6 +52,10 @@ public:
return max_varyings;
}
+ u32 GetMaxComputeSharedMemorySize() const {
+ return max_compute_shared_memory_size;
+ }
+
bool HasWarpIntrinsics() const {
return has_warp_intrinsics;
}
@@ -104,6 +108,10 @@ public:
return use_assembly_shaders;
}
+ bool UseAsynchronousShaders() const {
+ return use_asynchronous_shaders;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -114,6 +122,7 @@ private:
std::size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
+ u32 max_compute_shared_memory_size{};
bool has_warp_intrinsics{};
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
@@ -127,6 +136,7 @@ private:
bool has_fast_buffer_sub_data{};
bool has_nv_viewport_array2{};
bool use_assembly_shaders{};
+ bool use_asynchronous_shaders{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index ec5421afa..b532fdcc2 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -4,16 +4,17 @@
#include "common/assert.h"
+#include <glad/glad.h>
+
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
namespace OpenGL {
-GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed)
- : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {}
+GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {}
GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
- : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {}
+ : FenceBase(address, payload, is_stubbed) {}
GLInnerFence::~GLInnerFence() = default;
@@ -44,11 +45,10 @@ void GLInnerFence::Wait() {
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
}
-FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,
- VideoCore::RasterizerInterface& rasterizer,
+FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
TextureCacheOpenGL& texture_cache,
OGLBufferCache& buffer_cache, QueryCache& query_cache)
- : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {}
+ : GenericFenceManager{rasterizer, gpu, texture_cache, buffer_cache, query_cache} {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(value, is_stubbed);
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index c917b3343..da1dcdace 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -5,7 +5,6 @@
#pragma once
#include <memory>
-#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/fence_manager.h"
@@ -38,9 +37,9 @@ using GenericFenceManager =
class FenceManagerOpenGL final : public GenericFenceManager {
public:
- FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
- QueryCache& query_cache);
+ explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
+ QueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index d7ba57aca..2bb8ec2b8 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -30,12 +30,13 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace
-QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
+QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory)
: VideoCommon::QueryCacheBase<
QueryCache, CachedQuery, CounterStream, HostCounter,
- std::vector<OGLQuery>>{system,
- static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},
- gl_rasterizer{gl_rasterizer} {}
+ std::vector<OGLQuery>>{static_cast<VideoCore::RasterizerInterface&>(rasterizer),
+ maxwell3d, gpu_memory},
+ gl_rasterizer{rasterizer} {}
QueryCache::~QueryCache() = default;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index d8e7052a1..dd626b66b 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -29,7 +29,8 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
HostCounter, std::vector<OGLQuery>> {
public:
- explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
+ explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e960a0ef1..bbb2eb17c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -139,17 +139,33 @@ void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
+void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) {
+ if (num_entries == 0) {
+ return;
+ }
+ if (num_entries % 2 == 1) {
+ pointers[num_entries] = 0;
+ }
+ const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
+ glProgramLocalParametersI4uivNV(target, 0, num_vectors,
+ reinterpret_cast<const GLuint*>(pointers));
+}
+
} // Anonymous namespace
-RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- const Device& device, ScreenInfo& info,
- ProgramManager& program_manager, StateTracker& state_tracker)
- : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device,
- state_tracker},
- shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
- buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
- fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
- screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
+RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_,
+ Core::Memory::Memory& cpu_memory, const Device& device_,
+ ScreenInfo& screen_info_, ProgramManager& program_manager_,
+ StateTracker& state_tracker_)
+ : RasterizerAccelerated{cpu_memory}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
+ kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
+ screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
+ texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker),
+ shader_cache(*this, emu_window, gpu, maxwell3d, kepler_compute, gpu_memory, device),
+ query_cache(*this, maxwell3d, gpu_memory),
+ buffer_cache(*this, gpu_memory, cpu_memory, device, STREAM_BUFFER_SIZE),
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
+ async_shaders(emu_window) {
CheckExtensions();
unified_uniform_buffer.Create();
@@ -162,6 +178,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
nullptr, 0);
}
}
+
+ if (device.UseAsynchronousShaders()) {
+ async_shaders.AllocateWorkers();
+ }
}
RasterizerOpenGL::~RasterizerOpenGL() {
@@ -179,8 +199,7 @@ void RasterizerOpenGL::CheckExtensions() {
}
void RasterizerOpenGL::SetupVertexFormat() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexFormats]) {
return;
}
@@ -200,7 +219,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
}
flags[Dirty::VertexFormat0 + index] = false;
- const auto attrib = gpu.regs.vertex_attrib_format[index];
+ const auto attrib = maxwell3d.regs.vertex_attrib_format[index];
const auto gl_index = static_cast<GLuint>(index);
// Disable constant attributes.
@@ -224,8 +243,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
}
void RasterizerOpenGL::SetupVertexBuffer() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexBuffers]) {
return;
}
@@ -236,7 +254,7 @@ void RasterizerOpenGL::SetupVertexBuffer() {
const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
// Upload all guest vertex arrays sequentially to our buffer
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
@@ -273,14 +291,13 @@ void RasterizerOpenGL::SetupVertexBuffer() {
}
void RasterizerOpenGL::SetupVertexInstances() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexInstances]) {
return;
}
flags[Dirty::VertexInstances] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
if (!flags[Dirty::VertexInstance0 + index]) {
continue;
@@ -296,7 +313,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
MICROPROFILE_SCOPE(OpenGL_Index);
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
const std::size_t size = CalculateIndexBufferSize();
const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
@@ -305,16 +322,14 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
- auto& gpu = system.GPU().Maxwell3D();
- std::size_t num_ssbos = 0;
u32 clip_distances = 0;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- const auto& shader_config = gpu.regs.shader_config[index];
+ const auto& shader_config = maxwell3d.regs.shader_config[index];
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
- if (!gpu.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
switch (program) {
case Maxwell::ShaderProgram::Geometry:
program_manager.UseGeometryShader(0);
@@ -329,31 +344,15 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
// Currently this stages are not supported in the OpenGL backend.
- // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
- if (program == Maxwell::ShaderProgram::TesselationControl) {
- continue;
- } else if (program == Maxwell::ShaderProgram::TesselationEval) {
+ // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
+ if (program == Maxwell::ShaderProgram::TesselationControl ||
+ program == Maxwell::ShaderProgram::TesselationEval) {
continue;
}
- Shader* const shader = shader_cache.GetStageProgram(program);
-
- if (device.UseAssemblyShaders()) {
- // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
- // all stages share the same bindings.
- const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size();
- ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage");
- num_ssbos += num_stage_ssbos;
- }
-
- // Stage indices are 0 - 5
- const std::size_t stage = index == 0 ? 0 : index - 1;
- SetupDrawConstBuffers(stage, shader);
- SetupDrawGlobalMemory(stage, shader);
- SetupDrawTextures(stage, shader);
- SetupDrawImages(stage, shader);
+ Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
- const GLuint program_handle = shader->GetHandle();
+ const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
switch (program) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
@@ -370,6 +369,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
shader_config.enable.Value(), shader_config.offset);
}
+ // Stage indices are 0 - 5
+ const std::size_t stage = index == 0 ? 0 : index - 1;
+ SetupDrawConstBuffers(stage, shader);
+ SetupDrawGlobalMemory(stage, shader);
+ SetupDrawTextures(stage, shader);
+ SetupDrawImages(stage, shader);
+
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -384,11 +390,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
SyncClipEnabled(clip_distances);
- gpu.dirty.flags[Dirty::Shaders] = false;
+ maxwell3d.dirty.flags[Dirty::Shaders] = false;
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@@ -406,34 +412,27 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
}
std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
-
- return static_cast<std::size_t>(regs.index_array.count) *
- static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
+ return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
+ static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
}
-void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
+void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
- shader_cache.LoadDiskCache(stop_loading, callback);
-}
-
-void RasterizerOpenGL::SetupDirtyFlags() {
- state_tracker.Initialize();
+ shader_cache.LoadDiskCache(title_id, stop_loading, callback);
}
void RasterizerOpenGL::ConfigureFramebuffers() {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
- auto& gpu = system.GPU().Maxwell3D();
- if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
+ if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
return;
}
- gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
+ maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
texture_cache.GuardRenderTargets(true);
View depth_surface = texture_cache.GetDepthBufferSurface(true);
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
// Bind the framebuffer surfaces
@@ -465,8 +464,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
}
void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
- auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
texture_cache.GuardRenderTargets(true);
View color_surface;
@@ -516,12 +514,11 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_de
}
void RasterizerOpenGL::Clear() {
- const auto& gpu = system.GPU().Maxwell3D();
- if (!gpu.ShouldExecute()) {
+ if (!maxwell3d.ShouldExecute()) {
return;
}
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
bool use_color{};
bool use_depth{};
bool use_stencil{};
@@ -586,7 +583,6 @@ void RasterizerOpenGL::Clear() {
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
- auto& gpu = system.GPU().Maxwell3D();
query_cache.UpdateCounters();
@@ -634,7 +630,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
if (invalidated) {
// When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
- auto& dirty = gpu.dirty.flags;
+ auto& dirty = maxwell3d.dirty.flags;
dirty[Dirty::VertexBuffers] = true;
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
dirty[index] = true;
@@ -655,7 +651,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Setup emulation uniform buffer.
if (!device.UseAssemblyShaders()) {
MaxwellUniformData ubo;
- ubo.SetFromRegs(gpu);
+ ubo.SetFromRegs(maxwell3d);
const auto info =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
@@ -664,7 +660,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Setup shaders and their used resources.
texture_cache.GuardSamplers(true);
- const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology);
+ const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
SetupShaders(primitive_mode);
texture_cache.GuardSamplers(false);
@@ -681,14 +677,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
BeginTransformFeedback(primitive_mode);
- const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
+ const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
const GLsizei num_instances =
- static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
+ static_cast<GLsizei>(is_instanced ? maxwell3d.mme_draw.instance_count : 1);
if (is_indexed) {
- const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base);
- const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count);
+ const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
+ const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
- const GLenum format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
+ const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
glDrawElements(primitive_mode, num_vertices, format, offset);
} else if (num_instances == 1 && base_instance == 0) {
@@ -707,8 +703,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
base_instance);
}
} else {
- const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first);
- const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count);
+ const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vertex_buffer.first);
+ const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.vertex_buffer.count);
if (num_instances == 1 && base_instance == 0) {
glDrawArrays(primitive_mode, base_vertex, num_vertices);
} else if (base_instance == 0) {
@@ -723,7 +719,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
++num_queued_commands;
- system.GPU().TickWork();
+ gpu.TickWork();
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -731,6 +727,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
current_cbuf = 0;
auto kernel = shader_cache.GetComputeKernel(code_addr);
+ program_manager.BindCompute(kernel->GetHandle());
+
SetupComputeTextures(kernel);
SetupComputeImages(kernel);
@@ -744,7 +742,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Unmap();
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& launch_desc = kepler_compute.launch_description;
program_manager.BindCompute(kernel->GetHandle());
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
@@ -807,17 +805,14 @@ void RasterizerOpenGL::SyncGuestHost() {
}
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
- auto& memory_manager{gpu.MemoryManager()};
- memory_manager.Write<u32>(addr, value);
+ gpu_memory.Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
}
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;
@@ -826,7 +821,6 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
}
void RasterizerOpenGL::ReleaseFences() {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
return;
}
@@ -912,7 +906,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* sh
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
+ const auto& stages = maxwell3d.state.shader_stages;
const auto& shader_stage = stages[stage_index];
const auto& entries = shader->GetEntries();
const bool use_unified = entries.use_unified_uniforms;
@@ -937,7 +931,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* sh
void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& launch_desc = kepler_compute.launch_description;
const auto& entries = kernel->GetEntries();
const bool use_unified = entries.use_unified_uniforms;
@@ -1005,45 +999,66 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
}
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
- const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
+ static constexpr std::array TARGET_LUT = {
+ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+ GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
+ };
+
+ const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
+ const auto& entries{shader->GetEntries().global_memory_entries};
- u32 binding =
- device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
- for (const auto& entry : shader->GetEntries().global_memory_entries) {
+ std::array<GLuint64EXT, 32> pointers;
+ ASSERT(entries.size() < pointers.size());
+
+ const bool assembly_shaders = device.UseAssemblyShaders();
+ u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
+ for (const auto& entry : entries) {
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
- const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
- const u32 size{memory_manager.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding++, entry, gpu_addr, size);
+ const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
+ const u32 size{gpu_memory.Read<u32>(addr + 8)};
+ SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
+ ++binding;
+ }
+ if (assembly_shaders) {
+ UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());
}
}
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
- const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
+ const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
+ const auto& entries{kernel->GetEntries().global_memory_entries};
+
+ std::array<GLuint64EXT, 32> pointers;
+ ASSERT(entries.size() < pointers.size());
u32 binding = 0;
- for (const auto& entry : kernel->GetEntries().global_memory_entries) {
- const auto addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
- const auto gpu_addr{memory_manager.Read<u64>(addr)};
- const auto size{memory_manager.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding++, entry, gpu_addr, size);
+ for (const auto& entry : entries) {
+ const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
+ const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
+ const u32 size{gpu_memory.Read<u32>(addr + 8)};
+ SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
+ ++binding;
+ }
+ if (device.UseAssemblyShaders()) {
+ UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());
}
}
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
- GPUVAddr gpu_addr, std::size_t size) {
- const auto alignment{device.GetShaderStorageBufferAlignment()};
+ GPUVAddr gpu_addr, std::size_t size,
+ GLuint64EXT* pointer) {
+ const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
- static_cast<GLsizeiptr>(size));
+ if (device.UseAssemblyShaders()) {
+ *pointer = info.address + info.offset;
+ } else {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
+ static_cast<GLsizeiptr>(size));
+ }
}
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
MICROPROFILE_SCOPE(OpenGL_Texture);
- const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).sampler;
for (const auto& entry : shader->GetEntries().samplers) {
const auto shader_type = static_cast<ShaderType>(stage_index);
@@ -1056,11 +1071,10 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader
void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_Texture);
- const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
for (const auto& entry : kernel->GetEntries().samplers) {
for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i);
+ const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i);
SetupTexture(binding++, texture, entry);
}
}
@@ -1084,20 +1098,18 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
}
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
- const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).image;
for (const auto& entry : shader->GetEntries().images) {
- const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
+ const auto shader_type = static_cast<ShaderType>(stage_index);
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
SetupImage(binding++, tic, entry);
}
}
void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
- const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
for (const auto& entry : shader->GetEntries().images) {
- const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
+ const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic;
SetupImage(binding++, tic, entry);
}
}
@@ -1117,9 +1129,8 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
}
void RasterizerOpenGL::SyncViewport() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
- const auto& regs = gpu.regs;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
const bool dirty_viewport = flags[Dirty::Viewports];
const bool dirty_clip_control = flags[Dirty::ClipControl];
@@ -1191,25 +1202,23 @@ void RasterizerOpenGL::SyncViewport() {
}
void RasterizerOpenGL::SyncDepthClamp() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::DepthClampEnabled]) {
return;
}
flags[Dirty::DepthClampEnabled] = false;
- oglEnable(GL_DEPTH_CLAMP, gpu.regs.view_volume_clip_control.depth_clamp_disabled == 0);
+ oglEnable(GL_DEPTH_CLAMP, maxwell3d.regs.view_volume_clip_control.depth_clamp_disabled == 0);
}
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
return;
}
flags[Dirty::ClipDistances] = false;
- clip_mask &= gpu.regs.clip_distance_enabled;
+ clip_mask &= maxwell3d.regs.clip_distance_enabled;
if (clip_mask == last_clip_distance_mask) {
return;
}
@@ -1225,9 +1234,8 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
- const auto& regs = gpu.regs;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
if (flags[Dirty::CullTest]) {
flags[Dirty::CullTest] = false;
@@ -1242,26 +1250,24 @@ void RasterizerOpenGL::SyncCullMode() {
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PrimitiveRestart]) {
return;
}
flags[Dirty::PrimitiveRestart] = false;
- if (gpu.regs.primitive_restart.enabled) {
+ if (maxwell3d.regs.primitive_restart.enabled) {
glEnable(GL_PRIMITIVE_RESTART);
- glPrimitiveRestartIndex(gpu.regs.primitive_restart.index);
+ glPrimitiveRestartIndex(maxwell3d.regs.primitive_restart.index);
} else {
glDisable(GL_PRIMITIVE_RESTART);
}
}
void RasterizerOpenGL::SyncDepthTestState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
- const auto& regs = gpu.regs;
if (flags[Dirty::DepthMask]) {
flags[Dirty::DepthMask] = false;
glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE);
@@ -1279,14 +1285,13 @@ void RasterizerOpenGL::SyncDepthTestState() {
}
void RasterizerOpenGL::SyncStencilTestState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::StencilTest]) {
return;
}
flags[Dirty::StencilTest] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_STENCIL_TEST, regs.stencil_enable);
glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
@@ -1311,25 +1316,24 @@ void RasterizerOpenGL::SyncStencilTestState() {
}
void RasterizerOpenGL::SyncRasterizeEnable() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::RasterizeEnable]) {
return;
}
flags[Dirty::RasterizeEnable] = false;
- oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0);
+ oglEnable(GL_RASTERIZER_DISCARD, maxwell3d.regs.rasterize_enable == 0);
}
void RasterizerOpenGL::SyncPolygonModes() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PolygonModes]) {
return;
}
flags[Dirty::PolygonModes] = false;
- if (gpu.regs.fill_rectangle) {
+ const auto& regs = maxwell3d.regs;
+ if (regs.fill_rectangle) {
if (!GLAD_GL_NV_fill_rectangle) {
LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
@@ -1342,27 +1346,26 @@ void RasterizerOpenGL::SyncPolygonModes() {
return;
}
- if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) {
+ if (regs.polygon_mode_front == regs.polygon_mode_back) {
flags[Dirty::PolygonModeFront] = false;
flags[Dirty::PolygonModeBack] = false;
- glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+ glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
return;
}
if (flags[Dirty::PolygonModeFront]) {
flags[Dirty::PolygonModeFront] = false;
- glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+ glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
}
if (flags[Dirty::PolygonModeBack]) {
flags[Dirty::PolygonModeBack] = false;
- glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back));
+ glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_back));
}
}
void RasterizerOpenGL::SyncColorMask() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::ColorMasks]) {
return;
}
@@ -1371,7 +1374,7 @@ void RasterizerOpenGL::SyncColorMask() {
const bool force = flags[Dirty::ColorMaskCommon];
flags[Dirty::ColorMaskCommon] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
if (regs.color_mask_common) {
if (!force && !flags[Dirty::ColorMask0]) {
return;
@@ -1396,33 +1399,30 @@ void RasterizerOpenGL::SyncColorMask() {
}
void RasterizerOpenGL::SyncMultiSampleState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::MultisampleControl]) {
return;
}
flags[Dirty::MultisampleControl] = false;
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage);
oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one);
}
void RasterizerOpenGL::SyncFragmentColorClampState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::FragmentClampColor]) {
return;
}
flags[Dirty::FragmentClampColor] = false;
- glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
+ glClampColor(GL_CLAMP_FRAGMENT_COLOR, maxwell3d.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
}
void RasterizerOpenGL::SyncBlendState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
- const auto& regs = gpu.regs;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
if (flags[Dirty::BlendColor]) {
flags[Dirty::BlendColor] = false;
@@ -1479,14 +1479,13 @@ void RasterizerOpenGL::SyncBlendState() {
}
void RasterizerOpenGL::SyncLogicOpState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::LogicOp]) {
return;
}
flags[Dirty::LogicOp] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
if (regs.logic_op.enable) {
glEnable(GL_COLOR_LOGIC_OP);
glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation));
@@ -1496,14 +1495,13 @@ void RasterizerOpenGL::SyncLogicOpState() {
}
void RasterizerOpenGL::SyncScissorTest() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::Scissors]) {
return;
}
flags[Dirty::Scissors] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
if (!flags[Dirty::Scissor0 + index]) {
continue;
@@ -1522,16 +1520,15 @@ void RasterizerOpenGL::SyncScissorTest() {
}
void RasterizerOpenGL::SyncPointState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PointSize]) {
return;
}
flags[Dirty::PointSize] = false;
- oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable);
+ oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
- if (gpu.regs.vp_point_size.enable) {
+ if (maxwell3d.regs.vp_point_size.enable) {
// By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
glEnable(GL_PROGRAM_POINT_SIZE);
return;
@@ -1539,32 +1536,30 @@ void RasterizerOpenGL::SyncPointState() {
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
// in OpenGL).
- glPointSize(std::max(1.0f, gpu.regs.point_size));
+ glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
glDisable(GL_PROGRAM_POINT_SIZE);
}
void RasterizerOpenGL::SyncLineState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::LineWidth]) {
return;
}
flags[Dirty::LineWidth] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable);
glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased);
}
void RasterizerOpenGL::SyncPolygonOffset() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PolygonOffset]) {
return;
}
flags[Dirty::PolygonOffset] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
@@ -1578,14 +1573,13 @@ void RasterizerOpenGL::SyncPolygonOffset() {
}
void RasterizerOpenGL::SyncAlphaTest() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::AlphaTest]) {
return;
}
flags[Dirty::AlphaTest] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
if (regs.alpha_test_enabled && regs.rt_control.count > 1) {
LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested");
}
@@ -1599,20 +1593,19 @@ void RasterizerOpenGL::SyncAlphaTest() {
}
void RasterizerOpenGL::SyncFramebufferSRGB() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::FramebufferSRGB]) {
return;
}
flags[Dirty::FramebufferSRGB] = false;
- oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
+ oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
}
void RasterizerOpenGL::SyncTransformFeedback() {
// TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
// when this is required.
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
static constexpr std::size_t STRIDE = 3;
std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
@@ -1664,7 +1657,7 @@ void RasterizerOpenGL::SyncTransformFeedback() {
}
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
@@ -1707,7 +1700,7 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
}
void RasterizerOpenGL::EndTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4f082592f..f451404b2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -33,10 +33,11 @@
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h"
+#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
-namespace Core {
-class System;
+namespace Core::Memory {
+class Memory;
}
namespace Core::Frontend {
@@ -54,9 +55,10 @@ struct DrawParameters;
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
- explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- const Device& device, ScreenInfo& info,
- ProgramManager& program_manager, StateTracker& state_tracker);
+ explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
+ Core::Memory::Memory& cpu_memory, const Device& device,
+ ScreenInfo& screen_info, ProgramManager& program_manager,
+ StateTracker& state_tracker);
~RasterizerOpenGL() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -82,15 +84,22 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
- void LoadDiskResources(const std::atomic_bool& stop_loading,
+ void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
- void SetupDirtyFlags() override;
/// Returns true when there are commands queued to the OpenGL server.
bool AnyCommandQueued() const {
return num_queued_commands > 0;
}
+ VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
+ return async_shaders;
+ }
+
+ const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
+ return async_shaders;
+ }
+
private:
/// Configures the color and depth framebuffer states.
void ConfigureFramebuffers();
@@ -115,9 +124,9 @@ private:
/// Configures the current global memory entries to use for the kernel invocation.
void SetupComputeGlobalMemory(Shader* kernel);
- /// Configures a constant buffer.
+ /// Configures a global memory buffer.
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
- std::size_t size);
+ std::size_t size, GLuint64EXT* pointer);
/// Configures the current textures to use for the draw command.
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
@@ -228,7 +237,15 @@ private:
void SetupShaders(GLenum primitive_mode);
+ Tegra::GPU& gpu;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
+
const Device& device;
+ ScreenInfo& screen_info;
+ ProgramManager& program_manager;
+ StateTracker& state_tracker;
TextureCacheOpenGL texture_cache;
ShaderCacheOpenGL shader_cache;
@@ -238,10 +255,7 @@ private:
OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
- Core::System& system;
- ScreenInfo& screen_info;
- ProgramManager& program_manager;
- StateTracker& state_tracker;
+ VideoCommon::Shader::AsyncShaders async_shaders;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index a787e27d2..0ebcec427 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <string_view>
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
@@ -82,11 +83,13 @@ void OGLSampler::Release() {
handle = 0;
}
-void OGLShader::Create(const char* source, GLenum type) {
- if (handle != 0)
+void OGLShader::Create(std::string_view source, GLenum type) {
+ if (handle != 0) {
return;
- if (source == nullptr)
+ }
+ if (source.empty()) {
return;
+ }
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
handle = GLShader::LoadShader(source, type);
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index f8b322227..f48398669 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -4,6 +4,7 @@
#pragma once
+#include <string_view>
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
@@ -127,7 +128,7 @@ public:
return *this;
}
- void Create(const char* source, GLenum type);
+ void Create(std::string_view source, GLenum type);
void Release();
@@ -177,6 +178,12 @@ public:
Release();
}
+ OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept {
+ Release();
+ handle = std::exchange(o.handle, 0);
+ return *this;
+ }
+
/// Deletes the internal OpenGL resource
void Release();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index c6a3bf3a1..bd56bed0c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -22,6 +22,7 @@
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_arb_decompiler.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
@@ -31,6 +32,7 @@
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h"
+#include "video_core/shader_notify.h"
namespace OpenGL {
@@ -125,7 +127,7 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
entry.graphics_info, entry.compute_info};
- const auto registry = std::make_shared<Registry>(entry.type, info);
+ auto registry = std::make_shared<Registry>(entry.type, info);
for (const auto& [address, value] : entry.keys) {
const auto [buffer, offset] = address;
registry->InsertKey(buffer, offset, value);
@@ -140,9 +142,24 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
return registry;
}
+std::unordered_set<GLenum> GetSupportedFormats() {
+ GLint num_formats;
+ glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
+
+ std::vector<GLint> formats(num_formats);
+ glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
+
+ std::unordered_set<GLenum> supported_formats;
+ for (const GLint format : formats) {
+ supported_formats.insert(static_cast<GLenum>(format));
+ }
+ return supported_formats;
+}
+
+} // Anonymous namespace
+
ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
- const ShaderIR& ir, const Registry& registry,
- bool hint_retrievable = false) {
+ const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
LOG_INFO(Render_OpenGL, "{}", shader_id);
@@ -181,30 +198,17 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
return program;
}
-std::unordered_set<GLenum> GetSupportedFormats() {
- GLint num_formats;
- glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
-
- std::vector<GLint> formats(num_formats);
- glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
-
- std::unordered_set<GLenum> supported_formats;
- for (const GLint format : formats) {
- supported_formats.insert(static_cast<GLenum>(format));
- }
- return supported_formats;
-}
-
-} // Anonymous namespace
-
Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
- ProgramSharedPtr program_)
- : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
+ ProgramSharedPtr program_, bool is_built)
+ : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
+ is_built(is_built) {
handle = program->assembly_program.handle;
if (handle == 0) {
handle = program->source_program.handle;
}
- ASSERT(handle != 0);
+ if (is_built) {
+ ASSERT(handle != 0);
+ }
}
Shader::~Shader() = default;
@@ -214,43 +218,78 @@ GLuint Shader::GetHandle() const {
return handle;
}
-std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
- Maxwell::ShaderProgram program_type,
- ProgramCode code, ProgramCode code_b) {
+bool Shader::IsBuilt() const {
+ return is_built;
+}
+
+void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
+ program->source_program = std::move(new_program);
+ handle = program->source_program.handle;
+ is_built = true;
+}
+
+void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
+ program->assembly_program = std::move(new_program);
+ handle = program->assembly_program.handle;
+ is_built = true;
+}
+
+std::unique_ptr<Shader> Shader::CreateStageFromMemory(
+ const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
+ ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
const auto shader_type = GetShaderType(program_type);
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
- auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
- const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- // TODO(Rodrigo): Handle VertexA shaders
- // std::optional<ShaderIR> ir_b;
- // if (!code_b.empty()) {
- // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
- // }
- auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
+ auto& gpu = params.gpu;
+ gpu.ShaderNotify().MarkSharderBuilding();
+
+ auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
+ if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
+ const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
+ // TODO(Rodrigo): Handle VertexA shaders
+ // std::optional<ShaderIR> ir_b;
+ // if (!code_b.empty()) {
+ // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
+ // }
+ auto program =
+ BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
+ ShaderDiskCacheEntry entry;
+ entry.type = shader_type;
+ entry.code = std::move(code);
+ entry.code_b = std::move(code_b);
+ entry.unique_identifier = params.unique_identifier;
+ entry.bound_buffer = registry->GetBoundBuffer();
+ entry.graphics_info = registry->GetGraphicsInfo();
+ entry.keys = registry->GetKeys();
+ entry.bound_samplers = registry->GetBoundSamplers();
+ entry.bindless_samplers = registry->GetBindlessSamplers();
+ params.disk_cache.SaveEntry(std::move(entry));
+
+ gpu.ShaderNotify().MarkShaderComplete();
+
+ return std::unique_ptr<Shader>(new Shader(std::move(registry),
+ MakeEntries(params.device, ir, shader_type),
+ std::move(program), true));
+ } else {
+ // Required for entries
+ const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
+ auto entries = MakeEntries(params.device, ir, shader_type);
- ShaderDiskCacheEntry entry;
- entry.type = shader_type;
- entry.code = std::move(code);
- entry.code_b = std::move(code_b);
- entry.unique_identifier = params.unique_identifier;
- entry.bound_buffer = registry->GetBoundBuffer();
- entry.graphics_info = registry->GetGraphicsInfo();
- entry.keys = registry->GetKeys();
- entry.bound_samplers = registry->GetBoundSamplers();
- entry.bindless_samplers = registry->GetBindlessSamplers();
- params.disk_cache.SaveEntry(std::move(entry));
+ async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
+ std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
+ COMPILER_SETTINGS, *registry, cpu_addr);
- return std::unique_ptr<Shader>(new Shader(
- std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
+ auto program = std::make_shared<ProgramHandle>();
+ return std::unique_ptr<Shader>(
+ new Shader(std::move(registry), std::move(entries), std::move(program), false));
+ }
}
std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
ProgramCode code) {
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
+ auto& gpu = params.gpu;
+ gpu.ShaderNotify().MarkSharderBuilding();
- auto& engine = params.system.GPU().KeplerCompute();
- auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
+ auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
const u64 uid = params.unique_identifier;
auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
@@ -266,6 +305,8 @@ std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& p
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
+ gpu.ShaderNotify().MarkShaderComplete();
+
return std::unique_ptr<Shader>(new Shader(std::move(registry),
MakeEntries(params.device, ir, ShaderType::Compute),
std::move(program)));
@@ -277,15 +318,20 @@ std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
}
-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
- Core::Frontend::EmuWindow& emu_window, const Device& device)
- : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
- emu_window{emu_window}, device{device}, disk_cache{system} {}
+ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer,
+ Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_)
+ : VideoCommon::ShaderCache<Shader>{rasterizer}, emu_window{emu_window_}, gpu{gpu_},
+ gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_},
+ kepler_compute{kepler_compute_}, device{device_} {}
ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
-void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
+void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
+ disk_cache.BindTitleID(title_id);
const std::optional transferable = disk_cache.LoadTransferable();
if (!transferable) {
return;
@@ -361,7 +407,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
};
- const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
+ const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
const std::size_t bucket_size{transferable->size() / num_workers};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
@@ -436,42 +482,77 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
return program;
}
-Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
- if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
- return last_shaders[static_cast<std::size_t>(program)];
+Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
+ VideoCommon::Shader::AsyncShaders& async_shaders) {
+ if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
+ auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
+ if (last_shader->IsBuilt()) {
+ return last_shader;
+ }
}
- auto& memory_manager{system.GPU().MemoryManager()};
- const GPUVAddr address{GetShaderAddress(system, program)};
+ const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
+
+ if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
+ auto completed_work = async_shaders.GetCompletedWork();
+ for (auto& work : completed_work) {
+ Shader* shader = TryGet(work.cpu_address);
+ gpu.ShaderNotify().MarkShaderComplete();
+ if (shader == nullptr) {
+ continue;
+ }
+ using namespace VideoCommon::Shader;
+ if (work.backend == AsyncShaders::Backend::OpenGL) {
+ shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
+ } else if (work.backend == AsyncShaders::Backend::GLASM) {
+ shader->AsyncGLASMBuilt(std::move(work.program.glasm));
+ }
+
+ auto& registry = shader->GetRegistry();
+
+ ShaderDiskCacheEntry entry;
+ entry.type = work.shader_type;
+ entry.code = std::move(work.code);
+ entry.code_b = std::move(work.code_b);
+ entry.unique_identifier = work.uid;
+ entry.bound_buffer = registry.GetBoundBuffer();
+ entry.graphics_info = registry.GetGraphicsInfo();
+ entry.keys = registry.GetKeys();
+ entry.bound_samplers = registry.GetBoundSamplers();
+ entry.bindless_samplers = registry.GetBindlessSamplers();
+ disk_cache.SaveEntry(std::move(entry));
+ }
+ }
// Look up shader in the cache based on address
- const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
+ const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
- const auto host_ptr{memory_manager.GetPointer(address)};
+ const u8* const host_ptr{gpu_memory.GetPointer(address)};
// No shader found - create a new one
- ProgramCode code{GetShaderCode(memory_manager, address, host_ptr, false)};
+ ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
ProgramCode code_b;
if (program == Maxwell::ShaderProgram::VertexA) {
- const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
- const u8* host_ptr_b = memory_manager.GetPointer(address_b);
- code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);
+ const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
+ const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
+ code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
}
const std::size_t code_size = code.size() * sizeof(u64);
const u64 unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
- const ShaderParameters params{system, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
+ const ShaderParameters params{gpu, maxwell3d, disk_cache, device,
+ *cpu_addr, host_ptr, unique_identifier};
std::unique_ptr<Shader> shader;
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
- shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
+ shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
+ async_shaders, cpu_addr.value_or(0));
} else {
shader = Shader::CreateFromCache(params, found->second);
}
@@ -487,21 +568,20 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
}
Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
- auto& memory_manager{system.GPU().MemoryManager()};
- const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
+ const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
return kernel;
}
- const auto host_ptr{memory_manager.GetPointer(code_addr)};
// No kernel found, create a new one
- ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
+ const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
+ ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
const std::size_t code_size{code.size() * sizeof(u64)};
const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
- const ShaderParameters params{system, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
+ const ShaderParameters params{gpu, kepler_compute, disk_cache, device,
+ *cpu_addr, host_ptr, unique_identifier};
std::unique_ptr<Shader> kernel;
const auto found = runtime_cache.find(unique_identifier);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 994aaeaf2..1708af06a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -25,14 +25,18 @@
#include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h"
-namespace Core {
-class System;
+namespace Tegra {
+class MemoryManager;
}
namespace Core::Frontend {
class EmuWindow;
}
+namespace VideoCommon::Shader {
+class AsyncShaders;
+}
+
namespace OpenGL {
class Device;
@@ -53,14 +57,20 @@ struct PrecompiledShader {
};
struct ShaderParameters {
- Core::System& system;
+ Tegra::GPU& gpu;
+ Tegra::Engines::ConstBufferEngineInterface& engine;
ShaderDiskCacheOpenGL& disk_cache;
const Device& device;
VAddr cpu_addr;
- u8* host_ptr;
+ const u8* host_ptr;
u64 unique_identifier;
};
+ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
+ u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
+ const VideoCommon::Shader::Registry& registry,
+ bool hint_retrievable = false);
+
class Shader final {
public:
~Shader();
@@ -68,15 +78,28 @@ public:
/// Gets the GL program handle for the shader
GLuint GetHandle() const;
+ bool IsBuilt() const;
+
/// Gets the shader entries for the shader
const ShaderEntries& GetEntries() const {
return entries;
}
- static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
- Maxwell::ShaderProgram program_type,
- ProgramCode program_code,
- ProgramCode program_code_b);
+ const VideoCommon::Shader::Registry& GetRegistry() const {
+ return *registry;
+ }
+
+ /// Mark a OpenGL shader as built
+ void AsyncOpenGLBuilt(OGLProgram new_program);
+
+ /// Mark a GLASM shader as built
+ void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
+
+ static std::unique_ptr<Shader> CreateStageFromMemory(
+ const ShaderParameters& params, Maxwell::ShaderProgram program_type,
+ ProgramCode program_code, ProgramCode program_code_b,
+ VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
+
static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
ProgramCode code);
@@ -85,26 +108,30 @@ public:
private:
explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
- ProgramSharedPtr program);
+ ProgramSharedPtr program, bool is_built = true);
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
ProgramSharedPtr program;
GLuint handle = 0;
+ bool is_built{};
};
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
public:
- explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
- Core::Frontend::EmuWindow& emu_window, const Device& device);
+ explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::Frontend::EmuWindow& emu_window,
+ Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::Engines::KeplerCompute& kepler_compute,
+ Tegra::MemoryManager& gpu_memory, const Device& device);
~ShaderCacheOpenGL() override;
/// Loads disk cache for the current game
- void LoadDiskCache(const std::atomic_bool& stop_loading,
+ void LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
/// Gets the current specified shader stage program
- Shader* GetStageProgram(Maxwell::ShaderProgram program);
+ Shader* GetStageProgram(Maxwell::ShaderProgram program,
+ VideoCommon::Shader::AsyncShaders& async_shaders);
/// Gets a compute kernel in the passed address
Shader* GetComputeKernel(GPUVAddr code_addr);
@@ -114,9 +141,13 @@ private:
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats);
- Core::System& system;
Core::Frontend::EmuWindow& emu_window;
+ Tegra::GPU& gpu;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
const Device& device;
+
ShaderDiskCacheOpenGL disk_cache;
std::unordered_map<u64, PrecompiledShader> runtime_cache;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 2c49aeaac..3f75fcd2b 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -602,8 +602,15 @@ private:
return;
}
const auto& info = registry.GetComputeInfo();
- if (const u32 size = info.shared_memory_size_in_words; size > 0) {
- code.AddLine("shared uint smem[{}];", size);
+ if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
+ const u32 limit = device.GetMaxComputeSharedMemorySize();
+ if (size > limit) {
+ LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
+ size, limit);
+ size = limit;
+ }
+
+ code.AddLine("shared uint smem[{}];", size / 4);
code.AddNewLine();
}
code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
@@ -1912,7 +1919,7 @@ private:
Expression Comparison(Operation operation) {
static_assert(!unordered || type == Type::Float);
- const Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
+ Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
// GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
@@ -1952,10 +1959,6 @@ private:
return {fmt::format("({} != 0)", carry), Type::Bool};
}
- Expression LogicalFIsNan(Operation operation) {
- return GenerateUnary(operation, "isnan", Type::Bool, Type::Float);
- }
-
Expression LogicalAssign(Operation operation) {
const Node& dest = operation[0];
const Node& src = operation[1];
@@ -2771,15 +2774,6 @@ private:
return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
}
- bool IsRenderTargetEnabled(u32 render_target) const {
- for (u32 component = 0; component < 4; ++component) {
- if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
- return true;
- }
- }
- return false;
- }
-
const Device& device;
const ShaderIR& ir;
const Registry& registry;
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 2dcc2b0eb..166ee34e1 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -73,7 +73,7 @@ ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
-bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
+bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
return false;
}
@@ -144,7 +144,7 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
return true;
}
-bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
+bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
if (file.WriteObject(static_cast<u32>(type)) != 1 ||
file.WriteObject(static_cast<u32>(code.size())) != 1 ||
file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
@@ -206,28 +206,32 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
flat_bindless_samplers.size();
}
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
+void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
+ title_id = title_id_;
+}
+
std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
// Skip games without title id
- const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
+ const bool has_title_id = title_id != 0;
if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
- return {};
+ return std::nullopt;
}
- FileUtil::IOFile file(GetTransferablePath(), "rb");
+ Common::FS::IOFile file(GetTransferablePath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No transferable shader cache found");
is_usable = true;
- return {};
+ return std::nullopt;
}
u32 version{};
if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
- return {};
+ return std::nullopt;
}
if (version < NativeVersion) {
@@ -235,12 +239,12 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran
file.Close();
InvalidateTransferable();
is_usable = true;
- return {};
+ return std::nullopt;
}
if (version > NativeVersion) {
LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
"of the emulator, skipping");
- return {};
+ return std::nullopt;
}
// Version is valid, load the shaders
@@ -249,7 +253,7 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran
ShaderDiskCacheEntry& entry = entries.emplace_back();
if (!entry.Load(file)) {
LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
- return {};
+ return std::nullopt;
}
}
@@ -262,7 +266,7 @@ std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled()
return {};
}
- FileUtil::IOFile file(GetPrecompiledPath(), "rb");
+ Common::FS::IOFile file(GetPrecompiledPath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
return {};
@@ -279,7 +283,7 @@ std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled()
}
std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
- FileUtil::IOFile& file) {
+ Common::FS::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize());
file.ReadBytes(compressed.data(), compressed.size());
@@ -290,12 +294,12 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo
ShaderCacheVersionHash file_hash{};
if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
precompiled_cache_virtual_file_offset = 0;
- return {};
+ return std::nullopt;
}
if (GetShaderCacheVersionHash() != file_hash) {
LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
precompiled_cache_virtual_file_offset = 0;
- return {};
+ return std::nullopt;
}
std::vector<ShaderDiskCachePrecompiled> entries;
@@ -305,19 +309,20 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo
if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
!LoadObjectFromPrecompiled(entry.binary_format) ||
!LoadObjectFromPrecompiled(binary_size)) {
- return {};
+ return std::nullopt;
}
entry.binary.resize(binary_size);
if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
- return {};
+ return std::nullopt;
}
}
- return entries;
+
+ return std::move(entries);
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
- if (!FileUtil::Delete(GetTransferablePath())) {
+ if (!Common::FS::Delete(GetTransferablePath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
GetTransferablePath());
}
@@ -328,7 +333,7 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
// Clear virtaul precompiled cache file
precompiled_cache_virtual_file.Resize(0);
- if (!FileUtil::Delete(GetPrecompiledPath())) {
+ if (!Common::FS::Delete(GetPrecompiledPath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
}
}
@@ -344,7 +349,7 @@ void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
return;
}
- FileUtil::IOFile file = AppendTransferableFile();
+ Common::FS::IOFile file = AppendTransferableFile();
if (!file.IsOpen()) {
return;
}
@@ -386,15 +391,15 @@ void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint progra
}
}
-FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
+Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
if (!EnsureDirectories()) {
return {};
}
const auto transferable_path{GetTransferablePath()};
- const bool existed = FileUtil::Exists(transferable_path);
+ const bool existed = Common::FS::Exists(transferable_path);
- FileUtil::IOFile file(transferable_path, "ab");
+ Common::FS::IOFile file(transferable_path, "ab");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
return {};
@@ -426,7 +431,7 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
const auto precompiled_path{GetPrecompiledPath()};
- FileUtil::IOFile file(precompiled_path, "wb");
+ Common::FS::IOFile file(precompiled_path, "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
@@ -440,24 +445,24 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
const auto CreateDir = [](const std::string& dir) {
- if (!FileUtil::CreateDir(dir)) {
+ if (!Common::FS::CreateDir(dir)) {
LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
return false;
}
return true;
};
- return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
+ return CreateDir(Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)) &&
CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
CreateDir(GetPrecompiledDir());
}
std::string ShaderDiskCacheOpenGL::GetTransferablePath() const {
- return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+ return Common::FS::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
}
std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
- return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+ return Common::FS::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
}
std::string ShaderDiskCacheOpenGL::GetTransferableDir() const {
@@ -469,11 +474,11 @@ std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
}
std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
- return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl";
+ return Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir) + DIR_SEP "opengl";
}
std::string ShaderDiskCacheOpenGL::GetTitleID() const {
- return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID());
+ return fmt::format("{:016X}", title_id);
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index a79cef0e9..aef841c1d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -21,11 +21,7 @@
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
-namespace Core {
-class System;
-}
-
-namespace FileUtil {
+namespace Common::FS {
class IOFile;
}
@@ -38,9 +34,9 @@ struct ShaderDiskCacheEntry {
ShaderDiskCacheEntry();
~ShaderDiskCacheEntry();
- bool Load(FileUtil::IOFile& file);
+ bool Load(Common::FS::IOFile& file);
- bool Save(FileUtil::IOFile& file) const;
+ bool Save(Common::FS::IOFile& file) const;
bool HasProgramA() const {
return !code.empty() && !code_b.empty();
@@ -70,9 +66,12 @@ struct ShaderDiskCachePrecompiled {
class ShaderDiskCacheOpenGL {
public:
- explicit ShaderDiskCacheOpenGL(Core::System& system);
+ explicit ShaderDiskCacheOpenGL();
~ShaderDiskCacheOpenGL();
+ /// Binds a title ID for all future operations.
+ void BindTitleID(u64 title_id);
+
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
@@ -97,10 +96,10 @@ public:
private:
/// Loads the transferable cache. Returns empty on failure.
std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
- FileUtil::IOFile& file);
+ Common::FS::IOFile& file);
/// Opens current game's transferable file and write it's header if it doesn't exist
- FileUtil::IOFile AppendTransferableFile() const;
+ Common::FS::IOFile AppendTransferableFile() const;
/// Save precompiled header to precompiled_cache_in_memory
void SavePrecompiledHeaderToVirtualPrecompiledCache();
@@ -157,8 +156,6 @@ private:
return LoadArrayFromPrecompiled(&object, 1);
}
- Core::System& system;
-
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
// file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
@@ -168,8 +165,11 @@ private:
// Stored transferable shaders
std::unordered_set<u64> stored_transferable;
+ /// Title ID to operate on
+ u64 title_id = 0;
+
// The cache has been loaded at boot
- bool is_usable{};
+ bool is_usable = false;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 8e754fa90..691c6c79b 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -11,8 +11,30 @@
namespace OpenGL {
-ProgramManager::ProgramManager(const Device& device) {
- use_assembly_programs = device.UseAssemblyShaders();
+namespace {
+
+void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
+ if (current == old) {
+ return;
+ }
+ if (current == 0) {
+ if (enabled) {
+ enabled = false;
+ glDisable(stage);
+ }
+ return;
+ }
+ if (!enabled) {
+ enabled = true;
+ glEnable(stage);
+ }
+ glBindProgramARB(stage, current);
+}
+
+} // Anonymous namespace
+
+ProgramManager::ProgramManager(const Device& device)
+ : use_assembly_programs{device.UseAssemblyShaders()} {
if (use_assembly_programs) {
glEnable(GL_COMPUTE_PROGRAM_NV);
} else {
@@ -33,9 +55,7 @@ void ProgramManager::BindCompute(GLuint program) {
}
void ProgramManager::BindGraphicsPipeline() {
- if (use_assembly_programs) {
- UpdateAssemblyPrograms();
- } else {
+ if (!use_assembly_programs) {
UpdateSourcePrograms();
}
}
@@ -63,32 +83,25 @@ void ProgramManager::RestoreGuestPipeline() {
}
}
-void ProgramManager::UpdateAssemblyPrograms() {
- const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) {
- if (current == old) {
- return;
- }
- if (current == 0) {
- if (enabled) {
- enabled = false;
- glDisable(stage);
- }
- return;
- }
- if (!enabled) {
- enabled = true;
- glEnable(stage);
- }
- glBindProgramARB(stage, current);
- };
+void ProgramManager::UseVertexShader(GLuint program) {
+ if (use_assembly_programs) {
+ BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
+ }
+ current_state.vertex = program;
+}
- update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex);
- update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry,
- old_state.geometry);
- update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment,
- old_state.fragment);
+void ProgramManager::UseGeometryShader(GLuint program) {
+ if (use_assembly_programs) {
+ BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
+ }
+ current_state.geometry = program;
+}
- old_state = current_state;
+void ProgramManager::UseFragmentShader(GLuint program) {
+ if (use_assembly_programs) {
+ BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
+ }
+ current_state.fragment = program;
}
void ProgramManager::UpdateSourcePrograms() {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 0f03b4f12..950e0dfcb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,17 +45,9 @@ public:
/// Rewinds BindHostPipeline state changes.
void RestoreGuestPipeline();
- void UseVertexShader(GLuint program) {
- current_state.vertex = program;
- }
-
- void UseGeometryShader(GLuint program) {
- current_state.geometry = program;
- }
-
- void UseFragmentShader(GLuint program) {
- current_state.fragment = program;
- }
+ void UseVertexShader(GLuint program);
+ void UseGeometryShader(GLuint program);
+ void UseFragmentShader(GLuint program);
private:
struct PipelineState {
@@ -64,9 +56,6 @@ private:
GLuint fragment = 0;
};
- /// Update NV_gpu_program5 programs.
- void UpdateAssemblyPrograms();
-
/// Update GLSL programs.
void UpdateSourcePrograms();
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 9e74eda0d..4bf0d6090 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <string_view>
#include <vector>
#include <glad/glad.h>
#include "common/assert.h"
@@ -11,7 +12,8 @@
namespace OpenGL::GLShader {
namespace {
-const char* GetStageDebugName(GLenum type) {
+
+std::string_view StageDebugName(GLenum type) {
switch (type) {
case GL_VERTEX_SHADER:
return "vertex";
@@ -25,12 +27,17 @@ const char* GetStageDebugName(GLenum type) {
UNIMPLEMENTED();
return "unknown";
}
+
} // Anonymous namespace
-GLuint LoadShader(const char* source, GLenum type) {
- const char* debug_type = GetStageDebugName(type);
+GLuint LoadShader(std::string_view source, GLenum type) {
+ const std::string_view debug_type = StageDebugName(type);
const GLuint shader_id = glCreateShader(type);
- glShaderSource(shader_id, 1, &source, nullptr);
+
+ const GLchar* source_string = source.data();
+ const GLint source_length = static_cast<GLint>(source.size());
+
+ glShaderSource(shader_id, 1, &source_string, &source_length);
LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
glCompileShader(shader_id);
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 03b7548c2..1b770532e 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -38,7 +38,7 @@ void LogShaderSource(T... shaders) {
* @param source String of the GLSL shader program
* @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
*/
-GLuint LoadShader(const char* source, GLenum type);
+GLuint LoadShader(std::string_view source, GLenum type);
/**
* Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index d24fad3de..6bcf831f2 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -214,10 +214,8 @@ void SetupDirtyMisc(Tables& tables) {
} // Anonymous namespace
-StateTracker::StateTracker(Core::System& system) : system{system} {}
-
-void StateTracker::Initialize() {
- auto& dirty = system.GPU().Maxwell3D().dirty;
+StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
+ auto& dirty = gpu.Maxwell3D().dirty;
auto& tables = dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyColorMasks(tables);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 0f823288e..9d127548f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -13,8 +13,8 @@
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
-namespace Core {
-class System;
+namespace Tegra {
+class GPU;
}
namespace OpenGL {
@@ -90,9 +90,7 @@ static_assert(Last <= std::numeric_limits<u8>::max());
class StateTracker {
public:
- explicit StateTracker(Core::System& system);
-
- void Initialize();
+ explicit StateTracker(Tegra::GPU& gpu);
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
@@ -103,7 +101,6 @@ public:
}
void NotifyScreenDrawVertexArray() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
@@ -117,98 +114,81 @@ public:
}
void NotifyPolygonModes() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::PolygonModes] = true;
flags[OpenGL::Dirty::PolygonModeFront] = true;
flags[OpenGL::Dirty::PolygonModeBack] = true;
}
void NotifyViewport0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Viewports] = true;
flags[OpenGL::Dirty::Viewport0] = true;
}
void NotifyScissor0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Scissors] = true;
flags[OpenGL::Dirty::Scissor0] = true;
}
void NotifyColorMask0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::ColorMasks] = true;
flags[OpenGL::Dirty::ColorMask0] = true;
}
void NotifyBlend0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::BlendStates] = true;
flags[OpenGL::Dirty::BlendState0] = true;
}
void NotifyFramebuffer() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[VideoCommon::Dirty::RenderTargets] = true;
}
void NotifyFrontFace() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::FrontFace] = true;
}
void NotifyCullTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::CullTest] = true;
}
void NotifyDepthMask() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::DepthMask] = true;
}
void NotifyDepthTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::DepthTest] = true;
}
void NotifyStencilTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::StencilTest] = true;
}
void NotifyPolygonOffset() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::PolygonOffset] = true;
}
void NotifyRasterizeEnable() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::RasterizeEnable] = true;
}
void NotifyFramebufferSRGB() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::FramebufferSRGB] = true;
}
void NotifyLogicOp() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::LogicOp] = true;
}
void NotifyClipControl() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::ClipControl] = true;
}
void NotifyAlphaTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::AlphaTest] = true;
}
private:
- Core::System& system;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
GLuint index_buffer = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 3655ff629..887995cf4 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -35,7 +35,7 @@ OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool ver
mapped_ptr = static_cast<u8*>(
glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
- if (device.HasVertexBufferUnifiedMemory()) {
+ if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 61505879b..a863ef218 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -41,91 +41,103 @@ struct FormatTuple {
};
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // ABGR8U
- {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // ABGR8S
- {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // ABGR8UI
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5U
- {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10U
- {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5U
- {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8U
- {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8UI
- {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F
- {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // RGBA16U
- {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // RGBA16S
- {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // RGBA16UI
- {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F
- {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // RGBA32UI
- {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // DXT1
- {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // DXT23
- {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // DXT45
- {GL_COMPRESSED_RED_RGTC1}, // DXN1
- {GL_COMPRESSED_RG_RGTC2}, // DXN2UNORM
- {GL_COMPRESSED_SIGNED_RG_RGTC2}, // DXN2SNORM
- {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7U
- {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UF16
- {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SF16
- {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4
- {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8
- {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F
- {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F
- {GL_R32F, GL_RED, GL_FLOAT}, // R32F
- {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F
- {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16U
- {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16S
- {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16UI
- {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I
- {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // RG16
- {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F
- {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // RG16UI
- {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // RG16I
- {GL_RG16_SNORM, GL_RG, GL_SHORT}, // RG16S
- {GL_RGB32F, GL_RGB, GL_FLOAT}, // RGB32F
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // RGBA8_SRGB
- {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8U
- {GL_RG8_SNORM, GL_RG, GL_BYTE}, // RG8S
- {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG8UI
- {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG32UI
- {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // RGBX16F
- {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32UI
- {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I
- {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8
- {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5
- {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4
- {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
+ {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
+ {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
+ {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
+ {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
+ {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
+ {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
+ {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
+ {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
+ {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
+ {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
+ {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
+ {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
+ {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
+ {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
+ {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
+ {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
+ {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
+ {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
+ {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
+ {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
+ {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
+ {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
+ {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
+ {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
+ {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
+ {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
+ {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
+ {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
+ {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
+ {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
+ {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
+ {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
+ {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
+ {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
+ {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
+ {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
+ {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
+ {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
+ {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
+ {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
+ {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
+ {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
+ {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
+ {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
+ {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
+ {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
+ {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
+ {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
+ {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
+ {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
+ {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
+ {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
// Compressed sRGB formats
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // DXT1_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // DXT23_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // DXT45_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7U_SRGB
- {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
+ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
- {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5
+ {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8
+ {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6
+ {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10
+ {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
- {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12
+ {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
- {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6
+ {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5
+ {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
- {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9F
+ {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
// Depth formats
- {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // Z32F
- {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
// DepthStencil formats
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // Z24S8
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8Z24
- {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
+ {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
+ GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
}};
const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
@@ -178,10 +190,10 @@ GLint GetSwizzleSource(SwizzleSource source) {
GLenum GetComponent(PixelFormat format, bool is_first) {
switch (format) {
- case PixelFormat::Z24S8:
- case PixelFormat::Z32FS8:
+ case PixelFormat::D24_UNORM_S8_UINT:
+ case PixelFormat::D32_FLOAT_S8_UINT:
return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
- case PixelFormat::S8Z24:
+ case PixelFormat::S8_UINT_D24_UNORM:
return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
default:
UNREACHABLE();
@@ -391,7 +403,7 @@ void CachedSurface::DecorateSurfaceName() {
LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName());
}
-void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) {
+void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) {
LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix);
}
@@ -482,9 +494,9 @@ GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_sou
std::array swizzle{x_source, y_source, z_source, w_source};
switch (const PixelFormat format = GetSurfaceParams().pixel_format) {
- case PixelFormat::Z24S8:
- case PixelFormat::Z32FS8:
- case PixelFormat::S8Z24:
+ case PixelFormat::D24_UNORM_S8_UINT:
+ case PixelFormat::D32_FLOAT_S8_UINT:
+ case PixelFormat::S8_UINT_D24_UNORM:
UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
GetComponent(format, x_source == SwizzleSource::R));
@@ -520,10 +532,12 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
return texture_view;
}
-TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
- VideoCore::RasterizerInterface& rasterizer,
- const Device& device, StateTracker& state_tracker)
- : TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} {
+TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory, const Device& device,
+ StateTracker& state_tracker_)
+ : TextureCacheBase{rasterizer, maxwell3d, gpu_memory, device.HasASTC()}, state_tracker{
+ state_tracker_} {
src_framebuffer.Create();
dst_framebuffer.Create();
}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index bfc4ddf5d..7787134fc 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -90,7 +90,7 @@ public:
Tegra::Texture::SwizzleSource z_source,
Tegra::Texture::SwizzleSource w_source);
- void DecorateViewName(GPUVAddr gpu_addr, std::string prefix);
+ void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix);
void MarkAsModified(u64 tick) {
surface.MarkAsModified(true, tick);
@@ -129,8 +129,10 @@ private:
class TextureCacheOpenGL final : public TextureCacheBase {
public:
- explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const Device& device, StateTracker& state_tracker);
+ explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory, const Device& device,
+ StateTracker& state_tracker);
~TextureCacheOpenGL();
protected:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e66cdc083..a4c5b8f74 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -21,6 +21,8 @@
#include "core/perf_stats.h"
#include "core/settings.h"
#include "core/telemetry_session.h"
+#include "video_core/host_shaders/opengl_present_frag.h"
+#include "video_core/host_shaders/opengl_present_vert.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -44,46 +46,6 @@ struct Frame {
bool is_srgb{}; /// Framebuffer is sRGB or RGB
};
-constexpr char VERTEX_SHADER[] = R"(
-#version 430 core
-
-out gl_PerVertex {
- vec4 gl_Position;
-};
-
-layout (location = 0) in vec2 vert_position;
-layout (location = 1) in vec2 vert_tex_coord;
-layout (location = 0) out vec2 frag_tex_coord;
-
-// This is a truncated 3x3 matrix for 2D transformations:
-// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
-// The third column performs translation.
-// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
-// implicitly be [0, 0, 1]
-layout (location = 0) uniform mat3x2 modelview_matrix;
-
-void main() {
- // Multiply input position by the rotscale part of the matrix and then manually translate by
- // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
- // to `vec3(vert_position.xy, 1.0)`
- gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0);
- frag_tex_coord = vert_tex_coord;
-}
-)";
-
-constexpr char FRAGMENT_SHADER[] = R"(
-#version 430 core
-
-layout (location = 0) in vec2 frag_tex_coord;
-layout (location = 0) out vec4 color;
-
-layout (binding = 0) uniform sampler2D color_texture;
-
-void main() {
- color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
-}
-)";
-
constexpr GLint PositionLocation = 0;
constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
@@ -313,10 +275,13 @@ public:
}
};
-RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
- Core::Frontend::GraphicsContext& context)
- : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
- program_manager{device}, has_debug_tool{HasDebugTool()} {}
+RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
+ Core::Frontend::EmuWindow& emu_window_,
+ Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context)
+ : RendererBase{emu_window_, std::move(context)}, telemetry_session{telemetry_session_},
+ emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device},
+ has_debug_tool{HasDebugTool()} {}
RendererOpenGL::~RendererOpenGL() = default;
@@ -384,7 +349,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (has_debug_tool) {
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
Present(0);
- context.SwapBuffers();
+ context->SwapBuffers();
}
}
@@ -423,7 +388,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
- u8* const host_ptr{system.Memory().GetPointer(framebuffer_addr)};
+ u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
// TODO(Rodrigo): Read this from HLE
@@ -460,10 +425,10 @@ void RendererOpenGL::InitOpenGLObjects() {
// Create shader programs
OGLShader vertex_shader;
- vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER);
+ vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
OGLShader fragment_shader;
- fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER);
+ fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
vertex_program.Create(true, false, vertex_shader.handle);
fragment_program.Create(true, false, fragment_shader.handle);
@@ -508,18 +473,18 @@ void RendererOpenGL::AddTelemetryFields() {
LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
- auto& telemetry_session = system.TelemetrySession();
- telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
- telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
- telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+ constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
+ telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor);
+ telemetry_session.AddField(user_system, "GPU_Model", gpu_model);
+ telemetry_session.AddField(user_system, "GPU_OpenGL_Version", gl_version);
}
void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
}
- rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info,
- program_manager, state_tracker);
+ rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
+ screen_info, program_manager, state_tracker);
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
@@ -535,12 +500,12 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
GLint internal_format;
switch (framebuffer.pixel_format) {
- case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+ case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
break;
- case Tegra::FramebufferConfig::PixelFormat::RGB565:
+ case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
internal_format = GL_RGB565;
texture.gl_format = GL_RGB;
texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 8b18d32e6..5329577fb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -16,16 +16,25 @@
namespace Core {
class System;
-}
+class TelemetrySession;
+} // namespace Core
namespace Core::Frontend {
class EmuWindow;
}
+namespace Core::Memory {
+class Memory;
+}
+
namespace Layout {
struct FramebufferLayout;
}
+namespace Tegra {
+class GPU;
+}
+
namespace OpenGL {
/// Structure used for storing information about the textures for the Switch screen
@@ -56,8 +65,10 @@ class FrameMailbox;
class RendererOpenGL final : public VideoCore::RendererBase {
public:
- explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
- Core::Frontend::GraphicsContext& context);
+ explicit RendererOpenGL(Core::TelemetrySession& telemetry_session,
+ Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory,
+ Tegra::GPU& gpu,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context);
~RendererOpenGL() override;
bool Init() override;
@@ -93,12 +104,13 @@ private:
bool Present(int timeout_ms);
+ Core::TelemetrySession& telemetry_session;
Core::Frontend::EmuWindow& emu_window;
- Core::System& system;
- Core::Frontend::GraphicsContext& context;
- const Device device;
+ Core::Memory::Memory& cpu_memory;
+ Tegra::GPU& gpu;
- StateTracker state_tracker{system};
+ const Device device;
+ StateTracker state_tracker{gpu};
// OpenGL object IDs
OGLBuffer vertex_buffer;
@@ -120,7 +132,7 @@ private:
std::vector<u8> gl_framebuffer_data;
/// Used for transforming the framebuffer orientation
- Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
+ Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags{};
Common::Rectangle<int> framebuffer_crop_rect;
/// Frame presentation mailbox
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index d1f0ea932..81a39a3b8 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -40,7 +40,6 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
} // Anonymous namespace
void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) {
- const auto& clip = regs.view_volume_clip_control;
const std::array enabled_lut = {regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable};
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index d7f1ae89f..f8c77f4fa 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -117,90 +117,101 @@ struct FormatTuple {
VkFormat format; ///< Vulkan format
int usage = 0; ///< Describes image format usage
} constexpr tex_format_tuples[] = {
- {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // ABGR8U
- {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // ABGR8S
- {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // ABGR8UI
- {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5U
- {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10U
- {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5U (flipped with swizzle)
- {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8U
- {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8UI
- {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // RGBA16F
- {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // RGBA16U
- {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // RGBA16S
- {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // RGBA16UI
- {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // R11FG11FB10F
- {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // RGBA32UI
- {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // DXT1
- {VK_FORMAT_BC2_UNORM_BLOCK}, // DXT23
- {VK_FORMAT_BC3_UNORM_BLOCK}, // DXT45
- {VK_FORMAT_BC4_UNORM_BLOCK}, // DXN1
- {VK_FORMAT_BC5_UNORM_BLOCK}, // DXN2UNORM
- {VK_FORMAT_BC5_SNORM_BLOCK}, // DXN2SNORM
- {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7U
- {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16
- {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16
- {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4
- {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8
- {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F
- {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F
- {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F
- {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F
- {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U
- {VK_FORMAT_UNDEFINED}, // R16S
- {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI
- {VK_FORMAT_UNDEFINED}, // R16I
- {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16
- {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F
- {VK_FORMAT_UNDEFINED}, // RG16UI
- {VK_FORMAT_UNDEFINED}, // RG16I
- {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // RG16S
- {VK_FORMAT_UNDEFINED}, // RGB32F
- {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // RGBA8_SRGB
- {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // RG8U
- {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // RG8S
- {VK_FORMAT_R8G8_UINT, Attachable | Storage}, // RG8UI
- {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // RG32UI
- {VK_FORMAT_UNDEFINED}, // RGBX16F
- {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32UI
- {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32I
- {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8
- {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5
- {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4
- {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB
- {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB
- {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB
- {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB
- {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7U_SRGB
- {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // R4G4B4A4U
- {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB
- {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB
- {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB
- {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB
- {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5
- {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB
- {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8
- {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB
- {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6
- {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB
- {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10
- {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB
- {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12
- {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB
- {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6
- {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB
- {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5
- {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB
- {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9F
+ {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // A8B8G8R8_UNORM
+ {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // A8B8G8R8_SNORM
+ {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT
+ {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT
+ {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM
+ {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM
+ {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM
+ {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
+ {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
+ {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle)
+ {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM
+ {VK_FORMAT_R8_SNORM, Attachable | Storage}, // R8_SNORM
+ {VK_FORMAT_R8_SINT, Attachable | Storage}, // R8_SINT
+ {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8_UINT
+ {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // R16G16B16A16_FLOAT
+ {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // R16G16B16A16_UNORM
+ {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // R16G16B16A16_SNORM
+ {VK_FORMAT_R16G16B16A16_SINT, Attachable | Storage}, // R16G16B16A16_SINT
+ {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // R16G16B16A16_UINT
+ {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // B10G11R11_FLOAT
+ {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // R32G32B32A32_UINT
+ {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // BC1_RGBA_UNORM
+ {VK_FORMAT_BC2_UNORM_BLOCK}, // BC2_UNORM
+ {VK_FORMAT_BC3_UNORM_BLOCK}, // BC3_UNORM
+ {VK_FORMAT_BC4_UNORM_BLOCK}, // BC4_UNORM
+ {VK_FORMAT_BC4_SNORM_BLOCK}, // BC4_SNORM
+ {VK_FORMAT_BC5_UNORM_BLOCK}, // BC5_UNORM
+ {VK_FORMAT_BC5_SNORM_BLOCK}, // BC5_SNORM
+ {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7_UNORM
+ {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UFLOAT
+ {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SFLOAT
+ {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4_UNORM
+ {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // B8G8R8A8_UNORM
+ {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // R32G32B32A32_FLOAT
+ {VK_FORMAT_R32G32B32A32_SINT, Attachable | Storage}, // R32G32B32A32_SINT
+ {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // R32G32_FLOAT
+ {VK_FORMAT_R32G32_SINT, Attachable | Storage}, // R32G32_SINT
+ {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT
+ {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT
+ {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM
+ {VK_FORMAT_UNDEFINED}, // R16_SNORM
+ {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT
+ {VK_FORMAT_UNDEFINED}, // R16_SINT
+ {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
+ {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT
+ {VK_FORMAT_UNDEFINED}, // R16G16_UINT
+ {VK_FORMAT_UNDEFINED}, // R16G16_SINT
+ {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
+ {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
+ {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB
+ {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM
+ {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM
+ {VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT
+ {VK_FORMAT_R8G8_UINT, Attachable | Storage}, // R8G8_UINT
+ {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // R32G32_UINT
+ {VK_FORMAT_UNDEFINED}, // R16G16B16X16_FLOAT
+ {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT
+ {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT
+ {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM
+ {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5_UNORM
+ {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4_UNORM
+ {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB
+ {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB
+ {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB
+ {VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB
+ {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB
+ {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // A4B4G4R4_UNORM
+ {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB
+ {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB
+ {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB
+ {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB
+ {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5_UNORM
+ {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB
+ {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8_UNORM
+ {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB
+ {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM
+ {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB
+ {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM
+ {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB
+ {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM
+ {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB
+ {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM
+ {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB
+ {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM
+ {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB
+ {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT
// Depth formats
- {VK_FORMAT_D32_SFLOAT, Attachable}, // Z32F
- {VK_FORMAT_D16_UNORM, Attachable}, // Z16
+ {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT
+ {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM
// DepthStencil formats
- {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // Z24S8
- {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8Z24 (emulated)
- {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // Z32FS8
+ {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT
+ {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated)
+ {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // D32_FLOAT_S8_UINT
};
static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat);
@@ -221,7 +232,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true};
}
- // Use ABGR8 on hardware that doesn't support ASTC natively
+ // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format)
? VK_FORMAT_A8B8G8R8_SRGB_PACK32
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
index 435c8c1b8..5b01020ec 100644
--- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
+++ b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
@@ -65,10 +65,10 @@ bool NsightAftermathTracker::Initialize() {
return false;
}
- dump_dir = FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "gpucrash";
+ dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash";
- (void)FileUtil::DeleteDirRecursively(dump_dir);
- if (!FileUtil::CreateDir(dump_dir)) {
+ (void)Common::FS::DeleteDirRecursively(dump_dir);
+ if (!Common::FS::CreateDir(dump_dir)) {
LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
return false;
}
@@ -106,7 +106,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
return;
}
- FileUtil::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
+ Common::FS::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
return;
@@ -156,12 +156,12 @@ void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump,
}();
std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size);
- if (FileUtil::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
+ if (Common::FS::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
LOG_ERROR(Render_Vulkan, "Failed to write dump file");
return;
}
const std::string_view json_view(json.data(), json.size());
- if (FileUtil::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
+ if (Common::FS::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
LOG_ERROR(Render_Vulkan, "Failed to write JSON");
return;
}
@@ -180,7 +180,7 @@ void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_
const std::string path =
fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]);
- FileUtil::IOFile file(path, "wb");
+ Common::FS::IOFile file(path, "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Failed to create file {}", path);
return;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 2258479f5..0e4583986 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -78,7 +78,7 @@ Common::DynamicLibrary OpenVulkanLibrary() {
if (!libvulkan_env || !library.Open(libvulkan_env)) {
// Use the libvulkan.dylib from the application bundle.
const std::string filename =
- FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
+ Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
library.Open(filename.c_str());
}
#else
@@ -86,7 +86,7 @@ Common::DynamicLibrary OpenVulkanLibrary() {
if (!library.Open(filename.c_str())) {
// Android devices may not have libvulkan.so.1, only libvulkan.so.
filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
- library.Open(filename.c_str());
+ (void)library.Open(filename.c_str());
}
#endif
return library;
@@ -237,8 +237,12 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
} // Anonymous namespace
-RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system)
- : RendererBase(window), system{system} {}
+RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
+ Core::Frontend::EmuWindow& emu_window,
+ Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context)
+ : RendererBase{emu_window, std::move(context)}, telemetry_session{telemetry_session_},
+ cpu_memory{cpu_memory_}, gpu{gpu_} {}
RendererVulkan::~RendererVulkan() {
ShutDown();
@@ -302,15 +306,15 @@ bool RendererVulkan::Init() {
swapchain = std::make_unique<VKSwapchain>(*surface, *device);
swapchain->Create(framebuffer.width, framebuffer.height, false);
- state_tracker = std::make_unique<StateTracker>(system);
+ state_tracker = std::make_unique<StateTracker>(gpu);
scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker);
- rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device,
- *resource_manager, *memory_manager,
- *state_tracker, *scheduler);
+ rasterizer = std::make_unique<RasterizerVulkan>(
+ render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, *device,
+ *resource_manager, *memory_manager, *state_tracker, *scheduler);
- blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device,
+ blit_screen = std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
*resource_manager, *memory_manager, *swapchain,
*scheduler, screen_info);
@@ -438,8 +442,7 @@ void RendererVulkan::Report() const {
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
- auto& telemetry_session = system.TelemetrySession();
- constexpr auto field = Telemetry::FieldType::UserSystem;
+ static constexpr auto field = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
telemetry_session.AddField(field, "GPU_Model", model_name);
telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 522b5bff8..ddff77942 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -14,7 +14,15 @@
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
-class System;
+class TelemetrySession;
+}
+
+namespace Core::Memory {
+class Memory;
+}
+
+namespace Tegra {
+class GPU;
}
namespace Vulkan {
@@ -38,7 +46,10 @@ struct VKScreenInfo {
class RendererVulkan final : public VideoCore::RendererBase {
public:
- explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system);
+ explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
+ Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory,
+ Tegra::GPU& gpu,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context);
~RendererVulkan() override;
bool Init() override;
@@ -57,7 +68,9 @@ private:
void Report() const;
- Core::System& system;
+ Core::TelemetrySession& telemetry_session;
+ Core::Memory::Memory& cpu_memory;
+ Tegra::GPU& gpu;
Common::DynamicLibrary library;
vk::InstanceDispatch dld;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index fbd406f2b..2bea7b24d 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -141,24 +141,28 @@ struct ScreenRectVertex {
std::array<f32, 2> tex_coord;
static VkVertexInputBindingDescription GetDescription() {
- VkVertexInputBindingDescription description;
- description.binding = 0;
- description.stride = sizeof(ScreenRectVertex);
- description.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
- return description;
+ return {
+ .binding = 0,
+ .stride = sizeof(ScreenRectVertex),
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
+ };
}
static std::array<VkVertexInputAttributeDescription, 2> GetAttributes() {
- std::array<VkVertexInputAttributeDescription, 2> attributes;
- attributes[0].location = 0;
- attributes[0].binding = 0;
- attributes[0].format = VK_FORMAT_R32G32_SFLOAT;
- attributes[0].offset = offsetof(ScreenRectVertex, position);
- attributes[1].location = 1;
- attributes[1].binding = 0;
- attributes[1].format = VK_FORMAT_R32G32_SFLOAT;
- attributes[1].offset = offsetof(ScreenRectVertex, tex_coord);
- return attributes;
+ return {{
+ {
+ .location = 0,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = offsetof(ScreenRectVertex, position),
+ },
+ {
+ .location = 1,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = offsetof(ScreenRectVertex, tex_coord),
+ },
+ }};
}
};
@@ -183,9 +187,9 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
switch (framebuffer.pixel_format) {
- case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+ case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
- case Tegra::FramebufferConfig::PixelFormat::RGB565:
+ case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
return VK_FORMAT_R5G6B5_UNORM_PACK16;
default:
UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
@@ -206,14 +210,16 @@ struct VKBlitScreen::BufferData {
// Unaligned image data goes here
};
-VKBlitScreen::VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
- VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- VKSwapchain& swapchain, VKScheduler& scheduler,
- const VKScreenInfo& screen_info)
- : system{system}, render_window{render_window}, rasterizer{rasterizer}, device{device},
- resource_manager{resource_manager}, memory_manager{memory_manager}, swapchain{swapchain},
- scheduler{scheduler}, image_count{swapchain.GetImageCount()}, screen_info{screen_info} {
+VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
+ Core::Frontend::EmuWindow& render_window_,
+ VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_,
+ VKResourceManager& resource_manager_, VKMemoryManager& memory_manager_,
+ VKSwapchain& swapchain_, VKScheduler& scheduler_,
+ const VKScreenInfo& screen_info_)
+ : cpu_memory{cpu_memory_}, render_window{render_window_},
+ rasterizer{rasterizer_}, device{device_}, resource_manager{resource_manager_},
+ memory_manager{memory_manager_}, swapchain{swapchain_}, scheduler{scheduler_},
+ image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
watches.resize(image_count);
std::generate(watches.begin(), watches.end(),
[]() { return std::make_unique<VKFenceWatch>(); });
@@ -255,7 +261,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
const auto pixel_format =
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
- const auto host_ptr = system.Memory().GetPointer(framebuffer_addr);
+ const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr);
rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer));
// TODO(Rodrigo): Read this from HLE
@@ -267,20 +273,25 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
- VkBufferImageCopy copy;
- copy.bufferOffset = image_offset;
- copy.bufferRowLength = 0;
- copy.bufferImageHeight = 0;
- copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
- copy.imageSubresource.mipLevel = 0;
- copy.imageSubresource.baseArrayLayer = 0;
- copy.imageSubresource.layerCount = 1;
- copy.imageOffset.x = 0;
- copy.imageOffset.y = 0;
- copy.imageOffset.z = 0;
- copy.imageExtent.width = framebuffer.width;
- copy.imageExtent.height = framebuffer.height;
- copy.imageExtent.depth = 1;
+ const VkBufferImageCopy copy{
+ .bufferOffset = image_offset,
+ .bufferRowLength = 0,
+ .bufferImageHeight = 0,
+ .imageSubresource =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .mipLevel = 0,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ .imageOffset = {.x = 0, .y = 0, .z = 0},
+ .imageExtent =
+ {
+ .width = framebuffer.width,
+ .height = framebuffer.height,
+ .depth = 1,
+ },
+ };
scheduler.Record(
[buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
@@ -295,11 +306,9 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
descriptor_set = descriptor_sets[image_index], buffer = *buffer,
size = swapchain.GetSize(), pipeline = *pipeline,
layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
- VkClearValue clear_color;
- clear_color.color.float32[0] = 0.0f;
- clear_color.color.float32[1] = 0.0f;
- clear_color.color.float32[2] = 0.0f;
- clear_color.color.float32[3] = 0.0f;
+ const VkClearValue clear_color{
+ .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}},
+ };
VkRenderPassBeginInfo renderpass_bi;
renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
@@ -379,93 +388,109 @@ void VKBlitScreen::CreateSemaphores() {
}
void VKBlitScreen::CreateDescriptorPool() {
- std::array<VkDescriptorPoolSize, 2> pool_sizes;
- pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
- pool_sizes[0].descriptorCount = static_cast<u32>(image_count);
- pool_sizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
- pool_sizes[1].descriptorCount = static_cast<u32>(image_count);
-
- VkDescriptorPoolCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
- ci.maxSets = static_cast<u32>(image_count);
- ci.poolSizeCount = static_cast<u32>(pool_sizes.size());
- ci.pPoolSizes = pool_sizes.data();
+ const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
+ {
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .descriptorCount = static_cast<u32>(image_count),
+ },
+ {
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = static_cast<u32>(image_count),
+ },
+ }};
+
+ const VkDescriptorPoolCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+ .maxSets = static_cast<u32>(image_count),
+ .poolSizeCount = static_cast<u32>(pool_sizes.size()),
+ .pPoolSizes = pool_sizes.data(),
+ };
descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
}
void VKBlitScreen::CreateRenderPass() {
- VkAttachmentDescription color_attachment;
- color_attachment.flags = 0;
- color_attachment.format = swapchain.GetImageFormat();
- color_attachment.samples = VK_SAMPLE_COUNT_1_BIT;
- color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
- color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
- color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
- color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
- color_attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
- color_attachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
-
- VkAttachmentReference color_attachment_ref;
- color_attachment_ref.attachment = 0;
- color_attachment_ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-
- VkSubpassDescription subpass_description;
- subpass_description.flags = 0;
- subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
- subpass_description.inputAttachmentCount = 0;
- subpass_description.pInputAttachments = nullptr;
- subpass_description.colorAttachmentCount = 1;
- subpass_description.pColorAttachments = &color_attachment_ref;
- subpass_description.pResolveAttachments = nullptr;
- subpass_description.pDepthStencilAttachment = nullptr;
- subpass_description.preserveAttachmentCount = 0;
- subpass_description.pPreserveAttachments = nullptr;
-
- VkSubpassDependency dependency;
- dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
- dependency.dstSubpass = 0;
- dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
- dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
- dependency.srcAccessMask = 0;
- dependency.dstAccessMask =
- VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
- dependency.dependencyFlags = 0;
-
- VkRenderPassCreateInfo renderpass_ci;
- renderpass_ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
- renderpass_ci.pNext = nullptr;
- renderpass_ci.flags = 0;
- renderpass_ci.attachmentCount = 1;
- renderpass_ci.pAttachments = &color_attachment;
- renderpass_ci.subpassCount = 1;
- renderpass_ci.pSubpasses = &subpass_description;
- renderpass_ci.dependencyCount = 1;
- renderpass_ci.pDependencies = &dependency;
+ const VkAttachmentDescription color_attachment{
+ .flags = 0,
+ .format = swapchain.GetImageFormat(),
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+ };
+
+ const VkAttachmentReference color_attachment_ref{
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ };
+
+ const VkSubpassDescription subpass_description{
+ .flags = 0,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .pInputAttachments = nullptr,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &color_attachment_ref,
+ .pResolveAttachments = nullptr,
+ .pDepthStencilAttachment = nullptr,
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = nullptr,
+ };
+
+ const VkSubpassDependency dependency{
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+ .dependencyFlags = 0,
+ };
+
+ const VkRenderPassCreateInfo renderpass_ci{
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .attachmentCount = 1,
+ .pAttachments = &color_attachment,
+ .subpassCount = 1,
+ .pSubpasses = &subpass_description,
+ .dependencyCount = 1,
+ .pDependencies = &dependency,
+ };
renderpass = device.GetLogical().CreateRenderPass(renderpass_ci);
}
void VKBlitScreen::CreateDescriptorSetLayout() {
- std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings;
- layout_bindings[0].binding = 0;
- layout_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
- layout_bindings[0].descriptorCount = 1;
- layout_bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
- layout_bindings[0].pImmutableSamplers = nullptr;
- layout_bindings[1].binding = 1;
- layout_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
- layout_bindings[1].descriptorCount = 1;
- layout_bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
- layout_bindings[1].pImmutableSamplers = nullptr;
-
- VkDescriptorSetLayoutCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.bindingCount = static_cast<u32>(layout_bindings.size());
- ci.pBindings = layout_bindings.data();
+ const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ }};
+
+ const VkDescriptorSetLayoutCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .bindingCount = static_cast<u32>(layout_bindings.size()),
+ .pBindings = layout_bindings.data(),
+ };
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
}
@@ -473,175 +498,192 @@ void VKBlitScreen::CreateDescriptorSetLayout() {
void VKBlitScreen::CreateDescriptorSets() {
const std::vector layouts(image_count, *descriptor_set_layout);
- VkDescriptorSetAllocateInfo ai;
- ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
- ai.pNext = nullptr;
- ai.descriptorPool = *descriptor_pool;
- ai.descriptorSetCount = static_cast<u32>(image_count);
- ai.pSetLayouts = layouts.data();
+ const VkDescriptorSetAllocateInfo ai{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .descriptorPool = *descriptor_pool,
+ .descriptorSetCount = static_cast<u32>(image_count),
+ .pSetLayouts = layouts.data(),
+ };
+
descriptor_sets = descriptor_pool.Allocate(ai);
}
void VKBlitScreen::CreatePipelineLayout() {
- VkPipelineLayoutCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.setLayoutCount = 1;
- ci.pSetLayouts = descriptor_set_layout.address();
- ci.pushConstantRangeCount = 0;
- ci.pPushConstantRanges = nullptr;
+ const VkPipelineLayoutCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .setLayoutCount = 1,
+ .pSetLayouts = descriptor_set_layout.address(),
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = nullptr,
+ };
pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
}
void VKBlitScreen::CreateGraphicsPipeline() {
- std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages;
- shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
- shader_stages[0].pNext = nullptr;
- shader_stages[0].flags = 0;
- shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
- shader_stages[0].module = *vertex_shader;
- shader_stages[0].pName = "main";
- shader_stages[0].pSpecializationInfo = nullptr;
- shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
- shader_stages[1].pNext = nullptr;
- shader_stages[1].flags = 0;
- shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
- shader_stages[1].module = *fragment_shader;
- shader_stages[1].pName = "main";
- shader_stages[1].pSpecializationInfo = nullptr;
+ const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = *vertex_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = *fragment_shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ }};
const auto vertex_binding_description = ScreenRectVertex::GetDescription();
const auto vertex_attrs_description = ScreenRectVertex::GetAttributes();
- VkPipelineVertexInputStateCreateInfo vertex_input_ci;
- vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
- vertex_input_ci.pNext = nullptr;
- vertex_input_ci.flags = 0;
- vertex_input_ci.vertexBindingDescriptionCount = 1;
- vertex_input_ci.pVertexBindingDescriptions = &vertex_binding_description;
- vertex_input_ci.vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()};
- vertex_input_ci.pVertexAttributeDescriptions = vertex_attrs_description.data();
-
- VkPipelineInputAssemblyStateCreateInfo input_assembly_ci;
- input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
- input_assembly_ci.pNext = nullptr;
- input_assembly_ci.flags = 0;
- input_assembly_ci.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
- input_assembly_ci.primitiveRestartEnable = VK_FALSE;
-
- VkPipelineViewportStateCreateInfo viewport_state_ci;
- viewport_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
- viewport_state_ci.pNext = nullptr;
- viewport_state_ci.flags = 0;
- viewport_state_ci.viewportCount = 1;
- viewport_state_ci.pViewports = nullptr;
- viewport_state_ci.scissorCount = 1;
- viewport_state_ci.pScissors = nullptr;
-
- VkPipelineRasterizationStateCreateInfo rasterization_ci;
- rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
- rasterization_ci.pNext = nullptr;
- rasterization_ci.flags = 0;
- rasterization_ci.depthClampEnable = VK_FALSE;
- rasterization_ci.rasterizerDiscardEnable = VK_FALSE;
- rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL;
- rasterization_ci.cullMode = VK_CULL_MODE_NONE;
- rasterization_ci.frontFace = VK_FRONT_FACE_CLOCKWISE;
- rasterization_ci.depthBiasEnable = VK_FALSE;
- rasterization_ci.depthBiasConstantFactor = 0.0f;
- rasterization_ci.depthBiasClamp = 0.0f;
- rasterization_ci.depthBiasSlopeFactor = 0.0f;
- rasterization_ci.lineWidth = 1.0f;
-
- VkPipelineMultisampleStateCreateInfo multisampling_ci;
- multisampling_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
- multisampling_ci.pNext = nullptr;
- multisampling_ci.flags = 0;
- multisampling_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
- multisampling_ci.sampleShadingEnable = VK_FALSE;
- multisampling_ci.minSampleShading = 0.0f;
- multisampling_ci.pSampleMask = nullptr;
- multisampling_ci.alphaToCoverageEnable = VK_FALSE;
- multisampling_ci.alphaToOneEnable = VK_FALSE;
-
- VkPipelineColorBlendAttachmentState color_blend_attachment;
- color_blend_attachment.blendEnable = VK_FALSE;
- color_blend_attachment.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO;
- color_blend_attachment.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO;
- color_blend_attachment.colorBlendOp = VK_BLEND_OP_ADD;
- color_blend_attachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
- color_blend_attachment.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
- color_blend_attachment.alphaBlendOp = VK_BLEND_OP_ADD;
- color_blend_attachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
-
- VkPipelineColorBlendStateCreateInfo color_blend_ci;
- color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
- color_blend_ci.flags = 0;
- color_blend_ci.pNext = nullptr;
- color_blend_ci.logicOpEnable = VK_FALSE;
- color_blend_ci.logicOp = VK_LOGIC_OP_COPY;
- color_blend_ci.attachmentCount = 1;
- color_blend_ci.pAttachments = &color_blend_attachment;
- color_blend_ci.blendConstants[0] = 0.0f;
- color_blend_ci.blendConstants[1] = 0.0f;
- color_blend_ci.blendConstants[2] = 0.0f;
- color_blend_ci.blendConstants[3] = 0.0f;
-
- static constexpr std::array dynamic_states = {VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR};
- VkPipelineDynamicStateCreateInfo dynamic_state_ci;
- dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
- dynamic_state_ci.pNext = nullptr;
- dynamic_state_ci.flags = 0;
- dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size());
- dynamic_state_ci.pDynamicStates = dynamic_states.data();
-
- VkGraphicsPipelineCreateInfo pipeline_ci;
- pipeline_ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
- pipeline_ci.pNext = nullptr;
- pipeline_ci.flags = 0;
- pipeline_ci.stageCount = static_cast<u32>(shader_stages.size());
- pipeline_ci.pStages = shader_stages.data();
- pipeline_ci.pVertexInputState = &vertex_input_ci;
- pipeline_ci.pInputAssemblyState = &input_assembly_ci;
- pipeline_ci.pTessellationState = nullptr;
- pipeline_ci.pViewportState = &viewport_state_ci;
- pipeline_ci.pRasterizationState = &rasterization_ci;
- pipeline_ci.pMultisampleState = &multisampling_ci;
- pipeline_ci.pDepthStencilState = nullptr;
- pipeline_ci.pColorBlendState = &color_blend_ci;
- pipeline_ci.pDynamicState = &dynamic_state_ci;
- pipeline_ci.layout = *pipeline_layout;
- pipeline_ci.renderPass = *renderpass;
- pipeline_ci.subpass = 0;
- pipeline_ci.basePipelineHandle = 0;
- pipeline_ci.basePipelineIndex = 0;
+ const VkPipelineVertexInputStateCreateInfo vertex_input_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .vertexBindingDescriptionCount = 1,
+ .pVertexBindingDescriptions = &vertex_binding_description,
+ .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()},
+ .pVertexAttributeDescriptions = vertex_attrs_description.data(),
+ };
+
+ const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = VK_FALSE,
+ };
+
+ const VkPipelineViewportStateCreateInfo viewport_state_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .viewportCount = 1,
+ .pViewports = nullptr,
+ .scissorCount = 1,
+ .pScissors = nullptr,
+ };
+
+ const VkPipelineRasterizationStateCreateInfo rasterization_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .depthClampEnable = VK_FALSE,
+ .rasterizerDiscardEnable = VK_FALSE,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_CLOCKWISE,
+ .depthBiasEnable = VK_FALSE,
+ .depthBiasConstantFactor = 0.0f,
+ .depthBiasClamp = 0.0f,
+ .depthBiasSlopeFactor = 0.0f,
+ .lineWidth = 1.0f,
+ };
+
+ const VkPipelineMultisampleStateCreateInfo multisampling_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ .sampleShadingEnable = VK_FALSE,
+ .minSampleShading = 0.0f,
+ .pSampleMask = nullptr,
+ .alphaToCoverageEnable = VK_FALSE,
+ .alphaToOneEnable = VK_FALSE,
+ };
+
+ const VkPipelineColorBlendAttachmentState color_blend_attachment{
+ .blendEnable = VK_FALSE,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ };
+
+ const VkPipelineColorBlendStateCreateInfo color_blend_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_COPY,
+ .attachmentCount = 1,
+ .pAttachments = &color_blend_attachment,
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
+ };
+
+ static constexpr std::array dynamic_states{
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ };
+ const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
+ .pDynamicStates = dynamic_states.data(),
+ };
+
+ const VkGraphicsPipelineCreateInfo pipeline_ci{
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(shader_stages.size()),
+ .pStages = shader_stages.data(),
+ .pVertexInputState = &vertex_input_ci,
+ .pInputAssemblyState = &input_assembly_ci,
+ .pTessellationState = nullptr,
+ .pViewportState = &viewport_state_ci,
+ .pRasterizationState = &rasterization_ci,
+ .pMultisampleState = &multisampling_ci,
+ .pDepthStencilState = nullptr,
+ .pColorBlendState = &color_blend_ci,
+ .pDynamicState = &dynamic_state_ci,
+ .layout = *pipeline_layout,
+ .renderPass = *renderpass,
+ .subpass = 0,
+ .basePipelineHandle = 0,
+ .basePipelineIndex = 0,
+ };
pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci);
}
void VKBlitScreen::CreateSampler() {
- VkSamplerCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.magFilter = VK_FILTER_LINEAR;
- ci.minFilter = VK_FILTER_NEAREST;
- ci.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
- ci.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
- ci.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
- ci.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
- ci.mipLodBias = 0.0f;
- ci.anisotropyEnable = VK_FALSE;
- ci.maxAnisotropy = 0.0f;
- ci.compareEnable = VK_FALSE;
- ci.compareOp = VK_COMPARE_OP_NEVER;
- ci.minLod = 0.0f;
- ci.maxLod = 0.0f;
- ci.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
- ci.unnormalizedCoordinates = VK_FALSE;
+ const VkSamplerCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .magFilter = VK_FILTER_LINEAR,
+ .minFilter = VK_FILTER_NEAREST,
+ .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .mipLodBias = 0.0f,
+ .anisotropyEnable = VK_FALSE,
+ .maxAnisotropy = 0.0f,
+ .compareEnable = VK_FALSE,
+ .compareOp = VK_COMPARE_OP_NEVER,
+ .minLod = 0.0f,
+ .maxLod = 0.0f,
+ .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
+ .unnormalizedCoordinates = VK_FALSE,
+ };
sampler = device.GetLogical().CreateSampler(ci);
}
@@ -650,15 +692,17 @@ void VKBlitScreen::CreateFramebuffers() {
const VkExtent2D size{swapchain.GetSize()};
framebuffers.resize(image_count);
- VkFramebufferCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.renderPass = *renderpass;
- ci.attachmentCount = 1;
- ci.width = size.width;
- ci.height = size.height;
- ci.layers = 1;
+ VkFramebufferCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .renderPass = *renderpass,
+ .attachmentCount = 1,
+ .pAttachments = nullptr,
+ .width = size.width,
+ .height = size.height,
+ .layers = 1,
+ };
for (std::size_t i = 0; i < image_count; ++i) {
const VkImageView image_view{swapchain.GetImageViewIndex(i)};
@@ -678,16 +722,17 @@ void VKBlitScreen::ReleaseRawImages() {
}
void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
- VkBufferCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.size = CalculateBufferSize(framebuffer);
- ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
- VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
+ const VkBufferCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = CalculateBufferSize(framebuffer),
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ };
buffer = device.GetLogical().CreateBuffer(ci);
buffer_commit = memory_manager.Commit(buffer, true);
@@ -697,24 +742,28 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
raw_images.resize(image_count);
raw_buffer_commits.resize(image_count);
- VkImageCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.imageType = VK_IMAGE_TYPE_2D;
- ci.format = GetFormat(framebuffer);
- ci.extent.width = framebuffer.width;
- ci.extent.height = framebuffer.height;
- ci.extent.depth = 1;
- ci.mipLevels = 1;
- ci.arrayLayers = 1;
- ci.samples = VK_SAMPLE_COUNT_1_BIT;
- ci.tiling = VK_IMAGE_TILING_LINEAR;
- ci.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
- ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+ const VkImageCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = GetFormat(framebuffer),
+ .extent =
+ {
+ .width = framebuffer.width,
+ .height = framebuffer.height,
+ .depth = 1,
+ },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .tiling = VK_IMAGE_TILING_LINEAR,
+ .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ };
for (std::size_t i = 0; i < image_count; ++i) {
raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT);
@@ -723,39 +772,43 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
}
void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
- VkDescriptorBufferInfo buffer_info;
- buffer_info.buffer = *buffer;
- buffer_info.offset = offsetof(BufferData, uniform);
- buffer_info.range = sizeof(BufferData::uniform);
-
- VkWriteDescriptorSet ubo_write;
- ubo_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
- ubo_write.pNext = nullptr;
- ubo_write.dstSet = descriptor_sets[image_index];
- ubo_write.dstBinding = 0;
- ubo_write.dstArrayElement = 0;
- ubo_write.descriptorCount = 1;
- ubo_write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
- ubo_write.pImageInfo = nullptr;
- ubo_write.pBufferInfo = &buffer_info;
- ubo_write.pTexelBufferView = nullptr;
-
- VkDescriptorImageInfo image_info;
- image_info.sampler = *sampler;
- image_info.imageView = image_view;
- image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-
- VkWriteDescriptorSet sampler_write;
- sampler_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
- sampler_write.pNext = nullptr;
- sampler_write.dstSet = descriptor_sets[image_index];
- sampler_write.dstBinding = 1;
- sampler_write.dstArrayElement = 0;
- sampler_write.descriptorCount = 1;
- sampler_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
- sampler_write.pImageInfo = &image_info;
- sampler_write.pBufferInfo = nullptr;
- sampler_write.pTexelBufferView = nullptr;
+ const VkDescriptorBufferInfo buffer_info{
+ .buffer = *buffer,
+ .offset = offsetof(BufferData, uniform),
+ .range = sizeof(BufferData::uniform),
+ };
+
+ const VkWriteDescriptorSet ubo_write{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .pNext = nullptr,
+ .dstSet = descriptor_sets[image_index],
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .pImageInfo = nullptr,
+ .pBufferInfo = &buffer_info,
+ .pTexelBufferView = nullptr,
+ };
+
+ const VkDescriptorImageInfo image_info{
+ .sampler = *sampler,
+ .imageView = image_view,
+ .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ };
+
+ const VkWriteDescriptorSet sampler_write{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .pNext = nullptr,
+ .dstSet = descriptor_sets[image_index],
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = &image_info,
+ .pBufferInfo = nullptr,
+ .pTexelBufferView = nullptr,
+ };
device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {});
}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 243640fab..838d38f69 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -15,6 +15,10 @@ namespace Core {
class System;
}
+namespace Core::Memory {
+class Memory;
+}
+
namespace Core::Frontend {
class EmuWindow;
}
@@ -39,7 +43,8 @@ class VKSwapchain;
class VKBlitScreen final {
public:
- explicit VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
+ explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
+ Core::Frontend::EmuWindow& render_window,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
VKSwapchain& swapchain, VKScheduler& scheduler,
@@ -81,7 +86,7 @@ private:
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
- Core::System& system;
+ Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
VideoCore::RasterizerInterface& rasterizer;
const VKDevice& device;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 2be38d419..d9d3da9ea 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -39,16 +39,17 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size)
- : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
- VkBufferCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.size = static_cast<VkDeviceSize>(size);
- ci.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
+ : BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
+ const VkBufferCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = static_cast<VkDeviceSize>(size),
+ .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ };
buffer.handle = device.GetLogical().CreateBuffer(ci);
buffer.commit = memory_manager.Commit(buffer.handle, false);
@@ -66,16 +67,17 @@ void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {
scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});
- VkBufferMemoryBarrier barrier;
- barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
- barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
- barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = handle;
- barrier.offset = offset;
- barrier.size = size;
+ const VkBufferMemoryBarrier barrier{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = UPLOAD_ACCESS_BARRIERS,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = handle,
+ .offset = offset,
+ .size = size,
+ };
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
barrier, {});
});
@@ -87,16 +89,17 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
const VkBuffer handle = Handle();
scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
- VkBufferMemoryBarrier barrier;
- barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
- barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
- barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = handle;
- barrier.offset = offset;
- barrier.size = size;
+ const VkBufferMemoryBarrier barrier{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = handle,
+ .offset = offset,
+ .size = size,
+ };
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
@@ -142,14 +145,15 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
});
}
-VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
- const VKDevice& device, VKMemoryManager& memory_manager,
- VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
- : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
- CreateStreamBuffer(device,
- scheduler)},
- device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
- staging_pool} {}
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
+ const VKDevice& device_, VKMemoryManager& memory_manager_,
+ VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_)
+ : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, gpu_memory, cpu_memory,
+ CreateStreamBuffer(device_,
+ scheduler_)},
+ device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
+ staging_pool_} {}
VKBufferCache::~VKBufferCache() = default;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 991ee451c..7fb5ceedf 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -13,10 +13,6 @@
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/wrapper.h"
-namespace Core {
-class System;
-}
-
namespace Vulkan {
class VKDevice;
@@ -53,7 +49,8 @@ private:
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
public:
- explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+ explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
~VKBufferCache();
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index da71e710c..182461ed9 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -115,32 +115,32 @@ constexpr u8 quad_array[] = {
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
- VkDescriptorSetLayoutBinding binding;
- binding.binding = 0;
- binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
- binding.descriptorCount = 1;
- binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
- binding.pImmutableSamplers = nullptr;
- return binding;
+ return {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ };
}
VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
- VkDescriptorUpdateTemplateEntryKHR entry;
- entry.dstBinding = 0;
- entry.dstArrayElement = 0;
- entry.descriptorCount = 1;
- entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
- entry.offset = 0;
- entry.stride = sizeof(DescriptorUpdateEntry);
- return entry;
+ return {
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .offset = 0,
+ .stride = sizeof(DescriptorUpdateEntry),
+ };
}
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
- VkPushConstantRange range;
- range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
- range.offset = 0;
- range.size = static_cast<u32>(size);
- return range;
+ return {
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .offset = 0,
+ .size = static_cast<u32>(size),
+ };
}
// Uint8 SPIR-V module. Generated from the "shaders/" directory.
@@ -344,29 +344,33 @@ constexpr u8 QUAD_INDEXED_SPV[] = {
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
- std::array<VkDescriptorSetLayoutBinding, 2> bindings;
- bindings[0].binding = 0;
- bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
- bindings[0].descriptorCount = 1;
- bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
- bindings[0].pImmutableSamplers = nullptr;
- bindings[1].binding = 1;
- bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
- bindings[1].descriptorCount = 1;
- bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
- bindings[1].pImmutableSamplers = nullptr;
- return bindings;
+ return {{
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ }};
}
VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
- VkDescriptorUpdateTemplateEntryKHR entry;
- entry.dstBinding = 0;
- entry.dstArrayElement = 0;
- entry.descriptorCount = 2;
- entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
- entry.offset = 0;
- entry.stride = sizeof(DescriptorUpdateEntry);
- return entry;
+ return {
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 2,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .offset = 0,
+ .stride = sizeof(DescriptorUpdateEntry),
+ };
}
} // Anonymous namespace
@@ -376,37 +380,37 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
vk::Span<VkPushConstantRange> push_constants, std::size_t code_size,
const u8* code) {
- VkDescriptorSetLayoutCreateInfo descriptor_layout_ci;
- descriptor_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
- descriptor_layout_ci.pNext = nullptr;
- descriptor_layout_ci.flags = 0;
- descriptor_layout_ci.bindingCount = bindings.size();
- descriptor_layout_ci.pBindings = bindings.data();
- descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(descriptor_layout_ci);
-
- VkPipelineLayoutCreateInfo pipeline_layout_ci;
- pipeline_layout_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
- pipeline_layout_ci.pNext = nullptr;
- pipeline_layout_ci.flags = 0;
- pipeline_layout_ci.setLayoutCount = 1;
- pipeline_layout_ci.pSetLayouts = descriptor_set_layout.address();
- pipeline_layout_ci.pushConstantRangeCount = push_constants.size();
- pipeline_layout_ci.pPushConstantRanges = push_constants.data();
- layout = device.GetLogical().CreatePipelineLayout(pipeline_layout_ci);
+ descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .bindingCount = bindings.size(),
+ .pBindings = bindings.data(),
+ });
+
+ layout = device.GetLogical().CreatePipelineLayout({
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .setLayoutCount = 1,
+ .pSetLayouts = descriptor_set_layout.address(),
+ .pushConstantRangeCount = push_constants.size(),
+ .pPushConstantRanges = push_constants.data(),
+ });
if (!templates.empty()) {
- VkDescriptorUpdateTemplateCreateInfoKHR template_ci;
- template_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR;
- template_ci.pNext = nullptr;
- template_ci.flags = 0;
- template_ci.descriptorUpdateEntryCount = templates.size();
- template_ci.pDescriptorUpdateEntries = templates.data();
- template_ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
- template_ci.descriptorSetLayout = *descriptor_set_layout;
- template_ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
- template_ci.pipelineLayout = *layout;
- template_ci.set = 0;
- descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR(template_ci);
+ descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .flags = 0,
+ .descriptorUpdateEntryCount = templates.size(),
+ .pDescriptorUpdateEntries = templates.data(),
+ .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
+ .descriptorSetLayout = *descriptor_set_layout,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .pipelineLayout = *layout,
+ .set = 0,
+ });
descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
}
@@ -414,32 +418,32 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
std::memcpy(code_copy.get(), code, code_size);
- VkShaderModuleCreateInfo module_ci;
- module_ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
- module_ci.pNext = nullptr;
- module_ci.flags = 0;
- module_ci.codeSize = code_size;
- module_ci.pCode = code_copy.get();
- module = device.GetLogical().CreateShaderModule(module_ci);
-
- VkComputePipelineCreateInfo pipeline_ci;
- pipeline_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
- pipeline_ci.pNext = nullptr;
- pipeline_ci.flags = 0;
- pipeline_ci.layout = *layout;
- pipeline_ci.basePipelineHandle = nullptr;
- pipeline_ci.basePipelineIndex = 0;
-
- VkPipelineShaderStageCreateInfo& stage_ci = pipeline_ci.stage;
- stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
- stage_ci.pNext = nullptr;
- stage_ci.flags = 0;
- stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT;
- stage_ci.module = *module;
- stage_ci.pName = "main";
- stage_ci.pSpecializationInfo = nullptr;
-
- pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci);
+ module = device.GetLogical().CreateShaderModule({
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .codeSize = code_size,
+ .pCode = code_copy.get(),
+ });
+
+ pipeline = device.GetLogical().CreateComputePipeline({
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage =
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = *module,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ .layout = *layout,
+ .basePipelineHandle = nullptr,
+ .basePipelineIndex = 0,
+ });
}
VKComputePass::~VKComputePass() = default;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 281bf9ac3..ed9d2991c 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -43,12 +43,13 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) {
// TODO(Rodrigo): Maybe make individual bindings here?
for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
- VkDescriptorSetLayoutBinding& entry = bindings.emplace_back();
- entry.binding = binding++;
- entry.descriptorType = descriptor_type;
- entry.descriptorCount = 1;
- entry.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
- entry.pImmutableSamplers = nullptr;
+ bindings.push_back({
+ .binding = binding++,
+ .descriptorType = descriptor_type,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ });
}
};
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
@@ -58,25 +59,25 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
- VkDescriptorSetLayoutCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.bindingCount = static_cast<u32>(bindings.size());
- ci.pBindings = bindings.data();
- return device.GetLogical().CreateDescriptorSetLayout(ci);
+ return device.GetLogical().CreateDescriptorSetLayout({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .bindingCount = static_cast<u32>(bindings.size()),
+ .pBindings = bindings.data(),
+ });
}
vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const {
- VkPipelineLayoutCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.setLayoutCount = 1;
- ci.pSetLayouts = descriptor_set_layout.address();
- ci.pushConstantRangeCount = 0;
- ci.pPushConstantRanges = nullptr;
- return device.GetLogical().CreatePipelineLayout(ci);
+ return device.GetLogical().CreatePipelineLayout({
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .setLayoutCount = 1,
+ .pSetLayouts = descriptor_set_layout.address(),
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = nullptr,
+ });
}
vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const {
@@ -89,59 +90,63 @@ vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplat
return {};
}
- VkDescriptorUpdateTemplateCreateInfoKHR ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size());
- ci.pDescriptorUpdateEntries = template_entries.data();
- ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
- ci.descriptorSetLayout = *descriptor_set_layout;
- ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
- ci.pipelineLayout = *layout;
- ci.set = DESCRIPTOR_SET;
- return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
+ return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .flags = 0,
+ .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
+ .pDescriptorUpdateEntries = template_entries.data(),
+ .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
+ .descriptorSetLayout = *descriptor_set_layout,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .pipelineLayout = *layout,
+ .set = DESCRIPTOR_SET,
+ });
}
vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
device.SaveShader(code);
- VkShaderModuleCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.codeSize = code.size() * sizeof(u32);
- ci.pCode = code.data();
- return device.GetLogical().CreateShaderModule(ci);
+ return device.GetLogical().CreateShaderModule({
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .codeSize = code.size() * sizeof(u32),
+ .pCode = code.data(),
+ });
}
vk::Pipeline VKComputePipeline::CreatePipeline() const {
- VkComputePipelineCreateInfo ci;
- VkPipelineShaderStageCreateInfo& stage_ci = ci.stage;
- stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
- stage_ci.pNext = nullptr;
- stage_ci.flags = 0;
- stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT;
- stage_ci.module = *shader_module;
- stage_ci.pName = "main";
- stage_ci.pSpecializationInfo = nullptr;
-
- VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
- subgroup_size_ci.sType =
- VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT;
- subgroup_size_ci.pNext = nullptr;
- subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
+
+ VkComputePipelineCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage =
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = *shader_module,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ .layout = *layout,
+ .basePipelineHandle = nullptr,
+ .basePipelineIndex = 0,
+ };
+
+ const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .requiredSubgroupSize = GuestWarpSize,
+ };
if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) {
- stage_ci.pNext = &subgroup_size_ci;
+ ci.stage.pNext = &subgroup_size_ci;
}
- ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.layout = *layout;
- ci.basePipelineHandle = nullptr;
- ci.basePipelineIndex = 0;
return device.GetLogical().CreateComputePipeline(ci);
}
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index 9259b618d..ac4a0884e 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -43,27 +43,30 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
- {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}};
-
- VkDescriptorPoolCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
- ci.maxSets = num_sets;
- ci.poolSizeCount = static_cast<u32>(std::size(pool_sizes));
- ci.pPoolSizes = std::data(pool_sizes);
+ {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40},
+ };
+
+ const VkDescriptorPoolCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+ .maxSets = num_sets,
+ .poolSizeCount = static_cast<u32>(std::size(pool_sizes)),
+ .pPoolSizes = std::data(pool_sizes),
+ };
return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci));
}
vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout,
std::size_t count) {
const std::vector layout_copies(count, layout);
- VkDescriptorSetAllocateInfo ai;
- ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
- ai.pNext = nullptr;
- ai.descriptorPool = **active_pool;
- ai.descriptorSetCount = static_cast<u32>(count);
- ai.pSetLayouts = layout_copies.data();
+ VkDescriptorSetAllocateInfo ai{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .descriptorPool = **active_pool,
+ .descriptorSetCount = static_cast<u32>(count),
+ .pSetLayouts = layout_copies.data(),
+ };
vk::DescriptorSets sets = active_pool->Allocate(ai);
if (!sets.IsOutOfPoolMemory()) {
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index fdaea4210..4205bd573 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -22,14 +22,21 @@ namespace {
namespace Alternatives {
-constexpr std::array Depth24UnormS8_UINT = {VK_FORMAT_D32_SFLOAT_S8_UINT,
- VK_FORMAT_D16_UNORM_S8_UINT, VkFormat{}};
-constexpr std::array Depth16UnormS8_UINT = {VK_FORMAT_D24_UNORM_S8_UINT,
- VK_FORMAT_D32_SFLOAT_S8_UINT, VkFormat{}};
+constexpr std::array Depth24UnormS8_UINT{
+ VK_FORMAT_D32_SFLOAT_S8_UINT,
+ VK_FORMAT_D16_UNORM_S8_UINT,
+ VkFormat{},
+};
+
+constexpr std::array Depth16UnormS8_UINT{
+ VK_FORMAT_D24_UNORM_S8_UINT,
+ VK_FORMAT_D32_SFLOAT_S8_UINT,
+ VkFormat{},
+};
} // namespace Alternatives
-constexpr std::array REQUIRED_EXTENSIONS = {
+constexpr std::array REQUIRED_EXTENSIONS{
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
@@ -77,14 +84,19 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_A8B8G8R8_UINT_PACK32,
VK_FORMAT_A8B8G8R8_SNORM_PACK32,
+ VK_FORMAT_A8B8G8R8_SINT_PACK32,
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
VK_FORMAT_B5G6R5_UNORM_PACK16,
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
+ VK_FORMAT_A2B10G10R10_UINT_PACK32,
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R32G32B32A32_SINT,
VK_FORMAT_R32G32B32A32_UINT,
VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32_SINT,
VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_R16G16B16A16_SINT,
VK_FORMAT_R16G16B16A16_UINT,
VK_FORMAT_R16G16B16A16_SNORM,
VK_FORMAT_R16G16B16A16_UNORM,
@@ -96,8 +108,11 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
VK_FORMAT_R8G8B8A8_SRGB,
VK_FORMAT_R8G8_UNORM,
VK_FORMAT_R8G8_SNORM,
+ VK_FORMAT_R8G8_SINT,
VK_FORMAT_R8G8_UINT,
VK_FORMAT_R8_UNORM,
+ VK_FORMAT_R8_SNORM,
+ VK_FORMAT_R8_SINT,
VK_FORMAT_R8_UINT,
VK_FORMAT_B10G11R11_UFLOAT_PACK32,
VK_FORMAT_R32_SFLOAT,
@@ -117,6 +132,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
VK_FORMAT_BC2_UNORM_BLOCK,
VK_FORMAT_BC3_UNORM_BLOCK,
VK_FORMAT_BC4_UNORM_BLOCK,
+ VK_FORMAT_BC4_SNORM_BLOCK,
VK_FORMAT_BC5_UNORM_BLOCK,
VK_FORMAT_BC5_SNORM_BLOCK,
VK_FORMAT_BC7_UNORM_BLOCK,
@@ -169,97 +185,104 @@ bool VKDevice::Create() {
const auto queue_cis = GetDeviceQueueCreateInfos();
const std::vector extensions = LoadExtensions();
- VkPhysicalDeviceFeatures2 features2;
- features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
- features2.pNext = nullptr;
+ VkPhysicalDeviceFeatures2 features2{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
+ .pNext = nullptr,
+ };
const void* first_next = &features2;
void** next = &features2.pNext;
- auto& features = features2.features;
- features.robustBufferAccess = false;
- features.fullDrawIndexUint32 = false;
- features.imageCubeArray = false;
- features.independentBlend = true;
- features.geometryShader = true;
- features.tessellationShader = true;
- features.sampleRateShading = false;
- features.dualSrcBlend = false;
- features.logicOp = false;
- features.multiDrawIndirect = false;
- features.drawIndirectFirstInstance = false;
- features.depthClamp = true;
- features.depthBiasClamp = true;
- features.fillModeNonSolid = false;
- features.depthBounds = false;
- features.wideLines = false;
- features.largePoints = true;
- features.alphaToOne = false;
- features.multiViewport = true;
- features.samplerAnisotropy = true;
- features.textureCompressionETC2 = false;
- features.textureCompressionASTC_LDR = is_optimal_astc_supported;
- features.textureCompressionBC = false;
- features.occlusionQueryPrecise = true;
- features.pipelineStatisticsQuery = false;
- features.vertexPipelineStoresAndAtomics = true;
- features.fragmentStoresAndAtomics = true;
- features.shaderTessellationAndGeometryPointSize = false;
- features.shaderImageGatherExtended = true;
- features.shaderStorageImageExtendedFormats = false;
- features.shaderStorageImageMultisample = false;
- features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
- features.shaderStorageImageWriteWithoutFormat = true;
- features.shaderUniformBufferArrayDynamicIndexing = false;
- features.shaderSampledImageArrayDynamicIndexing = false;
- features.shaderStorageBufferArrayDynamicIndexing = false;
- features.shaderStorageImageArrayDynamicIndexing = false;
- features.shaderClipDistance = false;
- features.shaderCullDistance = false;
- features.shaderFloat64 = false;
- features.shaderInt64 = false;
- features.shaderInt16 = false;
- features.shaderResourceResidency = false;
- features.shaderResourceMinLod = false;
- features.sparseBinding = false;
- features.sparseResidencyBuffer = false;
- features.sparseResidencyImage2D = false;
- features.sparseResidencyImage3D = false;
- features.sparseResidency2Samples = false;
- features.sparseResidency4Samples = false;
- features.sparseResidency8Samples = false;
- features.sparseResidency16Samples = false;
- features.sparseResidencyAliased = false;
- features.variableMultisampleRate = false;
- features.inheritedQueries = false;
-
- VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage;
- bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR;
- bit16_storage.pNext = nullptr;
- bit16_storage.storageBuffer16BitAccess = false;
- bit16_storage.uniformAndStorageBuffer16BitAccess = true;
- bit16_storage.storagePushConstant16 = false;
- bit16_storage.storageInputOutput16 = false;
+ features2.features = {
+ .robustBufferAccess = false,
+ .fullDrawIndexUint32 = false,
+ .imageCubeArray = false,
+ .independentBlend = true,
+ .geometryShader = true,
+ .tessellationShader = true,
+ .sampleRateShading = false,
+ .dualSrcBlend = false,
+ .logicOp = false,
+ .multiDrawIndirect = false,
+ .drawIndirectFirstInstance = false,
+ .depthClamp = true,
+ .depthBiasClamp = true,
+ .fillModeNonSolid = false,
+ .depthBounds = false,
+ .wideLines = false,
+ .largePoints = true,
+ .alphaToOne = false,
+ .multiViewport = true,
+ .samplerAnisotropy = true,
+ .textureCompressionETC2 = false,
+ .textureCompressionASTC_LDR = is_optimal_astc_supported,
+ .textureCompressionBC = false,
+ .occlusionQueryPrecise = true,
+ .pipelineStatisticsQuery = false,
+ .vertexPipelineStoresAndAtomics = true,
+ .fragmentStoresAndAtomics = true,
+ .shaderTessellationAndGeometryPointSize = false,
+ .shaderImageGatherExtended = true,
+ .shaderStorageImageExtendedFormats = false,
+ .shaderStorageImageMultisample = false,
+ .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
+ .shaderStorageImageWriteWithoutFormat = true,
+ .shaderUniformBufferArrayDynamicIndexing = false,
+ .shaderSampledImageArrayDynamicIndexing = false,
+ .shaderStorageBufferArrayDynamicIndexing = false,
+ .shaderStorageImageArrayDynamicIndexing = false,
+ .shaderClipDistance = false,
+ .shaderCullDistance = false,
+ .shaderFloat64 = false,
+ .shaderInt64 = false,
+ .shaderInt16 = false,
+ .shaderResourceResidency = false,
+ .shaderResourceMinLod = false,
+ .sparseBinding = false,
+ .sparseResidencyBuffer = false,
+ .sparseResidencyImage2D = false,
+ .sparseResidencyImage3D = false,
+ .sparseResidency2Samples = false,
+ .sparseResidency4Samples = false,
+ .sparseResidency8Samples = false,
+ .sparseResidency16Samples = false,
+ .sparseResidencyAliased = false,
+ .variableMultisampleRate = false,
+ .inheritedQueries = false,
+ };
+
+ VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
+ .pNext = nullptr,
+ .storageBuffer16BitAccess = false,
+ .uniformAndStorageBuffer16BitAccess = true,
+ .storagePushConstant16 = false,
+ .storageInputOutput16 = false,
+ };
SetNext(next, bit16_storage);
- VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage;
- bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR;
- bit8_storage.pNext = nullptr;
- bit8_storage.storageBuffer8BitAccess = false;
- bit8_storage.uniformAndStorageBuffer8BitAccess = true;
- bit8_storage.storagePushConstant8 = false;
+ VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR,
+ .pNext = nullptr,
+ .storageBuffer8BitAccess = false,
+ .uniformAndStorageBuffer8BitAccess = true,
+ .storagePushConstant8 = false,
+ };
SetNext(next, bit8_storage);
- VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset;
- host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT;
- host_query_reset.hostQueryReset = true;
+ VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT,
+ .hostQueryReset = true,
+ };
SetNext(next, host_query_reset);
VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
if (is_float16_supported) {
- float16_int8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
- float16_int8.pNext = nullptr;
- float16_int8.shaderFloat16 = true;
- float16_int8.shaderInt8 = false;
+ float16_int8 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR,
+ .pNext = nullptr,
+ .shaderFloat16 = true,
+ .shaderInt8 = false,
+ };
SetNext(next, float16_int8);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
@@ -271,10 +294,11 @@ bool VKDevice::Create() {
VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
if (khr_uniform_buffer_standard_layout) {
- std430_layout.sType =
- VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR;
- std430_layout.pNext = nullptr;
- std430_layout.uniformBufferStandardLayout = true;
+ std430_layout = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR,
+ .pNext = nullptr,
+ .uniformBufferStandardLayout = true,
+ };
SetNext(next, std430_layout);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs");
@@ -282,9 +306,11 @@ bool VKDevice::Create() {
VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8;
if (ext_index_type_uint8) {
- index_type_uint8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT;
- index_type_uint8.pNext = nullptr;
- index_type_uint8.indexTypeUint8 = true;
+ index_type_uint8 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT,
+ .pNext = nullptr,
+ .indexTypeUint8 = true,
+ };
SetNext(next, index_type_uint8);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
@@ -292,11 +318,12 @@ bool VKDevice::Create() {
VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
if (ext_transform_feedback) {
- transform_feedback.sType =
- VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
- transform_feedback.pNext = nullptr;
- transform_feedback.transformFeedback = true;
- transform_feedback.geometryStreams = true;
+ transform_feedback = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT,
+ .pNext = nullptr,
+ .transformFeedback = true,
+ .geometryStreams = true,
+ };
SetNext(next, transform_feedback);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
@@ -304,10 +331,12 @@ bool VKDevice::Create() {
VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border;
if (ext_custom_border_color) {
- custom_border.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
- custom_border.pNext = nullptr;
- custom_border.customBorderColors = VK_TRUE;
- custom_border.customBorderColorWithoutFormat = VK_TRUE;
+ custom_border = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT,
+ .pNext = nullptr,
+ .customBorderColors = VK_TRUE,
+ .customBorderColorWithoutFormat = VK_TRUE,
+ };
SetNext(next, custom_border);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors");
@@ -315,9 +344,11 @@ bool VKDevice::Create() {
VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
if (ext_extended_dynamic_state) {
- dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
- dynamic_state.pNext = nullptr;
- dynamic_state.extendedDynamicState = VK_TRUE;
+ dynamic_state = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT,
+ .pNext = nullptr,
+ .extendedDynamicState = VK_TRUE,
+ };
SetNext(next, dynamic_state);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
@@ -331,11 +362,13 @@ bool VKDevice::Create() {
if (nv_device_diagnostics_config) {
nsight_aftermath_tracker.Initialize();
- diagnostics_nv.sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV;
- diagnostics_nv.pNext = &features2;
- diagnostics_nv.flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV |
- VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV |
- VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV;
+ diagnostics_nv = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV,
+ .pNext = &features2,
+ .flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV |
+ VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV |
+ VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV,
+ };
first_next = &diagnostics_nv;
}
@@ -347,8 +380,18 @@ bool VKDevice::Create() {
CollectTelemetryParameters();
+ if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR) {
+ // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but the <stride> field
+ // seems to be bugged. Blacklisting it for now.
+ LOG_WARNING(Render_Vulkan,
+ "Blacklisting AMD proprietary from VK_EXT_extended_dynamic_state");
+ ext_extended_dynamic_state = false;
+ }
+
graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family);
+
+ use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
return true;
}
@@ -704,13 +747,15 @@ void VKDevice::SetupFeatures() {
}
void VKDevice::CollectTelemetryParameters() {
- VkPhysicalDeviceDriverPropertiesKHR driver;
- driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR;
- driver.pNext = nullptr;
+ VkPhysicalDeviceDriverPropertiesKHR driver{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR,
+ .pNext = nullptr,
+ };
- VkPhysicalDeviceProperties2KHR properties;
- properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
- properties.pNext = &driver;
+ VkPhysicalDeviceProperties2KHR properties{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
+ .pNext = &driver,
+ };
physical.GetProperties2KHR(properties);
driver_id = driver.driverID;
@@ -719,23 +764,26 @@ void VKDevice::CollectTelemetryParameters() {
const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
reported_extensions.reserve(std::size(extensions));
for (const auto& extension : extensions) {
- reported_extensions.push_back(extension.extensionName);
+ reported_extensions.emplace_back(extension.extensionName);
}
}
std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
static constexpr float QUEUE_PRIORITY = 1.0f;
- std::unordered_set<u32> unique_queue_families = {graphics_family, present_family};
+ std::unordered_set<u32> unique_queue_families{graphics_family, present_family};
std::vector<VkDeviceQueueCreateInfo> queue_cis;
+ queue_cis.reserve(unique_queue_families.size());
for (const u32 queue_family : unique_queue_families) {
- VkDeviceQueueCreateInfo& ci = queue_cis.emplace_back();
- ci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.queueFamilyIndex = queue_family;
- ci.queueCount = 1;
+ auto& ci = queue_cis.emplace_back(VkDeviceQueueCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .queueFamilyIndex = queue_family,
+ .queueCount = 1,
+ .pQueuePriorities = nullptr,
+ });
ci.pQueuePriorities = &QUEUE_PRIORITY;
}
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index ae5c21baa..26a233db1 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,6 +122,11 @@ public:
return properties.limits.maxPushConstantsSize;
}
+ /// Returns the maximum size for shared memory.
+ u32 GetMaxComputeSharedMemorySize() const {
+ return properties.limits.maxComputeSharedMemorySize;
+ }
+
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
@@ -197,6 +202,11 @@ public:
return reported_extensions;
}
+ /// Returns true if the setting for async shader compilation is enabled.
+ bool UseAsynchronousShaders() const {
+ return use_asynchronous_shaders;
+ }
+
/// Checks if the physical device is suitable.
static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);
@@ -247,6 +257,9 @@ private:
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
+ // Asynchronous Graphics Pipeline setting
+ bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
+
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index a02be5487..55a8348fc 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -29,7 +29,7 @@ void InnerFence::Queue() {
}
ASSERT(!event);
- event = device.GetLogical().CreateEvent();
+ event = device.GetLogical().CreateNewEvent();
ticks = scheduler.Ticks();
scheduler.RequestOutsideRenderPassOperationContext();
@@ -71,12 +71,12 @@ bool InnerFence::IsEventSignalled() const {
}
}
-VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
- VKQueryCache& query_cache)
- : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
- device{device}, scheduler{scheduler} {}
+VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache,
+ VKBufferCache& buffer_cache, VKQueryCache& query_cache,
+ const VKDevice& device_, VKScheduler& scheduler_)
+ : GenericFenceManager(rasterizer, gpu, texture_cache, buffer_cache, query_cache),
+ device{device_}, scheduler{scheduler_} {}
Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 043fe7947..1547d6d30 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -55,10 +55,10 @@ using GenericFenceManager =
class VKFenceManager final : public GenericFenceManager {
public:
- explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
- VKQueryCache& query_cache);
+ explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache,
+ VKBufferCache& buffer_cache, VKQueryCache& query_cache,
+ const VKDevice& device, VKScheduler& scheduler);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 844445105..2e46c6278 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -28,15 +28,15 @@ namespace {
template <class StencilFace>
VkStencilOpState GetStencilFaceState(const StencilFace& face) {
- VkStencilOpState state;
- state.failOp = MaxwellToVK::StencilOp(face.ActionStencilFail());
- state.passOp = MaxwellToVK::StencilOp(face.ActionDepthPass());
- state.depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail());
- state.compareOp = MaxwellToVK::ComparisonOp(face.TestFunc());
- state.compareMask = 0;
- state.writeMask = 0;
- state.reference = 0;
- return state;
+ return {
+ .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()),
+ .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()),
+ .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()),
+ .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()),
+ .compareMask = 0,
+ .writeMask = 0,
+ .reference = 0,
+ };
}
bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
@@ -52,20 +52,21 @@ bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
}
VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
- union {
+ union Swizzle {
u32 raw;
BitField<0, 3, Maxwell::ViewportSwizzle> x;
BitField<4, 3, Maxwell::ViewportSwizzle> y;
BitField<8, 3, Maxwell::ViewportSwizzle> z;
BitField<12, 3, Maxwell::ViewportSwizzle> w;
- } const unpacked{swizzle};
-
- VkViewportSwizzleNV result;
- result.x = MaxwellToVK::ViewportSwizzle(unpacked.x);
- result.y = MaxwellToVK::ViewportSwizzle(unpacked.y);
- result.z = MaxwellToVK::ViewportSwizzle(unpacked.z);
- result.w = MaxwellToVK::ViewportSwizzle(unpacked.w);
- return result;
+ };
+ const Swizzle unpacked{swizzle};
+
+ return {
+ .x = MaxwellToVK::ViewportSwizzle(unpacked.x),
+ .y = MaxwellToVK::ViewportSwizzle(unpacked.y),
+ .z = MaxwellToVK::ViewportSwizzle(unpacked.z),
+ .w = MaxwellToVK::ViewportSwizzle(unpacked.w),
+ };
}
} // Anonymous namespace
@@ -77,15 +78,14 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
const GraphicsPipelineCacheKey& key,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
const SPIRVProgram& program)
- : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
+ : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()},
descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
descriptor_allocator{descriptor_pool, *descriptor_set_layout},
update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
program)},
- renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
- key.renderpass_params,
- program)} {}
+ renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)},
+ pipeline{CreatePipeline(cache_key.renderpass_params, program)} {}
VKGraphicsPipeline::~VKGraphicsPipeline() = default;
@@ -100,24 +100,26 @@ VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
vk::Span<VkDescriptorSetLayoutBinding> bindings) const {
- VkDescriptorSetLayoutCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.bindingCount = bindings.size();
- ci.pBindings = bindings.data();
+ const VkDescriptorSetLayoutCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .bindingCount = bindings.size(),
+ .pBindings = bindings.data(),
+ };
return device.GetLogical().CreateDescriptorSetLayout(ci);
}
vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
- VkPipelineLayoutCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.setLayoutCount = 1;
- ci.pSetLayouts = descriptor_set_layout.address();
- ci.pushConstantRangeCount = 0;
- ci.pPushConstantRanges = nullptr;
+ const VkPipelineLayoutCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .setLayoutCount = 1,
+ .pSetLayouts = descriptor_set_layout.address(),
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = nullptr,
+ };
return device.GetLogical().CreatePipelineLayout(ci);
}
@@ -136,26 +138,28 @@ vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTempla
return {};
}
- VkDescriptorUpdateTemplateCreateInfoKHR ci;
- ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size());
- ci.pDescriptorUpdateEntries = template_entries.data();
- ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
- ci.descriptorSetLayout = *descriptor_set_layout;
- ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
- ci.pipelineLayout = *layout;
- ci.set = DESCRIPTOR_SET;
+ const VkDescriptorUpdateTemplateCreateInfoKHR ci{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .flags = 0,
+ .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
+ .pDescriptorUpdateEntries = template_entries.data(),
+ .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
+ .descriptorSetLayout = *descriptor_set_layout,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .pipelineLayout = *layout,
+ .set = DESCRIPTOR_SET,
+ };
return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
}
std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
const SPIRVProgram& program) const {
- VkShaderModuleCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
+ VkShaderModuleCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ };
std::vector<vk::ShaderModule> modules;
modules.reserve(Maxwell::MaxShaderStage);
@@ -176,7 +180,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
const SPIRVProgram& program) const {
- const auto& state = fixed_state;
+ const auto& state = cache_key.fixed_state;
const auto& viewport_swizzles = state.viewport_swizzles;
FixedPipelineState::DynamicState dynamic;
@@ -204,15 +208,17 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
const bool instanced = state.binding_divisors[index] != 0;
const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
- auto& vertex_binding = vertex_bindings.emplace_back();
- vertex_binding.binding = static_cast<u32>(index);
- vertex_binding.stride = binding.stride;
- vertex_binding.inputRate = rate;
+ vertex_bindings.push_back({
+ .binding = static_cast<u32>(index),
+ .stride = binding.stride,
+ .inputRate = rate,
+ });
if (instanced) {
- auto& binding_divisor = vertex_binding_divisors.emplace_back();
- binding_divisor.binding = static_cast<u32>(index);
- binding_divisor.divisor = state.binding_divisors[index];
+ vertex_binding_divisors.push_back({
+ .binding = static_cast<u32>(index),
+ .divisor = state.binding_divisors[index],
+ });
}
}
@@ -227,116 +233,132 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
// Skip attributes not used by the vertex shaders.
continue;
}
- auto& vertex_attribute = vertex_attributes.emplace_back();
- vertex_attribute.location = static_cast<u32>(index);
- vertex_attribute.binding = attribute.buffer;
- vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size());
- vertex_attribute.offset = attribute.offset;
+ vertex_attributes.push_back({
+ .location = static_cast<u32>(index),
+ .binding = attribute.buffer,
+ .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
+ .offset = attribute.offset,
+ });
}
- VkPipelineVertexInputStateCreateInfo vertex_input_ci;
- vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
- vertex_input_ci.pNext = nullptr;
- vertex_input_ci.flags = 0;
- vertex_input_ci.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size());
- vertex_input_ci.pVertexBindingDescriptions = vertex_bindings.data();
- vertex_input_ci.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size());
- vertex_input_ci.pVertexAttributeDescriptions = vertex_attributes.data();
-
- VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci;
- input_divisor_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT;
- input_divisor_ci.pNext = nullptr;
- input_divisor_ci.vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size());
- input_divisor_ci.pVertexBindingDivisors = vertex_binding_divisors.data();
+ VkPipelineVertexInputStateCreateInfo vertex_input_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
+ .pVertexBindingDescriptions = vertex_bindings.data(),
+ .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
+ .pVertexAttributeDescriptions = vertex_attributes.data(),
+ };
+
+ const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()),
+ .pVertexBindingDivisors = vertex_binding_divisors.data(),
+ };
if (!vertex_binding_divisors.empty()) {
vertex_input_ci.pNext = &input_divisor_ci;
}
- VkPipelineInputAssemblyStateCreateInfo input_assembly_ci;
- input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
- input_assembly_ci.pNext = nullptr;
- input_assembly_ci.flags = 0;
- input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology());
- input_assembly_ci.primitiveRestartEnable =
- state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology);
-
- VkPipelineTessellationStateCreateInfo tessellation_ci;
- tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
- tessellation_ci.pNext = nullptr;
- tessellation_ci.flags = 0;
- tessellation_ci.patchControlPoints = state.patch_control_points_minus_one.Value() + 1;
-
- VkPipelineViewportStateCreateInfo viewport_ci;
- viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
- viewport_ci.pNext = nullptr;
- viewport_ci.flags = 0;
- viewport_ci.viewportCount = Maxwell::NumViewports;
- viewport_ci.pViewports = nullptr;
- viewport_ci.scissorCount = Maxwell::NumViewports;
- viewport_ci.pScissors = nullptr;
+ const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology());
+ const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()),
+ .primitiveRestartEnable = state.primitive_restart_enable != 0 &&
+ SupportsPrimitiveRestart(input_assembly_topology),
+ };
+
+ const VkPipelineTessellationStateCreateInfo tessellation_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .patchControlPoints = state.patch_control_points_minus_one.Value() + 1,
+ };
+
+ VkPipelineViewportStateCreateInfo viewport_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .viewportCount = Maxwell::NumViewports,
+ .pViewports = nullptr,
+ .scissorCount = Maxwell::NumViewports,
+ .pScissors = nullptr,
+ };
std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(),
UnpackViewportSwizzle);
- VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci;
- swizzle_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV;
- swizzle_ci.pNext = nullptr;
- swizzle_ci.flags = 0;
- swizzle_ci.viewportCount = Maxwell::NumViewports;
- swizzle_ci.pViewportSwizzles = swizzles.data();
+ VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
+ .pNext = nullptr,
+ .flags = 0,
+ .viewportCount = Maxwell::NumViewports,
+ .pViewportSwizzles = swizzles.data(),
+ };
if (device.IsNvViewportSwizzleSupported()) {
viewport_ci.pNext = &swizzle_ci;
}
- VkPipelineRasterizationStateCreateInfo rasterization_ci;
- rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
- rasterization_ci.pNext = nullptr;
- rasterization_ci.flags = 0;
- rasterization_ci.depthClampEnable = state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE;
- rasterization_ci.rasterizerDiscardEnable = state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE;
- rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL;
- rasterization_ci.cullMode =
- dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE;
- rasterization_ci.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace());
- rasterization_ci.depthBiasEnable = state.depth_bias_enable;
- rasterization_ci.depthBiasConstantFactor = 0.0f;
- rasterization_ci.depthBiasClamp = 0.0f;
- rasterization_ci.depthBiasSlopeFactor = 0.0f;
- rasterization_ci.lineWidth = 1.0f;
-
- VkPipelineMultisampleStateCreateInfo multisample_ci;
- multisample_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
- multisample_ci.pNext = nullptr;
- multisample_ci.flags = 0;
- multisample_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
- multisample_ci.sampleShadingEnable = VK_FALSE;
- multisample_ci.minSampleShading = 0.0f;
- multisample_ci.pSampleMask = nullptr;
- multisample_ci.alphaToCoverageEnable = VK_FALSE;
- multisample_ci.alphaToOneEnable = VK_FALSE;
-
- VkPipelineDepthStencilStateCreateInfo depth_stencil_ci;
- depth_stencil_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
- depth_stencil_ci.pNext = nullptr;
- depth_stencil_ci.flags = 0;
- depth_stencil_ci.depthTestEnable = dynamic.depth_test_enable;
- depth_stencil_ci.depthWriteEnable = dynamic.depth_write_enable;
- depth_stencil_ci.depthCompareOp = dynamic.depth_test_enable
- ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
- : VK_COMPARE_OP_ALWAYS;
- depth_stencil_ci.depthBoundsTestEnable = dynamic.depth_bounds_enable;
- depth_stencil_ci.stencilTestEnable = dynamic.stencil_enable;
- depth_stencil_ci.front = GetStencilFaceState(dynamic.front);
- depth_stencil_ci.back = GetStencilFaceState(dynamic.back);
- depth_stencil_ci.minDepthBounds = 0.0f;
- depth_stencil_ci.maxDepthBounds = 0.0f;
+ const VkPipelineRasterizationStateCreateInfo rasterization_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .depthClampEnable =
+ static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
+ .rasterizerDiscardEnable =
+ static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode =
+ dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE,
+ .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
+ .depthBiasEnable = state.depth_bias_enable,
+ .depthBiasConstantFactor = 0.0f,
+ .depthBiasClamp = 0.0f,
+ .depthBiasSlopeFactor = 0.0f,
+ .lineWidth = 1.0f,
+ };
+
+ const VkPipelineMultisampleStateCreateInfo multisample_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ .sampleShadingEnable = VK_FALSE,
+ .minSampleShading = 0.0f,
+ .pSampleMask = nullptr,
+ .alphaToCoverageEnable = VK_FALSE,
+ .alphaToOneEnable = VK_FALSE,
+ };
+
+ const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .depthTestEnable = dynamic.depth_test_enable,
+ .depthWriteEnable = dynamic.depth_write_enable,
+ .depthCompareOp = dynamic.depth_test_enable
+ ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
+ : VK_COMPARE_OP_ALWAYS,
+ .depthBoundsTestEnable = dynamic.depth_bounds_enable,
+ .stencilTestEnable = dynamic.stencil_enable,
+ .front = GetStencilFaceState(dynamic.front),
+ .back = GetStencilFaceState(dynamic.back),
+ .minDepthBounds = 0.0f,
+ .maxDepthBounds = 0.0f,
+ };
std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments);
for (std::size_t index = 0; index < num_attachments; ++index) {
- static constexpr std::array COMPONENT_TABLE = {
- VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT,
- VK_COLOR_COMPONENT_A_BIT};
+ static constexpr std::array COMPONENT_TABLE{
+ VK_COLOR_COMPONENT_R_BIT,
+ VK_COLOR_COMPONENT_G_BIT,
+ VK_COLOR_COMPONENT_B_BIT,
+ VK_COLOR_COMPONENT_A_BIT,
+ };
const auto& blend = state.attachments[index];
VkColorComponentFlags color_components = 0;
@@ -346,35 +368,36 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
}
}
- VkPipelineColorBlendAttachmentState& attachment = cb_attachments[index];
- attachment.blendEnable = blend.enable != 0;
- attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor());
- attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor());
- attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB());
- attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor());
- attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor());
- attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha());
- attachment.colorWriteMask = color_components;
+ cb_attachments[index] = {
+ .blendEnable = blend.enable != 0,
+ .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
+ .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
+ .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()),
+ .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
+ .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
+ .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
+ .colorWriteMask = color_components,
+ };
}
- VkPipelineColorBlendStateCreateInfo color_blend_ci;
- color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
- color_blend_ci.pNext = nullptr;
- color_blend_ci.flags = 0;
- color_blend_ci.logicOpEnable = VK_FALSE;
- color_blend_ci.logicOp = VK_LOGIC_OP_COPY;
- color_blend_ci.attachmentCount = static_cast<u32>(num_attachments);
- color_blend_ci.pAttachments = cb_attachments.data();
- std::memset(color_blend_ci.blendConstants, 0, sizeof(color_blend_ci.blendConstants));
-
- std::vector dynamic_states = {
+ const VkPipelineColorBlendStateCreateInfo color_blend_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_COPY,
+ .attachmentCount = static_cast<u32>(num_attachments),
+ .pAttachments = cb_attachments.data(),
+ };
+
+ std::vector dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
};
if (device.IsExtExtendedDynamicStateSupported()) {
- static constexpr std::array extended = {
+ static constexpr std::array extended{
VK_DYNAMIC_STATE_CULL_MODE_EXT,
VK_DYNAMIC_STATE_FRONT_FACE_EXT,
VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT,
@@ -389,18 +412,19 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
}
- VkPipelineDynamicStateCreateInfo dynamic_state_ci;
- dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
- dynamic_state_ci.pNext = nullptr;
- dynamic_state_ci.flags = 0;
- dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size());
- dynamic_state_ci.pDynamicStates = dynamic_states.data();
+ const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
+ .pDynamicStates = dynamic_states.data(),
+ };
- VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
- subgroup_size_ci.sType =
- VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT;
- subgroup_size_ci.pNext = nullptr;
- subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
+ const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .requiredSubgroupSize = GuestWarpSize,
+ };
std::vector<VkPipelineShaderStageCreateInfo> shader_stages;
std::size_t module_index = 0;
@@ -408,6 +432,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
if (!program[stage]) {
continue;
}
+
VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back();
stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
stage_ci.pNext = nullptr;
@@ -422,26 +447,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
}
}
- VkGraphicsPipelineCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.stageCount = static_cast<u32>(shader_stages.size());
- ci.pStages = shader_stages.data();
- ci.pVertexInputState = &vertex_input_ci;
- ci.pInputAssemblyState = &input_assembly_ci;
- ci.pTessellationState = &tessellation_ci;
- ci.pViewportState = &viewport_ci;
- ci.pRasterizationState = &rasterization_ci;
- ci.pMultisampleState = &multisample_ci;
- ci.pDepthStencilState = &depth_stencil_ci;
- ci.pColorBlendState = &color_blend_ci;
- ci.pDynamicState = &dynamic_state_ci;
- ci.layout = *layout;
- ci.renderPass = renderpass;
- ci.subpass = 0;
- ci.basePipelineHandle = nullptr;
- ci.basePipelineIndex = 0;
+ const VkGraphicsPipelineCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(shader_stages.size()),
+ .pStages = shader_stages.data(),
+ .pVertexInputState = &vertex_input_ci,
+ .pInputAssemblyState = &input_assembly_ci,
+ .pTessellationState = &tessellation_ci,
+ .pViewportState = &viewport_ci,
+ .pRasterizationState = &rasterization_ci,
+ .pMultisampleState = &multisample_ci,
+ .pDepthStencilState = &depth_stencil_ci,
+ .pColorBlendState = &color_blend_ci,
+ .pDynamicState = &dynamic_state_ci,
+ .layout = *layout,
+ .renderPass = renderpass,
+ .subpass = 0,
+ .basePipelineHandle = nullptr,
+ .basePipelineIndex = 0,
+ };
return device.GetLogical().CreateGraphicsPipeline(ci);
}
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index a1d699a6c..58aa35efd 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -19,7 +19,27 @@ namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-struct GraphicsPipelineCacheKey;
+struct GraphicsPipelineCacheKey {
+ RenderPassParams renderpass_params;
+ u32 padding;
+ std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
+ FixedPipelineState fixed_state;
+
+ std::size_t Hash() const noexcept;
+
+ bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
+
+ bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+
+ std::size_t Size() const noexcept {
+ return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
+ }
+};
+static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
class VKDescriptorPool;
class VKDevice;
@@ -54,6 +74,10 @@ public:
return renderpass;
}
+ GraphicsPipelineCacheKey GetCacheKey() const {
+ return cache_key;
+ }
+
private:
vk::DescriptorSetLayout CreateDescriptorSetLayout(
vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
@@ -70,7 +94,7 @@ private:
const VKDevice& device;
VKScheduler& scheduler;
- const FixedPipelineState fixed_state;
+ const GraphicsPipelineCacheKey cache_key;
const u64 hash;
vk::DescriptorSetLayout descriptor_set_layout;
diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp
index 9bceb3861..1c418ea17 100644
--- a/src/video_core/renderer_vulkan/vk_image.cpp
+++ b/src/video_core/renderer_vulkan/vk_image.cpp
@@ -102,21 +102,29 @@ bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num
void VKImage::CreatePresentView() {
// Image type has to be 2D to be presented.
- VkImageViewCreateInfo image_view_ci;
- image_view_ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
- image_view_ci.pNext = nullptr;
- image_view_ci.flags = 0;
- image_view_ci.image = *image;
- image_view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D;
- image_view_ci.format = format;
- image_view_ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
- VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
- image_view_ci.subresourceRange.aspectMask = aspect_mask;
- image_view_ci.subresourceRange.baseMipLevel = 0;
- image_view_ci.subresourceRange.levelCount = 1;
- image_view_ci.subresourceRange.baseArrayLayer = 0;
- image_view_ci.subresourceRange.layerCount = 1;
- present_view = device.GetLogical().CreateImageView(image_view_ci);
+ present_view = device.GetLogical().CreateImageView({
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .image = *image,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = format,
+ .components =
+ {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ },
+ .subresourceRange =
+ {
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ });
}
VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index b4c650a63..24c8960ac 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -178,13 +178,12 @@ bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 t
}();
// Try to allocate found type.
- VkMemoryAllocateInfo memory_ai;
- memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
- memory_ai.pNext = nullptr;
- memory_ai.allocationSize = size;
- memory_ai.memoryTypeIndex = type;
-
- vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory(memory_ai);
+ vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .allocationSize = size,
+ .memoryTypeIndex = type,
+ });
if (!memory) {
LOG_CRITICAL(Render_Vulkan, "Device allocation failed!");
return false;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 3da835324..5c038f4bc 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -28,6 +28,7 @@
#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/memory_util.h"
#include "video_core/shader_cache.h"
+#include "video_core/shader_notify.h"
namespace Vulkan {
@@ -88,12 +89,13 @@ void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& bindi
// Combined image samplers can be arrayed.
count = container[i].size;
}
- VkDescriptorSetLayoutBinding& entry = bindings.emplace_back();
- entry.binding = binding++;
- entry.descriptorType = descriptor_type;
- entry.descriptorCount = count;
- entry.stageFlags = stage_flags;
- entry.pImmutableSamplers = nullptr;
+ bindings.push_back({
+ .binding = binding++,
+ .descriptorType = descriptor_type,
+ .descriptorCount = count,
+ .stageFlags = stage_flags,
+ .pImmutableSamplers = nullptr,
+ });
}
}
@@ -133,64 +135,56 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
return std::memcmp(&rhs, this, sizeof *this) == 0;
}
-Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
- VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
- : gpu_addr{gpu_addr}, program_code{std::move(program_code)},
- registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
- compiler_settings, registry},
- entries{GenerateShaderEntries(shader_ir)} {}
+Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine, Tegra::Engines::ShaderType stage,
+ GPUVAddr gpu_addr_, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code_,
+ u32 main_offset)
+ : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage, engine),
+ shader_ir(program_code, main_offset, compiler_settings, registry),
+ entries(GenerateShaderEntries(shader_ir)) {}
Shader::~Shader() = default;
-Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
- Tegra::Engines::ShaderType stage) {
- if (stage == ShaderType::Compute) {
- return system.GPU().KeplerCompute();
- } else {
- return system.GPU().Maxwell3D();
- }
-}
-
-VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue,
- VKRenderPassCache& renderpass_cache)
- : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
- scheduler{scheduler}, descriptor_pool{descriptor_pool},
- update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
+VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
+ VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ VKRenderPassCache& renderpass_cache_)
+ : VideoCommon::ShaderCache<Shader>{rasterizer}, gpu{gpu_}, maxwell3d{maxwell3d_},
+ kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
+ scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
+ update_descriptor_queue{update_descriptor_queue_}, renderpass_cache{renderpass_cache_} {}
VKPipelineCache::~VKPipelineCache() = default;
std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
- const auto& gpu = system.GPU().Maxwell3D();
-
std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
+
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
- if (!gpu.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
continue;
}
- auto& memory_manager{system.GPU().MemoryManager()};
- const GPUVAddr program_addr{GetShaderAddress(system, program)};
- const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+ const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
if (!result) {
- const auto host_ptr{memory_manager.GetPointer(program_addr)};
+ const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
// No shader found - create a new one
- constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
+ static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
- ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+ ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
- auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
- stage_offset);
+ auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
+ std::move(code), stage_offset);
result = shader.get();
if (cpu_addr) {
@@ -204,24 +198,43 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
return last_shaders = shaders;
}
-VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
+VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
+ const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
if (last_graphics_pipeline && last_graphics_key == key) {
- return *last_graphics_pipeline;
+ return last_graphics_pipeline;
}
last_graphics_key = key;
+ if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
+ std::unique_lock lock{pipeline_cache};
+ const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
+ if (is_cache_miss) {
+ gpu.ShaderNotify().MarkSharderBuilding();
+ LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+ const auto [program, bindings] = DecompileShaders(key.fixed_state);
+ async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
+ update_descriptor_queue, renderpass_cache, bindings,
+ program, key);
+ }
+ last_graphics_pipeline = pair->second.get();
+ return last_graphics_pipeline;
+ }
+
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
auto& entry = pair->second;
if (is_cache_miss) {
+ gpu.ShaderNotify().MarkSharderBuilding();
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
- const auto [program, bindings] = DecompileShaders(key);
+ const auto [program, bindings] = DecompileShaders(key.fixed_state);
entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, renderpass_cache, key,
bindings, program);
+ gpu.ShaderNotify().MarkShaderComplete();
}
- return *(last_graphics_pipeline = entry.get());
+ last_graphics_pipeline = entry.get();
+ return last_graphics_pipeline;
}
VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
@@ -234,22 +247,21 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
}
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
- auto& memory_manager = system.GPU().MemoryManager();
- const auto program_addr = key.shader;
+ const GPUVAddr gpu_addr = key.shader;
- const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
if (!shader) {
// No shader found - create a new one
- const auto host_ptr = memory_manager.GetPointer(program_addr);
+ const auto host_ptr = gpu_memory.GetPointer(gpu_addr);
- ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
+ ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
- auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
- std::move(code), KERNEL_MAIN_OFFSET);
+ auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr,
+ *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET);
shader = shader_info.get();
if (cpu_addr) {
@@ -259,10 +271,15 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
}
}
- Specialization specialization;
- specialization.workgroup_size = key.workgroup_size;
- specialization.shared_memory_size = key.shared_memory_size;
-
+ const Specialization specialization{
+ .base_binding = 0,
+ .workgroup_size = key.workgroup_size,
+ .shared_memory_size = key.shared_memory_size,
+ .point_size = std::nullopt,
+ .enabled_attributes = {},
+ .attribute_types = {},
+ .ndc_minus_one_to_one = false,
+ };
const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
shader->GetRegistry(), specialization),
shader->GetEntries()};
@@ -271,6 +288,12 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
return *entry;
}
+void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
+ gpu.ShaderNotify().MarkShaderComplete();
+ std::unique_lock lock{pipeline_cache};
+ graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
+}
+
void VKPipelineCache::OnShaderRemoval(Shader* shader) {
bool finished = false;
const auto Finish = [&] {
@@ -306,11 +329,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) {
}
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
-VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
- const auto& fixed_state = key.fixed_state;
- auto& memory_manager = system.GPU().MemoryManager();
- const auto& gpu = system.GPU().Maxwell3D();
-
+VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
Specialization specialization;
if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points ||
device.IsExtExtendedDynamicStateSupported()) {
@@ -333,12 +352,12 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
// Skip stages that are not enabled
- if (!gpu.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
continue;
}
- const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
- const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+ const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
@@ -370,13 +389,14 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
for (u32 i = 0; i < count; ++i) {
const u32 num_samplers = container[i].size;
- VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
- entry.dstBinding = binding;
- entry.dstArrayElement = 0;
- entry.descriptorCount = num_samplers;
- entry.descriptorType = descriptor_type;
- entry.offset = offset;
- entry.stride = entry_size;
+ template_entries.push_back({
+ .dstBinding = binding,
+ .dstArrayElement = 0,
+ .descriptorCount = num_samplers,
+ .descriptorType = descriptor_type,
+ .offset = offset,
+ .stride = entry_size,
+ });
++binding;
offset += num_samplers * entry_size;
@@ -389,22 +409,24 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
// Nvidia has a bug where updating multiple texels at once causes the driver to crash.
// Note: Fixed in driver Windows 443.24, Linux 440.66.15
for (u32 i = 0; i < count; ++i) {
- VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
- entry.dstBinding = binding + i;
- entry.dstArrayElement = 0;
- entry.descriptorCount = 1;
- entry.descriptorType = descriptor_type;
- entry.offset = static_cast<std::size_t>(offset + i * entry_size);
- entry.stride = entry_size;
+ template_entries.push_back({
+ .dstBinding = binding + i,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = descriptor_type,
+ .offset = static_cast<std::size_t>(offset + i * entry_size),
+ .stride = entry_size,
+ });
}
} else if (count > 0) {
- VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
- entry.dstBinding = binding;
- entry.dstArrayElement = 0;
- entry.descriptorCount = count;
- entry.descriptorType = descriptor_type;
- entry.offset = offset;
- entry.stride = entry_size;
+ template_entries.push_back({
+ .dstBinding = binding,
+ .dstArrayElement = 0,
+ .descriptorCount = count,
+ .descriptorType = descriptor_type,
+ .offset = offset,
+ .stride = entry_size,
+ });
}
offset += count * entry_size;
binding += count;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0a3fe65fb..1a31fd9f6 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -22,6 +22,7 @@
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/async_shaders.h"
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
@@ -43,28 +44,6 @@ class VKUpdateDescriptorQueue;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-struct GraphicsPipelineCacheKey {
- RenderPassParams renderpass_params;
- u32 padding;
- std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
- FixedPipelineState fixed_state;
-
- std::size_t Hash() const noexcept;
-
- bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
-
- bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
- return !operator==(rhs);
- }
-
- std::size_t Size() const noexcept {
- return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
- }
-};
-static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
-
struct ComputePipelineCacheKey {
GPUVAddr shader;
u32 shared_memory_size;
@@ -106,7 +85,8 @@ namespace Vulkan {
class Shader {
public:
- explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+ explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine,
+ Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, VAddr cpu_addr,
VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
~Shader();
@@ -118,22 +98,19 @@ public:
return shader_ir;
}
- const VideoCommon::Shader::Registry& GetRegistry() const {
- return registry;
- }
-
const VideoCommon::Shader::ShaderIR& GetIR() const {
return shader_ir;
}
+ const VideoCommon::Shader::Registry& GetRegistry() const {
+ return registry;
+ }
+
const ShaderEntries& GetEntries() const {
return entries;
}
private:
- static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
- Tegra::Engines::ShaderType stage);
-
GPUVAddr gpu_addr{};
VideoCommon::Shader::ProgramCode program_code;
VideoCommon::Shader::Registry registry;
@@ -143,27 +120,36 @@ private:
class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
public:
- explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
+ explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
+ Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::Engines::KeplerCompute& kepler_compute,
+ Tegra::MemoryManager& gpu_memory, const VKDevice& device,
+ VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache);
~VKPipelineCache() override;
std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
- VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
+ VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
+ VideoCommon::Shader::AsyncShaders& async_shaders);
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
+ void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
+
protected:
void OnShaderRemoval(Shader* shader) final;
private:
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
- const GraphicsPipelineCacheKey& key);
+ const FixedPipelineState& fixed_state);
+
+ Tegra::GPU& gpu;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
- Core::System& system;
const VKDevice& device;
VKScheduler& scheduler;
VKDescriptorPool& descriptor_pool;
@@ -178,6 +164,7 @@ private:
GraphicsPipelineCacheKey last_graphics_key;
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
+ std::mutex pipeline_cache;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
graphics_cache;
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index bc91c48cc..5a97c959d 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -47,14 +47,14 @@ std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) {
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
usage.resize(end);
- VkQueryPoolCreateInfo query_pool_ci;
- query_pool_ci.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
- query_pool_ci.pNext = nullptr;
- query_pool_ci.flags = 0;
- query_pool_ci.queryType = GetTarget(type);
- query_pool_ci.queryCount = static_cast<u32>(end - begin);
- query_pool_ci.pipelineStatistics = 0;
- pools.push_back(device->GetLogical().CreateQueryPool(query_pool_ci));
+ pools.push_back(device->GetLogical().CreateQueryPool({
+ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .queryType = GetTarget(type),
+ .queryCount = static_cast<u32>(end - begin),
+ .pipelineStatistics = 0,
+ }));
}
void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
@@ -68,10 +68,11 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
}
-VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
const VKDevice& device, VKScheduler& scheduler)
: VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
- QueryPool>{system, rasterizer},
+ QueryPool>{rasterizer, maxwell3d, gpu_memory},
device{device}, scheduler{scheduler} {
for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 40119e6d3..9be996e55 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -56,7 +56,8 @@ class VKQueryCache final
: public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
QueryPool> {
public:
- explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+ explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
const VKDevice& device, VKScheduler& scheduler);
~VKQueryCache();
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7625871c2..bafebe294 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -14,6 +14,7 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
+#include "common/scope_exit.h"
#include "core/core.h"
#include "core/settings.h"
#include "video_core/engines/kepler_compute.h"
@@ -64,20 +65,22 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si
const auto& src = regs.viewport_transform[index];
const float width = src.scale_x * 2.0f;
const float height = src.scale_y * 2.0f;
+ const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
- VkViewport viewport;
- viewport.x = src.translate_x - src.scale_x;
- viewport.y = src.translate_y - src.scale_y;
- viewport.width = width != 0.0f ? width : 1.0f;
- viewport.height = height != 0.0f ? height : 1.0f;
+ VkViewport viewport{
+ .x = src.translate_x - src.scale_x,
+ .y = src.translate_y - src.scale_y,
+ .width = width != 0.0f ? width : 1.0f,
+ .height = height != 0.0f ? height : 1.0f,
+ .minDepth = src.translate_z - src.scale_z * reduce_z,
+ .maxDepth = src.translate_z + src.scale_z,
+ };
- const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
- viewport.minDepth = src.translate_z - src.scale_z * reduce_z;
- viewport.maxDepth = src.translate_z + src.scale_z;
if (!device.IsExtDepthRangeUnrestrictedSupported()) {
viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
}
+
return viewport;
}
@@ -378,28 +381,34 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
}
}
-RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
- VKScreenInfo& screen_info, const VKDevice& device,
- VKResourceManager& resource_manager,
- VKMemoryManager& memory_manager, StateTracker& state_tracker,
- VKScheduler& scheduler)
- : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
- screen_info{screen_info}, device{device}, resource_manager{resource_manager},
- memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler},
+RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_,
+ Tegra::MemoryManager& gpu_memory_,
+ Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info_,
+ const VKDevice& device_, VKResourceManager& resource_manager_,
+ VKMemoryManager& memory_manager_, StateTracker& state_tracker_,
+ VKScheduler& scheduler_)
+ : RasterizerAccelerated(cpu_memory), gpu(gpu_), gpu_memory(gpu_memory_),
+ maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_),
+ device(device_), resource_manager(resource_manager_), memory_manager(memory_manager_),
+ state_tracker(state_tracker_), scheduler(scheduler_),
staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
update_descriptor_queue(device, scheduler), renderpass_cache(device),
quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
- texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
- staging_pool),
- pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
- renderpass_cache),
- buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
- sampler_cache(device),
- fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
- query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} {
+ texture_cache(*this, maxwell3d, gpu_memory, device, resource_manager, memory_manager,
+ scheduler, staging_pool),
+ pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
+ descriptor_pool, update_descriptor_queue, renderpass_cache),
+ buffer_cache(*this, gpu_memory, cpu_memory, device, memory_manager, scheduler, staging_pool),
+ sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler),
+ fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device,
+ scheduler),
+ wfi_event(device.GetLogical().CreateNewEvent()), async_shaders(emu_window) {
scheduler.SetQueryCache(query_cache);
+ if (device.UseAsynchronousShaders()) {
+ async_shaders.AllocateWorkers();
+ }
}
RasterizerVulkan::~RasterizerVulkan() = default;
@@ -407,13 +416,13 @@ RasterizerVulkan::~RasterizerVulkan() = default;
void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(Vulkan_Drawing);
+ SCOPE_EXIT({ gpu.TickWork(); });
FlushWork();
query_cache.UpdateCounters();
- const auto& gpu = system.GPU().Maxwell3D();
GraphicsPipelineCacheKey key;
- key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported());
+ key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
@@ -437,10 +446,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
key.renderpass_params = GetRenderPassParams(texceptions);
key.padding = 0;
- auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
- scheduler.BindGraphicsPipeline(pipeline.GetHandle());
+ auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
+ if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
+ // Async graphics pipeline was not ready.
+ return;
+ }
+
+ scheduler.BindGraphicsPipeline(pipeline->GetHandle());
- const auto renderpass = pipeline.GetRenderPass();
+ const auto renderpass = pipeline->GetRenderPass();
const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
@@ -450,8 +464,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
BeginTransformFeedback();
- const auto pipeline_layout = pipeline.GetLayout();
- const auto descriptor_set = pipeline.CommitDescriptorSet();
+ const auto pipeline_layout = pipeline->GetLayout();
+ const auto descriptor_set = pipeline->CommitDescriptorSet();
scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
if (descriptor_set) {
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
@@ -461,15 +475,12 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
});
EndTransformFeedback();
-
- system.GPU().TickWork();
}
void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing);
- const auto& gpu = system.GPU().Maxwell3D();
- if (!system.GPU().Maxwell3D().ShouldExecute()) {
+ if (!maxwell3d.ShouldExecute()) {
return;
}
@@ -478,7 +489,7 @@ void RasterizerVulkan::Clear() {
query_cache.UpdateCounters();
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A;
const bool use_depth = regs.clear_buffers.Z;
@@ -508,10 +519,11 @@ void RasterizerVulkan::Clear() {
const u32 color_attachment = regs.clear_buffers.RT;
scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
- VkClearAttachment attachment;
- attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
- attachment.colorAttachment = color_attachment;
- attachment.clearValue = clear_value;
+ const VkClearAttachment attachment{
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .colorAttachment = color_attachment,
+ .clearValue = clear_value,
+ };
cmdbuf.ClearAttachments(attachment, clear_rect);
});
}
@@ -529,10 +541,6 @@ void RasterizerVulkan::Clear() {
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
- VkClearValue clear_value;
- clear_value.depthStencil.depth = clear_depth;
- clear_value.depthStencil.stencil = clear_stencil;
-
VkClearAttachment attachment;
attachment.aspectMask = aspect_flags;
attachment.colorAttachment = 0;
@@ -550,14 +558,17 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
query_cache.UpdateCounters();
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
- ComputePipelineCacheKey key;
- key.shader = code_addr;
- key.shared_memory_size = launch_desc.shared_alloc;
- key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y,
- launch_desc.block_dim_z};
-
- auto& pipeline = pipeline_cache.GetComputePipeline(key);
+ const auto& launch_desc = kepler_compute.launch_description;
+ auto& pipeline = pipeline_cache.GetComputePipeline({
+ .shader = code_addr,
+ .shared_memory_size = launch_desc.shared_alloc,
+ .workgroup_size =
+ {
+ launch_desc.block_dim_x,
+ launch_desc.block_dim_y,
+ launch_desc.block_dim_z,
+ },
+ });
// Compute dispatches can't be executed inside a renderpass
scheduler.RequestOutsideRenderPassOperationContext();
@@ -643,16 +654,14 @@ void RasterizerVulkan::SyncGuestHost() {
}
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
- gpu.MemoryManager().Write<u32>(addr, value);
+ gpu_memory.Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;
@@ -661,7 +670,6 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
}
void RasterizerVulkan::ReleaseFences() {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
return;
}
@@ -739,10 +747,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true;
}
-void RasterizerVulkan::SetupDirtyFlags() {
- state_tracker.Initialize();
-}
-
void RasterizerVulkan::FlushWork() {
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
@@ -766,10 +770,9 @@ void RasterizerVulkan::FlushWork() {
RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
MICROPROFILE_SCOPE(Vulkan_RenderTargets);
- auto& maxwell3d = system.GPU().Maxwell3D();
- auto& dirty = maxwell3d.dirty.flags;
- auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d.regs;
+ auto& dirty = maxwell3d.dirty.flags;
const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
dirty[VideoCommon::Dirty::RenderTargets] = false;
@@ -813,8 +816,13 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen
std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
VkRenderPass renderpass) {
- FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
- std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
+ FramebufferCacheKey key{
+ .renderpass = renderpass,
+ .width = std::numeric_limits<u32>::max(),
+ .height = std::numeric_limits<u32>::max(),
+ .layers = std::numeric_limits<u32>::max(),
+ .views = {},
+ };
const auto try_push = [&key](const View& view) {
if (!view) {
@@ -827,7 +835,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
return true;
};
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
for (std::size_t index = 0; index < num_attachments; ++index) {
if (try_push(color_attachments[index])) {
@@ -841,17 +849,17 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
auto& framebuffer = fbentry->second;
if (is_cache_miss) {
- VkFramebufferCreateInfo framebuffer_ci;
- framebuffer_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
- framebuffer_ci.pNext = nullptr;
- framebuffer_ci.flags = 0;
- framebuffer_ci.renderPass = key.renderpass;
- framebuffer_ci.attachmentCount = static_cast<u32>(key.views.size());
- framebuffer_ci.pAttachments = key.views.data();
- framebuffer_ci.width = key.width;
- framebuffer_ci.height = key.height;
- framebuffer_ci.layers = key.layers;
- framebuffer = device.GetLogical().CreateFramebuffer(framebuffer_ci);
+ framebuffer = device.GetLogical().CreateFramebuffer({
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .renderPass = key.renderpass,
+ .attachmentCount = static_cast<u32>(key.views.size()),
+ .pAttachments = key.views.data(),
+ .width = key.width,
+ .height = key.height,
+ .layers = key.layers,
+ });
}
return {*framebuffer, VkExtent2D{key.width, key.height}};
@@ -863,13 +871,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
bool is_instanced) {
MICROPROFILE_SCOPE(Vulkan_Geometry);
- const auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
SetupVertexArrays(buffer_bindings);
const u32 base_instance = regs.vb_base_instance;
- const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1;
+ const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1;
const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
@@ -930,7 +937,7 @@ void RasterizerVulkan::SetupImageTransitions(
}
void RasterizerVulkan::UpdateDynamicStates() {
- auto& regs = system.GPU().Maxwell3D().regs;
+ auto& regs = maxwell3d.regs;
UpdateViewportsState(regs);
UpdateScissorsState(regs);
UpdateDepthBias(regs);
@@ -951,7 +958,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
}
void RasterizerVulkan::BeginTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
@@ -983,7 +990,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
}
void RasterizerVulkan::EndTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
@@ -996,7 +1003,7 @@ void RasterizerVulkan::EndTransformFeedback() {
}
void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
const auto& vertex_array = regs.vertex_array[index];
@@ -1022,7 +1029,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
if (params.num_vertices == 0) {
return;
}
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
switch (regs.draw.topology) {
case Maxwell::PrimitiveTopology::Quads: {
if (!params.is_indexed) {
@@ -1070,8 +1077,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
- const auto& gpu = system.GPU().Maxwell3D();
- const auto& shader_stage = gpu.state.shader_stages[stage];
+ const auto& shader_stage = maxwell3d.state.shader_stages[stage];
for (const auto& entry : entries.const_buffers) {
SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
}
@@ -1079,8 +1085,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s
void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
- auto& gpu{system.GPU()};
- const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]};
+ const auto& cbufs{maxwell3d.state.shader_stages[stage]};
for (const auto& entry : entries.global_buffers) {
const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
@@ -1090,19 +1095,17 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.uniform_texels) {
- const auto image = GetTextureInfo(gpu, entry, stage).tic;
+ const auto image = GetTextureInfo(maxwell3d, entry, stage).tic;
SetupUniformTexels(image, entry);
}
}
void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.samplers) {
for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(gpu, entry, stage, i);
+ const auto texture = GetTextureInfo(maxwell3d, entry, stage, i);
SetupTexture(texture, entry);
}
}
@@ -1110,25 +1113,23 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.storage_texels) {
- const auto image = GetTextureInfo(gpu, entry, stage).tic;
+ const auto image = GetTextureInfo(maxwell3d, entry, stage).tic;
SetupStorageTexel(image, entry);
}
}
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Images);
- const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.images) {
- const auto tic = GetTextureInfo(gpu, entry, stage).tic;
+ const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic;
SetupImage(tic, entry);
}
}
void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& launch_desc = kepler_compute.launch_description;
for (const auto& entry : entries.const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
@@ -1142,7 +1143,7 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
- const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config};
+ const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
for (const auto& entry : entries.global_buffers) {
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
SetupGlobalBuffer(entry, addr);
@@ -1151,19 +1152,17 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.uniform_texels) {
- const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+ const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
SetupUniformTexels(image, entry);
}
}
void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.samplers) {
for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i);
+ const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i);
SetupTexture(texture, entry);
}
}
@@ -1171,18 +1170,16 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.storage_texels) {
- const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+ const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
SetupStorageTexel(image, entry);
}
}
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Images);
- const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.images) {
- const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+ const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
SetupImage(tic, entry);
}
}
@@ -1206,9 +1203,8 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
}
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
- auto& memory_manager{system.GPU().MemoryManager()};
- const auto actual_addr = memory_manager.Read<u64>(address);
- const auto size = memory_manager.Read<u32>(address + 8);
+ const u64 actual_addr = gpu_memory.Read<u64>(address);
+ const u32 size = gpu_memory.Read<u32>(address + 8);
if (size == 0) {
// Sometimes global memory pointers don't have a proper size. Upload a dummy entry
@@ -1426,10 +1422,10 @@ void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) {
}
void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) {
- if (!state_tracker.TouchPrimitiveTopology()) {
+ const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value();
+ if (!state_tracker.ChangePrimitiveTopology(primitive_topology)) {
return;
}
- const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value();
scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology));
});
@@ -1491,7 +1487,7 @@ std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
}
std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@@ -1506,9 +1502,8 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
}
std::size_t RasterizerVulkan::CalculateIndexBufferSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
- return static_cast<std::size_t>(regs.index_array.count) *
- static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
+ return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
+ static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
}
std::size_t RasterizerVulkan::CalculateConstBufferSize(
@@ -1523,7 +1518,7 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize(
}
RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
RenderPassParams params;
@@ -1553,17 +1548,17 @@ VkBuffer RasterizerVulkan::DefaultBuffer() {
return *default_buffer;
}
- VkBufferCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.size = DEFAULT_BUFFER_SIZE;
- ci.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
- default_buffer = device.GetLogical().CreateBuffer(ci);
+ default_buffer = device.GetLogical().CreateBuffer({
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = DEFAULT_BUFFER_SIZE,
+ .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
default_buffer_commit = memory_manager.Commit(default_buffer, false);
scheduler.RequestOutsideRenderPassOperationContext();
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 923178b0b..16251d0f6 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -32,6 +32,7 @@
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/async_shaders.h"
namespace Core {
class System;
@@ -105,7 +106,8 @@ struct ImageView {
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
- explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
+ explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
VKScreenInfo& screen_info, const VKDevice& device,
VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
StateTracker& state_tracker, VKScheduler& scheduler);
@@ -134,7 +136,14 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
- void SetupDirtyFlags() override;
+
+ VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
+ return async_shaders;
+ }
+
+ const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
+ return async_shaders;
+ }
/// Maximum supported size that a constbuffer can have in bytes.
static constexpr std::size_t MaxConstbufferSize = 0x10000;
@@ -270,8 +279,11 @@ private:
VkBuffer DefaultBuffer();
- Core::System& system;
- Core::Frontend::EmuWindow& render_window;
+ Tegra::GPU& gpu;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+
VKScreenInfo& screen_info;
const VKDevice& device;
VKResourceManager& resource_manager;
@@ -291,12 +303,13 @@ private:
VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache;
VKSamplerCache sampler_cache;
- VKFenceManager fence_manager;
VKQueryCache query_cache;
+ VKFenceManager fence_manager;
vk::Buffer default_buffer;
VKMemoryCommit default_buffer_commit;
vk::Event wfi_event;
+ VideoCommon::Shader::AsyncShaders async_shaders;
std::array<View, Maxwell::NumRenderTargets> color_attachments;
View zeta_attachment;
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
index 3f71d005e..80284cf92 100644
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
@@ -39,10 +39,14 @@ VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
using namespace VideoCore::Surface;
+ const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
+
std::vector<VkAttachmentDescription> descriptors;
+ descriptors.reserve(num_attachments);
+
std::vector<VkAttachmentReference> color_references;
+ color_references.reserve(num_attachments);
- const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
for (std::size_t rt = 0; rt < num_attachments; ++rt) {
const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]);
const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format);
@@ -54,20 +58,22 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0
? VK_IMAGE_LAYOUT_GENERAL
: VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- VkAttachmentDescription& descriptor = descriptors.emplace_back();
- descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT;
- descriptor.format = format.format;
- descriptor.samples = VK_SAMPLE_COUNT_1_BIT;
- descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
- descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
- descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
- descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
- descriptor.initialLayout = color_layout;
- descriptor.finalLayout = color_layout;
-
- VkAttachmentReference& reference = color_references.emplace_back();
- reference.attachment = static_cast<u32>(rt);
- reference.layout = color_layout;
+ descriptors.push_back({
+ .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
+ .format = format.format,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .initialLayout = color_layout,
+ .finalLayout = color_layout,
+ });
+
+ color_references.push_back({
+ .attachment = static_cast<u32>(rt),
+ .layout = color_layout,
+ });
}
VkAttachmentReference zeta_attachment_ref;
@@ -82,32 +88,36 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
const VkImageLayout zeta_layout = params.zeta_texception != 0
? VK_IMAGE_LAYOUT_GENERAL
: VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
- VkAttachmentDescription& descriptor = descriptors.emplace_back();
- descriptor.flags = 0;
- descriptor.format = format.format;
- descriptor.samples = VK_SAMPLE_COUNT_1_BIT;
- descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
- descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
- descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
- descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
- descriptor.initialLayout = zeta_layout;
- descriptor.finalLayout = zeta_layout;
-
- zeta_attachment_ref.attachment = static_cast<u32>(num_attachments);
- zeta_attachment_ref.layout = zeta_layout;
+ descriptors.push_back({
+ .flags = 0,
+ .format = format.format,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = zeta_layout,
+ .finalLayout = zeta_layout,
+ });
+
+ zeta_attachment_ref = {
+ .attachment = static_cast<u32>(num_attachments),
+ .layout = zeta_layout,
+ };
}
- VkSubpassDescription subpass_description;
- subpass_description.flags = 0;
- subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
- subpass_description.inputAttachmentCount = 0;
- subpass_description.pInputAttachments = nullptr;
- subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size());
- subpass_description.pColorAttachments = color_references.data();
- subpass_description.pResolveAttachments = nullptr;
- subpass_description.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr;
- subpass_description.preserveAttachmentCount = 0;
- subpass_description.pPreserveAttachments = nullptr;
+ const VkSubpassDescription subpass_description{
+ .flags = 0,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .pInputAttachments = nullptr,
+ .colorAttachmentCount = static_cast<u32>(color_references.size()),
+ .pColorAttachments = color_references.data(),
+ .pResolveAttachments = nullptr,
+ .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr,
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = nullptr,
+ };
VkAccessFlags access = 0;
VkPipelineStageFlags stage = 0;
@@ -122,26 +132,27 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
}
- VkSubpassDependency subpass_dependency;
- subpass_dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
- subpass_dependency.dstSubpass = 0;
- subpass_dependency.srcStageMask = stage;
- subpass_dependency.dstStageMask = stage;
- subpass_dependency.srcAccessMask = 0;
- subpass_dependency.dstAccessMask = access;
- subpass_dependency.dependencyFlags = 0;
-
- VkRenderPassCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.attachmentCount = static_cast<u32>(descriptors.size());
- ci.pAttachments = descriptors.data();
- ci.subpassCount = 1;
- ci.pSubpasses = &subpass_description;
- ci.dependencyCount = 1;
- ci.pDependencies = &subpass_dependency;
- return device.GetLogical().CreateRenderPass(ci);
+ const VkSubpassDependency subpass_dependency{
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = stage,
+ .dstStageMask = stage,
+ .srcAccessMask = 0,
+ .dstAccessMask = access,
+ .dependencyFlags = 0,
+ };
+
+ return device.GetLogical().CreateRenderPass({
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .attachmentCount = static_cast<u32>(descriptors.size()),
+ .pAttachments = descriptors.data(),
+ .subpassCount = 1,
+ .pSubpasses = &subpass_description,
+ .dependencyCount = 1,
+ .pDependencies = &subpass_dependency,
+ });
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
index dc06f545a..f19330a36 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -18,33 +18,32 @@ namespace {
constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
constexpr std::size_t FENCES_GROW_STEP = 0x40;
-VkFenceCreateInfo BuildFenceCreateInfo() {
- VkFenceCreateInfo fence_ci;
- fence_ci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
- fence_ci.pNext = nullptr;
- fence_ci.flags = 0;
- return fence_ci;
+constexpr VkFenceCreateInfo BuildFenceCreateInfo() {
+ return {
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ };
}
} // Anonymous namespace
class CommandBufferPool final : public VKFencedPool {
public:
- CommandBufferPool(const VKDevice& device)
+ explicit CommandBufferPool(const VKDevice& device)
: VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
void Allocate(std::size_t begin, std::size_t end) override {
// Command buffers are going to be commited, recorded, executed every single usage cycle.
// They are also going to be reseted when commited.
- VkCommandPoolCreateInfo command_pool_ci;
- command_pool_ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
- command_pool_ci.pNext = nullptr;
- command_pool_ci.flags =
- VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
- command_pool_ci.queueFamilyIndex = device.GetGraphicsFamily();
-
Pool& pool = pools.emplace_back();
- pool.handle = device.GetLogical().CreateCommandPool(command_pool_ci);
+ pool.handle = device.GetLogical().CreateCommandPool({
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
+ VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = device.GetGraphicsFamily(),
+ });
pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
}
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index 616eacc36..b068888f9 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -44,32 +44,36 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c
const bool arbitrary_borders = device.IsExtCustomBorderColorSupported();
const std::array color = tsc.GetBorderColor();
- VkSamplerCustomBorderColorCreateInfoEXT border;
- border.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT;
- border.pNext = nullptr;
- border.format = VK_FORMAT_UNDEFINED;
+ VkSamplerCustomBorderColorCreateInfoEXT border{
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .customBorderColor = {},
+ .format = VK_FORMAT_UNDEFINED,
+ };
std::memcpy(&border.customBorderColor, color.data(), sizeof(color));
- VkSamplerCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
- ci.pNext = arbitrary_borders ? &border : nullptr;
- ci.flags = 0;
- ci.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter);
- ci.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter);
- ci.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter);
- ci.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter);
- ci.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter);
- ci.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter);
- ci.mipLodBias = tsc.GetLodBias();
- ci.anisotropyEnable = tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE;
- ci.maxAnisotropy = tsc.GetMaxAnisotropy();
- ci.compareEnable = tsc.depth_compare_enabled;
- ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
- ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod();
- ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod();
- ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color);
- ci.unnormalizedCoordinates = VK_FALSE;
- return device.GetLogical().CreateSampler(ci);
+ return device.GetLogical().CreateSampler({
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .pNext = arbitrary_borders ? &border : nullptr,
+ .flags = 0,
+ .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
+ .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
+ .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
+ .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
+ .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
+ .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
+ .mipLodBias = tsc.GetLodBias(),
+ .anisotropyEnable =
+ static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE),
+ .maxAnisotropy = tsc.GetMaxAnisotropy(),
+ .compareEnable = tsc.depth_compare_enabled,
+ .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
+ .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(),
+ .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(),
+ .borderColor =
+ arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
+ .unnormalizedCoordinates = VK_FALSE,
+ });
}
VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 56524e6f3..dbbd0961a 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -100,16 +100,19 @@ void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer frame
state.framebuffer = framebuffer;
state.render_area = render_area;
- VkRenderPassBeginInfo renderpass_bi;
- renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
- renderpass_bi.pNext = nullptr;
- renderpass_bi.renderPass = renderpass;
- renderpass_bi.framebuffer = framebuffer;
- renderpass_bi.renderArea.offset.x = 0;
- renderpass_bi.renderArea.offset.y = 0;
- renderpass_bi.renderArea.extent = render_area;
- renderpass_bi.clearValueCount = 0;
- renderpass_bi.pClearValues = nullptr;
+ const VkRenderPassBeginInfo renderpass_bi{
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .pNext = nullptr,
+ .renderPass = renderpass,
+ .framebuffer = framebuffer,
+ .renderArea =
+ {
+ .offset = {.x = 0, .y = 0},
+ .extent = render_area,
+ },
+ .clearValueCount = 0,
+ .pClearValues = nullptr,
+ };
Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) {
if (end_renderpass) {
@@ -157,16 +160,17 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
current_cmdbuf.End();
- VkSubmitInfo submit_info;
- submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
- submit_info.pNext = nullptr;
- submit_info.waitSemaphoreCount = 0;
- submit_info.pWaitSemaphores = nullptr;
- submit_info.pWaitDstStageMask = nullptr;
- submit_info.commandBufferCount = 1;
- submit_info.pCommandBuffers = current_cmdbuf.address();
- submit_info.signalSemaphoreCount = semaphore ? 1 : 0;
- submit_info.pSignalSemaphores = &semaphore;
+ const VkSubmitInfo submit_info{
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = nullptr,
+ .waitSemaphoreCount = 0,
+ .pWaitSemaphores = nullptr,
+ .pWaitDstStageMask = nullptr,
+ .commandBufferCount = 1,
+ .pCommandBuffers = current_cmdbuf.address(),
+ .signalSemaphoreCount = semaphore ? 1U : 0U,
+ .pSignalSemaphores = &semaphore,
+ };
switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) {
case VK_SUCCESS:
break;
@@ -181,19 +185,18 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
void VKScheduler::AllocateNewContext() {
++ticks;
- VkCommandBufferBeginInfo cmdbuf_bi;
- cmdbuf_bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
- cmdbuf_bi.pNext = nullptr;
- cmdbuf_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
- cmdbuf_bi.pInheritanceInfo = nullptr;
-
std::unique_lock lock{mutex};
current_fence = next_fence;
next_fence = &resource_manager.CommitFence();
current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence),
device.GetDispatchLoader());
- current_cmdbuf.Begin(cmdbuf_bi);
+ current_cmdbuf.Begin({
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .pNext = nullptr,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ .pInheritanceInfo = nullptr,
+ });
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 97429cc59..cd7d7a4e4 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -685,13 +685,19 @@ private:
}
t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint);
- const u32 smem_size = specialization.shared_memory_size;
+ u32 smem_size = specialization.shared_memory_size * 4;
if (smem_size == 0) {
// Avoid declaring an empty array.
return;
}
- const auto element_count = static_cast<u32>(Common::AlignUp(smem_size, 4) / 4);
- const Id type_array = TypeArray(t_uint, Constant(t_uint, element_count));
+ const u32 limit = device.GetMaxComputeSharedMemorySize();
+ if (smem_size > limit) {
+ LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}",
+ smem_size, limit);
+ smem_size = limit;
+ }
+
+ const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4));
const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array);
Name(type_pointer, "SharedMemory");
@@ -700,9 +706,9 @@ private:
}
void DeclareInternalFlags() {
- constexpr std::array names = {"zero", "sign", "carry", "overflow"};
+ static constexpr std::array names{"zero", "sign", "carry", "overflow"};
+
for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
- const auto flag_code = static_cast<InternalFlag>(flag);
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
}
@@ -2798,7 +2804,6 @@ private:
std::map<GlobalMemoryBase, Id> global_buffers;
std::map<u32, TexelBuffer> uniform_texels;
std::map<u32, SampledImage> sampled_images;
- std::map<u32, TexelBuffer> storage_texels;
std::map<u32, StorageImage> images;
std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp
index 112df9c71..c1a218d76 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -19,13 +19,13 @@ vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, cons
const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
std::memcpy(data.get(), code_data, code_size);
- VkShaderModuleCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.codeSize = code_size;
- ci.pCode = data.get();
- return device.GetLogical().CreateShaderModule(ci);
+ return device.GetLogical().CreateShaderModule({
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .codeSize = code_size,
+ .pCode = data.get(),
+ });
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 45c180221..5eca0ab91 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -71,20 +71,19 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_
VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
const u32 log2 = Common::Log2Ceil64(size);
- VkBufferCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.size = 1ULL << log2;
- ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
- VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
-
auto buffer = std::make_unique<VKBuffer>();
- buffer->handle = device.GetLogical().CreateBuffer(ci);
+ buffer->handle = device.GetLogical().CreateBuffer({
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = 1ULL << log2,
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
buffer->commit = memory_manager.Commit(buffer->handle, host_visible);
auto& entries = GetCache(host_visible)[log2].entries;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index e5a583dd5..5d2c4a796 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -42,7 +42,6 @@ Flags MakeInvalidationFlags() {
flags[DepthWriteEnable] = true;
flags[DepthCompareOp] = true;
flags[FrontFace] = true;
- flags[PrimitiveTopology] = true;
flags[StencilOp] = true;
flags[StencilTestEnable] = true;
return flags;
@@ -112,10 +111,6 @@ void SetupDirtyFrontFace(Tables& tables) {
table[OFF(screen_y_control)] = FrontFace;
}
-void SetupDirtyPrimitiveTopology(Tables& tables) {
- tables[0][OFF(draw.topology)] = PrimitiveTopology;
-}
-
void SetupDirtyStencilOp(Tables& tables) {
auto& table = tables[0];
table[OFF(stencil_front_op_fail)] = StencilOp;
@@ -137,12 +132,9 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
} // Anonymous namespace
-StateTracker::StateTracker(Core::System& system)
- : system{system}, invalidation_flags{MakeInvalidationFlags()} {}
-
-void StateTracker::Initialize() {
- auto& dirty = system.GPU().Maxwell3D().dirty;
- auto& tables = dirty.tables;
+StateTracker::StateTracker(Tegra::GPU& gpu)
+ : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
+ auto& tables = gpu.Maxwell3D().dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
@@ -156,12 +148,8 @@ void StateTracker::Initialize() {
SetupDirtyDepthWriteEnable(tables);
SetupDirtyDepthCompareOp(tables);
SetupDirtyFrontFace(tables);
- SetupDirtyPrimitiveTopology(tables);
SetupDirtyStencilOp(tables);
-}
-
-void StateTracker::InvalidateCommandBufferState() {
- system.GPU().Maxwell3D().dirty.flags |= invalidation_flags;
+ SetupDirtyStencilTestEnable(tables);
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 54ca0d6c6..1de789e57 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -32,7 +32,6 @@ enum : u8 {
DepthWriteEnable,
DepthCompareOp,
FrontFace,
- PrimitiveTopology,
StencilOp,
StencilTestEnable,
@@ -43,12 +42,15 @@ static_assert(Last <= std::numeric_limits<u8>::max());
} // namespace Dirty
class StateTracker {
-public:
- explicit StateTracker(Core::System& system);
+ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
- void Initialize();
+public:
+ explicit StateTracker(Tegra::GPU& gpu);
- void InvalidateCommandBufferState();
+ void InvalidateCommandBufferState() {
+ flags |= invalidation_flags;
+ current_topology = INVALID_TOPOLOGY;
+ }
bool TouchViewports() {
return Exchange(Dirty::Viewports, false);
@@ -102,10 +104,6 @@ public:
return Exchange(Dirty::FrontFace, false);
}
- bool TouchPrimitiveTopology() {
- return Exchange(Dirty::PrimitiveTopology, false);
- }
-
bool TouchStencilOp() {
return Exchange(Dirty::StencilOp, false);
}
@@ -114,16 +112,24 @@ public:
return Exchange(Dirty::StencilTestEnable, false);
}
+ bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
+ const bool has_changed = current_topology != new_topology;
+ current_topology = new_topology;
+ return has_changed;
+ }
+
private:
+ static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
+
bool Exchange(std::size_t id, bool new_value) const noexcept {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
const bool is_dirty = flags[id];
flags[id] = new_value;
return is_dirty;
}
- Core::System& system;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
+ Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 2d28a6c47..3c9171a5e 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -57,9 +57,9 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
} // Anonymous namespace
-VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_,
VkBufferUsageFlags usage)
- : device{device}, scheduler{scheduler} {
+ : device{device_}, scheduler{scheduler_} {
CreateBuffers(usage);
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
@@ -122,30 +122,27 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
// Substract from the preferred heap size some bytes to avoid getting out of memory.
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
const VkDeviceSize allocable_size = heap_size - 9 * 1024 * 1024;
-
- VkBufferCreateInfo buffer_ci;
- buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
- buffer_ci.pNext = nullptr;
- buffer_ci.flags = 0;
- buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size);
- buffer_ci.usage = usage;
- buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- buffer_ci.queueFamilyIndexCount = 0;
- buffer_ci.pQueueFamilyIndices = nullptr;
-
- buffer = device.GetLogical().CreateBuffer(buffer_ci);
+ buffer = device.GetLogical().CreateBuffer({
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size),
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer);
const u32 required_flags = requirements.memoryTypeBits;
stream_buffer_size = static_cast<u64>(requirements.size);
- VkMemoryAllocateInfo memory_ai;
- memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
- memory_ai.pNext = nullptr;
- memory_ai.allocationSize = requirements.size;
- memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags);
-
- memory = device.GetLogical().AllocateMemory(memory_ai);
+ memory = device.GetLogical().AllocateMemory({
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .allocationSize = requirements.size,
+ .memoryTypeIndex = GetMemoryType(memory_properties, required_flags),
+ });
buffer.BindMemory(*memory, 0);
}
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index bffd8f32a..6bfd2abae 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -95,15 +95,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) {
const auto present_queue{device.GetPresentQueue()};
bool recreated = false;
- VkPresentInfoKHR present_info;
- present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
- present_info.pNext = nullptr;
- present_info.waitSemaphoreCount = render_semaphore ? 2U : 1U;
- present_info.pWaitSemaphores = semaphores.data();
- present_info.swapchainCount = 1;
- present_info.pSwapchains = swapchain.address();
- present_info.pImageIndices = &image_index;
- present_info.pResults = nullptr;
+ const VkPresentInfoKHR present_info{
+ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+ .pNext = nullptr,
+ .waitSemaphoreCount = render_semaphore ? 2U : 1U,
+ .pWaitSemaphores = semaphores.data(),
+ .swapchainCount = 1,
+ .pSwapchains = swapchain.address(),
+ .pImageIndices = &image_index,
+ .pResults = nullptr,
+ };
switch (const VkResult result = present_queue.Present(present_info)) {
case VK_SUCCESS:
@@ -147,24 +148,26 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
requested_image_count = capabilities.maxImageCount;
}
- VkSwapchainCreateInfoKHR swapchain_ci;
- swapchain_ci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
- swapchain_ci.pNext = nullptr;
- swapchain_ci.flags = 0;
- swapchain_ci.surface = surface;
- swapchain_ci.minImageCount = requested_image_count;
- swapchain_ci.imageFormat = surface_format.format;
- swapchain_ci.imageColorSpace = surface_format.colorSpace;
- swapchain_ci.imageArrayLayers = 1;
- swapchain_ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
- swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
- swapchain_ci.queueFamilyIndexCount = 0;
- swapchain_ci.pQueueFamilyIndices = nullptr;
- swapchain_ci.preTransform = capabilities.currentTransform;
- swapchain_ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
- swapchain_ci.presentMode = present_mode;
- swapchain_ci.clipped = VK_FALSE;
- swapchain_ci.oldSwapchain = nullptr;
+ VkSwapchainCreateInfoKHR swapchain_ci{
+ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .flags = 0,
+ .surface = surface,
+ .minImageCount = requested_image_count,
+ .imageFormat = surface_format.format,
+ .imageColorSpace = surface_format.colorSpace,
+ .imageExtent = {},
+ .imageArrayLayers = 1,
+ .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ .preTransform = capabilities.currentTransform,
+ .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
+ .presentMode = present_mode,
+ .clipped = VK_FALSE,
+ .oldSwapchain = nullptr,
+ };
const u32 graphics_family{device.GetGraphicsFamily()};
const u32 present_family{device.GetPresentFamily()};
@@ -173,8 +176,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
- } else {
- swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
}
// Request the size again to reduce the possibility of a TOCTOU race condition.
@@ -200,20 +201,29 @@ void VKSwapchain::CreateSemaphores() {
}
void VKSwapchain::CreateImageViews() {
- VkImageViewCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- // ci.image
- ci.viewType = VK_IMAGE_VIEW_TYPE_2D;
- ci.format = image_format;
- ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
- VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
- ci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
- ci.subresourceRange.baseMipLevel = 0;
- ci.subresourceRange.levelCount = 1;
- ci.subresourceRange.baseArrayLayer = 0;
- ci.subresourceRange.layerCount = 1;
+ VkImageViewCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .image = {},
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = image_format,
+ .components =
+ {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ },
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
image_views.resize(image_count);
for (std::size_t i = 0; i < image_count; i++) {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 430031665..06182d909 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -95,17 +95,18 @@ VkImageViewType GetImageViewType(SurfaceTarget target) {
vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
std::size_t host_memory_size) {
// TODO(Rodrigo): Move texture buffer creation to the buffer cache
- VkBufferCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.size = static_cast<VkDeviceSize>(host_memory_size);
- ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
- VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
- return device.GetLogical().CreateBuffer(ci);
+ return device.GetLogical().CreateBuffer({
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = static_cast<VkDeviceSize>(host_memory_size),
+ .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
+ VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
}
VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
@@ -113,15 +114,16 @@ VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
std::size_t host_memory_size) {
ASSERT(params.IsBuffer());
- VkBufferViewCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.buffer = buffer;
- ci.format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format;
- ci.offset = 0;
- ci.range = static_cast<VkDeviceSize>(host_memory_size);
- return ci;
+ return {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .buffer = buffer,
+ .format =
+ MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format,
+ .offset = 0,
+ .range = static_cast<VkDeviceSize>(host_memory_size),
+ };
}
VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
@@ -130,23 +132,24 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
const auto [format, attachable, storage] =
MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format);
- VkImageCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.imageType = SurfaceTargetToImage(params.target);
- ci.format = format;
- ci.mipLevels = params.num_levels;
- ci.arrayLayers = static_cast<u32>(params.GetNumLayers());
- ci.samples = VK_SAMPLE_COUNT_1_BIT;
- ci.tiling = VK_IMAGE_TILING_OPTIMAL;
- ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
- ci.queueFamilyIndexCount = 0;
- ci.pQueueFamilyIndices = nullptr;
- ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
-
- ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
- VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+ VkImageCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .imageType = SurfaceTargetToImage(params.target),
+ .format = format,
+ .extent = {},
+ .mipLevels = params.num_levels,
+ .arrayLayers = static_cast<u32>(params.GetNumLayers()),
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ };
if (attachable) {
ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT
: VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
@@ -185,13 +188,13 @@ u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::Swizzl
} // Anonymous namespace
-CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
- GPUVAddr gpu_addr, const SurfaceParams& params)
- : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system},
- device{device}, resource_manager{resource_manager},
- memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {
+CachedSurface::CachedSurface(const VKDevice& device, VKResourceManager& resource_manager,
+ VKMemoryManager& memory_manager, VKScheduler& scheduler,
+ VKStagingBufferPool& staging_pool, GPUVAddr gpu_addr,
+ const SurfaceParams& params)
+ : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, device{device},
+ resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
+ staging_pool{staging_pool} {
if (params.IsBuffer()) {
buffer = CreateBuffer(device, params, host_memory_size);
commit = memory_manager.Commit(buffer, false);
@@ -233,7 +236,7 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
UNIMPLEMENTED_IF(params.IsBuffer());
- if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
+ if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) {
LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed");
}
@@ -281,12 +284,10 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
- barrier.dstAccessMask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
- barrier.srcQueueFamilyIndex = VK_ACCESS_TRANSFER_WRITE_BIT;
- barrier.dstQueueFamilyIndex = VK_ACCESS_SHADER_READ_BIT;
- barrier.srcQueueFamilyIndex = 0;
- barrier.dstQueueFamilyIndex = 0;
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+ barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway
+ barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.buffer = dst_buffer;
barrier.offset = 0;
barrier.size = size;
@@ -323,22 +324,25 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
}
VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
- VkBufferImageCopy copy;
- copy.bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted);
- copy.bufferRowLength = 0;
- copy.bufferImageHeight = 0;
- copy.imageSubresource.aspectMask = image->GetAspectMask();
- copy.imageSubresource.mipLevel = level;
- copy.imageSubresource.baseArrayLayer = 0;
- copy.imageSubresource.layerCount = static_cast<u32>(params.GetNumLayers());
- copy.imageOffset.x = 0;
- copy.imageOffset.y = 0;
- copy.imageOffset.z = 0;
- copy.imageExtent.width = params.GetMipWidth(level);
- copy.imageExtent.height = params.GetMipHeight(level);
- copy.imageExtent.depth =
- params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1;
- return copy;
+ return {
+ .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted),
+ .bufferRowLength = 0,
+ .bufferImageHeight = 0,
+ .imageSubresource =
+ {
+ .aspectMask = image->GetAspectMask(),
+ .mipLevel = level,
+ .baseArrayLayer = 0,
+ .layerCount = static_cast<u32>(params.GetNumLayers()),
+ },
+ .imageOffset = {.x = 0, .y = 0, .z = 0},
+ .imageExtent =
+ {
+ .width = params.GetMipWidth(level),
+ .height = params.GetMipHeight(level),
+ .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U,
+ },
+ };
}
VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
@@ -382,7 +386,7 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc
std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source),
MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)};
- if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
+ if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) {
// A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
std::swap(swizzle[0], swizzle[2]);
}
@@ -394,11 +398,11 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc
UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
const bool is_first = x_source == SwizzleSource::R;
switch (params.pixel_format) {
- case VideoCore::Surface::PixelFormat::Z24S8:
- case VideoCore::Surface::PixelFormat::Z32FS8:
+ case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT:
+ case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT:
aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
break;
- case VideoCore::Surface::PixelFormat::S8Z24:
+ case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM:
aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
break;
default:
@@ -418,20 +422,29 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc
ASSERT(num_slices == params.depth);
}
- VkImageViewCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.image = surface.GetImageHandle();
- ci.viewType = image_view_type;
- ci.format = surface.GetImage().GetFormat();
- ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]};
- ci.subresourceRange.aspectMask = aspect;
- ci.subresourceRange.baseMipLevel = base_level;
- ci.subresourceRange.levelCount = num_levels;
- ci.subresourceRange.baseArrayLayer = base_layer;
- ci.subresourceRange.layerCount = num_layers;
- image_view = device.GetLogical().CreateImageView(ci);
+ image_view = device.GetLogical().CreateImageView({
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .image = surface.GetImageHandle(),
+ .viewType = image_view_type,
+ .format = surface.GetImage().GetFormat(),
+ .components =
+ {
+ .r = swizzle[0],
+ .g = swizzle[1],
+ .b = swizzle[2],
+ .a = swizzle[3],
+ },
+ .subresourceRange =
+ {
+ .aspectMask = aspect,
+ .baseMipLevel = base_level,
+ .levelCount = num_levels,
+ .baseArrayLayer = base_layer,
+ .layerCount = num_layers,
+ },
+ });
return last_image_view = *image_view;
}
@@ -441,17 +454,29 @@ VkImageView CachedSurfaceView::GetAttachment() {
return *render_target;
}
- VkImageViewCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.image = surface.GetImageHandle();
- ci.format = surface.GetImage().GetFormat();
- ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
- VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
- ci.subresourceRange.aspectMask = aspect_mask;
- ci.subresourceRange.baseMipLevel = base_level;
- ci.subresourceRange.levelCount = num_levels;
+ VkImageViewCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .image = surface.GetImageHandle(),
+ .viewType = VK_IMAGE_VIEW_TYPE_1D,
+ .format = surface.GetImage().GetFormat(),
+ .components =
+ {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ },
+ .subresourceRange =
+ {
+ .aspectMask = aspect_mask,
+ .baseMipLevel = base_level,
+ .levelCount = num_levels,
+ .baseArrayLayer = 0,
+ .layerCount = 0,
+ },
+ };
if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
ci.subresourceRange.baseArrayLayer = base_slice;
@@ -465,19 +490,21 @@ VkImageView CachedSurfaceView::GetAttachment() {
return *render_target;
}
-VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKResourceManager& resource_manager,
- VKMemoryManager& memory_manager, VKScheduler& scheduler,
- VKStagingBufferPool& staging_pool)
- : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device},
- resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
- staging_pool{staging_pool} {}
+VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory, const VKDevice& device_,
+ VKResourceManager& resource_manager_,
+ VKMemoryManager& memory_manager_, VKScheduler& scheduler_,
+ VKStagingBufferPool& staging_pool_)
+ : TextureCache(rasterizer, maxwell3d, gpu_memory, device_.IsOptimalAstcSupported()),
+ device{device_}, resource_manager{resource_manager_},
+ memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{staging_pool_} {}
VKTextureCache::~VKTextureCache() = default;
Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
- return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager,
- scheduler, staging_pool, gpu_addr, params);
+ return std::make_shared<CachedSurface>(device, resource_manager, memory_manager, scheduler,
+ staging_pool, gpu_addr, params);
}
void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
@@ -504,24 +531,40 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
- VkImageCopy copy;
- copy.srcSubresource.aspectMask = src_surface->GetAspectMask();
- copy.srcSubresource.mipLevel = copy_params.source_level;
- copy.srcSubresource.baseArrayLayer = copy_params.source_z;
- copy.srcSubresource.layerCount = num_layers;
- copy.srcOffset.x = copy_params.source_x;
- copy.srcOffset.y = copy_params.source_y;
- copy.srcOffset.z = 0;
- copy.dstSubresource.aspectMask = dst_surface->GetAspectMask();
- copy.dstSubresource.mipLevel = copy_params.dest_level;
- copy.dstSubresource.baseArrayLayer = dst_base_layer;
- copy.dstSubresource.layerCount = num_layers;
- copy.dstOffset.x = copy_params.dest_x;
- copy.dstOffset.y = copy_params.dest_y;
- copy.dstOffset.z = dst_offset_z;
- copy.extent.width = copy_params.width;
- copy.extent.height = copy_params.height;
- copy.extent.depth = extent_z;
+ const VkImageCopy copy{
+ .srcSubresource =
+ {
+ .aspectMask = src_surface->GetAspectMask(),
+ .mipLevel = copy_params.source_level,
+ .baseArrayLayer = copy_params.source_z,
+ .layerCount = num_layers,
+ },
+ .srcOffset =
+ {
+ .x = static_cast<s32>(copy_params.source_x),
+ .y = static_cast<s32>(copy_params.source_y),
+ .z = 0,
+ },
+ .dstSubresource =
+ {
+ .aspectMask = dst_surface->GetAspectMask(),
+ .mipLevel = copy_params.dest_level,
+ .baseArrayLayer = dst_base_layer,
+ .layerCount = num_layers,
+ },
+ .dstOffset =
+ {
+ .x = static_cast<s32>(copy_params.dest_x),
+ .y = static_cast<s32>(copy_params.dest_y),
+ .z = static_cast<s32>(dst_offset_z),
+ },
+ .extent =
+ {
+ .width = copy_params.width,
+ .height = copy_params.height,
+ .depth = extent_z,
+ },
+ };
const VkImage src_image = src_surface->GetImageHandle();
const VkImage dst_image = dst_surface->GetImageHandle();
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 807e26c8a..e47d02c41 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -15,10 +15,6 @@
#include "video_core/texture_cache/surface_base.h"
#include "video_core/texture_cache/texture_cache.h"
-namespace Core {
-class System;
-}
-
namespace VideoCore {
class RasterizerInterface;
}
@@ -45,10 +41,10 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> {
friend CachedSurfaceView;
public:
- explicit CachedSurface(Core::System& system, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
- GPUVAddr gpu_addr, const SurfaceParams& params);
+ explicit CachedSurface(const VKDevice& device, VKResourceManager& resource_manager,
+ VKMemoryManager& memory_manager, VKScheduler& scheduler,
+ VKStagingBufferPool& staging_pool, GPUVAddr gpu_addr,
+ const SurfaceParams& params);
~CachedSurface();
void UploadTexture(const std::vector<u8>& staging_buffer) override;
@@ -101,7 +97,6 @@ private:
VkImageSubresourceRange GetImageSubresourceRange() const;
- Core::System& system;
const VKDevice& device;
VKResourceManager& resource_manager;
VKMemoryManager& memory_manager;
@@ -201,7 +196,8 @@ private:
class VKTextureCache final : public TextureCacheBase {
public:
- explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+ explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
const VKDevice& device, VKResourceManager& resource_manager,
VKMemoryManager& memory_manager, VKScheduler& scheduler,
VKStagingBufferPool& staging_pool);
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 051298cc8..013865aa4 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -377,24 +377,26 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe
Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions,
InstanceDispatch& dld) noexcept {
- VkApplicationInfo application_info;
- application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
- application_info.pNext = nullptr;
- application_info.pApplicationName = "yuzu Emulator";
- application_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0);
- application_info.pEngineName = "yuzu Emulator";
- application_info.engineVersion = VK_MAKE_VERSION(0, 1, 0);
- application_info.apiVersion = VK_API_VERSION_1_1;
-
- VkInstanceCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.pApplicationInfo = &application_info;
- ci.enabledLayerCount = layers.size();
- ci.ppEnabledLayerNames = layers.data();
- ci.enabledExtensionCount = extensions.size();
- ci.ppEnabledExtensionNames = extensions.data();
+ static constexpr VkApplicationInfo application_info{
+ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+ .pNext = nullptr,
+ .pApplicationName = "yuzu Emulator",
+ .applicationVersion = VK_MAKE_VERSION(0, 1, 0),
+ .pEngineName = "yuzu Emulator",
+ .engineVersion = VK_MAKE_VERSION(0, 1, 0),
+ .apiVersion = VK_API_VERSION_1_1,
+ };
+
+ const VkInstanceCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .pApplicationInfo = &application_info,
+ .enabledLayerCount = layers.size(),
+ .ppEnabledLayerNames = layers.data(),
+ .enabledExtensionCount = extensions.size(),
+ .ppEnabledExtensionNames = extensions.data(),
+ };
VkInstance instance;
if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) {
@@ -425,19 +427,20 @@ std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices(
DebugCallback Instance::TryCreateDebugCallback(
PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept {
- VkDebugUtilsMessengerCreateInfoEXT ci;
- ci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
- ci.pNext = nullptr;
- ci.flags = 0;
- ci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT;
- ci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
- ci.pfnUserCallback = callback;
- ci.pUserData = nullptr;
+ const VkDebugUtilsMessengerCreateInfoEXT ci{
+ .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .flags = 0,
+ .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,
+ .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
+ .pfnUserCallback = callback,
+ .pUserData = nullptr,
+ };
VkDebugUtilsMessengerEXT messenger;
if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) {
@@ -468,12 +471,13 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c
}
CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const {
- VkCommandBufferAllocateInfo ai;
- ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
- ai.pNext = nullptr;
- ai.commandPool = handle;
- ai.level = level;
- ai.commandBufferCount = static_cast<u32>(num_buffers);
+ const VkCommandBufferAllocateInfo ai{
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .commandPool = handle,
+ .level = level,
+ .commandBufferCount = static_cast<u32>(num_buffers),
+ };
std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers);
switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) {
@@ -497,17 +501,18 @@ std::vector<VkImage> SwapchainKHR::GetImages() const {
Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
Span<const char*> enabled_extensions, const void* next,
DeviceDispatch& dld) noexcept {
- VkDeviceCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
- ci.pNext = next;
- ci.flags = 0;
- ci.queueCreateInfoCount = queues_ci.size();
- ci.pQueueCreateInfos = queues_ci.data();
- ci.enabledLayerCount = 0;
- ci.ppEnabledLayerNames = nullptr;
- ci.enabledExtensionCount = enabled_extensions.size();
- ci.ppEnabledExtensionNames = enabled_extensions.data();
- ci.pEnabledFeatures = nullptr;
+ const VkDeviceCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .pNext = next,
+ .flags = 0,
+ .queueCreateInfoCount = queues_ci.size(),
+ .pQueueCreateInfos = queues_ci.data(),
+ .enabledLayerCount = 0,
+ .ppEnabledLayerNames = nullptr,
+ .enabledExtensionCount = enabled_extensions.size(),
+ .ppEnabledExtensionNames = enabled_extensions.data(),
+ .pEnabledFeatures = nullptr,
+ };
VkDevice device;
if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) {
@@ -548,10 +553,11 @@ ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const {
}
Semaphore Device::CreateSemaphore() const {
- VkSemaphoreCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
+ static constexpr VkSemaphoreCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ };
VkSemaphore object;
Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object));
@@ -638,11 +644,13 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
return ShaderModule(object, handle, *dld);
}
-Event Device::CreateEvent() const {
- VkEventCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
- ci.pNext = nullptr;
- ci.flags = 0;
+Event Device::CreateNewEvent() const {
+ static constexpr VkEventCreateInfo ci{
+ .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ };
+
VkEvent object;
Check(dld->vkCreateEvent(handle, &ci, nullptr, &object));
return Event(object, handle, *dld);
@@ -778,7 +786,7 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp
VK_SUCCESS) {
return std::nullopt;
}
- return properties;
+ return std::move(properties);
}
std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 71daac9d7..b9d3fedc1 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -721,7 +721,7 @@ public:
ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
- Event CreateEvent() const;
+ Event CreateNewEvent() const;
SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
@@ -756,8 +756,8 @@ public:
}
VkResult GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size,
- void* data, VkDeviceSize stride, VkQueryResultFlags flags) const
- noexcept {
+ void* data, VkDeviceSize stride,
+ VkQueryResultFlags flags) const noexcept {
return dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride,
flags);
}
@@ -849,8 +849,8 @@ public:
dld->vkCmdBindPipeline(handle, bind_point, pipeline);
}
- void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType index_type) const
- noexcept {
+ void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset,
+ VkIndexType index_type) const noexcept {
dld->vkCmdBindIndexBuffer(handle, buffer, offset, index_type);
}
@@ -863,8 +863,8 @@ public:
BindVertexBuffers(binding, 1, &buffer, &offset);
}
- void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex, u32 first_instance) const
- noexcept {
+ void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex,
+ u32 first_instance) const noexcept {
dld->vkCmdDraw(handle, vertex_count, instance_count, first_vertex, first_instance);
}
@@ -874,15 +874,15 @@ public:
first_instance);
}
- void ClearAttachments(Span<VkClearAttachment> attachments, Span<VkClearRect> rects) const
- noexcept {
+ void ClearAttachments(Span<VkClearAttachment> attachments,
+ Span<VkClearRect> rects) const noexcept {
dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(),
rects.data());
}
void BlitImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image,
- VkImageLayout dst_layout, Span<VkImageBlit> regions, VkFilter filter) const
- noexcept {
+ VkImageLayout dst_layout, Span<VkImageBlit> regions,
+ VkFilter filter) const noexcept {
dld->vkCmdBlitImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(),
regions.data(), filter);
}
@@ -907,8 +907,8 @@ public:
regions.data());
}
- void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, Span<VkBufferCopy> regions) const
- noexcept {
+ void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
+ Span<VkBufferCopy> regions) const noexcept {
dld->vkCmdCopyBuffer(handle, src_buffer, dst_buffer, regions.size(), regions.data());
}
@@ -924,8 +924,8 @@ public:
regions.data());
}
- void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, u32 data) const
- noexcept {
+ void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size,
+ u32 data) const noexcept {
dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data);
}
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
new file mode 100644
index 000000000..aabd62c5c
--- /dev/null
+++ b/src/video_core/shader/async_shaders.cpp
@@ -0,0 +1,221 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#include <vector>
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_base.h"
+#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/shader/async_shaders.h"
+
+namespace VideoCommon::Shader {
+
+AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {}
+
+AsyncShaders::~AsyncShaders() {
+ KillWorkers();
+}
+
+void AsyncShaders::AllocateWorkers() {
+ // Max worker threads we should allow
+ constexpr u32 MAX_THREADS = 4;
+ // Deduce how many threads we can use
+ const u32 threads_used = std::thread::hardware_concurrency() / 4;
+ // Always allow at least 1 thread regardless of our settings
+ const auto max_worker_count = std::max(1U, threads_used);
+ // Don't use more than MAX_THREADS
+ const auto num_workers = std::min(max_worker_count, MAX_THREADS);
+
+ // If we already have workers queued, ignore
+ if (num_workers == worker_threads.size()) {
+ return;
+ }
+
+ // If workers already exist, clear them
+ if (!worker_threads.empty()) {
+ FreeWorkers();
+ }
+
+ // Create workers
+ for (std::size_t i = 0; i < num_workers; i++) {
+ context_list.push_back(emu_window.CreateSharedContext());
+ worker_threads.push_back(
+ std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()));
+ }
+}
+
+void AsyncShaders::FreeWorkers() {
+ // Mark all threads to quit
+ is_thread_exiting.store(true);
+ cv.notify_all();
+ for (auto& thread : worker_threads) {
+ thread.join();
+ }
+ // Clear our shared contexts
+ context_list.clear();
+
+ // Clear our worker threads
+ worker_threads.clear();
+}
+
+void AsyncShaders::KillWorkers() {
+ is_thread_exiting.store(true);
+ for (auto& thread : worker_threads) {
+ thread.detach();
+ }
+ // Clear our shared contexts
+ context_list.clear();
+
+ // Clear our worker threads
+ worker_threads.clear();
+}
+
+bool AsyncShaders::HasWorkQueued() const {
+ return !pending_queue.empty();
+}
+
+bool AsyncShaders::HasCompletedWork() const {
+ std::shared_lock lock{completed_mutex};
+ return !finished_work.empty();
+}
+
+bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
+ const auto& regs = gpu.Maxwell3D().regs;
+
+ // If something is using depth, we can assume that games are not rendering anything which will
+ // be used one time.
+ if (regs.zeta_enable) {
+ return true;
+ }
+
+ // If games are using a small index count, we can assume these are full screen quads. Usually
+ // these shaders are only used once for building textures so we can assume they can't be built
+ // async
+ if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
+ return false;
+ }
+
+ return true;
+}
+
+std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
+ std::vector<Result> results;
+ {
+ std::unique_lock lock{completed_mutex};
+ results.assign(std::make_move_iterator(finished_work.begin()),
+ std::make_move_iterator(finished_work.end()));
+ finished_work.clear();
+ }
+ return results;
+}
+
+void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
+ Tegra::Engines::ShaderType shader_type, u64 uid,
+ std::vector<u64> code, std::vector<u64> code_b,
+ u32 main_offset,
+ VideoCommon::Shader::CompilerSettings compiler_settings,
+ const VideoCommon::Shader::Registry& registry,
+ VAddr cpu_addr) {
+ WorkerParams params{
+ .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
+ .device = &device,
+ .shader_type = shader_type,
+ .uid = uid,
+ .code = std::move(code),
+ .code_b = std::move(code_b),
+ .main_offset = main_offset,
+ .compiler_settings = compiler_settings,
+ .registry = registry,
+ .cpu_address = cpu_addr,
+ };
+ std::unique_lock lock(queue_mutex);
+ pending_queue.push(std::move(params));
+ cv.notify_one();
+}
+
+void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
+ const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
+ Vulkan::VKDescriptorPool& descriptor_pool,
+ Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+ Vulkan::VKRenderPassCache& renderpass_cache,
+ std::vector<VkDescriptorSetLayoutBinding> bindings,
+ Vulkan::SPIRVProgram program,
+ Vulkan::GraphicsPipelineCacheKey key) {
+ WorkerParams params{
+ .backend = Backend::Vulkan,
+ .pp_cache = pp_cache,
+ .vk_device = &device,
+ .scheduler = &scheduler,
+ .descriptor_pool = &descriptor_pool,
+ .update_descriptor_queue = &update_descriptor_queue,
+ .renderpass_cache = &renderpass_cache,
+ .bindings = bindings,
+ .program = program,
+ .key = key,
+ };
+
+ std::unique_lock lock(queue_mutex);
+ pending_queue.push(std::move(params));
+ cv.notify_one();
+}
+
+void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
+ while (!is_thread_exiting.load(std::memory_order_relaxed)) {
+ std::unique_lock lock{queue_mutex};
+ cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
+ if (is_thread_exiting) {
+ return;
+ }
+
+ // Partial lock to allow all threads to read at the same time
+ if (!HasWorkQueued()) {
+ continue;
+ }
+ // Another thread beat us, just unlock and wait for the next load
+ if (pending_queue.empty()) {
+ continue;
+ }
+
+ // Pull work from queue
+ WorkerParams work = std::move(pending_queue.front());
+ pending_queue.pop();
+ lock.unlock();
+
+ if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
+ const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
+ const auto scope = context->Acquire();
+ auto program =
+ OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
+ Result result{};
+ result.backend = work.backend;
+ result.cpu_address = work.cpu_address;
+ result.uid = work.uid;
+ result.code = std::move(work.code);
+ result.code_b = std::move(work.code_b);
+ result.shader_type = work.shader_type;
+
+ if (work.backend == Backend::OpenGL) {
+ result.program.opengl = std::move(program->source_program);
+ } else if (work.backend == Backend::GLASM) {
+ result.program.glasm = std::move(program->assembly_program);
+ }
+
+ {
+ std::unique_lock complete_lock(completed_mutex);
+ finished_work.push_back(std::move(result));
+ }
+ } else if (work.backend == Backend::Vulkan) {
+ auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
+ *work.vk_device, *work.scheduler, *work.descriptor_pool,
+ *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
+ work.program);
+
+ work.pp_cache->EmplacePipeline(std::move(pipeline));
+ }
+ }
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
new file mode 100644
index 000000000..7cf8d994c
--- /dev/null
+++ b/src/video_core/shader/async_shaders.h
@@ -0,0 +1,136 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <condition_variable>
+#include <memory>
+#include <shared_mutex>
+#include <thread>
+
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Core::Frontend {
+class EmuWindow;
+class GraphicsContext;
+} // namespace Core::Frontend
+
+namespace Tegra {
+class GPU;
+}
+
+namespace Vulkan {
+class VKPipelineCache;
+}
+
+namespace VideoCommon::Shader {
+
+class AsyncShaders {
+public:
+ enum class Backend {
+ OpenGL,
+ GLASM,
+ Vulkan,
+ };
+
+ struct ResultPrograms {
+ OpenGL::OGLProgram opengl;
+ OpenGL::OGLAssemblyProgram glasm;
+ };
+
+ struct Result {
+ u64 uid;
+ VAddr cpu_address;
+ Backend backend;
+ ResultPrograms program;
+ std::vector<u64> code;
+ std::vector<u64> code_b;
+ Tegra::Engines::ShaderType shader_type;
+ };
+
+ explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window);
+ ~AsyncShaders();
+
+ /// Start up shader worker threads
+ void AllocateWorkers();
+
+ /// Clear the shader queue and kill all worker threads
+ void FreeWorkers();
+
+ // Force end all threads
+ void KillWorkers();
+
+ /// Check to see if any shaders have actually been compiled
+ [[nodiscard]] bool HasCompletedWork() const;
+
+ /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
+ /// every shader async as some shaders are only built and executed once. We try to "guess" which
+ /// shader would be used only once
+ [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
+
+ /// Pulls completed compiled shaders
+ [[nodiscard]] std::vector<Result> GetCompletedWork();
+
+ void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
+ u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
+ CompilerSettings compiler_settings, const Registry& registry,
+ VAddr cpu_addr);
+
+ void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
+ Vulkan::VKScheduler& scheduler,
+ Vulkan::VKDescriptorPool& descriptor_pool,
+ Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+ Vulkan::VKRenderPassCache& renderpass_cache,
+ std::vector<VkDescriptorSetLayoutBinding> bindings,
+ Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key);
+
+private:
+ void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
+
+ /// Check our worker queue to see if we have any work queued already
+ [[nodiscard]] bool HasWorkQueued() const;
+
+ struct WorkerParams {
+ Backend backend;
+ // For OGL
+ const OpenGL::Device* device;
+ Tegra::Engines::ShaderType shader_type;
+ u64 uid;
+ std::vector<u64> code;
+ std::vector<u64> code_b;
+ u32 main_offset;
+ CompilerSettings compiler_settings;
+ std::optional<Registry> registry;
+ VAddr cpu_address;
+
+ // For Vulkan
+ Vulkan::VKPipelineCache* pp_cache;
+ const Vulkan::VKDevice* vk_device;
+ Vulkan::VKScheduler* scheduler;
+ Vulkan::VKDescriptorPool* descriptor_pool;
+ Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
+ Vulkan::VKRenderPassCache* renderpass_cache;
+ std::vector<VkDescriptorSetLayoutBinding> bindings;
+ Vulkan::SPIRVProgram program;
+ Vulkan::GraphicsPipelineCacheKey key;
+ };
+
+ std::condition_variable cv;
+ mutable std::mutex queue_mutex;
+ mutable std::shared_mutex completed_mutex;
+ std::atomic<bool> is_thread_exiting{};
+ std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
+ std::vector<std::thread> worker_threads;
+ std::queue<WorkerParams> pending_queue;
+ std::vector<Result> finished_work;
+ Core::Frontend::EmuWindow& emu_window;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 8d86020f6..336397cdb 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -187,24 +187,26 @@ std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state,
std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
u64 ldc_tracked_register) {
- return TrackInstruction<u64>(state, pos,
- [ldc_tracked_register](auto instr, const auto& opcode) {
- return opcode.GetId() == OpCode::Id::SHL_IMM &&
- instr.gpr0.Value() == ldc_tracked_register;
- },
- [](auto instr, const auto&) { return instr.gpr8.Value(); });
+ return TrackInstruction<u64>(
+ state, pos,
+ [ldc_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::SHL_IMM &&
+ instr.gpr0.Value() == ldc_tracked_register;
+ },
+ [](auto instr, const auto&) { return instr.gpr8.Value(); });
}
std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
u64 shl_tracked_register) {
- return TrackInstruction<u32>(state, pos,
- [shl_tracked_register](auto instr, const auto& opcode) {
- return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
- instr.gpr0.Value() == shl_tracked_register;
- },
- [](auto instr, const auto&) {
- return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
- });
+ return TrackInstruction<u32>(
+ state, pos,
+ [shl_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
+ instr.gpr0.Value() == shl_tracked_register;
+ },
+ [](auto instr, const auto&) {
+ return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
+ });
}
std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index a041519b7..73155966f 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -98,12 +98,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
- const Node value = [&]() {
- const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
+ const Node value = [&] {
+ Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
}
- const Node shifted = [&]() {
+ const Node shifted = [&] {
switch (instr.iadd3.mode) {
case Tegra::Shader::IAdd3Mode::RightShift:
// TODO(tech4me): According to
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 07778dc3e..e75ca4fdb 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -31,11 +31,11 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
std::size_t component) {
const TextureFormat format{descriptor.format};
switch (format) {
- case TextureFormat::R16_G16_B16_A16:
- case TextureFormat::R32_G32_B32_A32:
- case TextureFormat::R32_G32_B32:
- case TextureFormat::R32_G32:
- case TextureFormat::R16_G16:
+ case TextureFormat::R16G16B16A16:
+ case TextureFormat::R32G32B32A32:
+ case TextureFormat::R32G32B32:
+ case TextureFormat::R32G32:
+ case TextureFormat::R16G16:
case TextureFormat::R32:
case TextureFormat::R16:
case TextureFormat::R8:
@@ -97,7 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
break;
case TextureFormat::B5G6R5:
case TextureFormat::B6G5R5:
- case TextureFormat::BF10GF11RF11:
+ case TextureFormat::B10G11R11:
if (component == 0) {
return descriptor.b_type;
}
@@ -108,9 +108,9 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
return descriptor.r_type;
}
break;
- case TextureFormat::G8R24:
- case TextureFormat::G24R8:
- case TextureFormat::G8R8:
+ case TextureFormat::R24G8:
+ case TextureFormat::R8G24:
+ case TextureFormat::R8G8:
case TextureFormat::G4R4:
if (component == 0) {
return descriptor.g_type;
@@ -137,15 +137,15 @@ bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
u32 GetComponentSize(TextureFormat format, std::size_t component) {
switch (format) {
- case TextureFormat::R32_G32_B32_A32:
+ case TextureFormat::R32G32B32A32:
return 32;
- case TextureFormat::R16_G16_B16_A16:
+ case TextureFormat::R16G16B16A16:
return 16;
- case TextureFormat::R32_G32_B32:
+ case TextureFormat::R32G32B32:
return component <= 2 ? 32 : 0;
- case TextureFormat::R32_G32:
+ case TextureFormat::R32G32:
return component <= 1 ? 32 : 0;
- case TextureFormat::R16_G16:
+ case TextureFormat::R16G16:
return component <= 1 ? 16 : 0;
case TextureFormat::R32:
return component == 0 ? 32 : 0;
@@ -192,7 +192,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
return 6;
}
return 0;
- case TextureFormat::BF10GF11RF11:
+ case TextureFormat::B10G11R11:
if (component == 1 || component == 2) {
return 11;
}
@@ -200,7 +200,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
return 10;
}
return 0;
- case TextureFormat::G8R24:
+ case TextureFormat::R24G8:
if (component == 0) {
return 8;
}
@@ -208,7 +208,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
return 24;
}
return 0;
- case TextureFormat::G24R8:
+ case TextureFormat::R8G24:
if (component == 0) {
return 8;
}
@@ -216,7 +216,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
return 24;
}
return 0;
- case TextureFormat::G8R8:
+ case TextureFormat::R8G8:
return (component == 0 || component == 1) ? 8 : 0;
case TextureFormat::G4R4:
return (component == 0 || component == 1) ? 4 : 0;
@@ -231,25 +231,25 @@ std::size_t GetImageComponentMask(TextureFormat format) {
constexpr u8 B = 0b0100;
constexpr u8 A = 0b1000;
switch (format) {
- case TextureFormat::R32_G32_B32_A32:
- case TextureFormat::R16_G16_B16_A16:
+ case TextureFormat::R32G32B32A32:
+ case TextureFormat::R16G16B16A16:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::A4B4G4R4:
case TextureFormat::A5B5G5R1:
case TextureFormat::A1B5G5R5:
return std::size_t{R | G | B | A};
- case TextureFormat::R32_G32_B32:
+ case TextureFormat::R32G32B32:
case TextureFormat::R32_B24G8:
case TextureFormat::B5G6R5:
case TextureFormat::B6G5R5:
- case TextureFormat::BF10GF11RF11:
+ case TextureFormat::B10G11R11:
return std::size_t{R | G | B};
- case TextureFormat::R32_G32:
- case TextureFormat::R16_G16:
- case TextureFormat::G8R24:
- case TextureFormat::G24R8:
- case TextureFormat::G8R8:
+ case TextureFormat::R32G32:
+ case TextureFormat::R16G16:
+ case TextureFormat::R24G8:
+ case TextureFormat::R8G24:
+ case TextureFormat::R8G8:
case TextureFormat::G4R4:
return std::size_t{R | G};
case TextureFormat::R32:
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 63adbc4a3..e2bba88dd 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -386,7 +386,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::RED: {
- UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
+ UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
+ static_cast<int>(instr.red.type.Value()));
const auto [real_address, base_address, descriptor] =
TrackGlobalMemory(bb, instr, true, true);
if (!real_address || !base_address) {
@@ -471,9 +472,9 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
const auto [base_address, index, offset] =
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
- ASSERT_OR_EXECUTE_MSG(base_address != nullptr,
- { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
- "Global memory tracking failed");
+ ASSERT_OR_EXECUTE_MSG(
+ base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
+ "Global memory tracking failed");
bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index c0a8f233f..29a7cfbfe 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -75,8 +75,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
const Node value = [this, instr] {
switch (instr.sys20) {
case SystemVariable::LaneId:
- LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
- return Immediate(0U);
+ return Operation(OperationCode::ThreadId);
case SystemVariable::InvocationId:
return Operation(OperationCode::InvocationId);
case SystemVariable::Ydirection:
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 64ba60ea2..1c0957277 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -91,29 +91,28 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
return pc;
}
-Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
- Tegra::Shader::VideoType type, u64 byte_height) {
+Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
+ u64 byte_height) {
if (!is_chunk) {
return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
}
- const Node zero = Immediate(0);
switch (type) {
- case Tegra::Shader::VideoType::Size16_Low:
+ case VideoType::Size16_Low:
return BitfieldExtract(op, 0, 16);
- case Tegra::Shader::VideoType::Size16_High:
+ case VideoType::Size16_High:
return BitfieldExtract(op, 16, 16);
- case Tegra::Shader::VideoType::Size32:
+ case VideoType::Size32:
// TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
// (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
UNIMPLEMENTED();
- return zero;
- case Tegra::Shader::VideoType::Invalid:
+ return Immediate(0);
+ case VideoType::Invalid:
UNREACHABLE_MSG("Invalid instruction encoding");
- return zero;
+ return Immediate(0);
default:
UNREACHABLE();
- return zero;
+ return Immediate(0);
}
}
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index c83dc6615..233b8fa42 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -81,20 +81,21 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
SetTemporary(bb, 0, product);
product = GetTemporary(0);
- const Node original_c = op_c;
+ Node original_c = op_c;
const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
- op_c = [&]() {
+ op_c = [&] {
switch (set_mode) {
case Tegra::Shader::XmadMode::None:
return original_c;
case Tegra::Shader::XmadMode::CLo:
- return BitfieldExtract(original_c, 0, 16);
+ return BitfieldExtract(std::move(original_c), 0, 16);
case Tegra::Shader::XmadMode::CHi:
- return BitfieldExtract(original_c, 16, 16);
+ return BitfieldExtract(std::move(original_c), 16, 16);
case Tegra::Shader::XmadMode::CBcc: {
- const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
- original_b, Immediate(16));
- return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
+ Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
+ original_b, Immediate(16));
+ return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
+ std::move(shifted_b));
}
case Tegra::Shader::XmadMode::CSfu: {
const Node comp_a =
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
index 5071c83ca..e18ccba8e 100644
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@@ -16,11 +16,10 @@
namespace VideoCommon::Shader {
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
- const auto& gpu{system.GPU().Maxwell3D()};
- const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
- return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+ const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
+ return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
}
bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
index be90d24fd..4624d38e6 100644
--- a/src/video_core/shader/memory_util.h
+++ b/src/video_core/shader/memory_util.h
@@ -11,10 +11,6 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
-namespace Core {
-class System;
-}
-
namespace Tegra {
class MemoryManager;
}
@@ -27,7 +23,7 @@ constexpr u32 STAGE_MAIN_OFFSET = 10;
constexpr u32 KERNEL_MAIN_OFFSET = 0;
/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
/// Gets if the current instruction offset is a scheduler instruction
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index e322c3402..29d794b34 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -112,9 +112,9 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
}
Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
- const Node node = MakeNode<InternalFlagNode>(flag);
+ Node node = MakeNode<InternalFlagNode>(flag);
if (negated) {
- return Operation(OperationCode::LogicalNegate, node);
+ return Operation(OperationCode::LogicalNegate, std::move(node));
}
return node;
}
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index b7608fc7b..015a789d6 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -209,11 +209,11 @@ private:
}
// Remove them from the cache
- const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) {
+ const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) {
return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
removed_shaders.end();
};
- storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end());
+ std::erase_if(storage, is_removed);
}
/// @brief Creates a new entry in the lookup cache and returns its pointer
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp
new file mode 100644
index 000000000..c3c71657d
--- /dev/null
+++ b/src/video_core/shader_notify.cpp
@@ -0,0 +1,42 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/shader_notify.h"
+
+using namespace std::chrono_literals;
+
+namespace VideoCore {
+namespace {
+constexpr auto UPDATE_TICK = 32ms;
+}
+
+ShaderNotify::ShaderNotify() = default;
+ShaderNotify::~ShaderNotify() = default;
+
+std::size_t ShaderNotify::GetShadersBuilding() {
+ const auto now = std::chrono::high_resolution_clock::now();
+ const auto diff = now - last_update;
+ if (diff > UPDATE_TICK) {
+ std::shared_lock lock(mutex);
+ last_updated_count = accurate_count;
+ }
+ return last_updated_count;
+}
+
+std::size_t ShaderNotify::GetShadersBuildingAccurate() {
+ std::shared_lock lock{mutex};
+ return accurate_count;
+}
+
+void ShaderNotify::MarkShaderComplete() {
+ std::unique_lock lock{mutex};
+ accurate_count--;
+}
+
+void ShaderNotify::MarkSharderBuilding() {
+ std::unique_lock lock{mutex};
+ accurate_count++;
+}
+
+} // namespace VideoCore
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h
new file mode 100644
index 000000000..a9c92d179
--- /dev/null
+++ b/src/video_core/shader_notify.h
@@ -0,0 +1,29 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <chrono>
+#include <shared_mutex>
+#include "common/common_types.h"
+
+namespace VideoCore {
+class ShaderNotify {
+public:
+ ShaderNotify();
+ ~ShaderNotify();
+
+ std::size_t GetShadersBuilding();
+ std::size_t GetShadersBuildingAccurate();
+
+ void MarkShaderComplete();
+ void MarkSharderBuilding();
+
+private:
+ std::size_t last_updated_count{};
+ std::size_t accurate_count{};
+ std::shared_mutex mutex;
+ std::chrono::high_resolution_clock::time_point last_update{};
+};
+} // namespace VideoCore
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index bbe93903c..1688267bb 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -74,117 +74,131 @@ bool SurfaceTargetIsArray(SurfaceTarget target) {
PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
switch (format) {
- case Tegra::DepthFormat::S8_Z24_UNORM:
- return PixelFormat::S8Z24;
- case Tegra::DepthFormat::Z24_S8_UNORM:
- return PixelFormat::Z24S8;
- case Tegra::DepthFormat::Z32_FLOAT:
- return PixelFormat::Z32F;
- case Tegra::DepthFormat::Z16_UNORM:
- return PixelFormat::Z16;
- case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
- return PixelFormat::Z32FS8;
+ case Tegra::DepthFormat::S8_UINT_Z24_UNORM:
+ return PixelFormat::S8_UINT_D24_UNORM;
+ case Tegra::DepthFormat::D24S8_UNORM:
+ return PixelFormat::D24_UNORM_S8_UINT;
+ case Tegra::DepthFormat::D32_FLOAT:
+ return PixelFormat::D32_FLOAT;
+ case Tegra::DepthFormat::D16_UNORM:
+ return PixelFormat::D16_UNORM;
+ case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT:
+ return PixelFormat::D32_FLOAT_S8_UINT;
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- return PixelFormat::S8Z24;
+ UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format));
+ return PixelFormat::S8_UINT_D24_UNORM;
}
}
PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
switch (format) {
- case Tegra::RenderTargetFormat::RGBA8_SRGB:
- return PixelFormat::RGBA8_SRGB;
- case Tegra::RenderTargetFormat::RGBA8_UNORM:
- return PixelFormat::ABGR8U;
- case Tegra::RenderTargetFormat::RGBA8_SNORM:
- return PixelFormat::ABGR8S;
- case Tegra::RenderTargetFormat::RGBA8_UINT:
- return PixelFormat::ABGR8UI;
- case Tegra::RenderTargetFormat::BGRA8_SRGB:
- return PixelFormat::BGRA8_SRGB;
- case Tegra::RenderTargetFormat::BGRA8_UNORM:
- return PixelFormat::BGRA8;
- case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
- return PixelFormat::A2B10G10R10U;
- case Tegra::RenderTargetFormat::RGBA16_FLOAT:
- return PixelFormat::RGBA16F;
- case Tegra::RenderTargetFormat::RGBA16_UNORM:
- return PixelFormat::RGBA16U;
- case Tegra::RenderTargetFormat::RGBA16_SNORM:
- return PixelFormat::RGBA16S;
- case Tegra::RenderTargetFormat::RGBA16_UINT:
- return PixelFormat::RGBA16UI;
- case Tegra::RenderTargetFormat::RGBA32_FLOAT:
- return PixelFormat::RGBA32F;
- case Tegra::RenderTargetFormat::RG32_FLOAT:
- return PixelFormat::RG32F;
- case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
- return PixelFormat::R11FG11FB10F;
- case Tegra::RenderTargetFormat::B5G6R5_UNORM:
- return PixelFormat::B5G6R5U;
- case Tegra::RenderTargetFormat::BGR5A1_UNORM:
- return PixelFormat::A1B5G5R5U;
- case Tegra::RenderTargetFormat::RGBA32_UINT:
- return PixelFormat::RGBA32UI;
- case Tegra::RenderTargetFormat::R8_UNORM:
- return PixelFormat::R8U;
- case Tegra::RenderTargetFormat::R8_UINT:
- return PixelFormat::R8UI;
- case Tegra::RenderTargetFormat::RG16_FLOAT:
- return PixelFormat::RG16F;
- case Tegra::RenderTargetFormat::RG16_UINT:
- return PixelFormat::RG16UI;
- case Tegra::RenderTargetFormat::RG16_SINT:
- return PixelFormat::RG16I;
- case Tegra::RenderTargetFormat::RG16_UNORM:
- return PixelFormat::RG16;
- case Tegra::RenderTargetFormat::RG16_SNORM:
- return PixelFormat::RG16S;
- case Tegra::RenderTargetFormat::RG8_UNORM:
- return PixelFormat::RG8U;
- case Tegra::RenderTargetFormat::RG8_SNORM:
- return PixelFormat::RG8S;
- case Tegra::RenderTargetFormat::RG8_UINT:
- return PixelFormat::RG8UI;
- case Tegra::RenderTargetFormat::R16_FLOAT:
- return PixelFormat::R16F;
+ case Tegra::RenderTargetFormat::R32B32G32A32_FLOAT:
+ return PixelFormat::R32G32B32A32_FLOAT;
+ case Tegra::RenderTargetFormat::R32G32B32A32_SINT:
+ return PixelFormat::R32G32B32A32_SINT;
+ case Tegra::RenderTargetFormat::R32G32B32A32_UINT:
+ return PixelFormat::R32G32B32A32_UINT;
+ case Tegra::RenderTargetFormat::R16G16B16A16_UNORM:
+ return PixelFormat::R16G16B16A16_UNORM;
+ case Tegra::RenderTargetFormat::R16G16B16A16_SNORM:
+ return PixelFormat::R16G16B16A16_SNORM;
+ case Tegra::RenderTargetFormat::R16G16B16A16_SINT:
+ return PixelFormat::R16G16B16A16_SINT;
+ case Tegra::RenderTargetFormat::R16G16B16A16_UINT:
+ return PixelFormat::R16G16B16A16_UINT;
+ case Tegra::RenderTargetFormat::R16G16B16A16_FLOAT:
+ return PixelFormat::R16G16B16A16_FLOAT;
+ case Tegra::RenderTargetFormat::R32G32_FLOAT:
+ return PixelFormat::R32G32_FLOAT;
+ case Tegra::RenderTargetFormat::R32G32_SINT:
+ return PixelFormat::R32G32_SINT;
+ case Tegra::RenderTargetFormat::R32G32_UINT:
+ return PixelFormat::R32G32_UINT;
+ case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT:
+ return PixelFormat::R16G16B16X16_FLOAT;
+ case Tegra::RenderTargetFormat::B8G8R8A8_UNORM:
+ return PixelFormat::B8G8R8A8_UNORM;
+ case Tegra::RenderTargetFormat::B8G8R8A8_SRGB:
+ return PixelFormat::B8G8R8A8_SRGB;
+ case Tegra::RenderTargetFormat::A2B10G10R10_UNORM:
+ return PixelFormat::A2B10G10R10_UNORM;
+ case Tegra::RenderTargetFormat::A2B10G10R10_UINT:
+ return PixelFormat::A2B10G10R10_UINT;
+ case Tegra::RenderTargetFormat::A8B8G8R8_UNORM:
+ return PixelFormat::A8B8G8R8_UNORM;
+ case Tegra::RenderTargetFormat::A8B8G8R8_SRGB:
+ return PixelFormat::A8B8G8R8_SRGB;
+ case Tegra::RenderTargetFormat::A8B8G8R8_SNORM:
+ return PixelFormat::A8B8G8R8_SNORM;
+ case Tegra::RenderTargetFormat::A8B8G8R8_SINT:
+ return PixelFormat::A8B8G8R8_SINT;
+ case Tegra::RenderTargetFormat::A8B8G8R8_UINT:
+ return PixelFormat::A8B8G8R8_UINT;
+ case Tegra::RenderTargetFormat::R16G16_UNORM:
+ return PixelFormat::R16G16_UNORM;
+ case Tegra::RenderTargetFormat::R16G16_SNORM:
+ return PixelFormat::R16G16_SNORM;
+ case Tegra::RenderTargetFormat::R16G16_SINT:
+ return PixelFormat::R16G16_SINT;
+ case Tegra::RenderTargetFormat::R16G16_UINT:
+ return PixelFormat::R16G16_UINT;
+ case Tegra::RenderTargetFormat::R16G16_FLOAT:
+ return PixelFormat::R16G16_FLOAT;
+ case Tegra::RenderTargetFormat::B10G11R11_FLOAT:
+ return PixelFormat::B10G11R11_FLOAT;
+ case Tegra::RenderTargetFormat::R32_SINT:
+ return PixelFormat::R32_SINT;
+ case Tegra::RenderTargetFormat::R32_UINT:
+ return PixelFormat::R32_UINT;
+ case Tegra::RenderTargetFormat::R32_FLOAT:
+ return PixelFormat::R32_FLOAT;
+ case Tegra::RenderTargetFormat::R5G6B5_UNORM:
+ return PixelFormat::R5G6B5_UNORM;
+ case Tegra::RenderTargetFormat::A1R5G5B5_UNORM:
+ return PixelFormat::A1R5G5B5_UNORM;
+ case Tegra::RenderTargetFormat::R8G8_UNORM:
+ return PixelFormat::R8G8_UNORM;
+ case Tegra::RenderTargetFormat::R8G8_SNORM:
+ return PixelFormat::R8G8_SNORM;
+ case Tegra::RenderTargetFormat::R8G8_SINT:
+ return PixelFormat::R8G8_SINT;
+ case Tegra::RenderTargetFormat::R8G8_UINT:
+ return PixelFormat::R8G8_UINT;
case Tegra::RenderTargetFormat::R16_UNORM:
- return PixelFormat::R16U;
+ return PixelFormat::R16_UNORM;
case Tegra::RenderTargetFormat::R16_SNORM:
- return PixelFormat::R16S;
- case Tegra::RenderTargetFormat::R16_UINT:
- return PixelFormat::R16UI;
+ return PixelFormat::R16_SNORM;
case Tegra::RenderTargetFormat::R16_SINT:
- return PixelFormat::R16I;
- case Tegra::RenderTargetFormat::R32_FLOAT:
- return PixelFormat::R32F;
- case Tegra::RenderTargetFormat::R32_SINT:
- return PixelFormat::R32I;
- case Tegra::RenderTargetFormat::R32_UINT:
- return PixelFormat::R32UI;
- case Tegra::RenderTargetFormat::RG32_UINT:
- return PixelFormat::RG32UI;
- case Tegra::RenderTargetFormat::RGBX16_FLOAT:
- return PixelFormat::RGBX16F;
+ return PixelFormat::R16_SINT;
+ case Tegra::RenderTargetFormat::R16_UINT:
+ return PixelFormat::R16_UINT;
+ case Tegra::RenderTargetFormat::R16_FLOAT:
+ return PixelFormat::R16_FLOAT;
+ case Tegra::RenderTargetFormat::R8_UNORM:
+ return PixelFormat::R8_UNORM;
+ case Tegra::RenderTargetFormat::R8_SNORM:
+ return PixelFormat::R8_SNORM;
+ case Tegra::RenderTargetFormat::R8_SINT:
+ return PixelFormat::R8_SINT;
+ case Tegra::RenderTargetFormat::R8_UINT:
+ return PixelFormat::R8_UINT;
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- return PixelFormat::RGBA8_SRGB;
+ UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<int>(format));
+ return PixelFormat::A8B8G8R8_UNORM;
}
}
PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
switch (format) {
- case Tegra::FramebufferConfig::PixelFormat::ABGR8:
- return PixelFormat::ABGR8U;
- case Tegra::FramebufferConfig::PixelFormat::RGB565:
- return PixelFormat::B5G6R5U;
- case Tegra::FramebufferConfig::PixelFormat::BGRA8:
- return PixelFormat::BGRA8;
+ case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
+ return PixelFormat::A8B8G8R8_UNORM;
+ case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
+ return PixelFormat::R5G6B5_UNORM;
+ case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM:
+ return PixelFormat::B8G8R8A8_UNORM;
default:
UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format));
- return PixelFormat::ABGR8U;
+ return PixelFormat::A8B8G8R8_UNORM;
}
}
@@ -212,27 +226,27 @@ SurfaceType GetFormatType(PixelFormat pixel_format) {
bool IsPixelFormatASTC(PixelFormat format) {
switch (format) {
- case PixelFormat::ASTC_2D_4X4:
- case PixelFormat::ASTC_2D_5X4:
- case PixelFormat::ASTC_2D_5X5:
- case PixelFormat::ASTC_2D_8X8:
- case PixelFormat::ASTC_2D_8X5:
+ case PixelFormat::ASTC_2D_4X4_UNORM:
+ case PixelFormat::ASTC_2D_5X4_UNORM:
+ case PixelFormat::ASTC_2D_5X5_UNORM:
+ case PixelFormat::ASTC_2D_8X8_UNORM:
+ case PixelFormat::ASTC_2D_8X5_UNORM:
case PixelFormat::ASTC_2D_4X4_SRGB:
case PixelFormat::ASTC_2D_5X4_SRGB:
case PixelFormat::ASTC_2D_5X5_SRGB:
case PixelFormat::ASTC_2D_8X8_SRGB:
case PixelFormat::ASTC_2D_8X5_SRGB:
- case PixelFormat::ASTC_2D_10X8:
+ case PixelFormat::ASTC_2D_10X8_UNORM:
case PixelFormat::ASTC_2D_10X8_SRGB:
- case PixelFormat::ASTC_2D_6X6:
+ case PixelFormat::ASTC_2D_6X6_UNORM:
case PixelFormat::ASTC_2D_6X6_SRGB:
- case PixelFormat::ASTC_2D_10X10:
+ case PixelFormat::ASTC_2D_10X10_UNORM:
case PixelFormat::ASTC_2D_10X10_SRGB:
- case PixelFormat::ASTC_2D_12X12:
+ case PixelFormat::ASTC_2D_12X12_UNORM:
case PixelFormat::ASTC_2D_12X12_SRGB:
- case PixelFormat::ASTC_2D_8X6:
+ case PixelFormat::ASTC_2D_8X6_UNORM:
case PixelFormat::ASTC_2D_8X6_SRGB:
- case PixelFormat::ASTC_2D_6X5:
+ case PixelFormat::ASTC_2D_6X5_UNORM:
case PixelFormat::ASTC_2D_6X5_SRGB:
return true;
default:
@@ -242,12 +256,12 @@ bool IsPixelFormatASTC(PixelFormat format) {
bool IsPixelFormatSRGB(PixelFormat format) {
switch (format) {
- case PixelFormat::RGBA8_SRGB:
- case PixelFormat::BGRA8_SRGB:
- case PixelFormat::DXT1_SRGB:
- case PixelFormat::DXT23_SRGB:
- case PixelFormat::DXT45_SRGB:
- case PixelFormat::BC7U_SRGB:
+ case PixelFormat::A8B8G8R8_SRGB:
+ case PixelFormat::B8G8R8A8_SRGB:
+ case PixelFormat::BC1_RGBA_SRGB:
+ case PixelFormat::BC2_SRGB:
+ case PixelFormat::BC3_SRGB:
+ case PixelFormat::BC7_SRGB:
case PixelFormat::ASTC_2D_4X4_SRGB:
case PixelFormat::ASTC_2D_8X8_SRGB:
case PixelFormat::ASTC_2D_8X5_SRGB:
@@ -269,25 +283,4 @@ std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)};
}
-bool IsFormatBCn(PixelFormat format) {
- switch (format) {
- case PixelFormat::DXT1:
- case PixelFormat::DXT23:
- case PixelFormat::DXT45:
- case PixelFormat::DXN1:
- case PixelFormat::DXN2SNORM:
- case PixelFormat::DXN2UNORM:
- case PixelFormat::BC7U:
- case PixelFormat::BC6H_UF16:
- case PixelFormat::BC6H_SF16:
- case PixelFormat::DXT1_SRGB:
- case PixelFormat::DXT23_SRGB:
- case PixelFormat::DXT45_SRGB:
- case PixelFormat::BC7U_SRGB:
- return true;
- default:
- return false;
- }
-}
-
} // namespace VideoCore::Surface
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 6da6a1b97..cfd12fa61 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -15,94 +15,105 @@
namespace VideoCore::Surface {
enum class PixelFormat {
- ABGR8U = 0,
- ABGR8S = 1,
- ABGR8UI = 2,
- B5G6R5U = 3,
- A2B10G10R10U = 4,
- A1B5G5R5U = 5,
- R8U = 6,
- R8UI = 7,
- RGBA16F = 8,
- RGBA16U = 9,
- RGBA16S = 10,
- RGBA16UI = 11,
- R11FG11FB10F = 12,
- RGBA32UI = 13,
- DXT1 = 14,
- DXT23 = 15,
- DXT45 = 16,
- DXN1 = 17, // This is also known as BC4
- DXN2UNORM = 18,
- DXN2SNORM = 19,
- BC7U = 20,
- BC6H_UF16 = 21,
- BC6H_SF16 = 22,
- ASTC_2D_4X4 = 23,
- BGRA8 = 24,
- RGBA32F = 25,
- RG32F = 26,
- R32F = 27,
- R16F = 28,
- R16U = 29,
- R16S = 30,
- R16UI = 31,
- R16I = 32,
- RG16 = 33,
- RG16F = 34,
- RG16UI = 35,
- RG16I = 36,
- RG16S = 37,
- RGB32F = 38,
- RGBA8_SRGB = 39,
- RG8U = 40,
- RG8S = 41,
- RG8UI = 42,
- RG32UI = 43,
- RGBX16F = 44,
- R32UI = 45,
- R32I = 46,
- ASTC_2D_8X8 = 47,
- ASTC_2D_8X5 = 48,
- ASTC_2D_5X4 = 49,
- BGRA8_SRGB = 50,
- DXT1_SRGB = 51,
- DXT23_SRGB = 52,
- DXT45_SRGB = 53,
- BC7U_SRGB = 54,
- R4G4B4A4U = 55,
- ASTC_2D_4X4_SRGB = 56,
- ASTC_2D_8X8_SRGB = 57,
- ASTC_2D_8X5_SRGB = 58,
- ASTC_2D_5X4_SRGB = 59,
- ASTC_2D_5X5 = 60,
- ASTC_2D_5X5_SRGB = 61,
- ASTC_2D_10X8 = 62,
- ASTC_2D_10X8_SRGB = 63,
- ASTC_2D_6X6 = 64,
- ASTC_2D_6X6_SRGB = 65,
- ASTC_2D_10X10 = 66,
- ASTC_2D_10X10_SRGB = 67,
- ASTC_2D_12X12 = 68,
- ASTC_2D_12X12_SRGB = 69,
- ASTC_2D_8X6 = 70,
- ASTC_2D_8X6_SRGB = 71,
- ASTC_2D_6X5 = 72,
- ASTC_2D_6X5_SRGB = 73,
- E5B9G9R9F = 74,
+ A8B8G8R8_UNORM,
+ A8B8G8R8_SNORM,
+ A8B8G8R8_SINT,
+ A8B8G8R8_UINT,
+ R5G6B5_UNORM,
+ B5G6R5_UNORM,
+ A1R5G5B5_UNORM,
+ A2B10G10R10_UNORM,
+ A2B10G10R10_UINT,
+ A1B5G5R5_UNORM,
+ R8_UNORM,
+ R8_SNORM,
+ R8_SINT,
+ R8_UINT,
+ R16G16B16A16_FLOAT,
+ R16G16B16A16_UNORM,
+ R16G16B16A16_SNORM,
+ R16G16B16A16_SINT,
+ R16G16B16A16_UINT,
+ B10G11R11_FLOAT,
+ R32G32B32A32_UINT,
+ BC1_RGBA_UNORM,
+ BC2_UNORM,
+ BC3_UNORM,
+ BC4_UNORM,
+ BC4_SNORM,
+ BC5_UNORM,
+ BC5_SNORM,
+ BC7_UNORM,
+ BC6H_UFLOAT,
+ BC6H_SFLOAT,
+ ASTC_2D_4X4_UNORM,
+ B8G8R8A8_UNORM,
+ R32G32B32A32_FLOAT,
+ R32G32B32A32_SINT,
+ R32G32_FLOAT,
+ R32G32_SINT,
+ R32_FLOAT,
+ R16_FLOAT,
+ R16_UNORM,
+ R16_SNORM,
+ R16_UINT,
+ R16_SINT,
+ R16G16_UNORM,
+ R16G16_FLOAT,
+ R16G16_UINT,
+ R16G16_SINT,
+ R16G16_SNORM,
+ R32G32B32_FLOAT,
+ A8B8G8R8_SRGB,
+ R8G8_UNORM,
+ R8G8_SNORM,
+ R8G8_SINT,
+ R8G8_UINT,
+ R32G32_UINT,
+ R16G16B16X16_FLOAT,
+ R32_UINT,
+ R32_SINT,
+ ASTC_2D_8X8_UNORM,
+ ASTC_2D_8X5_UNORM,
+ ASTC_2D_5X4_UNORM,
+ B8G8R8A8_SRGB,
+ BC1_RGBA_SRGB,
+ BC2_SRGB,
+ BC3_SRGB,
+ BC7_SRGB,
+ A4B4G4R4_UNORM,
+ ASTC_2D_4X4_SRGB,
+ ASTC_2D_8X8_SRGB,
+ ASTC_2D_8X5_SRGB,
+ ASTC_2D_5X4_SRGB,
+ ASTC_2D_5X5_UNORM,
+ ASTC_2D_5X5_SRGB,
+ ASTC_2D_10X8_UNORM,
+ ASTC_2D_10X8_SRGB,
+ ASTC_2D_6X6_UNORM,
+ ASTC_2D_6X6_SRGB,
+ ASTC_2D_10X10_UNORM,
+ ASTC_2D_10X10_SRGB,
+ ASTC_2D_12X12_UNORM,
+ ASTC_2D_12X12_SRGB,
+ ASTC_2D_8X6_UNORM,
+ ASTC_2D_8X6_SRGB,
+ ASTC_2D_6X5_UNORM,
+ ASTC_2D_6X5_SRGB,
+ E5B9G9R9_FLOAT,
MaxColorFormat,
// Depth formats
- Z32F = 75,
- Z16 = 76,
+ D32_FLOAT = MaxColorFormat,
+ D16_UNORM,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 77,
- S8Z24 = 78,
- Z32FS8 = 79,
+ D24_UNORM_S8_UINT = MaxDepthFormat,
+ S8_UINT_D24_UNORM,
+ D32_FLOAT_S8_UINT,
MaxDepthStencilFormat,
@@ -130,86 +141,97 @@ enum class SurfaceTarget {
};
constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
- 0, // ABGR8U
- 0, // ABGR8S
- 0, // ABGR8UI
- 0, // B5G6R5U
- 0, // A2B10G10R10U
- 0, // A1B5G5R5U
- 0, // R8U
- 0, // R8UI
- 0, // RGBA16F
- 0, // RGBA16U
- 0, // RGBA16S
- 0, // RGBA16UI
- 0, // R11FG11FB10F
- 0, // RGBA32UI
- 2, // DXT1
- 2, // DXT23
- 2, // DXT45
- 2, // DXN1
- 2, // DXN2UNORM
- 2, // DXN2SNORM
- 2, // BC7U
- 2, // BC6H_UF16
- 2, // BC6H_SF16
- 2, // ASTC_2D_4X4
- 0, // BGRA8
- 0, // RGBA32F
- 0, // RG32F
- 0, // R32F
- 0, // R16F
- 0, // R16U
- 0, // R16S
- 0, // R16UI
- 0, // R16I
- 0, // RG16
- 0, // RG16F
- 0, // RG16UI
- 0, // RG16I
- 0, // RG16S
- 0, // RGB32F
- 0, // RGBA8_SRGB
- 0, // RG8U
- 0, // RG8S
- 0, // RG8UI
- 0, // RG32UI
- 0, // RGBX16F
- 0, // R32UI
- 0, // R32I
- 2, // ASTC_2D_8X8
- 2, // ASTC_2D_8X5
- 2, // ASTC_2D_5X4
- 0, // BGRA8_SRGB
- 2, // DXT1_SRGB
- 2, // DXT23_SRGB
- 2, // DXT45_SRGB
- 2, // BC7U_SRGB
- 0, // R4G4B4A4U
+ 0, // A8B8G8R8_UNORM
+ 0, // A8B8G8R8_SNORM
+ 0, // A8B8G8R8_SINT
+ 0, // A8B8G8R8_UINT
+ 0, // R5G6B5_UNORM
+ 0, // B5G6R5_UNORM
+ 0, // A1R5G5B5_UNORM
+ 0, // A2B10G10R10_UNORM
+ 0, // A2B10G10R10_UINT
+ 0, // A1B5G5R5_UNORM
+ 0, // R8_UNORM
+ 0, // R8_SNORM
+ 0, // R8_SINT
+ 0, // R8_UINT
+ 0, // R16G16B16A16_FLOAT
+ 0, // R16G16B16A16_UNORM
+ 0, // R16G16B16A16_SNORM
+ 0, // R16G16B16A16_SINT
+ 0, // R16G16B16A16_UINT
+ 0, // B10G11R11_FLOAT
+ 0, // R32G32B32A32_UINT
+ 2, // BC1_RGBA_UNORM
+ 2, // BC2_UNORM
+ 2, // BC3_UNORM
+ 2, // BC4_UNORM
+ 2, // BC4_SNORM
+ 2, // BC5_UNORM
+ 2, // BC5_SNORM
+ 2, // BC7_UNORM
+ 2, // BC6H_UFLOAT
+ 2, // BC6H_SFLOAT
+ 2, // ASTC_2D_4X4_UNORM
+ 0, // B8G8R8A8_UNORM
+ 0, // R32G32B32A32_FLOAT
+ 0, // R32G32B32A32_SINT
+ 0, // R32G32_FLOAT
+ 0, // R32G32_SINT
+ 0, // R32_FLOAT
+ 0, // R16_FLOAT
+ 0, // R16_UNORM
+ 0, // R16_SNORM
+ 0, // R16_UINT
+ 0, // R16_SINT
+ 0, // R16G16_UNORM
+ 0, // R16G16_FLOAT
+ 0, // R16G16_UINT
+ 0, // R16G16_SINT
+ 0, // R16G16_SNORM
+ 0, // R32G32B32_FLOAT
+ 0, // A8B8G8R8_SRGB
+ 0, // R8G8_UNORM
+ 0, // R8G8_SNORM
+ 0, // R8G8_SINT
+ 0, // R8G8_UINT
+ 0, // R32G32_UINT
+ 0, // R16G16B16X16_FLOAT
+ 0, // R32_UINT
+ 0, // R32_SINT
+ 2, // ASTC_2D_8X8_UNORM
+ 2, // ASTC_2D_8X5_UNORM
+ 2, // ASTC_2D_5X4_UNORM
+ 0, // B8G8R8A8_SRGB
+ 2, // BC1_RGBA_SRGB
+ 2, // BC2_SRGB
+ 2, // BC3_SRGB
+ 2, // BC7_SRGB
+ 0, // A4B4G4R4_UNORM
2, // ASTC_2D_4X4_SRGB
2, // ASTC_2D_8X8_SRGB
2, // ASTC_2D_8X5_SRGB
2, // ASTC_2D_5X4_SRGB
- 2, // ASTC_2D_5X5
+ 2, // ASTC_2D_5X5_UNORM
2, // ASTC_2D_5X5_SRGB
- 2, // ASTC_2D_10X8
+ 2, // ASTC_2D_10X8_UNORM
2, // ASTC_2D_10X8_SRGB
- 2, // ASTC_2D_6X6
+ 2, // ASTC_2D_6X6_UNORM
2, // ASTC_2D_6X6_SRGB
- 2, // ASTC_2D_10X10
+ 2, // ASTC_2D_10X10_UNORM
2, // ASTC_2D_10X10_SRGB
- 2, // ASTC_2D_12X12
+ 2, // ASTC_2D_12X12_UNORM
2, // ASTC_2D_12X12_SRGB
- 2, // ASTC_2D_8X6
+ 2, // ASTC_2D_8X6_UNORM
2, // ASTC_2D_8X6_SRGB
- 2, // ASTC_2D_6X5
+ 2, // ASTC_2D_6X5_UNORM
2, // ASTC_2D_6X5_SRGB
- 0, // E5B9G9R9F
- 0, // Z32F
- 0, // Z16
- 0, // Z24S8
- 0, // S8Z24
- 0, // Z32FS8
+ 0, // E5B9G9R9_FLOAT
+ 0, // D32_FLOAT
+ 0, // D16_UNORM
+ 0, // D24_UNORM_S8_UINT
+ 0, // S8_UINT_D24_UNORM
+ 0, // D32_FLOAT_S8_UINT
}};
/**
@@ -229,86 +251,97 @@ inline constexpr u32 GetCompressionFactor(PixelFormat format) {
}
constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
- 1, // ABGR8U
- 1, // ABGR8S
- 1, // ABGR8UI
- 1, // B5G6R5U
- 1, // A2B10G10R10U
- 1, // A1B5G5R5U
- 1, // R8U
- 1, // R8UI
- 1, // RGBA16F
- 1, // RGBA16U
- 1, // RGBA16S
- 1, // RGBA16UI
- 1, // R11FG11FB10F
- 1, // RGBA32UI
- 4, // DXT1
- 4, // DXT23
- 4, // DXT45
- 4, // DXN1
- 4, // DXN2UNORM
- 4, // DXN2SNORM
- 4, // BC7U
- 4, // BC6H_UF16
- 4, // BC6H_SF16
- 4, // ASTC_2D_4X4
- 1, // BGRA8
- 1, // RGBA32F
- 1, // RG32F
- 1, // R32F
- 1, // R16F
- 1, // R16U
- 1, // R16S
- 1, // R16UI
- 1, // R16I
- 1, // RG16
- 1, // RG16F
- 1, // RG16UI
- 1, // RG16I
- 1, // RG16S
- 1, // RGB32F
- 1, // RGBA8_SRGB
- 1, // RG8U
- 1, // RG8S
- 1, // RG8UI
- 1, // RG32UI
- 1, // RGBX16F
- 1, // R32UI
- 1, // R32I
- 8, // ASTC_2D_8X8
- 8, // ASTC_2D_8X5
- 5, // ASTC_2D_5X4
- 1, // BGRA8_SRGB
- 4, // DXT1_SRGB
- 4, // DXT23_SRGB
- 4, // DXT45_SRGB
- 4, // BC7U_SRGB
- 1, // R4G4B4A4U
+ 1, // A8B8G8R8_UNORM
+ 1, // A8B8G8R8_SNORM
+ 1, // A8B8G8R8_SINT
+ 1, // A8B8G8R8_UINT
+ 1, // R5G6B5_UNORM
+ 1, // B5G6R5_UNORM
+ 1, // A1R5G5B5_UNORM
+ 1, // A2B10G10R10_UNORM
+ 1, // A2B10G10R10_UINT
+ 1, // A1B5G5R5_UNORM
+ 1, // R8_UNORM
+ 1, // R8_SNORM
+ 1, // R8_SINT
+ 1, // R8_UINT
+ 1, // R16G16B16A16_FLOAT
+ 1, // R16G16B16A16_UNORM
+ 1, // R16G16B16A16_SNORM
+ 1, // R16G16B16A16_SINT
+ 1, // R16G16B16A16_UINT
+ 1, // B10G11R11_FLOAT
+ 1, // R32G32B32A32_UINT
+ 4, // BC1_RGBA_UNORM
+ 4, // BC2_UNORM
+ 4, // BC3_UNORM
+ 4, // BC4_UNORM
+ 4, // BC4_SNORM
+ 4, // BC5_UNORM
+ 4, // BC5_SNORM
+ 4, // BC7_UNORM
+ 4, // BC6H_UFLOAT
+ 4, // BC6H_SFLOAT
+ 4, // ASTC_2D_4X4_UNORM
+ 1, // B8G8R8A8_UNORM
+ 1, // R32G32B32A32_FLOAT
+ 1, // R32G32B32A32_SINT
+ 1, // R32G32_FLOAT
+ 1, // R32G32_SINT
+ 1, // R32_FLOAT
+ 1, // R16_FLOAT
+ 1, // R16_UNORM
+ 1, // R16_SNORM
+ 1, // R16_UINT
+ 1, // R16_SINT
+ 1, // R16G16_UNORM
+ 1, // R16G16_FLOAT
+ 1, // R16G16_UINT
+ 1, // R16G16_SINT
+ 1, // R16G16_SNORM
+ 1, // R32G32B32_FLOAT
+ 1, // A8B8G8R8_SRGB
+ 1, // R8G8_UNORM
+ 1, // R8G8_SNORM
+ 1, // R8G8_SINT
+ 1, // R8G8_UINT
+ 1, // R32G32_UINT
+ 1, // R16G16B16X16_FLOAT
+ 1, // R32_UINT
+ 1, // R32_SINT
+ 8, // ASTC_2D_8X8_UNORM
+ 8, // ASTC_2D_8X5_UNORM
+ 5, // ASTC_2D_5X4_UNORM
+ 1, // B8G8R8A8_SRGB
+ 4, // BC1_RGBA_SRGB
+ 4, // BC2_SRGB
+ 4, // BC3_SRGB
+ 4, // BC7_SRGB
+ 1, // A4B4G4R4_UNORM
4, // ASTC_2D_4X4_SRGB
8, // ASTC_2D_8X8_SRGB
8, // ASTC_2D_8X5_SRGB
5, // ASTC_2D_5X4_SRGB
- 5, // ASTC_2D_5X5
+ 5, // ASTC_2D_5X5_UNORM
5, // ASTC_2D_5X5_SRGB
- 10, // ASTC_2D_10X8
+ 10, // ASTC_2D_10X8_UNORM
10, // ASTC_2D_10X8_SRGB
- 6, // ASTC_2D_6X6
+ 6, // ASTC_2D_6X6_UNORM
6, // ASTC_2D_6X6_SRGB
- 10, // ASTC_2D_10X10
+ 10, // ASTC_2D_10X10_UNORM
10, // ASTC_2D_10X10_SRGB
- 12, // ASTC_2D_12X12
+ 12, // ASTC_2D_12X12_UNORM
12, // ASTC_2D_12X12_SRGB
- 8, // ASTC_2D_8X6
+ 8, // ASTC_2D_8X6_UNORM
8, // ASTC_2D_8X6_SRGB
- 6, // ASTC_2D_6X5
+ 6, // ASTC_2D_6X5_UNORM
6, // ASTC_2D_6X5_SRGB
- 1, // E5B9G9R9F
- 1, // Z32F
- 1, // Z16
- 1, // Z24S8
- 1, // S8Z24
- 1, // Z32FS8
+ 1, // E5B9G9R9_FLOAT
+ 1, // D32_FLOAT
+ 1, // D16_UNORM
+ 1, // D24_UNORM_S8_UINT
+ 1, // S8_UINT_D24_UNORM
+ 1, // D32_FLOAT_S8_UINT
}};
static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
@@ -320,86 +353,97 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
}
constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
- 1, // ABGR8U
- 1, // ABGR8S
- 1, // ABGR8UI
- 1, // B5G6R5U
- 1, // A2B10G10R10U
- 1, // A1B5G5R5U
- 1, // R8U
- 1, // R8UI
- 1, // RGBA16F
- 1, // RGBA16U
- 1, // RGBA16S
- 1, // RGBA16UI
- 1, // R11FG11FB10F
- 1, // RGBA32UI
- 4, // DXT1
- 4, // DXT23
- 4, // DXT45
- 4, // DXN1
- 4, // DXN2UNORM
- 4, // DXN2SNORM
- 4, // BC7U
- 4, // BC6H_UF16
- 4, // BC6H_SF16
- 4, // ASTC_2D_4X4
- 1, // BGRA8
- 1, // RGBA32F
- 1, // RG32F
- 1, // R32F
- 1, // R16F
- 1, // R16U
- 1, // R16S
- 1, // R16UI
- 1, // R16I
- 1, // RG16
- 1, // RG16F
- 1, // RG16UI
- 1, // RG16I
- 1, // RG16S
- 1, // RGB32F
- 1, // RGBA8_SRGB
- 1, // RG8U
- 1, // RG8S
- 1, // RG8UI
- 1, // RG32UI
- 1, // RGBX16F
- 1, // R32UI
- 1, // R32I
- 8, // ASTC_2D_8X8
- 5, // ASTC_2D_8X5
- 4, // ASTC_2D_5X4
- 1, // BGRA8_SRGB
- 4, // DXT1_SRGB
- 4, // DXT23_SRGB
- 4, // DXT45_SRGB
- 4, // BC7U_SRGB
- 1, // R4G4B4A4U
+ 1, // A8B8G8R8_UNORM
+ 1, // A8B8G8R8_SNORM
+ 1, // A8B8G8R8_SINT
+ 1, // A8B8G8R8_UINT
+ 1, // R5G6B5_UNORM
+ 1, // B5G6R5_UNORM
+ 1, // A1R5G5B5_UNORM
+ 1, // A2B10G10R10_UNORM
+ 1, // A2B10G10R10_UINT
+ 1, // A1B5G5R5_UNORM
+ 1, // R8_UNORM
+ 1, // R8_SNORM
+ 1, // R8_SINT
+ 1, // R8_UINT
+ 1, // R16G16B16A16_FLOAT
+ 1, // R16G16B16A16_UNORM
+ 1, // R16G16B16A16_SNORM
+ 1, // R16G16B16A16_SINT
+ 1, // R16G16B16A16_UINT
+ 1, // B10G11R11_FLOAT
+ 1, // R32G32B32A32_UINT
+ 4, // BC1_RGBA_UNORM
+ 4, // BC2_UNORM
+ 4, // BC3_UNORM
+ 4, // BC4_UNORM
+ 4, // BC4_SNORM
+ 4, // BC5_UNORM
+ 4, // BC5_SNORM
+ 4, // BC7_UNORM
+ 4, // BC6H_UFLOAT
+ 4, // BC6H_SFLOAT
+ 4, // ASTC_2D_4X4_UNORM
+ 1, // B8G8R8A8_UNORM
+ 1, // R32G32B32A32_FLOAT
+ 1, // R32G32B32A32_SINT
+ 1, // R32G32_FLOAT
+ 1, // R32G32_SINT
+ 1, // R32_FLOAT
+ 1, // R16_FLOAT
+ 1, // R16_UNORM
+ 1, // R16_SNORM
+ 1, // R16_UINT
+ 1, // R16_SINT
+ 1, // R16G16_UNORM
+ 1, // R16G16_FLOAT
+ 1, // R16G16_UINT
+ 1, // R16G16_SINT
+ 1, // R16G16_SNORM
+ 1, // R32G32B32_FLOAT
+ 1, // A8B8G8R8_SRGB
+ 1, // R8G8_UNORM
+ 1, // R8G8_SNORM
+ 1, // R8G8_SINT
+ 1, // R8G8_UINT
+ 1, // R32G32_UINT
+ 1, // R16G16B16X16_FLOAT
+ 1, // R32_UINT
+ 1, // R32_SINT
+ 8, // ASTC_2D_8X8_UNORM
+ 5, // ASTC_2D_8X5_UNORM
+ 4, // ASTC_2D_5X4_UNORM
+ 1, // B8G8R8A8_SRGB
+ 4, // BC1_RGBA_SRGB
+ 4, // BC2_SRGB
+ 4, // BC3_SRGB
+ 4, // BC7_SRGB
+ 1, // A4B4G4R4_UNORM
4, // ASTC_2D_4X4_SRGB
8, // ASTC_2D_8X8_SRGB
5, // ASTC_2D_8X5_SRGB
4, // ASTC_2D_5X4_SRGB
- 5, // ASTC_2D_5X5
+ 5, // ASTC_2D_5X5_UNORM
5, // ASTC_2D_5X5_SRGB
- 8, // ASTC_2D_10X8
+ 8, // ASTC_2D_10X8_UNORM
8, // ASTC_2D_10X8_SRGB
- 6, // ASTC_2D_6X6
+ 6, // ASTC_2D_6X6_UNORM
6, // ASTC_2D_6X6_SRGB
- 10, // ASTC_2D_10X10
+ 10, // ASTC_2D_10X10_UNORM
10, // ASTC_2D_10X10_SRGB
- 12, // ASTC_2D_12X12
+ 12, // ASTC_2D_12X12_UNORM
12, // ASTC_2D_12X12_SRGB
- 6, // ASTC_2D_8X6
+ 6, // ASTC_2D_8X6_UNORM
6, // ASTC_2D_8X6_SRGB
- 5, // ASTC_2D_6X5
+ 5, // ASTC_2D_6X5_UNORM
5, // ASTC_2D_6X5_SRGB
- 1, // E5B9G9R9F
- 1, // Z32F
- 1, // Z16
- 1, // Z24S8
- 1, // S8Z24
- 1, // Z32FS8
+ 1, // E5B9G9R9_FLOAT
+ 1, // D32_FLOAT
+ 1, // D16_UNORM
+ 1, // D24_UNORM_S8_UINT
+ 1, // S8_UINT_D24_UNORM
+ 1, // D32_FLOAT_S8_UINT
}};
static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
@@ -411,86 +455,97 @@ static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
}
constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
- 32, // ABGR8U
- 32, // ABGR8S
- 32, // ABGR8UI
- 16, // B5G6R5U
- 32, // A2B10G10R10U
- 16, // A1B5G5R5U
- 8, // R8U
- 8, // R8UI
- 64, // RGBA16F
- 64, // RGBA16U
- 64, // RGBA16S
- 64, // RGBA16UI
- 32, // R11FG11FB10F
- 128, // RGBA32UI
- 64, // DXT1
- 128, // DXT23
- 128, // DXT45
- 64, // DXN1
- 128, // DXN2UNORM
- 128, // DXN2SNORM
- 128, // BC7U
- 128, // BC6H_UF16
- 128, // BC6H_SF16
- 128, // ASTC_2D_4X4
- 32, // BGRA8
- 128, // RGBA32F
- 64, // RG32F
- 32, // R32F
- 16, // R16F
- 16, // R16U
- 16, // R16S
- 16, // R16UI
- 16, // R16I
- 32, // RG16
- 32, // RG16F
- 32, // RG16UI
- 32, // RG16I
- 32, // RG16S
- 96, // RGB32F
- 32, // RGBA8_SRGB
- 16, // RG8U
- 16, // RG8S
- 16, // RG8UI
- 64, // RG32UI
- 64, // RGBX16F
- 32, // R32UI
- 32, // R32I
- 128, // ASTC_2D_8X8
- 128, // ASTC_2D_8X5
- 128, // ASTC_2D_5X4
- 32, // BGRA8_SRGB
- 64, // DXT1_SRGB
- 128, // DXT23_SRGB
- 128, // DXT45_SRGB
- 128, // BC7U
- 16, // R4G4B4A4U
+ 32, // A8B8G8R8_UNORM
+ 32, // A8B8G8R8_SNORM
+ 32, // A8B8G8R8_SINT
+ 32, // A8B8G8R8_UINT
+ 16, // R5G6B5_UNORM
+ 16, // B5G6R5_UNORM
+ 16, // A1R5G5B5_UNORM
+ 32, // A2B10G10R10_UNORM
+ 32, // A2B10G10R10_UINT
+ 16, // A1B5G5R5_UNORM
+ 8, // R8_UNORM
+ 8, // R8_SNORM
+ 8, // R8_SINT
+ 8, // R8_UINT
+ 64, // R16G16B16A16_FLOAT
+ 64, // R16G16B16A16_UNORM
+ 64, // R16G16B16A16_SNORM
+ 64, // R16G16B16A16_SINT
+ 64, // R16G16B16A16_UINT
+ 32, // B10G11R11_FLOAT
+ 128, // R32G32B32A32_UINT
+ 64, // BC1_RGBA_UNORM
+ 128, // BC2_UNORM
+ 128, // BC3_UNORM
+ 64, // BC4_UNORM
+ 64, // BC4_SNORM
+ 128, // BC5_UNORM
+ 128, // BC5_SNORM
+ 128, // BC7_UNORM
+ 128, // BC6H_UFLOAT
+ 128, // BC6H_SFLOAT
+ 128, // ASTC_2D_4X4_UNORM
+ 32, // B8G8R8A8_UNORM
+ 128, // R32G32B32A32_FLOAT
+ 128, // R32G32B32A32_SINT
+ 64, // R32G32_FLOAT
+ 64, // R32G32_SINT
+ 32, // R32_FLOAT
+ 16, // R16_FLOAT
+ 16, // R16_UNORM
+ 16, // R16_SNORM
+ 16, // R16_UINT
+ 16, // R16_SINT
+ 32, // R16G16_UNORM
+ 32, // R16G16_FLOAT
+ 32, // R16G16_UINT
+ 32, // R16G16_SINT
+ 32, // R16G16_SNORM
+ 96, // R32G32B32_FLOAT
+ 32, // A8B8G8R8_SRGB
+ 16, // R8G8_UNORM
+ 16, // R8G8_SNORM
+ 16, // R8G8_SINT
+ 16, // R8G8_UINT
+ 64, // R32G32_UINT
+ 64, // R16G16B16X16_FLOAT
+ 32, // R32_UINT
+ 32, // R32_SINT
+ 128, // ASTC_2D_8X8_UNORM
+ 128, // ASTC_2D_8X5_UNORM
+ 128, // ASTC_2D_5X4_UNORM
+ 32, // B8G8R8A8_SRGB
+ 64, // BC1_RGBA_SRGB
+ 128, // BC2_SRGB
+ 128, // BC3_SRGB
+ 128, // BC7_UNORM
+ 16, // A4B4G4R4_UNORM
128, // ASTC_2D_4X4_SRGB
128, // ASTC_2D_8X8_SRGB
128, // ASTC_2D_8X5_SRGB
128, // ASTC_2D_5X4_SRGB
- 128, // ASTC_2D_5X5
+ 128, // ASTC_2D_5X5_UNORM
128, // ASTC_2D_5X5_SRGB
- 128, // ASTC_2D_10X8
+ 128, // ASTC_2D_10X8_UNORM
128, // ASTC_2D_10X8_SRGB
- 128, // ASTC_2D_6X6
+ 128, // ASTC_2D_6X6_UNORM
128, // ASTC_2D_6X6_SRGB
- 128, // ASTC_2D_10X10
+ 128, // ASTC_2D_10X10_UNORM
128, // ASTC_2D_10X10_SRGB
- 128, // ASTC_2D_12X12
+ 128, // ASTC_2D_12X12_UNORM
128, // ASTC_2D_12X12_SRGB
- 128, // ASTC_2D_8X6
+ 128, // ASTC_2D_8X6_UNORM
128, // ASTC_2D_8X6_SRGB
- 128, // ASTC_2D_6X5
+ 128, // ASTC_2D_6X5_UNORM
128, // ASTC_2D_6X5_SRGB
- 32, // E5B9G9R9F
- 32, // Z32F
- 16, // Z16
- 32, // Z24S8
- 32, // S8Z24
- 64, // Z32FS8
+ 32, // E5B9G9R9_FLOAT
+ 32, // D32_FLOAT
+ 16, // D16_UNORM
+ 32, // D24_UNORM_S8_UINT
+ 32, // S8_UINT_D24_UNORM
+ 64, // D32_FLOAT_S8_UINT
}};
static constexpr u32 GetFormatBpp(PixelFormat format) {
@@ -529,7 +584,4 @@ bool IsPixelFormatSRGB(PixelFormat format);
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
-/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN
-bool IsFormatBCn(PixelFormat format);
-
} // namespace VideoCore::Surface
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index f476f03b0..7d5a75648 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -19,8 +19,6 @@ constexpr auto SNORM = ComponentType::SNORM;
constexpr auto UNORM = ComponentType::UNORM;
constexpr auto SINT = ComponentType::SINT;
constexpr auto UINT = ComponentType::UINT;
-constexpr auto SNORM_FORCE_FP16 = ComponentType::SNORM_FORCE_FP16;
-constexpr auto UNORM_FORCE_FP16 = ComponentType::UNORM_FORCE_FP16;
constexpr auto FLOAT = ComponentType::FLOAT;
constexpr bool C = false; // Normal color
constexpr bool S = true; // Srgb
@@ -41,119 +39,126 @@ struct Table {
ComponentType alpha_component;
bool is_srgb;
};
-constexpr std::array<Table, 78> DefinitionTable = {{
- {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
- {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
- {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
- {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA8_SRGB},
+constexpr std::array<Table, 86> DefinitionTable = {{
+ {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM},
+ {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM},
+ {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT},
+ {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT},
+ {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB},
- {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5U},
+ {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM},
- {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10U},
+ {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM},
+ {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT},
- {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5U},
+ {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM},
- {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R4G4B4A4U},
+ {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM},
- {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8U},
- {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8UI},
+ {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM},
+ {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM},
+ {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT},
+ {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT},
- {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
- {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
- {TextureFormat::G8R8, C, UINT, UINT, UINT, UINT, PixelFormat::RG8UI},
+ {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM},
+ {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM},
+ {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT},
+ {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT},
- {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S},
- {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
- {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F},
- {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI},
+ {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM},
+ {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM},
+ {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT},
+ {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT},
+ {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT},
- {TextureFormat::R16_G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG16F},
- {TextureFormat::R16_G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG16},
- {TextureFormat::R16_G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG16S},
- {TextureFormat::R16_G16, C, UINT, UINT, UINT, UINT, PixelFormat::RG16UI},
- {TextureFormat::R16_G16, C, SINT, SINT, SINT, SINT, PixelFormat::RG16I},
+ {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT},
+ {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM},
+ {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM},
+ {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT},
+ {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT},
- {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16F},
- {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16U},
- {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16S},
- {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16UI},
- {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16I},
+ {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT},
+ {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM},
+ {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM},
+ {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT},
+ {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT},
- {TextureFormat::BF10GF11RF11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R11FG11FB10F},
+ {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT},
- {TextureFormat::R32_G32_B32_A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA32F},
- {TextureFormat::R32_G32_B32_A32, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA32UI},
+ {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT},
+ {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT},
+ {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT},
- {TextureFormat::R32_G32_B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGB32F},
+ {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT},
- {TextureFormat::R32_G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG32F},
- {TextureFormat::R32_G32, C, UINT, UINT, UINT, UINT, PixelFormat::RG32UI},
+ {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT},
+ {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT},
+ {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT},
- {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F},
- {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI},
- {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I},
+ {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT},
+ {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT},
+ {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT},
- {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F},
+ {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT},
- {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
- {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
- {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
- {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
- {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
+ {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT},
+ {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM},
+ {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
+ {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
+ {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT},
- {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
- {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB},
+ {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM},
+ {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB},
- {TextureFormat::DXT23, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23},
- {TextureFormat::DXT23, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23_SRGB},
+ {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM},
+ {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB},
- {TextureFormat::DXT45, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45},
- {TextureFormat::DXT45, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45_SRGB},
+ {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM},
+ {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB},
- // TODO: Use a different pixel format for SNORM
- {TextureFormat::DXN1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN1},
- {TextureFormat::DXN1, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN1},
+ {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM},
+ {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM},
- {TextureFormat::DXN2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN2UNORM},
- {TextureFormat::DXN2, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN2SNORM},
+ {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM},
+ {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM},
- {TextureFormat::BC7U, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U},
- {TextureFormat::BC7U, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U_SRGB},
+ {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM},
+ {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB},
- {TextureFormat::BC6H_SF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SF16},
- {TextureFormat::BC6H_UF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UF16},
+ {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT},
+ {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT},
- {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4},
+ {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM},
{TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB},
- {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4},
+ {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM},
{TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB},
- {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5},
+ {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM},
{TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB},
- {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8},
+ {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM},
{TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB},
- {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5},
+ {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM},
{TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB},
- {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8},
+ {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM},
{TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB},
- {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6},
+ {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM},
{TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB},
- {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10},
+ {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM},
{TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB},
- {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12},
+ {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM},
{TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB},
- {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6},
+ {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM},
{TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB},
- {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5},
+ {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM},
{TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB},
}};
@@ -184,7 +189,7 @@ PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb
static_cast<int>(format), is_srgb, static_cast<int>(red_component),
static_cast<int>(green_component), static_cast<int>(blue_component),
static_cast<int>(alpha_component));
- return PixelFormat::ABGR8U;
+ return PixelFormat::A8B8G8R8_UNORM;
}
void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component,
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 0caf3b4f0..dfcf36e0b 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -228,7 +228,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
}
}
- if (!is_converted && params.pixel_format != PixelFormat::S8Z24) {
+ if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) {
return;
}
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 0b2b2b8c4..e8515321b 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -74,21 +74,21 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
SurfaceParams params;
params.is_tiled = tic.IsTiled();
params.srgb_conversion = tic.IsSrgbConversionEnabled();
- params.block_width = params.is_tiled ? tic.BlockWidth() : 0,
- params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
- params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
+ params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
+ params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
+ params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
params.pixel_format = lookup_table.GetPixelFormat(
tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
params.type = GetFormatType(params.pixel_format);
if (entry.is_shadow && params.type == SurfaceType::ColorTexture) {
switch (params.pixel_format) {
- case PixelFormat::R16U:
- case PixelFormat::R16F:
- params.pixel_format = PixelFormat::Z16;
+ case PixelFormat::R16_UNORM:
+ case PixelFormat::R16_FLOAT:
+ params.pixel_format = PixelFormat::D16_UNORM;
break;
- case PixelFormat::R32F:
- params.pixel_format = PixelFormat::Z32F;
+ case PixelFormat::R32_FLOAT:
+ params.pixel_format = PixelFormat::D32_FLOAT;
break;
default:
UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
@@ -96,7 +96,6 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
}
params.type = GetFormatType(params.pixel_format);
}
- params.type = GetFormatType(params.pixel_format);
// TODO: on 1DBuffer we should use the tic info.
if (tic.IsBuffer()) {
params.target = SurfaceTarget::TextureBuffer;
@@ -130,14 +129,13 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl
SurfaceParams params;
params.is_tiled = tic.IsTiled();
params.srgb_conversion = tic.IsSrgbConversionEnabled();
- params.block_width = params.is_tiled ? tic.BlockWidth() : 0,
- params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
- params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
+ params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
+ params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
+ params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
params.pixel_format = lookup_table.GetPixelFormat(
tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
params.type = GetFormatType(params.pixel_format);
- params.type = GetFormatType(params.pixel_format);
params.target = ImageTypeToSurfaceTarget(entry.type);
// TODO: on 1DBuffer we should use the tic info.
if (tic.IsBuffer()) {
@@ -165,38 +163,40 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl
return params;
}
-SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) {
- const auto& regs = system.GPU().Maxwell3D().regs;
- SurfaceParams params;
- params.is_tiled = regs.zeta.memory_layout.type ==
- Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
- params.srgb_conversion = false;
- params.block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U);
- params.block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U);
- params.block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
- params.tile_width_spacing = 1;
- params.pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
- params.type = GetFormatType(params.pixel_format);
- params.width = regs.zeta_width;
- params.height = regs.zeta_height;
- params.pitch = 0;
- params.num_levels = 1;
- params.emulated_levels = 1;
-
- const bool is_layered = regs.zeta_layers > 1 && params.block_depth == 0;
- params.is_layered = is_layered;
- params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
- params.depth = is_layered ? regs.zeta_layers.Value() : 1U;
- return params;
+SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) {
+ const auto& regs = maxwell3d.regs;
+ const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
+ const bool is_layered = regs.zeta_layers > 1 && block_depth == 0;
+ const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
+ return {
+ .is_tiled = regs.zeta.memory_layout.type ==
+ Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear,
+ .srgb_conversion = false,
+ .is_layered = is_layered,
+ .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U),
+ .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U),
+ .block_depth = block_depth,
+ .tile_width_spacing = 1,
+ .width = regs.zeta_width,
+ .height = regs.zeta_height,
+ .depth = is_layered ? regs.zeta_layers.Value() : 1U,
+ .pitch = 0,
+ .num_levels = 1,
+ .emulated_levels = 1,
+ .pixel_format = pixel_format,
+ .type = GetFormatType(pixel_format),
+ .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D,
+ };
}
-SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
- const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
+SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
+ std::size_t index) {
+ const auto& config{maxwell3d.regs.rt[index]};
SurfaceParams params;
params.is_tiled =
config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
- params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
- config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
+ params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
+ config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB;
params.block_width = config.memory_layout.block_width;
params.block_height = config.memory_layout.block_height;
params.block_depth = config.memory_layout.block_depth;
@@ -233,24 +233,29 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
SurfaceParams SurfaceParams::CreateForFermiCopySurface(
const Tegra::Engines::Fermi2D::Regs::Surface& config) {
- SurfaceParams params{};
- params.is_tiled = !config.linear;
- params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
- config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
- params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0,
- params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0,
- params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0,
- params.tile_width_spacing = 1;
- params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
- params.type = GetFormatType(params.pixel_format);
- params.width = config.width;
- params.height = config.height;
- params.pitch = config.pitch;
- // TODO(Rodrigo): Try to guess texture arrays from parameters
- params.target = SurfaceTarget::Texture2D;
- params.depth = 1;
- params.num_levels = 1;
- params.emulated_levels = 1;
+ const bool is_tiled = !config.linear;
+ const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+
+ SurfaceParams params{
+ .is_tiled = is_tiled,
+ .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
+ config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
+ .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
+ .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
+ .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,
+ .tile_width_spacing = 1,
+ .width = config.width,
+ .height = config.height,
+ .depth = 1,
+ .pitch = config.pitch,
+ .num_levels = 1,
+ .emulated_levels = 1,
+ .pixel_format = pixel_format,
+ .type = GetFormatType(pixel_format),
+ // TODO(Rodrigo): Try to guess texture arrays from parameters
+ .target = SurfaceTarget::Texture2D,
+ };
+
params.is_layered = params.IsLayered();
return params;
}
@@ -343,8 +348,7 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co
size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed);
}
if (is_tiled && is_layered) {
- return Common::AlignBits(size,
- Tegra::Texture::GetGOBSizeShift() + block_height + block_depth);
+ return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
}
return size;
}
@@ -418,7 +422,7 @@ std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
const u32 block_size = GetBlockSize();
const u32 block_index = offset / block_size;
const u32 gob_offset = offset % block_size;
- const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GetGOBSize());
+ const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE);
const u32 x_gob_pixels = 64U / GetBytesPerPixel();
const u32 x_block_pixels = x_gob_pixels << block_width;
const u32 y_block_pixels = 8U << block_height;
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 24957df8d..4466c3c34 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -33,10 +33,11 @@ public:
const VideoCommon::Shader::Image& entry);
/// Creates SurfaceCachedParams for a depth buffer configuration.
- static SurfaceParams CreateForDepthBuffer(Core::System& system);
+ static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d);
/// Creates SurfaceCachedParams from a framebuffer configuration.
- static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
+ static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
+ std::size_t index);
/// Creates SurfaceCachedParams from a Fermi2D surface configuration.
static SurfaceParams CreateForFermiCopySurface(
@@ -204,7 +205,7 @@ public:
static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
const u32 block_depth) {
return Common::AlignBits(out_size,
- Tegra::Texture::GetGOBSizeShift() + block_height + block_depth);
+ Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
}
/// Converts a width from a type of surface into another. This helps represent the
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index cdcddb225..ea835c59f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -135,8 +135,7 @@ public:
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
@@ -160,8 +159,7 @@ public:
if (!gpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
@@ -183,11 +181,11 @@ public:
TView GetDepthBufferSurface(bool preserve_contents) {
std::lock_guard lock{mutex};
- auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
+ auto& dirty = maxwell3d.dirty;
+ if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
return depth_buffer.view;
}
- maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
+ dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
const auto& regs{maxwell3d.regs};
const auto gpu_addr{regs.zeta.Address()};
@@ -195,13 +193,12 @@ public:
SetEmptyDepthBuffer();
return {};
}
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
SetEmptyDepthBuffer();
return {};
}
- const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
+ const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)};
auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
@@ -215,7 +212,6 @@ public:
TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
std::lock_guard lock{mutex};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
- auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
return render_targets[index].view;
}
@@ -235,15 +231,14 @@ public:
return {};
}
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
SetEmptyColorBuffer(index);
return {};
}
auto surface_view =
- GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
+ GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index),
preserve_contents, true);
if (render_targets[index].target) {
auto& surface = render_targets[index].target;
@@ -300,9 +295,8 @@ public:
const GPUVAddr dst_gpu_addr = dst_config.Address();
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
- const auto& memory_manager = system.GPU().MemoryManager();
- const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr);
- const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr);
+ const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr);
+ const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr);
std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
ImageBlit(src_surface, dst_surface.second, copy_config);
@@ -358,9 +352,11 @@ public:
}
protected:
- explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- bool is_astc_supported)
- : system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} {
+ explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
+ bool is_astc_supported_)
+ : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+ gpu_memory{gpu_memory_} {
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
SetEmptyColorBuffer(i);
}
@@ -373,9 +369,9 @@ protected:
siblings_table[static_cast<std::size_t>(b)] = a;
};
std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
- make_siblings(PixelFormat::Z16, PixelFormat::R16U);
- make_siblings(PixelFormat::Z32F, PixelFormat::R32F);
- make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F);
+ make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM);
+ make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT);
+ make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT);
sampled_textures.reserve(64);
}
@@ -395,7 +391,7 @@ protected:
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
void ManageRenderTargetUnregister(TSurface& surface) {
- auto& dirty = system.GPU().Maxwell3D().dirty;
+ auto& dirty = maxwell3d.dirty;
const u32 index = surface->GetRenderTarget();
if (index == DEPTH_RT) {
dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true;
@@ -408,8 +404,7 @@ protected:
void Register(TSurface surface) {
const GPUVAddr gpu_addr = surface->GetGpuAddr();
const std::size_t size = surface->GetSizeInBytes();
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
gpu_addr);
@@ -459,7 +454,6 @@ protected:
return new_surface;
}
- Core::System& system;
const bool is_astc_supported;
private:
@@ -954,8 +948,7 @@ private:
* @param params The parameters on the candidate surface.
**/
Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
Deduction result{};
@@ -1031,7 +1024,7 @@ private:
params.pitch = 4;
params.num_levels = 1;
params.emulated_levels = 1;
- params.pixel_format = VideoCore::Surface::PixelFormat::R8U;
+ params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM;
params.type = VideoCore::Surface::SurfaceType::ColorTexture;
auto surface = CreateSurface(0ULL, params);
invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
@@ -1112,7 +1105,7 @@ private:
void LoadSurface(const TSurface& surface) {
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
- surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache);
+ surface->LoadBuffer(gpu_memory, staging_cache);
surface->UploadTexture(staging_cache.GetBuffer(0));
surface->MarkAsModified(false, Tick());
}
@@ -1123,7 +1116,7 @@ private:
}
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
surface->DownloadTexture(staging_cache.GetBuffer(0));
- surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache);
+ surface->FlushBuffer(gpu_memory, staging_cache);
surface->MarkAsModified(false, Tick());
}
@@ -1253,6 +1246,8 @@ private:
}
VideoCore::RasterizerInterface& rasterizer;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::MemoryManager& gpu_memory;
FormatLookupTable format_lookup_table;
FormatCompatibility format_compatibility;
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
index f3efa7eb0..962921483 100644
--- a/src/video_core/textures/convert.cpp
+++ b/src/video_core/textures/convert.cpp
@@ -35,7 +35,7 @@ void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
S8Z24 s8z24_pixel{};
Z24S8 z24s8_pixel{};
constexpr auto bpp{
- VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
+ VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)};
for (std::size_t y = 0; y < height; ++y) {
for (std::size_t x = 0; x < width; ++x) {
const std::size_t offset{bpp * (y * width + x)};
@@ -73,7 +73,7 @@ void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format,
in_data, width, height, depth, block_width, block_height);
std::copy(rgba8_data.begin(), rgba8_data.end(), out_data);
- } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+ } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height);
}
}
@@ -85,7 +85,7 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h
static_cast<u32>(pixel_format));
UNREACHABLE();
- } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+ } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
}
}
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 548e4c3fe..16d46a018 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -6,6 +6,7 @@
#include <cstring>
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/bit_util.h"
#include "video_core/gpu.h"
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"
@@ -37,20 +38,10 @@ struct alignas(64) SwizzleTable {
std::array<std::array<u16, M>, N> values{};
};
-constexpr u32 gob_size_x_shift = 6;
-constexpr u32 gob_size_y_shift = 3;
-constexpr u32 gob_size_z_shift = 0;
-constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift;
+constexpr u32 FAST_SWIZZLE_ALIGN = 16;
-constexpr u32 gob_size_x = 1U << gob_size_x_shift;
-constexpr u32 gob_size_y = 1U << gob_size_y_shift;
-constexpr u32 gob_size_z = 1U << gob_size_z_shift;
-constexpr u32 gob_size = 1U << gob_size_shift;
-
-constexpr u32 fast_swizzle_align = 16;
-
-constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>();
-constexpr auto fast_swizzle_table = SwizzleTable<gob_size_y, 4, fast_swizzle_align>();
+constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>();
+constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>();
/**
* This function manages ALL the GOBs(Group of Bytes) Inside a single block.
@@ -69,17 +60,17 @@ void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, con
u32 y_address = z_address;
u32 pixel_base = layer_z * z + y_start * stride_x;
for (u32 y = y_start; y < y_end; y++) {
- const auto& table = legacy_swizzle_table[y % gob_size_y];
+ const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
for (u32 x = x_start; x < x_end; x++) {
- const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % gob_size_x]};
+ const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]};
const u32 pixel_index{x * out_bytes_per_pixel + pixel_base};
data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
}
pixel_base += stride_x;
- if ((y + 1) % gob_size_y == 0)
- y_address += gob_size;
+ if ((y + 1) % GOB_SIZE_Y == 0)
+ y_address += GOB_SIZE;
}
z_address += xy_block_size;
}
@@ -104,18 +95,18 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
u32 y_address = z_address;
u32 pixel_base = layer_z * z + y_start * stride_x;
for (u32 y = y_start; y < y_end; y++) {
- const auto& table = fast_swizzle_table[y % gob_size_y];
- for (u32 xb = x_startb; xb < x_endb; xb += fast_swizzle_align) {
- const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
+ const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y];
+ for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) {
+ const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]};
const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
const u32 pixel_index{out_x + pixel_base};
data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
- std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
+ std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN);
}
pixel_base += stride_x;
- if ((y + 1) % gob_size_y == 0)
- y_address += gob_size;
+ if ((y + 1) % GOB_SIZE_Y == 0)
+ y_address += GOB_SIZE;
}
z_address += xy_block_size;
}
@@ -138,9 +129,9 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 stride_x = width * out_bytes_per_pixel;
const u32 layer_z = height * stride_x;
- const u32 gob_elements_x = gob_size_x / bytes_per_pixel;
- constexpr u32 gob_elements_y = gob_size_y;
- constexpr u32 gob_elements_z = gob_size_z;
+ const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel;
+ constexpr u32 gob_elements_y = GOB_SIZE_Y;
+ constexpr u32 gob_elements_z = GOB_SIZE_Z;
const u32 block_x_elements = gob_elements_x;
const u32 block_y_elements = gob_elements_y * block_height;
const u32 block_z_elements = gob_elements_z * block_depth;
@@ -148,7 +139,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements);
const u32 blocks_on_y = div_ceil(height, block_y_elements);
const u32 blocks_on_z = div_ceil(depth, block_z_elements);
- const u32 xy_block_size = gob_size * block_height;
+ const u32 xy_block_size = GOB_SIZE * block_height;
const u32 block_size = xy_block_size * block_depth;
u32 tile_offset = 0;
for (u32 zb = 0; zb < blocks_on_z; zb++) {
@@ -182,7 +173,7 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
const u32 block_height_size{1U << block_height};
const u32 block_depth_size{1U << block_depth};
- if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) {
+ if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) {
SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
bytes_per_pixel, out_bytes_per_pixel, block_height_size,
block_depth_size, width_spacing);
@@ -193,53 +184,6 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
}
}
-u32 BytesPerPixel(TextureFormat format) {
- switch (format) {
- case TextureFormat::DXT1:
- case TextureFormat::DXN1:
- // In this case a 'pixel' actually refers to a 4x4 tile.
- return 8;
- case TextureFormat::DXT23:
- case TextureFormat::DXT45:
- case TextureFormat::DXN2:
- case TextureFormat::BC7U:
- case TextureFormat::BC6H_UF16:
- case TextureFormat::BC6H_SF16:
- // In this case a 'pixel' actually refers to a 4x4 tile.
- return 16;
- case TextureFormat::R32_G32_B32:
- return 12;
- case TextureFormat::ASTC_2D_4X4:
- case TextureFormat::ASTC_2D_5X4:
- case TextureFormat::ASTC_2D_8X8:
- case TextureFormat::ASTC_2D_8X5:
- case TextureFormat::ASTC_2D_10X8:
- case TextureFormat::ASTC_2D_5X5:
- case TextureFormat::A8R8G8B8:
- case TextureFormat::A2B10G10R10:
- case TextureFormat::BF10GF11RF11:
- case TextureFormat::R32:
- case TextureFormat::R16_G16:
- return 4;
- case TextureFormat::A1B5G5R5:
- case TextureFormat::B5G6R5:
- case TextureFormat::G8R8:
- case TextureFormat::R16:
- return 2;
- case TextureFormat::R8:
- return 1;
- case TextureFormat::R16_G16_B16_A16:
- return 8;
- case TextureFormat::R32_G32_B32_A32:
- return 16;
- case TextureFormat::R32_G32:
- return 8;
- default:
- UNIMPLEMENTED_MSG("Format not implemented");
- return 1;
- }
-}
-
void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
u32 block_depth, u32 width_spacing) {
@@ -259,47 +203,82 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y,
}
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
+ u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
u32 block_height_bit, u32 offset_x, u32 offset_y) {
const u32 block_height = 1U << block_height_bit;
- const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
- gob_size_x};
+ const u32 image_width_in_gobs =
+ (swizzled_width * bytes_per_pixel + (GOB_SIZE_X - 1)) / GOB_SIZE_X;
for (u32 line = 0; line < subrect_height; ++line) {
const u32 dst_y = line + offset_y;
const u32 gob_address_y =
- (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
- ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
- const auto& table = legacy_swizzle_table[dst_y % gob_size_y];
+ (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
+ ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
+ const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
for (u32 x = 0; x < subrect_width; ++x) {
const u32 dst_x = x + offset_x;
const u32 gob_address =
- gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
- const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x];
- u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
- u8* dest_addr = swizzled_data + swizzled_offset;
+ gob_address_y + (dst_x * bytes_per_pixel / GOB_SIZE_X) * GOB_SIZE * block_height;
+ const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % GOB_SIZE_X];
+ const u32 unswizzled_offset = line * source_pitch + x * bytes_per_pixel;
+ const u8* const source_line = unswizzled_data + unswizzled_offset;
+ u8* const dest_addr = swizzled_data + swizzled_offset;
std::memcpy(dest_addr, source_line, bytes_per_pixel);
}
}
}
-void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
- u32 block_height_bit, u32 offset_x, u32 offset_y) {
- const u32 block_height = 1U << block_height_bit;
- for (u32 line = 0; line < subrect_height; ++line) {
- const u32 y2 = line + offset_y;
- const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
- ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size;
- const auto& table = legacy_swizzle_table[y2 % gob_size_y];
- for (u32 x = 0; x < subrect_width; ++x) {
- const u32 x2 = (x + offset_x) * bytes_per_pixel;
- const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
- const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
- u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
- u8* source_addr = swizzled_data + swizzled_offset;
+void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
+ u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) {
+ const u32 stride = width * bytes_per_pixel;
+ const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
+ const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
+
+ const u32 block_height_mask = (1U << block_height) - 1;
+ const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height;
+
+ for (u32 line = 0; line < line_count; ++line) {
+ const u32 src_y = line + origin_y;
+ const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
+
+ const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
+ const u32 src_offset_y = (block_y >> block_height) * block_size +
+ ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
+ for (u32 column = 0; column < line_length_in; ++column) {
+ const u32 src_x = (column + origin_x) * bytes_per_pixel;
+ const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
+
+ const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X];
+ const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel;
- std::memcpy(dest_line, source_addr, bytes_per_pixel);
+ std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel);
+ }
+ }
+}
+
+void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
+ u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
+ u32 origin_y, u8* output, const u8* input) {
+ UNIMPLEMENTED_IF(origin_x > 0);
+ UNIMPLEMENTED_IF(origin_y > 0);
+
+ const u32 stride = width * bytes_per_pixel;
+ const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
+ const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
+
+ const u32 block_height_mask = (1U << block_height) - 1;
+ const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
+
+ for (u32 line = 0; line < line_count; ++line) {
+ const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y];
+ const u32 block_y = line / GOB_SIZE_Y;
+ const u32 dst_offset_y =
+ (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
+ for (u32 x = 0; x < line_length_in; ++x) {
+ const u32 dst_offset =
+ ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X];
+ const u32 src_offset = x * bytes_per_pixel + line * pitch;
+ std::memcpy(output + dst_offset, input + src_offset, bytes_per_pixel);
}
}
}
@@ -308,17 +287,17 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
const u32 block_height_bit, const std::size_t copy_size, const u8* source_data,
u8* swizzle_data) {
const u32 block_height = 1U << block_height_bit;
- const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x};
+ const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X};
std::size_t count = 0;
for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
const std::size_t gob_address_y =
- (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
- ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
- const auto& table = legacy_swizzle_table[y % gob_size_y];
+ (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
+ ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
+ const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
const std::size_t gob_address =
- gob_address_y + (x / gob_size_x) * gob_size * block_height;
- const std::size_t swizzled_offset = gob_address + table[x % gob_size_x];
+ gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
+ const std::size_t swizzled_offset = gob_address + table[x % GOB_SIZE_X];
const u8* source_line = source_data + count;
u8* dest_addr = swizzle_data + swizzled_offset;
count++;
@@ -328,54 +307,12 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
}
}
-std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
- u32 height) {
- std::vector<u8> rgba_data;
-
- // TODO(Subv): Implement.
- switch (format) {
- case TextureFormat::DXT1:
- case TextureFormat::DXT23:
- case TextureFormat::DXT45:
- case TextureFormat::DXN1:
- case TextureFormat::DXN2:
- case TextureFormat::BC7U:
- case TextureFormat::BC6H_UF16:
- case TextureFormat::BC6H_SF16:
- case TextureFormat::ASTC_2D_4X4:
- case TextureFormat::ASTC_2D_8X8:
- case TextureFormat::ASTC_2D_5X5:
- case TextureFormat::ASTC_2D_10X8:
- case TextureFormat::A8R8G8B8:
- case TextureFormat::A2B10G10R10:
- case TextureFormat::A1B5G5R5:
- case TextureFormat::B5G6R5:
- case TextureFormat::R8:
- case TextureFormat::G8R8:
- case TextureFormat::BF10GF11RF11:
- case TextureFormat::R32_G32_B32_A32:
- case TextureFormat::R32_G32:
- case TextureFormat::R32:
- case TextureFormat::R16:
- case TextureFormat::R16_G16:
- case TextureFormat::R32_G32_B32:
- // TODO(Subv): For the time being just forward the same data without any decoding.
- rgba_data = texture_data;
- break;
- default:
- UNIMPLEMENTED_MSG("Format not implemented");
- break;
- }
-
- return rgba_data;
-}
-
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth) {
if (tiled) {
- const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift);
- const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height);
- const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth);
+ const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, GOB_SIZE_X_SHIFT);
+ const u32 aligned_height = Common::AlignBits(height, GOB_SIZE_Y_SHIFT + block_height);
+ const u32 aligned_depth = Common::AlignBits(depth, GOB_SIZE_Z_SHIFT + block_depth);
return aligned_width * aligned_height * aligned_depth;
} else {
return width * height * depth * bytes_per_pixel;
@@ -386,14 +323,14 @@ u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
u32 bytes_per_pixel) {
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 gobs_in_block = 1 << block_height;
- const u32 y_blocks = gob_size_y << block_height;
- const u32 x_per_gob = gob_size_x / bytes_per_pixel;
+ const u32 y_blocks = GOB_SIZE_Y << block_height;
+ const u32 x_per_gob = GOB_SIZE_X / bytes_per_pixel;
const u32 x_blocks = div_ceil(width, x_per_gob);
- const u32 block_size = gob_size * gobs_in_block;
+ const u32 block_size = GOB_SIZE * gobs_in_block;
const u32 stride = block_size * x_blocks;
const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size;
const u32 relative_y = dst_y % y_blocks;
- return base + (relative_y / gob_size_y) * gob_size;
+ return base + (relative_y / GOB_SIZE_Y) * GOB_SIZE;
}
} // namespace Tegra::Texture
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 06f3ebf87..01e156bc8 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -10,15 +10,15 @@
namespace Tegra::Texture {
-// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents
-// an small rect of (64/bytes_per_pixel)X8.
-inline std::size_t GetGOBSize() {
- return 512;
-}
+constexpr u32 GOB_SIZE_X = 64;
+constexpr u32 GOB_SIZE_Y = 8;
+constexpr u32 GOB_SIZE_Z = 1;
+constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
-inline std::size_t GetGOBSizeShift() {
- return 9;
-}
+constexpr std::size_t GOB_SIZE_X_SHIFT = 6;
+constexpr std::size_t GOB_SIZE_Y_SHIFT = 3;
+constexpr std::size_t GOB_SIZE_Z_SHIFT = 0;
+constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
/// Unswizzles a swizzled texture without changing its format.
void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
@@ -38,23 +38,36 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
-/// Decodes an unswizzled texture into a A8R8G8B8 texture.
-std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
- u32 height);
-
/// This function calculates the correct size of a texture depending if it's tiled or not.
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth);
/// Copies an untiled subrectangle into a tiled surface.
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
- u32 offset_x, u32 offset_y);
+ u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
+ u32 block_height_bit, u32 offset_x, u32 offset_y);
/// Copies a tiled subrectangle into a linear surface.
-void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
- u32 offset_x, u32 offset_y);
+void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
+ u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input);
+
+/// @brief Swizzles a 2D array of pixels into a 3D texture
+/// @param line_length_in Number of pixels per line
+/// @param line_count Number of lines
+/// @param pitch Number of bytes per line
+/// @param width Width of the swizzled texture
+/// @param height Height of the swizzled texture
+/// @param bytes_per_pixel Number of bytes used per pixel
+/// @param block_height Block height shift
+/// @param block_depth Block depth shift
+/// @param origin_x Column offset in pixels of the swizzled texture
+/// @param origin_y Row offset in pixels of the swizzled texture
+/// @param output Pointer to the pixels of the swizzled texture
+/// @param input Pointer to the 2D array of pixels used as input
+/// @pre input and output points to an array large enough to hold the number of bytes used
+void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
+ u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
+ u32 origin_y, u8* output, const u8* input);
void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
std::size_t copy_size, const u8* source_data, u8* swizzle_data);
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index eba05aced..0574fef12 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -12,10 +12,10 @@
namespace Tegra::Texture {
enum class TextureFormat : u32 {
- R32_G32_B32_A32 = 0x01,
- R32_G32_B32 = 0x02,
- R16_G16_B16_A16 = 0x03,
- R32_G32 = 0x04,
+ R32G32B32A32 = 0x01,
+ R32G32B32 = 0x02,
+ R16G16B16A16 = 0x03,
+ R32G32 = 0x04,
R32_B24G8 = 0x05,
ETC2_RGB = 0x06,
X8B8G8R8 = 0x07,
@@ -23,19 +23,19 @@ enum class TextureFormat : u32 {
A2B10G10R10 = 0x09,
ETC2_RGB_PTA = 0x0a,
ETC2_RGBA = 0x0b,
- R16_G16 = 0x0c,
- G8R24 = 0x0d,
- G24R8 = 0x0e,
+ R16G16 = 0x0c,
+ R24G8 = 0x0d,
+ R8G24 = 0x0e,
R32 = 0x0f,
- BC6H_SF16 = 0x10,
- BC6H_UF16 = 0x11,
+ BC6H_SFLOAT = 0x10,
+ BC6H_UFLOAT = 0x11,
A4B4G4R4 = 0x12,
A5B5G5R1 = 0x13,
A1B5G5R5 = 0x14,
B5G6R5 = 0x15,
B6G5R5 = 0x16,
- BC7U = 0x17,
- G8R8 = 0x18,
+ BC7 = 0x17,
+ R8G8 = 0x18,
EAC = 0x19,
EACX2 = 0x1a,
R16 = 0x1b,
@@ -43,23 +43,23 @@ enum class TextureFormat : u32 {
R8 = 0x1d,
G4R4 = 0x1e,
R1 = 0x1f,
- E5B9G9R9_SHAREDEXP = 0x20,
- BF10GF11RF11 = 0x21,
+ E5B9G9R9 = 0x20,
+ B10G11R11 = 0x21,
G8B8G8R8 = 0x22,
B8G8R8G8 = 0x23,
- DXT1 = 0x24,
- DXT23 = 0x25,
- DXT45 = 0x26,
- DXN1 = 0x27,
- DXN2 = 0x28,
- S8Z24 = 0x29,
+ BC1_RGBA = 0x24,
+ BC2 = 0x25,
+ BC3 = 0x26,
+ BC4 = 0x27,
+ BC5 = 0x28,
+ S8D24 = 0x29,
X8Z24 = 0x2a,
- Z24S8 = 0x2b,
+ D24S8 = 0x2b,
X4V4Z24__COV4R4V = 0x2c,
X4V4Z24__COV8R8V = 0x2d,
V8Z24__COV4R12V = 0x2e,
- ZF32 = 0x2f,
- ZF32_X24S8 = 0x30,
+ D32 = 0x2f,
+ D32S8 = 0x30,
X8Z24_X20V4S8__COV4R4V = 0x31,
X8Z24_X20V4S8__COV8R8V = 0x32,
ZF32_X20V4X8__COV4R4V = 0x33,
@@ -69,7 +69,7 @@ enum class TextureFormat : u32 {
X8Z24_X16V8S8__COV4R12V = 0x37,
ZF32_X16V8X8__COV4R12V = 0x38,
ZF32_X16V8S8__COV4R12V = 0x39,
- Z16 = 0x3a,
+ D16 = 0x3a,
V8Z24__COV8R24V = 0x3b,
X8Z24_X16V8S8__COV8R24V = 0x3c,
ZF32_X16V8X8__COV8R24V = 0x3d,
@@ -375,7 +375,4 @@ struct FullTextureInfo {
TSCEntry tsc;
};
-/// Returns the number of bytes per pixel of the input texture format.
-u32 BytesPerPixel(TextureFormat format);
-
} // namespace Tegra::Texture
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 45f360bdd..a14df06a3 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <memory>
+
#include "common/logging/log.h"
#include "core/core.h"
#include "core/settings.h"
@@ -16,37 +17,49 @@
#include "video_core/video_core.h"
namespace {
-std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
- Core::System& system,
- Core::Frontend::GraphicsContext& context) {
+
+std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
+ Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context) {
+ auto& telemetry_session = system.TelemetrySession();
+ auto& cpu_memory = system.Memory();
+
switch (Settings::values.renderer_backend.GetValue()) {
case Settings::RendererBackend::OpenGL:
- return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context);
+ return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory,
+ gpu, std::move(context));
#ifdef HAS_VULKAN
case Settings::RendererBackend::Vulkan:
- return std::make_unique<Vulkan::RendererVulkan>(emu_window, system);
+ return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory,
+ gpu, std::move(context));
#endif
default:
return nullptr;
}
}
+
} // Anonymous namespace
namespace VideoCore {
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
+ std::unique_ptr<Tegra::GPU> gpu;
+ if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
+ gpu = std::make_unique<VideoCommon::GPUAsynch>(system);
+ } else {
+ gpu = std::make_unique<VideoCommon::GPUSynch>(system);
+ }
+
auto context = emu_window.CreateSharedContext();
const auto scope = context->Acquire();
- auto renderer = CreateRenderer(emu_window, system, *context);
+
+ auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
if (!renderer->Init()) {
return nullptr;
}
- if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer),
- std::move(context));
- }
- return std::make_unique<VideoCommon::GPUSynch>(system, std::move(renderer), std::move(context));
+ gpu->BindRenderer(std::move(renderer));
+ return gpu;
}
u16 GetResolutionScaleFactor(const RendererBase& renderer) {