summaryrefslogtreecommitdiffstats
path: root/src/video_core/texture_cache
diff options
context:
space:
mode:
authorbunnei <bunneidev@gmail.com>2020-12-30 08:20:09 +0100
committerGitHub <noreply@github.com>2020-12-30 08:20:09 +0100
commitd5fe722a30947c5c808717ad8940077e95a3a81c (patch)
treef6f5d6d6379b0404147969e7d1f548ed3d49ca01 /src/video_core/texture_cache
parentMerge pull request #5247 from comex/xx-concepts (diff)
parentvideo_core: Rewrite the texture cache (diff)
downloadyuzu-d5fe722a30947c5c808717ad8940077e95a3a81c.tar
yuzu-d5fe722a30947c5c808717ad8940077e95a3a81c.tar.gz
yuzu-d5fe722a30947c5c808717ad8940077e95a3a81c.tar.bz2
yuzu-d5fe722a30947c5c808717ad8940077e95a3a81c.tar.lz
yuzu-d5fe722a30947c5c808717ad8940077e95a3a81c.tar.xz
yuzu-d5fe722a30947c5c808717ad8940077e95a3a81c.tar.zst
yuzu-d5fe722a30947c5c808717ad8940077e95a3a81c.zip
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.cpp70
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.h45
-rw-r--r--src/video_core/texture_cache/copy_params.h36
-rw-r--r--src/video_core/texture_cache/decode_bc4.cpp97
-rw-r--r--src/video_core/texture_cache/decode_bc4.h16
-rw-r--r--src/video_core/texture_cache/descriptor_table.h82
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp380
-rw-r--r--src/video_core/texture_cache/format_lookup_table.h42
-rw-r--r--src/video_core/texture_cache/formatter.cpp95
-rw-r--r--src/video_core/texture_cache/formatter.h263
-rw-r--r--src/video_core/texture_cache/image_base.cpp216
-rw-r--r--src/video_core/texture_cache/image_base.h83
-rw-r--r--src/video_core/texture_cache/image_info.cpp189
-rw-r--r--src/video_core/texture_cache/image_info.h38
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp41
-rw-r--r--src/video_core/texture_cache/image_view_base.h47
-rw-r--r--src/video_core/texture_cache/image_view_info.cpp88
-rw-r--r--src/video_core/texture_cache/image_view_info.h50
-rw-r--r--src/video_core/texture_cache/render_targets.h51
-rw-r--r--src/video_core/texture_cache/samples_helper.h55
-rw-r--r--src/video_core/texture_cache/slot_vector.h156
-rw-r--r--src/video_core/texture_cache/surface_base.cpp299
-rw-r--r--src/video_core/texture_cache/surface_base.h333
-rw-r--r--src/video_core/texture_cache/surface_params.cpp445
-rw-r--r--src/video_core/texture_cache/surface_params.h294
-rw-r--r--src/video_core/texture_cache/surface_view.cpp27
-rw-r--r--src/video_core/texture_cache/surface_view.h68
-rw-r--r--src/video_core/texture_cache/texture_cache.h2397
-rw-r--r--src/video_core/texture_cache/types.h140
-rw-r--r--src/video_core/texture_cache/util.cpp1232
-rw-r--r--src/video_core/texture_cache/util.h107
31 files changed, 4633 insertions, 2849 deletions
diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp
new file mode 100644
index 000000000..a4fc1184b
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.cpp
@@ -0,0 +1,70 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bit>
+
+#include "common/alignment.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/accelerated_swizzle.h"
+#include "video_core/texture_cache/util.h"
+#include "video_core/textures/decoders.h"
+
+namespace VideoCommon::Accelerated {
+
+using Tegra::Texture::GOB_SIZE_SHIFT;
+using Tegra::Texture::GOB_SIZE_X;
+using Tegra::Texture::GOB_SIZE_X_SHIFT;
+using Tegra::Texture::GOB_SIZE_Y_SHIFT;
+using VideoCore::Surface::BytesPerBlock;
+
+BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle,
+ const ImageInfo& info) {
+ const Extent3D block = swizzle.block;
+ const Extent3D num_tiles = swizzle.num_tiles;
+ const u32 bytes_per_block = BytesPerBlock(info.format);
+ const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
+ const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
+ const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
+ return BlockLinearSwizzle2DParams{
+ .origin{0, 0, 0},
+ .destination{0, 0, 0},
+ .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
+ .layer_stride = info.layer_stride,
+ .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth),
+ .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
+ .block_height = block.height,
+ .block_height_mask = (1U << block.height) - 1,
+ };
+}
+
+BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle,
+ const ImageInfo& info) {
+ const Extent3D block = swizzle.block;
+ const Extent3D num_tiles = swizzle.num_tiles;
+ const u32 bytes_per_block = BytesPerBlock(info.format);
+ const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
+ const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
+
+ const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
+ const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
+ const u32 slice_size =
+ Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size;
+ return BlockLinearSwizzle3DParams{
+ .origin{0, 0, 0},
+ .destination{0, 0, 0},
+ .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
+ .slice_size = slice_size,
+ .block_size = block_size,
+ .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
+ .block_height = block.height,
+ .block_height_mask = (1U << block.height) - 1,
+ .block_depth = block.depth,
+ .block_depth_mask = (1U << block.depth) - 1,
+ };
+}
+
+} // namespace VideoCommon::Accelerated \ No newline at end of file
diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h
new file mode 100644
index 000000000..6ec5c78c4
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.h
@@ -0,0 +1,45 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+
+#include "common/common_types.h"
+#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon::Accelerated {
+
+struct BlockLinearSwizzle2DParams {
+ std::array<u32, 3> origin;
+ std::array<s32, 3> destination;
+ u32 bytes_per_block_log2;
+ u32 layer_stride;
+ u32 block_size;
+ u32 x_shift;
+ u32 block_height;
+ u32 block_height_mask;
+};
+
+struct BlockLinearSwizzle3DParams {
+ std::array<u32, 3> origin;
+ std::array<s32, 3> destination;
+ u32 bytes_per_block_log2;
+ u32 slice_size;
+ u32 block_size;
+ u32 x_shift;
+ u32 block_height;
+ u32 block_height_mask;
+ u32 block_depth;
+ u32 block_depth_mask;
+};
+
+[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(
+ const SwizzleParameters& swizzle, const ImageInfo& info);
+
+[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(
+ const SwizzleParameters& swizzle, const ImageInfo& info);
+
+} // namespace VideoCommon::Accelerated
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h
deleted file mode 100644
index 5b475fe06..000000000
--- a/src/video_core/texture_cache/copy_params.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace VideoCommon {
-
-struct CopyParams {
- constexpr CopyParams(u32 source_x_, u32 source_y_, u32 source_z_, u32 dest_x_, u32 dest_y_,
- u32 dest_z_, u32 source_level_, u32 dest_level_, u32 width_, u32 height_,
- u32 depth_)
- : source_x{source_x_}, source_y{source_y_}, source_z{source_z_}, dest_x{dest_x_},
- dest_y{dest_y_}, dest_z{dest_z_}, source_level{source_level_},
- dest_level{dest_level_}, width{width_}, height{height_}, depth{depth_} {}
-
- constexpr CopyParams(u32 width_, u32 height_, u32 depth_, u32 level_)
- : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level_},
- dest_level{level_}, width{width_}, height{height_}, depth{depth_} {}
-
- u32 source_x;
- u32 source_y;
- u32 source_z;
- u32 dest_x;
- u32 dest_y;
- u32 dest_z;
- u32 source_level;
- u32 dest_level;
- u32 width;
- u32 height;
- u32 depth;
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp
new file mode 100644
index 000000000..017327975
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.cpp
@@ -0,0 +1,97 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <span>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/texture_cache/decode_bc4.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
+[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) {
+ const u32 code_offset = 16 + 3 * (4 * y + x);
+ const u32 code = (bits >> code_offset) & 7;
+ const u32 red0 = (bits >> 0) & 0xff;
+ const u32 red1 = (bits >> 8) & 0xff;
+ if (red0 > red1) {
+ switch (code) {
+ case 0:
+ return red0;
+ case 1:
+ return red1;
+ case 2:
+ return (6 * red0 + 1 * red1) / 7;
+ case 3:
+ return (5 * red0 + 2 * red1) / 7;
+ case 4:
+ return (4 * red0 + 3 * red1) / 7;
+ case 5:
+ return (3 * red0 + 4 * red1) / 7;
+ case 6:
+ return (2 * red0 + 5 * red1) / 7;
+ case 7:
+ return (1 * red0 + 6 * red1) / 7;
+ }
+ } else {
+ switch (code) {
+ case 0:
+ return red0;
+ case 1:
+ return red1;
+ case 2:
+ return (4 * red0 + 1 * red1) / 5;
+ case 3:
+ return (3 * red0 + 2 * red1) / 5;
+ case 4:
+ return (2 * red0 + 3 * red1) / 5;
+ case 5:
+ return (1 * red0 + 4 * red1) / 5;
+ case 6:
+ return 0;
+ case 7:
+ return 0xff;
+ }
+ }
+ return 0;
+}
+
+void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) {
+ UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width);
+ UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height);
+ static constexpr u32 BLOCK_SIZE = 4;
+ size_t input_offset = 0;
+ for (u32 slice = 0; slice < extent.depth; ++slice) {
+ for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) {
+ for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) {
+ u64 bits;
+ std::memcpy(&bits, &input[input_offset], sizeof(bits));
+ input_offset += sizeof(bits);
+
+ for (u32 y = 0; y < BLOCK_SIZE; ++y) {
+ for (u32 x = 0; x < BLOCK_SIZE; ++x) {
+ const u32 linear_z = slice;
+ const u32 linear_y = block_y * BLOCK_SIZE + y;
+ const u32 linear_x = block_x * BLOCK_SIZE + x;
+ const u32 offset_z = linear_z * extent.width * extent.height;
+ const u32 offset_y = linear_y * extent.width;
+ const u32 offset_x = linear_x;
+ const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL;
+ const u32 color = DecompressBlock(bits, x, y);
+ output[output_offset + 0] = static_cast<u8>(color);
+ output[output_offset + 1] = 0;
+ output[output_offset + 2] = 0;
+ output[output_offset + 3] = 0xff;
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h
new file mode 100644
index 000000000..63fb23508
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.h
@@ -0,0 +1,16 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <span>
+
+#include "common/common_types.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output);
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h
new file mode 100644
index 000000000..3a03b786f
--- /dev/null
+++ b/src/video_core/texture_cache/descriptor_table.h
@@ -0,0 +1,82 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "common/logging/log.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+template <typename Descriptor>
+class DescriptorTable {
+public:
+ explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {}
+
+ [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) {
+ [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) {
+ return false;
+ }
+ Refresh(gpu_addr, limit);
+ return true;
+ }
+
+ void Invalidate() noexcept {
+ std::ranges::fill(read_descriptors, 0);
+ }
+
+ [[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) {
+ DEBUG_ASSERT(index <= current_limit);
+ const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor);
+ std::pair<Descriptor, bool> result;
+ gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor));
+ if (IsDescriptorRead(index)) {
+ result.second = result.first != descriptors[index];
+ } else {
+ MarkDescriptorAsRead(index);
+ result.second = true;
+ }
+ if (result.second) {
+ descriptors[index] = result.first;
+ }
+ return result;
+ }
+
+ [[nodiscard]] u32 Limit() const noexcept {
+ return current_limit;
+ }
+
+private:
+ void Refresh(GPUVAddr gpu_addr, u32 limit) {
+ current_gpu_addr = gpu_addr;
+ current_limit = limit;
+
+ const size_t num_descriptors = static_cast<size_t>(limit) + 1;
+ read_descriptors.clear();
+ read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0);
+ descriptors.resize(num_descriptors);
+ }
+
+ void MarkDescriptorAsRead(u32 index) noexcept {
+ read_descriptors[index / 64] |= 1ULL << (index % 64);
+ }
+
+ [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept {
+ return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0;
+ }
+
+ Tegra::MemoryManager& gpu_memory;
+ GPUVAddr current_gpu_addr{};
+ u32 current_limit{};
+ std::vector<u64> read_descriptors;
+ std::vector<Descriptor> descriptors;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 7938d71eb..ddfb726fe 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <array>
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/texture_cache/format_lookup_table.h"
@@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM;
constexpr auto SINT = ComponentType::SINT;
constexpr auto UINT = ComponentType::UINT;
constexpr auto FLOAT = ComponentType::FLOAT;
-constexpr bool C = false; // Normal color
-constexpr bool S = true; // Srgb
-
-struct Table {
- constexpr Table(TextureFormat texture_format_, bool is_srgb_, ComponentType red_component_,
- ComponentType green_component_, ComponentType blue_component_,
- ComponentType alpha_component_, PixelFormat pixel_format_)
- : texture_format{texture_format_}, pixel_format{pixel_format_},
- red_component{red_component_}, green_component{green_component_},
- blue_component{blue_component_}, alpha_component{alpha_component_}, is_srgb{is_srgb_} {}
-
- TextureFormat texture_format;
- PixelFormat pixel_format;
- ComponentType red_component;
- ComponentType green_component;
- ComponentType blue_component;
- ComponentType alpha_component;
- bool is_srgb;
-};
-constexpr std::array<Table, 86> DefinitionTable = {{
- {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM},
- {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM},
- {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT},
- {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT},
- {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB},
-
- {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM},
-
- {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM},
- {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT},
-
- {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM},
-
- {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM},
-
- {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM},
- {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM},
- {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT},
- {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT},
-
- {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM},
- {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM},
- {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT},
- {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT},
-
- {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM},
- {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM},
- {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT},
- {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT},
- {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT},
-
- {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT},
- {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM},
- {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM},
- {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT},
- {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT},
-
- {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT},
- {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM},
- {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM},
- {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT},
- {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT},
-
- {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT},
-
- {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT},
- {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT},
- {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT},
-
- {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT},
-
- {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT},
- {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT},
- {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT},
-
- {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT},
- {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT},
- {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT},
-
- {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT},
-
- {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT},
- {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM},
- {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
- {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
- {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT},
-
- {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM},
- {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB},
-
- {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM},
- {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB},
-
- {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM},
- {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB},
-
- {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM},
- {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM},
-
- {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM},
- {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM},
-
- {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM},
- {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB},
-
- {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT},
- {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT},
-
- {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM},
- {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB},
-
- {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM},
- {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB},
-
- {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM},
- {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB},
-
- {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM},
- {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB},
-
- {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM},
- {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB},
-
- {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM},
- {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB},
-
- {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM},
- {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB},
-
- {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM},
- {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB},
-
- {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM},
- {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB},
-
- {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM},
- {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB},
+constexpr bool LINEAR = false;
+constexpr bool SRGB = true;
+
+constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component,
+ ComponentType blue_component, ComponentType alpha_component, bool is_srgb) {
+ u32 hash = is_srgb ? 1 : 0;
+ hash |= static_cast<u32>(red_component) << 1;
+ hash |= static_cast<u32>(green_component) << 4;
+ hash |= static_cast<u32>(blue_component) << 7;
+ hash |= static_cast<u32>(alpha_component) << 10;
+ hash |= static_cast<u32>(format) << 13;
+ return hash;
+}
- {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM},
- {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB},
-}};
+constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) {
+ return Hash(format, component, component, component, component, is_srgb);
+}
} // Anonymous namespace
-FormatLookupTable::FormatLookupTable() {
- table.fill(static_cast<u8>(PixelFormat::Invalid));
-
- for (const auto& entry : DefinitionTable) {
- table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component,
- entry.green_component, entry.blue_component, entry.alpha_component)] =
- static_cast<u8>(entry.pixel_format);
- }
-}
-
-PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb,
- ComponentType red_component,
- ComponentType green_component,
- ComponentType blue_component,
- ComponentType alpha_component) const noexcept {
- const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex(
- format, is_srgb, red_component, green_component, blue_component, alpha_component)]);
- // [[likely]]
- if (pixel_format != PixelFormat::Invalid) {
- return pixel_format;
+PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green,
+ ComponentType blue, ComponentType alpha,
+ bool is_srgb) noexcept {
+ switch (Hash(format, red, green, blue, alpha, is_srgb)) {
+ case Hash(TextureFormat::A8R8G8B8, UNORM):
+ return PixelFormat::A8B8G8R8_UNORM;
+ case Hash(TextureFormat::A8R8G8B8, SNORM):
+ return PixelFormat::A8B8G8R8_SNORM;
+ case Hash(TextureFormat::A8R8G8B8, UINT):
+ return PixelFormat::A8B8G8R8_UINT;
+ case Hash(TextureFormat::A8R8G8B8, SINT):
+ return PixelFormat::A8B8G8R8_SINT;
+ case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB):
+ return PixelFormat::A8B8G8R8_SRGB;
+ case Hash(TextureFormat::B5G6R5, UNORM):
+ return PixelFormat::B5G6R5_UNORM;
+ case Hash(TextureFormat::A2B10G10R10, UNORM):
+ return PixelFormat::A2B10G10R10_UNORM;
+ case Hash(TextureFormat::A2B10G10R10, UINT):
+ return PixelFormat::A2B10G10R10_UINT;
+ case Hash(TextureFormat::A1B5G5R5, UNORM):
+ return PixelFormat::A1B5G5R5_UNORM;
+ case Hash(TextureFormat::A4B4G4R4, UNORM):
+ return PixelFormat::A4B4G4R4_UNORM;
+ case Hash(TextureFormat::R8, UNORM):
+ return PixelFormat::R8_UNORM;
+ case Hash(TextureFormat::R8, SNORM):
+ return PixelFormat::R8_SNORM;
+ case Hash(TextureFormat::R8, UINT):
+ return PixelFormat::R8_UINT;
+ case Hash(TextureFormat::R8, SINT):
+ return PixelFormat::R8_SINT;
+ case Hash(TextureFormat::R8G8, UNORM):
+ return PixelFormat::R8G8_UNORM;
+ case Hash(TextureFormat::R8G8, SNORM):
+ return PixelFormat::R8G8_SNORM;
+ case Hash(TextureFormat::R8G8, UINT):
+ return PixelFormat::R8G8_UINT;
+ case Hash(TextureFormat::R8G8, SINT):
+ return PixelFormat::R8G8_SINT;
+ case Hash(TextureFormat::R16G16B16A16, FLOAT):
+ return PixelFormat::R16G16B16A16_FLOAT;
+ case Hash(TextureFormat::R16G16B16A16, UNORM):
+ return PixelFormat::R16G16B16A16_UNORM;
+ case Hash(TextureFormat::R16G16B16A16, SNORM):
+ return PixelFormat::R16G16B16A16_SNORM;
+ case Hash(TextureFormat::R16G16B16A16, UINT):
+ return PixelFormat::R16G16B16A16_UINT;
+ case Hash(TextureFormat::R16G16B16A16, SINT):
+ return PixelFormat::R16G16B16A16_SINT;
+ case Hash(TextureFormat::R16G16, FLOAT):
+ return PixelFormat::R16G16_FLOAT;
+ case Hash(TextureFormat::R16G16, UNORM):
+ return PixelFormat::R16G16_UNORM;
+ case Hash(TextureFormat::R16G16, SNORM):
+ return PixelFormat::R16G16_SNORM;
+ case Hash(TextureFormat::R16G16, UINT):
+ return PixelFormat::R16G16_UINT;
+ case Hash(TextureFormat::R16G16, SINT):
+ return PixelFormat::R16G16_SINT;
+ case Hash(TextureFormat::R16, FLOAT):
+ return PixelFormat::R16_FLOAT;
+ case Hash(TextureFormat::R16, UNORM):
+ return PixelFormat::R16_UNORM;
+ case Hash(TextureFormat::R16, SNORM):
+ return PixelFormat::R16_SNORM;
+ case Hash(TextureFormat::R16, UINT):
+ return PixelFormat::R16_UINT;
+ case Hash(TextureFormat::R16, SINT):
+ return PixelFormat::R16_SINT;
+ case Hash(TextureFormat::B10G11R11, FLOAT):
+ return PixelFormat::B10G11R11_FLOAT;
+ case Hash(TextureFormat::R32G32B32A32, FLOAT):
+ return PixelFormat::R32G32B32A32_FLOAT;
+ case Hash(TextureFormat::R32G32B32A32, UINT):
+ return PixelFormat::R32G32B32A32_UINT;
+ case Hash(TextureFormat::R32G32B32A32, SINT):
+ return PixelFormat::R32G32B32A32_SINT;
+ case Hash(TextureFormat::R32G32B32, FLOAT):
+ return PixelFormat::R32G32B32_FLOAT;
+ case Hash(TextureFormat::R32G32, FLOAT):
+ return PixelFormat::R32G32_FLOAT;
+ case Hash(TextureFormat::R32G32, UINT):
+ return PixelFormat::R32G32_UINT;
+ case Hash(TextureFormat::R32G32, SINT):
+ return PixelFormat::R32G32_SINT;
+ case Hash(TextureFormat::R32, FLOAT):
+ return PixelFormat::R32_FLOAT;
+ case Hash(TextureFormat::R32, UINT):
+ return PixelFormat::R32_UINT;
+ case Hash(TextureFormat::R32, SINT):
+ return PixelFormat::R32_SINT;
+ case Hash(TextureFormat::E5B9G9R9, FLOAT):
+ return PixelFormat::E5B9G9R9_FLOAT;
+ case Hash(TextureFormat::D32, FLOAT):
+ return PixelFormat::D32_FLOAT;
+ case Hash(TextureFormat::D16, UNORM):
+ return PixelFormat::D16_UNORM;
+ case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR):
+ return PixelFormat::S8_UINT_D24_UNORM;
+ case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
+ return PixelFormat::S8_UINT_D24_UNORM;
+ case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
+ return PixelFormat::D32_FLOAT_S8_UINT;
+ case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
+ return PixelFormat::BC1_RGBA_UNORM;
+ case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB):
+ return PixelFormat::BC1_RGBA_SRGB;
+ case Hash(TextureFormat::BC2, UNORM, LINEAR):
+ return PixelFormat::BC2_UNORM;
+ case Hash(TextureFormat::BC2, UNORM, SRGB):
+ return PixelFormat::BC2_SRGB;
+ case Hash(TextureFormat::BC3, UNORM, LINEAR):
+ return PixelFormat::BC3_UNORM;
+ case Hash(TextureFormat::BC3, UNORM, SRGB):
+ return PixelFormat::BC3_SRGB;
+ case Hash(TextureFormat::BC4, UNORM):
+ return PixelFormat::BC4_UNORM;
+ case Hash(TextureFormat::BC4, SNORM):
+ return PixelFormat::BC4_SNORM;
+ case Hash(TextureFormat::BC5, UNORM):
+ return PixelFormat::BC5_UNORM;
+ case Hash(TextureFormat::BC5, SNORM):
+ return PixelFormat::BC5_SNORM;
+ case Hash(TextureFormat::BC7, UNORM, LINEAR):
+ return PixelFormat::BC7_UNORM;
+ case Hash(TextureFormat::BC7, UNORM, SRGB):
+ return PixelFormat::BC7_SRGB;
+ case Hash(TextureFormat::BC6H_SFLOAT, FLOAT):
+ return PixelFormat::BC6H_SFLOAT;
+ case Hash(TextureFormat::BC6H_UFLOAT, FLOAT):
+ return PixelFormat::BC6H_UFLOAT;
+ case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_4X4_UNORM;
+ case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_4X4_SRGB;
+ case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_5X4_UNORM;
+ case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_5X4_SRGB;
+ case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_5X5_UNORM;
+ case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_5X5_SRGB;
+ case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_8X8_UNORM;
+ case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_8X8_SRGB;
+ case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_8X5_UNORM;
+ case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_8X5_SRGB;
+ case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_10X8_UNORM;
+ case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_10X8_SRGB;
+ case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_6X6_UNORM;
+ case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_6X6_SRGB;
+ case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_10X10_UNORM;
+ case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_10X10_SRGB;
+ case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_12X12_UNORM;
+ case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_12X12_SRGB;
+ case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_8X6_UNORM;
+ case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_8X6_SRGB;
+ case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_6X5_UNORM;
+ case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_6X5_SRGB;
}
UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}",
- static_cast<int>(format), is_srgb, static_cast<int>(red_component),
- static_cast<int>(green_component), static_cast<int>(blue_component),
- static_cast<int>(alpha_component));
+ static_cast<int>(format), is_srgb, static_cast<int>(red),
+ static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha));
return PixelFormat::A8B8G8R8_UNORM;
}
-void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component,
- ComponentType green_component, ComponentType blue_component,
- ComponentType alpha_component, PixelFormat pixel_format) {}
-
-std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb,
- ComponentType red_component,
- ComponentType green_component,
- ComponentType blue_component,
- ComponentType alpha_component) noexcept {
- const auto format_index = static_cast<std::size_t>(format);
- const auto red_index = static_cast<std::size_t>(red_component);
- const auto green_index = static_cast<std::size_t>(green_component);
- const auto blue_index = static_cast<std::size_t>(blue_component);
- const auto alpha_index = static_cast<std::size_t>(alpha_component);
- const std::size_t srgb_index = is_srgb ? 1 : 0;
-
- return format_index * PerFormat +
- srgb_index * PerComponent * PerComponent * PerComponent * PerComponent +
- alpha_index * PerComponent * PerComponent * PerComponent +
- blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index;
-}
-
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h
index aa77e0a5a..729533999 100644
--- a/src/video_core/texture_cache/format_lookup_table.h
+++ b/src/video_core/texture_cache/format_lookup_table.h
@@ -4,48 +4,14 @@
#pragma once
-#include <array>
-#include <limits>
#include "video_core/surface.h"
#include "video_core/textures/texture.h"
namespace VideoCommon {
-class FormatLookupTable {
-public:
- explicit FormatLookupTable();
-
- VideoCore::Surface::PixelFormat GetPixelFormat(
- Tegra::Texture::TextureFormat format, bool is_srgb,
- Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component,
- Tegra::Texture::ComponentType blue_component,
- Tegra::Texture::ComponentType alpha_component) const noexcept;
-
-private:
- static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max());
-
- static constexpr std::size_t NumTextureFormats = 128;
-
- static constexpr std::size_t PerComponent = 8;
- static constexpr std::size_t PerComponents2 = PerComponent * PerComponent;
- static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent;
- static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent;
- static constexpr std::size_t PerFormat = PerComponents4 * 2;
-
- static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb,
- Tegra::Texture::ComponentType red_component,
- Tegra::Texture::ComponentType green_component,
- Tegra::Texture::ComponentType blue_component,
- Tegra::Texture::ComponentType alpha_component) noexcept;
-
- void Set(Tegra::Texture::TextureFormat format, bool is_srgb,
- Tegra::Texture::ComponentType red_component,
- Tegra::Texture::ComponentType green_component,
- Tegra::Texture::ComponentType blue_component,
- Tegra::Texture::ComponentType alpha_component,
- VideoCore::Surface::PixelFormat pixel_format);
-
- std::array<u8, NumTextureFormats * PerFormat> table;
-};
+VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo(
+ Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component,
+ Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component,
+ Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept;
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
new file mode 100644
index 000000000..d10ba4ccd
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -0,0 +1,95 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <string>
+
+#include "video_core/texture_cache/formatter.h"
+#include "video_core/texture_cache/image_base.h"
+#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/image_view_base.h"
+#include "video_core/texture_cache/render_targets.h"
+
+namespace VideoCommon {
+
+std::string Name(const ImageBase& image) {
+ const GPUVAddr gpu_addr = image.gpu_addr;
+ const ImageInfo& info = image.info;
+ const u32 width = info.size.width;
+ const u32 height = info.size.height;
+ const u32 depth = info.size.depth;
+ const u32 num_layers = image.info.resources.layers;
+ const u32 num_levels = image.info.resources.levels;
+ std::string resource;
+ if (num_layers > 1) {
+ resource += fmt::format(":L{}", num_layers);
+ }
+ if (num_levels > 1) {
+ resource += fmt::format(":M{}", num_levels);
+ }
+ switch (image.info.type) {
+ case ImageType::e1D:
+ return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource);
+ case ImageType::e2D:
+ return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource);
+ case ImageType::e3D:
+ return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource);
+ case ImageType::Linear:
+ return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height);
+ case ImageType::Buffer:
+ return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width);
+ }
+ return "Invalid";
+}
+
+std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) {
+ const u32 width = image_view.size.width;
+ const u32 height = image_view.size.height;
+ const u32 depth = image_view.size.depth;
+ const u32 num_levels = image_view.range.extent.levels;
+ const u32 num_layers = image_view.range.extent.layers;
+
+ const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
+ switch (type.value_or(image_view.type)) {
+ case ImageViewType::e1D:
+ return fmt::format("ImageView 1D {}{}", width, level);
+ case ImageViewType::e2D:
+ return fmt::format("ImageView 2D {}x{}{}", width, height, level);
+ case ImageViewType::Cube:
+ return fmt::format("ImageView Cube {}x{}{}", width, height, level);
+ case ImageViewType::e3D:
+ return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level);
+ case ImageViewType::e1DArray:
+ return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers);
+ case ImageViewType::e2DArray:
+ return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers);
+ case ImageViewType::CubeArray:
+ return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers);
+ case ImageViewType::Rect:
+ return fmt::format("ImageView Rect {}x{}{}", width, height, level);
+ case ImageViewType::Buffer:
+ return fmt::format("BufferView {}", width);
+ }
+ return "Invalid";
+}
+
+std::string Name(const RenderTargets& render_targets) {
+ std::string_view debug_prefix;
+ const auto num_color = std::ranges::count_if(
+ render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast<bool>(id); });
+ if (render_targets.depth_buffer_id) {
+ debug_prefix = num_color > 0 ? "R" : "Z";
+ } else {
+ debug_prefix = num_color > 0 ? "C" : "X";
+ }
+ const Extent2D size = render_targets.size;
+ if (num_color > 0) {
+ return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width,
+ size.height);
+ } else {
+ return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height);
+ }
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
new file mode 100644
index 000000000..a48413983
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.h
@@ -0,0 +1,263 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "video_core/surface.h"
+#include "video_core/texture_cache/types.h"
+
+template <>
+struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::string_view> {
+ template <typename FormatContext>
+ auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) {
+ using VideoCore::Surface::PixelFormat;
+ const string_view name = [format] {
+ switch (format) {
+ case PixelFormat::A8B8G8R8_UNORM:
+ return "A8B8G8R8_UNORM";
+ case PixelFormat::A8B8G8R8_SNORM:
+ return "A8B8G8R8_SNORM";
+ case PixelFormat::A8B8G8R8_SINT:
+ return "A8B8G8R8_SINT";
+ case PixelFormat::A8B8G8R8_UINT:
+ return "A8B8G8R8_UINT";
+ case PixelFormat::R5G6B5_UNORM:
+ return "R5G6B5_UNORM";
+ case PixelFormat::B5G6R5_UNORM:
+ return "B5G6R5_UNORM";
+ case PixelFormat::A1R5G5B5_UNORM:
+ return "A1R5G5B5_UNORM";
+ case PixelFormat::A2B10G10R10_UNORM:
+ return "A2B10G10R10_UNORM";
+ case PixelFormat::A2B10G10R10_UINT:
+ return "A2B10G10R10_UINT";
+ case PixelFormat::A1B5G5R5_UNORM:
+ return "A1B5G5R5_UNORM";
+ case PixelFormat::R8_UNORM:
+ return "R8_UNORM";
+ case PixelFormat::R8_SNORM:
+ return "R8_SNORM";
+ case PixelFormat::R8_SINT:
+ return "R8_SINT";
+ case PixelFormat::R8_UINT:
+ return "R8_UINT";
+ case PixelFormat::R16G16B16A16_FLOAT:
+ return "R16G16B16A16_FLOAT";
+ case PixelFormat::R16G16B16A16_UNORM:
+ return "R16G16B16A16_UNORM";
+ case PixelFormat::R16G16B16A16_SNORM:
+ return "R16G16B16A16_SNORM";
+ case PixelFormat::R16G16B16A16_SINT:
+ return "R16G16B16A16_SINT";
+ case PixelFormat::R16G16B16A16_UINT:
+ return "R16G16B16A16_UINT";
+ case PixelFormat::B10G11R11_FLOAT:
+ return "B10G11R11_FLOAT";
+ case PixelFormat::R32G32B32A32_UINT:
+ return "R32G32B32A32_UINT";
+ case PixelFormat::BC1_RGBA_UNORM:
+ return "BC1_RGBA_UNORM";
+ case PixelFormat::BC2_UNORM:
+ return "BC2_UNORM";
+ case PixelFormat::BC3_UNORM:
+ return "BC3_UNORM";
+ case PixelFormat::BC4_UNORM:
+ return "BC4_UNORM";
+ case PixelFormat::BC4_SNORM:
+ return "BC4_SNORM";
+ case PixelFormat::BC5_UNORM:
+ return "BC5_UNORM";
+ case PixelFormat::BC5_SNORM:
+ return "BC5_SNORM";
+ case PixelFormat::BC7_UNORM:
+ return "BC7_UNORM";
+ case PixelFormat::BC6H_UFLOAT:
+ return "BC6H_UFLOAT";
+ case PixelFormat::BC6H_SFLOAT:
+ return "BC6H_SFLOAT";
+ case PixelFormat::ASTC_2D_4X4_UNORM:
+ return "ASTC_2D_4X4_UNORM";
+ case PixelFormat::B8G8R8A8_UNORM:
+ return "B8G8R8A8_UNORM";
+ case PixelFormat::R32G32B32A32_FLOAT:
+ return "R32G32B32A32_FLOAT";
+ case PixelFormat::R32G32B32A32_SINT:
+ return "R32G32B32A32_SINT";
+ case PixelFormat::R32G32_FLOAT:
+ return "R32G32_FLOAT";
+ case PixelFormat::R32G32_SINT:
+ return "R32G32_SINT";
+ case PixelFormat::R32_FLOAT:
+ return "R32_FLOAT";
+ case PixelFormat::R16_FLOAT:
+ return "R16_FLOAT";
+ case PixelFormat::R16_UNORM:
+ return "R16_UNORM";
+ case PixelFormat::R16_SNORM:
+ return "R16_SNORM";
+ case PixelFormat::R16_UINT:
+ return "R16_UINT";
+ case PixelFormat::R16_SINT:
+ return "R16_SINT";
+ case PixelFormat::R16G16_UNORM:
+ return "R16G16_UNORM";
+ case PixelFormat::R16G16_FLOAT:
+ return "R16G16_FLOAT";
+ case PixelFormat::R16G16_UINT:
+ return "R16G16_UINT";
+ case PixelFormat::R16G16_SINT:
+ return "R16G16_SINT";
+ case PixelFormat::R16G16_SNORM:
+ return "R16G16_SNORM";
+ case PixelFormat::R32G32B32_FLOAT:
+ return "R32G32B32_FLOAT";
+ case PixelFormat::A8B8G8R8_SRGB:
+ return "A8B8G8R8_SRGB";
+ case PixelFormat::R8G8_UNORM:
+ return "R8G8_UNORM";
+ case PixelFormat::R8G8_SNORM:
+ return "R8G8_SNORM";
+ case PixelFormat::R8G8_SINT:
+ return "R8G8_SINT";
+ case PixelFormat::R8G8_UINT:
+ return "R8G8_UINT";
+ case PixelFormat::R32G32_UINT:
+ return "R32G32_UINT";
+ case PixelFormat::R16G16B16X16_FLOAT:
+ return "R16G16B16X16_FLOAT";
+ case PixelFormat::R32_UINT:
+ return "R32_UINT";
+ case PixelFormat::R32_SINT:
+ return "R32_SINT";
+ case PixelFormat::ASTC_2D_8X8_UNORM:
+ return "ASTC_2D_8X8_UNORM";
+ case PixelFormat::ASTC_2D_8X5_UNORM:
+ return "ASTC_2D_8X5_UNORM";
+ case PixelFormat::ASTC_2D_5X4_UNORM:
+ return "ASTC_2D_5X4_UNORM";
+ case PixelFormat::B8G8R8A8_SRGB:
+ return "B8G8R8A8_SRGB";
+ case PixelFormat::BC1_RGBA_SRGB:
+ return "BC1_RGBA_SRGB";
+ case PixelFormat::BC2_SRGB:
+ return "BC2_SRGB";
+ case PixelFormat::BC3_SRGB:
+ return "BC3_SRGB";
+ case PixelFormat::BC7_SRGB:
+ return "BC7_SRGB";
+ case PixelFormat::A4B4G4R4_UNORM:
+ return "A4B4G4R4_UNORM";
+ case PixelFormat::ASTC_2D_4X4_SRGB:
+ return "ASTC_2D_4X4_SRGB";
+ case PixelFormat::ASTC_2D_8X8_SRGB:
+ return "ASTC_2D_8X8_SRGB";
+ case PixelFormat::ASTC_2D_8X5_SRGB:
+ return "ASTC_2D_8X5_SRGB";
+ case PixelFormat::ASTC_2D_5X4_SRGB:
+ return "ASTC_2D_5X4_SRGB";
+ case PixelFormat::ASTC_2D_5X5_UNORM:
+ return "ASTC_2D_5X5_UNORM";
+ case PixelFormat::ASTC_2D_5X5_SRGB:
+ return "ASTC_2D_5X5_SRGB";
+ case PixelFormat::ASTC_2D_10X8_UNORM:
+ return "ASTC_2D_10X8_UNORM";
+ case PixelFormat::ASTC_2D_10X8_SRGB:
+ return "ASTC_2D_10X8_SRGB";
+ case PixelFormat::ASTC_2D_6X6_UNORM:
+ return "ASTC_2D_6X6_UNORM";
+ case PixelFormat::ASTC_2D_6X6_SRGB:
+ return "ASTC_2D_6X6_SRGB";
+ case PixelFormat::ASTC_2D_10X10_UNORM:
+ return "ASTC_2D_10X10_UNORM";
+ case PixelFormat::ASTC_2D_10X10_SRGB:
+ return "ASTC_2D_10X10_SRGB";
+ case PixelFormat::ASTC_2D_12X12_UNORM:
+ return "ASTC_2D_12X12_UNORM";
+ case PixelFormat::ASTC_2D_12X12_SRGB:
+ return "ASTC_2D_12X12_SRGB";
+ case PixelFormat::ASTC_2D_8X6_UNORM:
+ return "ASTC_2D_8X6_UNORM";
+ case PixelFormat::ASTC_2D_8X6_SRGB:
+ return "ASTC_2D_8X6_SRGB";
+ case PixelFormat::ASTC_2D_6X5_UNORM:
+ return "ASTC_2D_6X5_UNORM";
+ case PixelFormat::ASTC_2D_6X5_SRGB:
+ return "ASTC_2D_6X5_SRGB";
+ case PixelFormat::E5B9G9R9_FLOAT:
+ return "E5B9G9R9_FLOAT";
+ case PixelFormat::D32_FLOAT:
+ return "D32_FLOAT";
+ case PixelFormat::D16_UNORM:
+ return "D16_UNORM";
+ case PixelFormat::D24_UNORM_S8_UINT:
+ return "D24_UNORM_S8_UINT";
+ case PixelFormat::S8_UINT_D24_UNORM:
+ return "S8_UINT_D24_UNORM";
+ case PixelFormat::D32_FLOAT_S8_UINT:
+ return "D32_FLOAT_S8_UINT";
+ case PixelFormat::MaxDepthStencilFormat:
+ case PixelFormat::Invalid:
+ return "Invalid";
+ }
+ return "Invalid";
+ }();
+ return formatter<string_view>::format(name, ctx);
+ }
+};
+
+template <>
+struct fmt::formatter<VideoCommon::ImageType> : fmt::formatter<fmt::string_view> {
+ template <typename FormatContext>
+ auto format(VideoCommon::ImageType type, FormatContext& ctx) {
+ const string_view name = [type] {
+ using VideoCommon::ImageType;
+ switch (type) {
+ case ImageType::e1D:
+ return "1D";
+ case ImageType::e2D:
+ return "2D";
+ case ImageType::e3D:
+ return "3D";
+ case ImageType::Linear:
+ return "Linear";
+ case ImageType::Buffer:
+ return "Buffer";
+ }
+ return "Invalid";
+ }();
+ return formatter<string_view>::format(name, ctx);
+ }
+};
+
+template <>
+struct fmt::formatter<VideoCommon::Extent3D> {
+ constexpr auto parse(fmt::format_parse_context& ctx) {
+ return ctx.begin();
+ }
+
+ template <typename FormatContext>
+ auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) {
+ return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height,
+ extent.depth);
+ }
+};
+
+namespace VideoCommon {
+
+struct ImageBase;
+struct ImageViewBase;
+struct RenderTargets;
+
+[[nodiscard]] std::string Name(const ImageBase& image);
+
+[[nodiscard]] std::string Name(const ImageViewBase& image_view,
+ std::optional<ImageViewType> type = std::nullopt);
+
+[[nodiscard]] std::string Name(const RenderTargets& render_targets);
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
new file mode 100644
index 000000000..448a05fcc
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -0,0 +1,216 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <optional>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/formatter.h"
+#include "video_core/texture_cache/image_base.h"
+#include "video_core/texture_cache/image_view_info.h"
+#include "video_core/texture_cache/util.h"
+
+namespace VideoCommon {
+
+using VideoCore::Surface::DefaultBlockHeight;
+using VideoCore::Surface::DefaultBlockWidth;
+
+namespace {
+/// Returns the base layer and mip level offset
+[[nodiscard]] std::pair<s32, s32> LayerMipOffset(s32 diff, u32 layer_stride) {
+ if (layer_stride == 0) {
+ return {0, diff};
+ } else {
+ return {diff / layer_stride, diff % layer_stride};
+ }
+}
+
+[[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) {
+ return layers.base_level < info.resources.levels &&
+ layers.base_layer + layers.num_layers <= info.resources.layers;
+}
+
+[[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) {
+ const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level);
+ const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level);
+ if (!ValidateLayers(copy.src_subresource, src)) {
+ return false;
+ }
+ if (!ValidateLayers(copy.dst_subresource, dst)) {
+ return false;
+ }
+ if (copy.src_offset.x + copy.extent.width > src_size.width ||
+ copy.src_offset.y + copy.extent.height > src_size.height ||
+ copy.src_offset.z + copy.extent.depth > src_size.depth) {
+ return false;
+ }
+ if (copy.dst_offset.x + copy.extent.width > dst_size.width ||
+ copy.dst_offset.y + copy.extent.height > dst_size.height ||
+ copy.dst_offset.z + copy.extent.depth > dst_size.depth) {
+ return false;
+ }
+ return true;
+}
+} // Anonymous namespace
+
+ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
+ : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
+ unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
+ converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_},
+ cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes},
+ mip_level_offsets{CalculateMipLevelOffsets(info)} {
+ if (info.type == ImageType::e3D) {
+ slice_offsets = CalculateSliceOffsets(info);
+ slice_subresources = CalculateSliceSubresources(info);
+ }
+}
+
+std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
+ if (other_addr < gpu_addr) {
+ // Subresource address can't be lower than the base
+ return std::nullopt;
+ }
+ const u32 diff = static_cast<u32>(other_addr - gpu_addr);
+ if (diff > guest_size_bytes) {
+ // This can happen when two CPU addresses are used for different GPU addresses
+ return std::nullopt;
+ }
+ if (info.type != ImageType::e3D) {
+ const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride);
+ const auto end = mip_level_offsets.begin() + info.resources.levels;
+ const auto it = std::find(mip_level_offsets.begin(), end, mip_offset);
+ if (layer > info.resources.layers || it == end) {
+ return std::nullopt;
+ }
+ return SubresourceBase{
+ .level = static_cast<s32>(std::distance(mip_level_offsets.begin(), it)),
+ .layer = layer,
+ };
+ } else {
+ // TODO: Consider using binary_search after a threshold
+ const auto it = std::ranges::find(slice_offsets, diff);
+ if (it == slice_offsets.cend()) {
+ return std::nullopt;
+ }
+ return slice_subresources[std::distance(slice_offsets.begin(), it)];
+ }
+}
+
+ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept {
+ const auto it = std::ranges::find(image_view_infos, view_info);
+ if (it == image_view_infos.end()) {
+ return ImageViewId{};
+ }
+ return image_view_ids[std::distance(image_view_infos.begin(), it)];
+}
+
+void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) {
+ image_view_infos.push_back(view_info);
+ image_view_ids.push_back(image_view_id);
+}
+
+void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
+ static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
+ ASSERT(lhs.info.type == rhs.info.type);
+ std::optional<SubresourceBase> base;
+ if (lhs.info.type == ImageType::Linear) {
+ base = SubresourceBase{.level = 0, .layer = 0};
+ } else {
+ base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS);
+ }
+ if (!base) {
+ LOG_ERROR(HW_GPU, "Image alias should have been flipped");
+ return;
+ }
+ const PixelFormat lhs_format = lhs.info.format;
+ const PixelFormat rhs_format = rhs.info.format;
+ const Extent2D lhs_block{
+ .width = DefaultBlockWidth(lhs_format),
+ .height = DefaultBlockHeight(lhs_format),
+ };
+ const Extent2D rhs_block{
+ .width = DefaultBlockWidth(rhs_format),
+ .height = DefaultBlockHeight(rhs_format),
+ };
+ const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1;
+ const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1;
+ if (is_lhs_compressed && is_rhs_compressed) {
+ LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented");
+ return;
+ }
+ const s32 lhs_mips = lhs.info.resources.levels;
+ const s32 rhs_mips = rhs.info.resources.levels;
+ const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips);
+ AliasedImage lhs_alias;
+ AliasedImage rhs_alias;
+ lhs_alias.id = rhs_id;
+ rhs_alias.id = lhs_id;
+ lhs_alias.copies.reserve(num_mips);
+ rhs_alias.copies.reserve(num_mips);
+ for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) {
+ Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level);
+ Extent3D rhs_size = MipSize(rhs.info.size, mip_level);
+ if (is_lhs_compressed) {
+ lhs_size.width /= lhs_block.width;
+ lhs_size.height /= lhs_block.height;
+ }
+ if (is_rhs_compressed) {
+ rhs_size.width /= rhs_block.width;
+ rhs_size.height /= rhs_block.height;
+ }
+ const Extent3D copy_size{
+ .width = std::min(lhs_size.width, rhs_size.width),
+ .height = std::min(lhs_size.height, rhs_size.height),
+ .depth = std::min(lhs_size.depth, rhs_size.depth),
+ };
+ if (copy_size.width == 0 || copy_size.height == 0) {
+ LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased.");
+ continue;
+ }
+ const bool is_lhs_3d = lhs.info.type == ImageType::e3D;
+ const bool is_rhs_3d = rhs.info.type == ImageType::e3D;
+ const Offset3D lhs_offset{0, 0, 0};
+ const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0};
+ const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer;
+ const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers;
+ const s32 num_layers = std::min(lhs_layers, rhs_layers);
+ const SubresourceLayers lhs_subresource{
+ .base_level = mip_level,
+ .base_layer = 0,
+ .num_layers = num_layers,
+ };
+ const SubresourceLayers rhs_subresource{
+ .base_level = base->level + mip_level,
+ .base_layer = is_rhs_3d ? 0 : base->layer,
+ .num_layers = num_layers,
+ };
+ [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{
+ .src_subresource = lhs_subresource,
+ .dst_subresource = rhs_subresource,
+ .src_offset = lhs_offset,
+ .dst_offset = rhs_offset,
+ .extent = copy_size,
+ });
+ [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{
+ .src_subresource = rhs_subresource,
+ .dst_subresource = lhs_subresource,
+ .src_offset = rhs_offset,
+ .dst_offset = lhs_offset,
+ .extent = copy_size,
+ });
+ ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy");
+ ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy");
+ }
+ ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
+ if (lhs_alias.copies.empty()) {
+ return;
+ }
+ lhs.aliased_images.push_back(std::move(lhs_alias));
+ rhs.aliased_images.push_back(std::move(rhs_alias));
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
new file mode 100644
index 000000000..b7f3b7e43
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.h
@@ -0,0 +1,83 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <optional>
+#include <vector>
+
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/image_view_info.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+enum class ImageFlagBits : u32 {
+ AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU
+ Converted = 1 << 1, ///< Guest format is not supported natively and it has to be converted
+ CpuModified = 1 << 2, ///< Contents have been modified from the CPU
+ GpuModified = 1 << 3, ///< Contents have been modified from the GPU
+ Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT
+ Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
+ Registered = 1 << 6, ///< True when the image is registered
+ Picked = 1 << 7, ///< Temporary flag to mark the image as picked
+};
+DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
+
+struct ImageViewInfo;
+
+struct AliasedImage {
+ std::vector<ImageCopy> copies;
+ ImageId id;
+};
+
+struct ImageBase {
+ explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
+
+ [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
+
+ [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept;
+
+ void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
+
+ [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
+ const VAddr overlap_end = overlap_cpu_addr + overlap_size;
+ return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
+ }
+
+ ImageInfo info;
+
+ u32 guest_size_bytes = 0;
+ u32 unswizzled_size_bytes = 0;
+ u32 converted_size_bytes = 0;
+ ImageFlagBits flags = ImageFlagBits::CpuModified;
+
+ GPUVAddr gpu_addr = 0;
+ VAddr cpu_addr = 0;
+ VAddr cpu_addr_end = 0;
+
+ u64 modification_tick = 0;
+ u64 frame_tick = 0;
+
+ std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};
+
+ std::vector<ImageViewInfo> image_view_infos;
+ std::vector<ImageViewId> image_view_ids;
+
+ std::vector<u32> slice_offsets;
+ std::vector<SubresourceBase> slice_subresources;
+
+ std::vector<AliasedImage> aliased_images;
+};
+
+struct ImageAllocBase {
+ std::vector<ImageId> images;
+};
+
+void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp
new file mode 100644
index 000000000..64fd7010a
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.cpp
@@ -0,0 +1,189 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/format_lookup_table.h"
+#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/samples_helper.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/texture_cache/util.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+using Tegra::Texture::TextureType;
+using Tegra::Texture::TICEntry;
+using VideoCore::Surface::PixelFormat;
+
+ImageInfo::ImageInfo(const TICEntry& config) noexcept {
+ format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
+ config.a_type, config.srgb_conversion);
+ num_samples = NumSamples(config.msaa_mode);
+ resources.levels = config.max_mip_level + 1;
+ if (config.IsPitchLinear()) {
+ pitch = config.Pitch();
+ } else if (config.IsBlockLinear()) {
+ block = Extent3D{
+ .width = config.block_width,
+ .height = config.block_height,
+ .depth = config.block_depth,
+ };
+ }
+ tile_width_spacing = config.tile_width_spacing;
+ if (config.texture_type != TextureType::Texture2D &&
+ config.texture_type != TextureType::Texture2DNoMipmap) {
+ ASSERT(!config.IsPitchLinear());
+ }
+ switch (config.texture_type) {
+ case TextureType::Texture1D:
+ ASSERT(config.BaseLayer() == 0);
+ type = ImageType::e1D;
+ size.width = config.Width();
+ break;
+ case TextureType::Texture1DArray:
+ UNIMPLEMENTED_IF(config.BaseLayer() != 0);
+ type = ImageType::e1D;
+ size.width = config.Width();
+ resources.layers = config.Depth();
+ break;
+ case TextureType::Texture2D:
+ case TextureType::Texture2DNoMipmap:
+ ASSERT(config.Depth() == 1);
+ type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D;
+ size.width = config.Width();
+ size.height = config.Height();
+ resources.layers = config.BaseLayer() + 1;
+ break;
+ case TextureType::Texture2DArray:
+ type = ImageType::e2D;
+ size.width = config.Width();
+ size.height = config.Height();
+ resources.layers = config.BaseLayer() + config.Depth();
+ break;
+ case TextureType::TextureCubemap:
+ ASSERT(config.Depth() == 1);
+ type = ImageType::e2D;
+ size.width = config.Width();
+ size.height = config.Height();
+ resources.layers = config.BaseLayer() + 6;
+ break;
+ case TextureType::TextureCubeArray:
+ UNIMPLEMENTED_IF(config.load_store_hint != 0);
+ type = ImageType::e2D;
+ size.width = config.Width();
+ size.height = config.Height();
+ resources.layers = config.BaseLayer() + config.Depth() * 6;
+ break;
+ case TextureType::Texture3D:
+ ASSERT(config.BaseLayer() == 0);
+ type = ImageType::e3D;
+ size.width = config.Width();
+ size.height = config.Height();
+ size.depth = config.Depth();
+ break;
+ case TextureType::Texture1DBuffer:
+ type = ImageType::Buffer;
+ size.width = config.Width();
+ break;
+ default:
+ UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
+ break;
+ }
+ if (type != ImageType::Linear) {
+ // FIXME: Call this without passing *this
+ layer_stride = CalculateLayerStride(*this);
+ maybe_unaligned_layer_stride = CalculateLayerSize(*this);
+ }
+}
+
+ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept {
+ const auto& rt = regs.rt[index];
+ format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format);
+ if (rt.tile_mode.is_pitch_linear) {
+ ASSERT(rt.tile_mode.is_3d == 0);
+ type = ImageType::Linear;
+ pitch = rt.width;
+ size = Extent3D{
+ .width = pitch / BytesPerBlock(format),
+ .height = rt.height,
+ .depth = 1,
+ };
+ return;
+ }
+ size.width = rt.width;
+ size.height = rt.height;
+ layer_stride = rt.layer_stride * 4;
+ maybe_unaligned_layer_stride = layer_stride;
+ num_samples = NumSamples(regs.multisample_mode);
+ block = Extent3D{
+ .width = rt.tile_mode.block_width,
+ .height = rt.tile_mode.block_height,
+ .depth = rt.tile_mode.block_depth,
+ };
+ if (rt.tile_mode.is_3d) {
+ type = ImageType::e3D;
+ size.depth = rt.depth;
+ } else {
+ type = ImageType::e2D;
+ resources.layers = rt.depth;
+ }
+}
+
+ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
+ format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format);
+ size.width = regs.zeta_width;
+ size.height = regs.zeta_height;
+ resources.levels = 1;
+ layer_stride = regs.zeta.layer_stride * 4;
+ maybe_unaligned_layer_stride = layer_stride;
+ num_samples = NumSamples(regs.multisample_mode);
+ block = Extent3D{
+ .width = regs.zeta.tile_mode.block_width,
+ .height = regs.zeta.tile_mode.block_height,
+ .depth = regs.zeta.tile_mode.block_depth,
+ };
+ if (regs.zeta.tile_mode.is_pitch_linear) {
+ ASSERT(regs.zeta.tile_mode.is_3d == 0);
+ type = ImageType::Linear;
+ pitch = size.width * BytesPerBlock(format);
+ } else if (regs.zeta.tile_mode.is_3d) {
+ ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0);
+ type = ImageType::e3D;
+ size.depth = regs.zeta_depth;
+ } else {
+ type = ImageType::e2D;
+ resources.layers = regs.zeta_depth;
+ }
+}
+
+ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
+ UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero");
+ format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format);
+ if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) {
+ type = ImageType::Linear;
+ size = Extent3D{
+ .width = config.pitch / VideoCore::Surface::BytesPerBlock(format),
+ .height = config.height,
+ .depth = 1,
+ };
+ pitch = config.pitch;
+ } else {
+ type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D;
+ block = Extent3D{
+ .width = config.block_width,
+ .height = config.block_height,
+ .depth = config.block_depth,
+ };
+ // 3D blits with more than once slice are not implemented for now
+ // Render to individual slices
+ size = Extent3D{
+ .width = config.width,
+ .height = config.height,
+ .depth = 1,
+ };
+ }
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h
new file mode 100644
index 000000000..5049fc36e
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.h
@@ -0,0 +1,38 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+using Tegra::Texture::TICEntry;
+using VideoCore::Surface::PixelFormat;
+
+struct ImageInfo {
+ explicit ImageInfo() = default;
+ explicit ImageInfo(const TICEntry& config) noexcept;
+ explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept;
+ explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept;
+ explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept;
+
+ PixelFormat format = PixelFormat::Invalid;
+ ImageType type = ImageType::e1D;
+ SubresourceExtent resources;
+ Extent3D size{1, 1, 1};
+ union {
+ Extent3D block{0, 0, 0};
+ u32 pitch;
+ };
+ u32 layer_stride = 0;
+ u32 maybe_unaligned_layer_stride = 0;
+ u32 num_samples = 1;
+ u32 tile_width_spacing = 0;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
new file mode 100644
index 000000000..076a4bcfd
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -0,0 +1,41 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+
+#include "common/assert.h"
+#include "core/settings.h"
+#include "video_core/compatible_formats.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/formatter.h"
+#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/image_view_base.h"
+#include "video_core/texture_cache/image_view_info.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
+ ImageId image_id_)
+ : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range},
+ size{
+ .width = std::max(image_info.size.width >> range.base.level, 1u),
+ .height = std::max(image_info.size.height >> range.base.level, 1u),
+ .depth = std::max(image_info.size.depth >> range.base.level, 1u),
+ } {
+ ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format),
+ "Image view format {} is incompatible with image format {}", info.format,
+ image_info.format);
+ const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
+ if (image_info.type == ImageType::Linear && is_async) {
+ flags |= ImageViewFlagBits::PreemtiveDownload;
+ }
+ if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) {
+ flags |= ImageViewFlagBits::Slice;
+ }
+}
+
+ImageViewBase::ImageViewBase(const NullImageParams&) {}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
new file mode 100644
index 000000000..73954167e
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -0,0 +1,47 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_funcs.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+using VideoCore::Surface::PixelFormat;
+
+struct ImageViewInfo;
+struct ImageInfo;
+
+struct NullImageParams {};
+
+enum class ImageViewFlagBits : u16 {
+ PreemtiveDownload = 1 << 0,
+ Strong = 1 << 1,
+ Slice = 1 << 2,
+};
+DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
+
+struct ImageViewBase {
+ explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
+ ImageId image_id);
+ explicit ImageViewBase(const NullImageParams&);
+
+ [[nodiscard]] bool IsBuffer() const noexcept {
+ return type == ImageViewType::Buffer;
+ }
+
+ ImageId image_id{};
+ PixelFormat format{};
+ ImageViewType type{};
+ SubresourceRange range;
+ Extent3D size{0, 0, 0};
+ ImageViewFlagBits flags{};
+
+ u64 invalidation_tick = 0;
+ u64 modification_tick = 0;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp
new file mode 100644
index 000000000..faf5b151f
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.cpp
@@ -0,0 +1,88 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <limits>
+
+#include "common/assert.h"
+#include "video_core/texture_cache/image_view_info.h"
+#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+namespace {
+
+constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max();
+
+[[nodiscard]] u8 CastSwizzle(SwizzleSource source) {
+ const u8 casted = static_cast<u8>(source);
+ ASSERT(static_cast<SwizzleSource>(casted) == source);
+ return casted;
+}
+
+} // Anonymous namespace
+
+ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept
+ : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)},
+ y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)},
+ w_source{CastSwizzle(config.w_source)} {
+ range.base = SubresourceBase{
+ .level = static_cast<s32>(config.res_min_mip_level),
+ .layer = base_layer,
+ };
+ range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1;
+
+ switch (config.texture_type) {
+ case TextureType::Texture1D:
+ ASSERT(config.Height() == 1);
+ ASSERT(config.Depth() == 1);
+ type = ImageViewType::e1D;
+ break;
+ case TextureType::Texture2D:
+ case TextureType::Texture2DNoMipmap:
+ ASSERT(config.Depth() == 1);
+ type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect;
+ break;
+ case TextureType::Texture3D:
+ type = ImageViewType::e3D;
+ break;
+ case TextureType::TextureCubemap:
+ ASSERT(config.Depth() == 1);
+ type = ImageViewType::Cube;
+ range.extent.layers = 6;
+ break;
+ case TextureType::Texture1DArray:
+ type = ImageViewType::e1DArray;
+ range.extent.layers = config.Depth();
+ break;
+ case TextureType::Texture2DArray:
+ type = ImageViewType::e2DArray;
+ range.extent.layers = config.Depth();
+ break;
+ case TextureType::Texture1DBuffer:
+ type = ImageViewType::Buffer;
+ break;
+ case TextureType::TextureCubeArray:
+ type = ImageViewType::CubeArray;
+ range.extent.layers = config.Depth() * 6;
+ break;
+ default:
+ UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
+ break;
+ }
+}
+
+ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_,
+ SubresourceRange range_) noexcept
+ : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE},
+ y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE},
+ w_source{RENDER_TARGET_SWIZZLE} {}
+
+bool ImageViewInfo::IsRenderTarget() const noexcept {
+ return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE &&
+ z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h
new file mode 100644
index 000000000..0c1f99117
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.h
@@ -0,0 +1,50 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <type_traits>
+
+#include "video_core/surface.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+using Tegra::Texture::SwizzleSource;
+using Tegra::Texture::TICEntry;
+using VideoCore::Surface::PixelFormat;
+
+/// Properties used to determine a image view
+struct ImageViewInfo {
+ explicit ImageViewInfo() noexcept = default;
+ explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept;
+ explicit ImageViewInfo(ImageViewType type, PixelFormat format,
+ SubresourceRange range = {}) noexcept;
+
+ auto operator<=>(const ImageViewInfo&) const noexcept = default;
+
+ [[nodiscard]] bool IsRenderTarget() const noexcept;
+
+ [[nodiscard]] std::array<SwizzleSource, 4> Swizzle() const noexcept {
+ return std::array{
+ static_cast<SwizzleSource>(x_source),
+ static_cast<SwizzleSource>(y_source),
+ static_cast<SwizzleSource>(z_source),
+ static_cast<SwizzleSource>(w_source),
+ };
+ }
+
+ ImageViewType type{};
+ PixelFormat format{};
+ SubresourceRange range;
+ u8 x_source = static_cast<u8>(SwizzleSource::R);
+ u8 y_source = static_cast<u8>(SwizzleSource::G);
+ u8 z_source = static_cast<u8>(SwizzleSource::B);
+ u8 w_source = static_cast<u8>(SwizzleSource::A);
+};
+static_assert(std::has_unique_object_representations_v<ImageViewInfo>);
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h
new file mode 100644
index 000000000..9b9544b07
--- /dev/null
+++ b/src/video_core/texture_cache/render_targets.h
@@ -0,0 +1,51 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <span>
+#include <utility>
+
+#include "common/bit_cast.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+/// Framebuffer properties used to lookup a framebuffer
+struct RenderTargets {
+ constexpr auto operator<=>(const RenderTargets&) const noexcept = default;
+
+ constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept {
+ const auto contains = [elements](ImageViewId item) {
+ return std::ranges::find(elements, item) != elements.end();
+ };
+ return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id);
+ }
+
+ std::array<ImageViewId, NUM_RT> color_buffer_ids;
+ ImageViewId depth_buffer_id;
+ std::array<u8, NUM_RT> draw_buffers{};
+ Extent2D size;
+};
+
+} // namespace VideoCommon
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::RenderTargets> {
+ size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept {
+ using VideoCommon::ImageViewId;
+ size_t value = std::hash<ImageViewId>{}(rt.depth_buffer_id);
+ for (const ImageViewId color_buffer_id : rt.color_buffer_ids) {
+ value ^= std::hash<ImageViewId>{}(color_buffer_id);
+ }
+ value ^= Common::BitCast<u64>(rt.draw_buffers);
+ value ^= Common::BitCast<u64>(rt.size);
+ return value;
+ }
+};
+
+} // namespace std
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h
new file mode 100644
index 000000000..04539a43c
--- /dev/null
+++ b/src/video_core/texture_cache/samples_helper.h
@@ -0,0 +1,55 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <utility>
+
+#include "common/assert.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+[[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) {
+ switch (num_samples) {
+ case 1:
+ return {0, 0};
+ case 2:
+ return {1, 0};
+ case 4:
+ return {1, 1};
+ case 8:
+ return {2, 1};
+ case 16:
+ return {2, 2};
+ }
+ UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
+ return {1, 1};
+}
+
+[[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) {
+ using Tegra::Texture::MsaaMode;
+ switch (msaa_mode) {
+ case MsaaMode::Msaa1x1:
+ return 1;
+ case MsaaMode::Msaa2x1:
+ case MsaaMode::Msaa2x1_D3D:
+ return 2;
+ case MsaaMode::Msaa2x2:
+ case MsaaMode::Msaa2x2_VC4:
+ case MsaaMode::Msaa2x2_VC12:
+ return 4;
+ case MsaaMode::Msaa4x2:
+ case MsaaMode::Msaa4x2_D3D:
+ case MsaaMode::Msaa4x2_VC8:
+ case MsaaMode::Msaa4x2_VC24:
+ return 8;
+ case MsaaMode::Msaa4x4:
+ return 16;
+ }
+ UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast<int>(msaa_mode));
+ return 1;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
new file mode 100644
index 000000000..eae3be6ea
--- /dev/null
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -0,0 +1,156 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <concepts>
+#include <numeric>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+
+namespace VideoCommon {
+
+struct SlotId {
+ static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
+
+ constexpr auto operator<=>(const SlotId&) const noexcept = default;
+
+ constexpr explicit operator bool() const noexcept {
+ return index != INVALID_INDEX;
+ }
+
+ u32 index = INVALID_INDEX;
+};
+
+template <class T>
+requires std::is_nothrow_move_assignable_v<T>&&
+ std::is_nothrow_move_constructible_v<T> class SlotVector {
+public:
+ ~SlotVector() noexcept {
+ size_t index = 0;
+ for (u64 bits : stored_bitset) {
+ for (size_t bit = 0; bits; ++bit, bits >>= 1) {
+ if ((bits & 1) != 0) {
+ values[index + bit].object.~T();
+ }
+ }
+ index += 64;
+ }
+ delete[] values;
+ }
+
+ [[nodiscard]] T& operator[](SlotId id) noexcept {
+ ValidateIndex(id);
+ return values[id.index].object;
+ }
+
+ [[nodiscard]] const T& operator[](SlotId id) const noexcept {
+ ValidateIndex(id);
+ return values[id.index].object;
+ }
+
+ template <typename... Args>
+ [[nodiscard]] SlotId insert(Args&&... args) noexcept {
+ const u32 index = FreeValueIndex();
+ new (&values[index].object) T(std::forward<Args>(args)...);
+ SetStorageBit(index);
+
+ return SlotId{index};
+ }
+
+ void erase(SlotId id) noexcept {
+ values[id.index].object.~T();
+ free_list.push_back(id.index);
+ ResetStorageBit(id.index);
+ }
+
+private:
+ struct NonTrivialDummy {
+ NonTrivialDummy() noexcept {}
+ };
+
+ union Entry {
+ Entry() noexcept : dummy{} {}
+ ~Entry() noexcept {}
+
+ NonTrivialDummy dummy;
+ T object;
+ };
+
+ void SetStorageBit(u32 index) noexcept {
+ stored_bitset[index / 64] |= u64(1) << (index % 64);
+ }
+
+ void ResetStorageBit(u32 index) noexcept {
+ stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
+ }
+
+ bool ReadStorageBit(u32 index) noexcept {
+ return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
+ }
+
+ void ValidateIndex(SlotId id) const noexcept {
+ DEBUG_ASSERT(id);
+ DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
+ DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
+ }
+
+ [[nodiscard]] u32 FreeValueIndex() noexcept {
+ if (free_list.empty()) {
+ Reserve(values_capacity ? (values_capacity << 1) : 1);
+ }
+ const u32 free_index = free_list.back();
+ free_list.pop_back();
+ return free_index;
+ }
+
+ void Reserve(size_t new_capacity) noexcept {
+ Entry* const new_values = new Entry[new_capacity];
+ size_t index = 0;
+ for (u64 bits : stored_bitset) {
+ for (size_t bit = 0; bits; ++bit, bits >>= 1) {
+ const size_t i = index + bit;
+ if ((bits & 1) == 0) {
+ continue;
+ }
+ T& old_value = values[i].object;
+ new (&new_values[i].object) T(std::move(old_value));
+ old_value.~T();
+ }
+ index += 64;
+ }
+
+ stored_bitset.resize((new_capacity + 63) / 64);
+
+ const size_t old_free_size = free_list.size();
+ free_list.resize(old_free_size + (new_capacity - values_capacity));
+ std::iota(free_list.begin() + old_free_size, free_list.end(),
+ static_cast<u32>(values_capacity));
+
+ delete[] values;
+ values = new_values;
+ values_capacity = new_capacity;
+ }
+
+ Entry* values = nullptr;
+ size_t values_capacity = 0;
+ size_t values_size = 0;
+
+ std::vector<u64> stored_bitset;
+ std::vector<u32> free_list;
+};
+
+} // namespace VideoCommon
+
+template <>
+struct std::hash<VideoCommon::SlotId> {
+ size_t operator()(const VideoCommon::SlotId& id) const noexcept {
+ return std::hash<u32>{}(id.index);
+ }
+};
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
deleted file mode 100644
index efbcf6723..000000000
--- a/src/video_core/texture_cache/surface_base.cpp
+++ /dev/null
@@ -1,299 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/algorithm.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/microprofile.h"
-#include "video_core/memory_manager.h"
-#include "video_core/texture_cache/surface_base.h"
-#include "video_core/texture_cache/surface_params.h"
-#include "video_core/textures/convert.h"
-
-namespace VideoCommon {
-
-MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128));
-MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128));
-
-using Tegra::Texture::ConvertFromGuestToHost;
-using VideoCore::MortonSwizzleMode;
-using VideoCore::Surface::IsPixelFormatASTC;
-using VideoCore::Surface::PixelFormat;
-
-StagingCache::StagingCache() = default;
-
-StagingCache::~StagingCache() = default;
-
-SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_,
- bool is_astc_supported_)
- : params{params_}, gpu_addr{gpu_addr_}, mipmap_sizes(params_.num_levels),
- mipmap_offsets(params.num_levels) {
- is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported_;
- host_memory_size = params.GetHostSizeInBytes(is_converted);
-
- std::size_t offset = 0;
- for (u32 level = 0; level < params.num_levels; ++level) {
- const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
- mipmap_sizes[level] = mipmap_size;
- mipmap_offsets[level] = offset;
- offset += mipmap_size;
- }
- layer_size = offset;
- if (params.is_layered) {
- if (params.is_tiled) {
- layer_size =
- SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth);
- }
- guest_memory_size = layer_size * params.depth;
- } else {
- guest_memory_size = layer_size;
- }
-}
-
-MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const {
- const u32 src_bpp{params.GetBytesPerPixel()};
- const u32 dst_bpp{rhs.GetBytesPerPixel()};
- const bool ib1 = params.IsBuffer();
- const bool ib2 = rhs.IsBuffer();
- if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) {
- const bool cb1 = params.IsCompressed();
- const bool cb2 = rhs.IsCompressed();
- if (cb1 == cb2) {
- return MatchTopologyResult::FullMatch;
- }
- return MatchTopologyResult::CompressUnmatch;
- }
- return MatchTopologyResult::None;
-}
-
-MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const {
- // Buffer surface Check
- if (params.IsBuffer()) {
- const std::size_t wd1 = params.width * params.GetBytesPerPixel();
- const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel();
- if (wd1 == wd2) {
- return MatchStructureResult::FullMatch;
- }
- return MatchStructureResult::None;
- }
-
- // Linear Surface check
- if (!params.is_tiled) {
- if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
- if (params.width == rhs.width) {
- return MatchStructureResult::FullMatch;
- } else {
- return MatchStructureResult::SemiMatch;
- }
- }
- return MatchStructureResult::None;
- }
-
- // Tiled Surface check
- if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth,
- params.tile_width_spacing, params.num_levels) ==
- std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth,
- rhs.tile_width_spacing, rhs.num_levels)) {
- if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) {
- return MatchStructureResult::FullMatch;
- }
- const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format,
- rhs.pixel_format);
- const u32 hs =
- SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format);
- const u32 w1 = params.GetBlockAlignedWidth();
- if (std::tie(w1, params.height) == std::tie(ws, hs)) {
- return MatchStructureResult::SemiMatch;
- }
- }
- return MatchStructureResult::None;
-}
-
-std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
- const GPUVAddr candidate_gpu_addr) const {
- if (gpu_addr == candidate_gpu_addr) {
- return {{0, 0}};
- }
-
- if (candidate_gpu_addr < gpu_addr) {
- return std::nullopt;
- }
-
- const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
- const auto layer{static_cast<u32>(relative_address / layer_size)};
- if (layer >= params.depth) {
- return std::nullopt;
- }
-
- const GPUVAddr mipmap_address = relative_address - layer_size * layer;
- const auto mipmap_it =
- Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
- if (mipmap_it == mipmap_offsets.end()) {
- return std::nullopt;
- }
-
- const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))};
- return std::make_pair(layer, level);
-}
-
-std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const {
- const u32 layers{params.depth};
- const u32 mipmaps{params.num_levels};
- std::vector<CopyParams> result;
- result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps));
-
- for (u32 layer = 0; layer < layers; layer++) {
- for (u32 level = 0; level < mipmaps; level++) {
- const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
- const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
- result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1);
- }
- }
- return result;
-}
-
-std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const {
- const u32 mipmaps{params.num_levels};
- std::vector<CopyParams> result;
- result.reserve(mipmaps);
-
- for (u32 level = 0; level < mipmaps; level++) {
- const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
- const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
- const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))};
- result.emplace_back(width, height, depth, level);
- }
- return result;
-}
-
-void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory,
- const SurfaceParams& surface_params, u8* buffer, u32 level) {
- const u32 width{surface_params.GetMipWidth(level)};
- const u32 height{surface_params.GetMipHeight(level)};
- const u32 block_height{surface_params.GetMipBlockHeight(level)};
- const u32 block_depth{surface_params.GetMipBlockDepth(level)};
-
- std::size_t guest_offset{mipmap_offsets[level]};
- if (surface_params.is_layered) {
- std::size_t host_offset = 0;
- const std::size_t guest_stride = layer_size;
- const std::size_t host_stride = surface_params.GetHostLayerSize(level);
- for (u32 layer = 0; layer < surface_params.depth; ++layer) {
- MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height,
- block_depth, 1, surface_params.tile_width_spacing, buffer + host_offset,
- memory + guest_offset);
- guest_offset += guest_stride;
- host_offset += host_stride;
- }
- } else {
- MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, block_depth,
- surface_params.GetMipDepth(level), surface_params.tile_width_spacing, buffer,
- memory + guest_offset);
- }
-}
-
-void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
- StagingCache& staging_cache) {
- MICROPROFILE_SCOPE(GPU_Load_Texture);
- auto& staging_buffer = staging_cache.GetBuffer(0);
- u8* host_ptr;
- // Use an extra temporal buffer
- auto& tmp_buffer = staging_cache.GetBuffer(1);
- tmp_buffer.resize(guest_memory_size);
- host_ptr = tmp_buffer.data();
- memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
-
- if (params.is_tiled) {
- ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
- params.block_width, static_cast<u32>(params.target));
- for (u32 level = 0; level < params.num_levels; ++level) {
- const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
- SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
- staging_buffer.data() + host_offset, level);
- }
- } else {
- ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented");
- const u32 bpp{params.GetBytesPerPixel()};
- const u32 block_width{params.GetDefaultBlockWidth()};
- const u32 block_height{params.GetDefaultBlockHeight()};
- const u32 width{(params.width + block_width - 1) / block_width};
- const u32 height{(params.height + block_height - 1) / block_height};
- const u32 copy_size{width * bpp};
- if (params.pitch == copy_size) {
- std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false));
- } else {
- const u8* start{host_ptr};
- u8* write_to{staging_buffer.data()};
- for (u32 h = height; h > 0; --h) {
- std::memcpy(write_to, start, copy_size);
- start += params.pitch;
- write_to += copy_size;
- }
- }
- }
-
- if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) {
- return;
- }
-
- for (u32 level = params.num_levels; level--;) {
- const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)};
- const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)};
- u8* const in_buffer = staging_buffer.data() + in_host_offset;
- u8* const out_buffer = staging_buffer.data() + out_host_offset;
- ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
- params.GetMipWidth(level), params.GetMipHeight(level),
- params.GetMipDepth(level), true, true);
- }
-}
-
-void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
- StagingCache& staging_cache) {
- MICROPROFILE_SCOPE(GPU_Flush_Texture);
- auto& staging_buffer = staging_cache.GetBuffer(0);
- u8* host_ptr;
-
- // Use an extra temporal buffer
- auto& tmp_buffer = staging_cache.GetBuffer(1);
- tmp_buffer.resize(guest_memory_size);
- host_ptr = tmp_buffer.data();
-
- if (params.target == SurfaceTarget::Texture3D) {
- // Special case for 3D texture segments
- memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
- }
-
- if (params.is_tiled) {
- ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
- for (u32 level = 0; level < params.num_levels; ++level) {
- const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
- SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
- staging_buffer.data() + host_offset, level);
- }
- } else if (params.IsBuffer()) {
- // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest
- // memory.
- std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
- } else {
- ASSERT(params.target == SurfaceTarget::Texture2D);
- ASSERT(params.num_levels == 1);
-
- const u32 bpp{params.GetBytesPerPixel()};
- const u32 copy_size{params.width * bpp};
- if (params.pitch == copy_size) {
- std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
- } else {
- u8* start{host_ptr};
- const u8* read_to{staging_buffer.data()};
- for (u32 h = params.height; h > 0; --h) {
- std::memcpy(start, read_to, copy_size);
- start += params.pitch;
- read_to += copy_size;
- }
- }
- }
- memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
deleted file mode 100644
index b57135fe4..000000000
--- a/src/video_core/texture_cache/surface_base.h
+++ /dev/null
@@ -1,333 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <optional>
-#include <tuple>
-#include <unordered_map>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/gpu.h"
-#include "video_core/morton.h"
-#include "video_core/texture_cache/copy_params.h"
-#include "video_core/texture_cache/surface_params.h"
-#include "video_core/texture_cache/surface_view.h"
-
-namespace Tegra {
-class MemoryManager;
-}
-
-namespace VideoCommon {
-
-using VideoCore::MortonSwizzleMode;
-using VideoCore::Surface::SurfaceTarget;
-
-enum class MatchStructureResult : u32 {
- FullMatch = 0,
- SemiMatch = 1,
- None = 2,
-};
-
-enum class MatchTopologyResult : u32 {
- FullMatch = 0,
- CompressUnmatch = 1,
- None = 2,
-};
-
-class StagingCache {
-public:
- explicit StagingCache();
- ~StagingCache();
-
- std::vector<u8>& GetBuffer(std::size_t index) {
- return staging_buffer[index];
- }
-
- const std::vector<u8>& GetBuffer(std::size_t index) const {
- return staging_buffer[index];
- }
-
- void SetSize(std::size_t size) {
- staging_buffer.resize(size);
- }
-
-private:
- std::vector<std::vector<u8>> staging_buffer;
-};
-
-class SurfaceBaseImpl {
-public:
- void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
-
- void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
-
- GPUVAddr GetGpuAddr() const {
- return gpu_addr;
- }
-
- bool Overlaps(const VAddr start, const VAddr end) const {
- return (cpu_addr < end) && (cpu_addr_end > start);
- }
-
- bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const {
- const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size;
- return gpu_addr <= other_start && other_end <= gpu_addr_end;
- }
-
- // Use only when recycling a surface
- void SetGpuAddr(const GPUVAddr new_addr) {
- gpu_addr = new_addr;
- }
-
- VAddr GetCpuAddr() const {
- return cpu_addr;
- }
-
- VAddr GetCpuAddrEnd() const {
- return cpu_addr_end;
- }
-
- void SetCpuAddr(const VAddr new_addr) {
- cpu_addr = new_addr;
- cpu_addr_end = new_addr + guest_memory_size;
- }
-
- const SurfaceParams& GetSurfaceParams() const {
- return params;
- }
-
- std::size_t GetSizeInBytes() const {
- return guest_memory_size;
- }
-
- std::size_t GetHostSizeInBytes() const {
- return host_memory_size;
- }
-
- std::size_t GetMipmapSize(const u32 level) const {
- return mipmap_sizes[level];
- }
-
- bool IsLinear() const {
- return !params.is_tiled;
- }
-
- bool IsConverted() const {
- return is_converted;
- }
-
- bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
- return params.pixel_format == pixel_format;
- }
-
- VideoCore::Surface::PixelFormat GetFormat() const {
- return params.pixel_format;
- }
-
- bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const {
- return params.target == target;
- }
-
- MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const;
-
- MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const;
-
- bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const {
- return std::tie(gpu_addr, params.target, params.num_levels) ==
- std::tie(other_gpu_addr, rhs.target, rhs.num_levels) &&
- params.target == SurfaceTarget::Texture2D && params.num_levels == 1;
- }
-
- std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const;
-
- std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const {
- return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params);
- }
-
-protected:
- explicit SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_,
- bool is_astc_supported_);
- ~SurfaceBaseImpl() = default;
-
- virtual void DecorateSurfaceName() = 0;
-
- const SurfaceParams params;
- std::size_t layer_size;
- std::size_t guest_memory_size;
- std::size_t host_memory_size;
- GPUVAddr gpu_addr{};
- VAddr cpu_addr{};
- VAddr cpu_addr_end{};
- bool is_converted{};
-
- std::vector<std::size_t> mipmap_sizes;
- std::vector<std::size_t> mipmap_offsets;
-
-private:
- void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& surface_params,
- u8* buffer, u32 level);
-
- std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const;
-
- std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const;
-};
-
-template <typename TView>
-class SurfaceBase : public SurfaceBaseImpl {
-public:
- virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0;
-
- virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0;
-
- void MarkAsModified(bool is_modified_, u64 tick) {
- is_modified = is_modified_ || is_target;
- modification_tick = tick;
- }
-
- void MarkAsRenderTarget(bool is_target_, u32 index_) {
- is_target = is_target_;
- index = index_;
- }
-
- void SetMemoryMarked(bool is_memory_marked_) {
- is_memory_marked = is_memory_marked_;
- }
-
- bool IsMemoryMarked() const {
- return is_memory_marked;
- }
-
- void SetSyncPending(bool is_sync_pending_) {
- is_sync_pending = is_sync_pending_;
- }
-
- bool IsSyncPending() const {
- return is_sync_pending;
- }
-
- void MarkAsPicked(bool is_picked_) {
- is_picked = is_picked_;
- }
-
- bool IsModified() const {
- return is_modified;
- }
-
- bool IsProtected() const {
- // Only 3D slices are to be protected
- return is_target && params.target == SurfaceTarget::Texture3D;
- }
-
- bool IsRenderTarget() const {
- return is_target;
- }
-
- u32 GetRenderTarget() const {
- return index;
- }
-
- bool IsRegistered() const {
- return is_registered;
- }
-
- bool IsPicked() const {
- return is_picked;
- }
-
- void MarkAsRegistered(bool is_reg) {
- is_registered = is_reg;
- }
-
- u64 GetModificationTick() const {
- return modification_tick;
- }
-
- TView EmplaceOverview(const SurfaceParams& overview_params) {
- const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth};
- return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
- }
-
- TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
- return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
- base_level, num_levels));
- }
-
- std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
- const GPUVAddr view_addr,
- const std::size_t candidate_size, const u32 mipmap,
- const u32 layer) {
- const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)};
- if (!layer_mipmap) {
- return {};
- }
- const auto [end_layer, end_mipmap] = *layer_mipmap;
- if (layer != end_layer) {
- if (mipmap == 0 && end_mipmap == 0) {
- return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1));
- }
- return {};
- } else {
- return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap));
- }
- }
-
- std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
- const std::size_t candidate_size) {
- if (params.target == SurfaceTarget::Texture3D ||
- view_params.target == SurfaceTarget::Texture3D ||
- (params.num_levels == 1 && !params.is_layered)) {
- return {};
- }
- const auto layer_mipmap{GetLayerMipmap(view_addr)};
- if (!layer_mipmap) {
- return {};
- }
- const auto [layer, mipmap] = *layer_mipmap;
- if (GetMipmapSize(mipmap) != candidate_size) {
- return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer);
- }
- return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1));
- }
-
- TView GetMainView() const {
- return main_view;
- }
-
-protected:
- explicit SurfaceBase(const GPUVAddr gpu_addr_, const SurfaceParams& params_,
- bool is_astc_supported_)
- : SurfaceBaseImpl{gpu_addr_, params_, is_astc_supported_} {}
-
- ~SurfaceBase() = default;
-
- virtual TView CreateView(const ViewParams& view_key) = 0;
-
- TView main_view;
- std::unordered_map<ViewParams, TView> views;
-
-private:
- TView GetView(const ViewParams& key) {
- const auto [entry, is_cache_miss] = views.try_emplace(key);
- auto& view{entry->second};
- if (is_cache_miss) {
- view = CreateView(key);
- }
- return view;
- }
-
- static constexpr u32 NO_RT = 0xFFFFFFFF;
-
- bool is_modified{};
- bool is_target{};
- bool is_registered{};
- bool is_picked{};
- bool is_memory_marked{};
- bool is_sync_pending{};
- u32 index{NO_RT};
- u64 modification_tick{};
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
deleted file mode 100644
index 96f93246d..000000000
--- a/src/video_core/texture_cache/surface_params.cpp
+++ /dev/null
@@ -1,445 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <string>
-#include <tuple>
-
-#include "common/alignment.h"
-#include "common/bit_util.h"
-#include "core/core.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/surface.h"
-#include "video_core/texture_cache/format_lookup_table.h"
-#include "video_core/texture_cache/surface_params.h"
-
-namespace VideoCommon {
-
-using VideoCore::Surface::PixelFormat;
-using VideoCore::Surface::PixelFormatFromDepthFormat;
-using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
-using VideoCore::Surface::SurfaceTarget;
-using VideoCore::Surface::SurfaceTargetFromTextureType;
-using VideoCore::Surface::SurfaceType;
-
-namespace {
-
-SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) {
- switch (type) {
- case Tegra::Shader::TextureType::Texture1D:
- return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D;
- case Tegra::Shader::TextureType::Texture2D:
- return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
- case Tegra::Shader::TextureType::Texture3D:
- ASSERT(!is_array);
- return SurfaceTarget::Texture3D;
- case Tegra::Shader::TextureType::TextureCube:
- return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap;
- default:
- UNREACHABLE();
- return SurfaceTarget::Texture2D;
- }
-}
-
-SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) {
- switch (type) {
- case Tegra::Shader::ImageType::Texture1D:
- return SurfaceTarget::Texture1D;
- case Tegra::Shader::ImageType::TextureBuffer:
- return SurfaceTarget::TextureBuffer;
- case Tegra::Shader::ImageType::Texture1DArray:
- return SurfaceTarget::Texture1DArray;
- case Tegra::Shader::ImageType::Texture2D:
- return SurfaceTarget::Texture2D;
- case Tegra::Shader::ImageType::Texture2DArray:
- return SurfaceTarget::Texture2DArray;
- case Tegra::Shader::ImageType::Texture3D:
- return SurfaceTarget::Texture3D;
- default:
- UNREACHABLE();
- return SurfaceTarget::Texture2D;
- }
-}
-
-constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
- return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
-}
-
-} // Anonymous namespace
-
-SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table,
- const Tegra::Texture::TICEntry& tic,
- const VideoCommon::Shader::Sampler& entry) {
- SurfaceParams params;
- params.is_tiled = tic.IsTiled();
- params.srgb_conversion = tic.IsSrgbConversionEnabled();
- params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
- params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
- params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
- params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
- params.pixel_format = lookup_table.GetPixelFormat(
- tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
- params.type = GetFormatType(params.pixel_format);
- if (entry.is_shadow && params.type == SurfaceType::ColorTexture) {
- switch (params.pixel_format) {
- case PixelFormat::R16_UNORM:
- case PixelFormat::R16_FLOAT:
- params.pixel_format = PixelFormat::D16_UNORM;
- break;
- case PixelFormat::R32_FLOAT:
- params.pixel_format = PixelFormat::D32_FLOAT;
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
- static_cast<u32>(params.pixel_format));
- }
- params.type = GetFormatType(params.pixel_format);
- }
- // TODO: on 1DBuffer we should use the tic info.
- if (tic.IsBuffer()) {
- params.target = SurfaceTarget::TextureBuffer;
- params.width = tic.Width();
- params.pitch = params.width * params.GetBytesPerPixel();
- params.height = 1;
- params.depth = 1;
- params.num_levels = 1;
- params.emulated_levels = 1;
- params.is_layered = false;
- } else {
- params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array);
- params.width = tic.Width();
- params.height = tic.Height();
- params.depth = tic.Depth();
- params.pitch = params.is_tiled ? 0 : tic.Pitch();
- if (params.target == SurfaceTarget::TextureCubemap ||
- params.target == SurfaceTarget::TextureCubeArray) {
- params.depth *= 6;
- }
- params.num_levels = tic.max_mip_level + 1;
- params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
- params.is_layered = params.IsLayered();
- }
- return params;
-}
-
-SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table,
- const Tegra::Texture::TICEntry& tic,
- const VideoCommon::Shader::Image& entry) {
- SurfaceParams params;
- params.is_tiled = tic.IsTiled();
- params.srgb_conversion = tic.IsSrgbConversionEnabled();
- params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
- params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
- params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
- params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
- params.pixel_format = lookup_table.GetPixelFormat(
- tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
- params.type = GetFormatType(params.pixel_format);
- params.target = ImageTypeToSurfaceTarget(entry.type);
- // TODO: on 1DBuffer we should use the tic info.
- if (tic.IsBuffer()) {
- params.target = SurfaceTarget::TextureBuffer;
- params.width = tic.Width();
- params.pitch = params.width * params.GetBytesPerPixel();
- params.height = 1;
- params.depth = 1;
- params.num_levels = 1;
- params.emulated_levels = 1;
- params.is_layered = false;
- } else {
- params.width = tic.Width();
- params.height = tic.Height();
- params.depth = tic.Depth();
- params.pitch = params.is_tiled ? 0 : tic.Pitch();
- if (params.target == SurfaceTarget::TextureCubemap ||
- params.target == SurfaceTarget::TextureCubeArray) {
- params.depth *= 6;
- }
- params.num_levels = tic.max_mip_level + 1;
- params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
- params.is_layered = params.IsLayered();
- }
- return params;
-}
-
-SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) {
- const auto& regs = maxwell3d.regs;
- const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
- const bool is_layered = regs.zeta_layers > 1 && block_depth == 0;
- const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
- return {
- .is_tiled = regs.zeta.memory_layout.type ==
- Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear,
- .srgb_conversion = false,
- .is_layered = is_layered,
- .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U),
- .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U),
- .block_depth = block_depth,
- .tile_width_spacing = 1,
- .width = regs.zeta_width,
- .height = regs.zeta_height,
- .depth = is_layered ? regs.zeta_layers.Value() : 1U,
- .pitch = 0,
- .num_levels = 1,
- .emulated_levels = 1,
- .pixel_format = pixel_format,
- .type = GetFormatType(pixel_format),
- .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D,
- };
-}
-
-SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
- std::size_t index) {
- const auto& config{maxwell3d.regs.rt[index]};
- SurfaceParams params;
- params.is_tiled =
- config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
- params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
- config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB;
- params.block_width = config.memory_layout.block_width;
- params.block_height = config.memory_layout.block_height;
- params.block_depth = config.memory_layout.block_depth;
- params.tile_width_spacing = 1;
- params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
- params.type = GetFormatType(params.pixel_format);
- if (params.is_tiled) {
- params.pitch = 0;
- params.width = config.width;
- } else {
- const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
- params.pitch = config.width;
- params.width = params.pitch / bpp;
- }
- params.height = config.height;
- params.num_levels = 1;
- params.emulated_levels = 1;
-
- if (config.memory_layout.is_3d != 0) {
- params.depth = config.layers.Value();
- params.is_layered = false;
- params.target = SurfaceTarget::Texture3D;
- } else if (config.layers > 1) {
- params.depth = config.layers.Value();
- params.is_layered = true;
- params.target = SurfaceTarget::Texture2DArray;
- } else {
- params.depth = 1;
- params.is_layered = false;
- params.target = SurfaceTarget::Texture2D;
- }
- return params;
-}
-
-SurfaceParams SurfaceParams::CreateForFermiCopySurface(
- const Tegra::Engines::Fermi2D::Regs::Surface& config) {
- const bool is_tiled = !config.linear;
- const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format);
-
- SurfaceParams params{
- .is_tiled = is_tiled,
- .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
- config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
- .is_layered = false,
- .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
- .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
- .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,
- .tile_width_spacing = 1,
- .width = config.width,
- .height = config.height,
- .depth = 1,
- .pitch = config.pitch,
- .num_levels = 1,
- .emulated_levels = 1,
- .pixel_format = pixel_format,
- .type = GetFormatType(pixel_format),
- // TODO(Rodrigo): Try to guess texture arrays from parameters
- .target = SurfaceTarget::Texture2D,
- };
-
- params.is_layered = params.IsLayered();
- return params;
-}
-
-VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
- const VideoCommon::Shader::Sampler& entry) {
- return TextureTypeToSurfaceTarget(entry.type, entry.is_array);
-}
-
-VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
- const VideoCommon::Shader::Image& entry) {
- return ImageTypeToSurfaceTarget(entry.type);
-}
-
-bool SurfaceParams::IsLayered() const {
- switch (target) {
- case SurfaceTarget::Texture1DArray:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubemap:
- case SurfaceTarget::TextureCubeArray:
- return true;
- default:
- return false;
- }
-}
-
-// Auto block resizing algorithm from:
-// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
-u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
- if (level == 0) {
- return this->block_height;
- }
-
- const u32 height_new{GetMipHeight(level)};
- const u32 default_block_height{GetDefaultBlockHeight()};
- const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height};
- const u32 block_height_new = Common::Log2Ceil32(blocks_in_y);
- return std::clamp(block_height_new, 3U, 7U) - 3U;
-}
-
-u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
- if (level == 0) {
- return this->block_depth;
- }
- if (is_layered) {
- return 0;
- }
-
- const u32 depth_new{GetMipDepth(level)};
- const u32 block_depth_new = Common::Log2Ceil32(depth_new);
- if (block_depth_new > 4) {
- return 5 - (GetMipBlockHeight(level) >= 2);
- }
- return block_depth_new;
-}
-
-std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
- std::size_t offset = 0;
- for (u32 i = 0; i < level; i++) {
- offset += GetInnerMipmapMemorySize(i, false, false);
- }
- return offset;
-}
-
-std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const {
- std::size_t offset = 0;
- if (is_converted) {
- for (u32 i = 0; i < level; ++i) {
- offset += GetConvertedMipmapSize(i) * GetNumLayers();
- }
- } else {
- for (u32 i = 0; i < level; ++i) {
- offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
- }
- }
- return offset;
-}
-
-std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
- constexpr std::size_t rgba8_bpp = 4ULL;
- const std::size_t mip_width = GetMipWidth(level);
- const std::size_t mip_height = GetMipHeight(level);
- const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level);
- return mip_width * mip_height * mip_depth * rgba8_bpp;
-}
-
-std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
- std::size_t size = 0;
- for (u32 level = 0; level < num_levels; ++level) {
- size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed);
- }
- if (is_tiled && is_layered) {
- return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
- }
- return size;
-}
-
-std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
- bool uncompressed) const {
- const u32 mip_width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
- const u32 mip_height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
- const u32 mip_depth{is_layered ? 1U : GetMipDepth(level)};
- if (is_tiled) {
- return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), mip_width,
- mip_height, mip_depth, GetMipBlockHeight(level),
- GetMipBlockDepth(level));
- } else if (as_host_size || IsBuffer()) {
- return GetBytesPerPixel() * mip_width * mip_height * mip_depth;
- } else {
- // Linear Texture Case
- return pitch * mip_height * mip_depth;
- }
-}
-
-bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
- return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
- height, depth, pitch, num_levels, pixel_format, type, target) ==
- std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
- rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
- rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target);
-}
-
-std::string SurfaceParams::TargetName() const {
- switch (target) {
- case SurfaceTarget::Texture1D:
- return "1D";
- case SurfaceTarget::TextureBuffer:
- return "TexBuffer";
- case SurfaceTarget::Texture2D:
- return "2D";
- case SurfaceTarget::Texture3D:
- return "3D";
- case SurfaceTarget::Texture1DArray:
- return "1DArray";
- case SurfaceTarget::Texture2DArray:
- return "2DArray";
- case SurfaceTarget::TextureCubemap:
- return "Cube";
- case SurfaceTarget::TextureCubeArray:
- return "CubeArray";
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target);
- UNREACHABLE();
- return fmt::format("TUK({})", target);
- }
-}
-
-u32 SurfaceParams::GetBlockSize() const {
- const u32 x = 64U << block_width;
- const u32 y = 8U << block_height;
- const u32 z = 1U << block_depth;
- return x * y * z;
-}
-
-std::pair<u32, u32> SurfaceParams::GetBlockXY() const {
- const u32 x_pixels = 64U / GetBytesPerPixel();
- const u32 x = x_pixels << block_width;
- const u32 y = 8U << block_height;
- return {x, y};
-}
-
-std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
- const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
- const u32 block_size = GetBlockSize();
- const u32 block_index = offset / block_size;
- const u32 gob_offset = offset % block_size;
- const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE);
- const u32 x_gob_pixels = 64U / GetBytesPerPixel();
- const u32 x_block_pixels = x_gob_pixels << block_width;
- const u32 y_block_pixels = 8U << block_height;
- const u32 z_block_pixels = 1U << block_depth;
- const u32 x_blocks = div_ceil(width, x_block_pixels);
- const u32 y_blocks = div_ceil(height, y_block_pixels);
- const u32 z_blocks = div_ceil(depth, z_block_pixels);
- const u32 base_x = block_index % x_blocks;
- const u32 base_y = (block_index / x_blocks) % y_blocks;
- const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks;
- u32 x = base_x * x_block_pixels;
- u32 y = base_y * y_block_pixels;
- u32 z = base_z * z_block_pixels;
- z += gob_index >> block_height;
- y += (gob_index * 8U) % y_block_pixels;
- return {x, y, z};
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
deleted file mode 100644
index 4466c3c34..000000000
--- a/src/video_core/texture_cache/surface_params.h
+++ /dev/null
@@ -1,294 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <utility>
-
-#include "common/alignment.h"
-#include "common/bit_util.h"
-#include "common/cityhash.h"
-#include "common/common_types.h"
-#include "video_core/engines/fermi_2d.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/shader/shader_ir.h"
-#include "video_core/surface.h"
-#include "video_core/textures/decoders.h"
-
-namespace VideoCommon {
-
-class FormatLookupTable;
-
-class SurfaceParams {
-public:
- /// Creates SurfaceCachedParams from a texture configuration.
- static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table,
- const Tegra::Texture::TICEntry& tic,
- const VideoCommon::Shader::Sampler& entry);
-
- /// Creates SurfaceCachedParams from an image configuration.
- static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table,
- const Tegra::Texture::TICEntry& tic,
- const VideoCommon::Shader::Image& entry);
-
- /// Creates SurfaceCachedParams for a depth buffer configuration.
- static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d);
-
- /// Creates SurfaceCachedParams from a framebuffer configuration.
- static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
- std::size_t index);
-
- /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
- static SurfaceParams CreateForFermiCopySurface(
- const Tegra::Engines::Fermi2D::Regs::Surface& config);
-
- /// Obtains the texture target from a shader's sampler entry.
- static VideoCore::Surface::SurfaceTarget ExpectedTarget(
- const VideoCommon::Shader::Sampler& entry);
-
- /// Obtains the texture target from a shader's sampler entry.
- static VideoCore::Surface::SurfaceTarget ExpectedTarget(
- const VideoCommon::Shader::Image& entry);
-
- std::size_t Hash() const {
- return static_cast<std::size_t>(
- Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
- }
-
- bool operator==(const SurfaceParams& rhs) const;
-
- bool operator!=(const SurfaceParams& rhs) const {
- return !operator==(rhs);
- }
-
- std::size_t GetGuestSizeInBytes() const {
- return GetInnerMemorySize(false, false, false);
- }
-
- std::size_t GetHostSizeInBytes(bool is_converted) const {
- if (!is_converted) {
- return GetInnerMemorySize(true, false, false);
- }
- // ASTC is uncompressed in software, in emulated as RGBA8
- std::size_t host_size_in_bytes = 0;
- for (u32 level = 0; level < num_levels; ++level) {
- host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers();
- }
- return host_size_in_bytes;
- }
-
- u32 GetBlockAlignedWidth() const {
- return Common::AlignUp(width, 64 / GetBytesPerPixel());
- }
-
- /// Returns the width of a given mipmap level.
- u32 GetMipWidth(u32 level) const {
- return std::max(1U, width >> level);
- }
-
- /// Returns the height of a given mipmap level.
- u32 GetMipHeight(u32 level) const {
- return std::max(1U, height >> level);
- }
-
- /// Returns the depth of a given mipmap level.
- u32 GetMipDepth(u32 level) const {
- return is_layered ? depth : std::max(1U, depth >> level);
- }
-
- /// Returns the block height of a given mipmap level.
- u32 GetMipBlockHeight(u32 level) const;
-
- /// Returns the block depth of a given mipmap level.
- u32 GetMipBlockDepth(u32 level) const;
-
- /// Returns the best possible row/pitch alignment for the surface.
- u32 GetRowAlignment(u32 level, bool is_converted) const {
- const u32 bpp = is_converted ? 4 : GetBytesPerPixel();
- return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
- }
-
- /// Returns the offset in bytes in guest memory of a given mipmap level.
- std::size_t GetGuestMipmapLevelOffset(u32 level) const;
-
- /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
- std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const;
-
- /// Returns the size in bytes in guest memory of a given mipmap level.
- std::size_t GetGuestMipmapSize(u32 level) const {
- return GetInnerMipmapMemorySize(level, false, false);
- }
-
- /// Returns the size in bytes in host memory (linear) of a given mipmap level.
- std::size_t GetHostMipmapSize(u32 level) const {
- return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers();
- }
-
- std::size_t GetConvertedMipmapSize(u32 level) const;
-
- /// Get this texture Tegra Block size in guest memory layout
- u32 GetBlockSize() const;
-
- /// Get X, Y coordinates max sizes of a single block.
- std::pair<u32, u32> GetBlockXY() const;
-
- /// Get the offset in x, y, z coordinates from a memory offset
- std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const;
-
- /// Returns the size of a layer in bytes in guest memory.
- std::size_t GetGuestLayerSize() const {
- return GetLayerSize(false, false);
- }
-
- /// Returns the size of a layer in bytes in host memory for a given mipmap level.
- std::size_t GetHostLayerSize(u32 level) const {
- ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D);
- return GetInnerMipmapMemorySize(level, true, false);
- }
-
- /// Returns the max possible mipmap that the texture can have in host gpu
- u32 MaxPossibleMipmap() const {
- const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U;
- const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U;
- const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h);
- if (target != VideoCore::Surface::SurfaceTarget::Texture3D)
- return max_mipmap;
- return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U);
- }
-
- /// Returns if the guest surface is a compressed surface.
- bool IsCompressed() const {
- return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1;
- }
-
- /// Returns the default block width.
- u32 GetDefaultBlockWidth() const {
- return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
- }
-
- /// Returns the default block height.
- u32 GetDefaultBlockHeight() const {
- return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
- }
-
- /// Returns the bits per pixel.
- u32 GetBitsPerPixel() const {
- return VideoCore::Surface::GetFormatBpp(pixel_format);
- }
-
- /// Returns the bytes per pixel.
- u32 GetBytesPerPixel() const {
- return VideoCore::Surface::GetBytesPerPixel(pixel_format);
- }
-
- /// Returns true if the pixel format is a depth and/or stencil format.
- bool IsPixelFormatZeta() const {
- return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
- pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
- }
-
- /// Returns is the surface is a TextureBuffer type of surface.
- bool IsBuffer() const {
- return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
- }
-
- /// Returns the number of layers in the surface.
- std::size_t GetNumLayers() const {
- return is_layered ? depth : 1;
- }
-
- /// Returns the debug name of the texture for use in graphic debuggers.
- std::string TargetName() const;
-
- // Helper used for out of class size calculations
- static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
- const u32 block_depth) {
- return Common::AlignBits(out_size,
- Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
- }
-
- /// Converts a width from a type of surface into another. This helps represent the
- /// equivalent value between compressed/non-compressed textures.
- static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from,
- VideoCore::Surface::PixelFormat pixel_format_to) {
- const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from);
- const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to);
- return (width * bw2 + bw1 - 1) / bw1;
- }
-
- /// Converts a height from a type of surface into another. This helps represent the
- /// equivalent value between compressed/non-compressed textures.
- static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from,
- VideoCore::Surface::PixelFormat pixel_format_to) {
- const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from);
- const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to);
- return (height * bh2 + bh1 - 1) / bh1;
- }
-
- // Finds the maximun possible width between 2 2D layers of different formats
- static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params,
- const u32 src_level, const u32 dst_level) {
- const u32 bw1 = src_params.GetDefaultBlockWidth();
- const u32 bw2 = dst_params.GetDefaultBlockWidth();
- const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1;
- const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2;
- return std::min(t_src_width, t_dst_width);
- }
-
- // Finds the maximun possible height between 2 2D layers of different formats
- static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params,
- const u32 src_level, const u32 dst_level) {
- const u32 bh1 = src_params.GetDefaultBlockHeight();
- const u32 bh2 = dst_params.GetDefaultBlockHeight();
- const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1;
- const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2;
- return std::min(t_src_height, t_dst_height);
- }
-
- bool is_tiled;
- bool srgb_conversion;
- bool is_layered;
- u32 block_width;
- u32 block_height;
- u32 block_depth;
- u32 tile_width_spacing;
- u32 width;
- u32 height;
- u32 depth;
- u32 pitch;
- u32 num_levels;
- u32 emulated_levels;
- VideoCore::Surface::PixelFormat pixel_format;
- VideoCore::Surface::SurfaceType type;
- VideoCore::Surface::SurfaceTarget target;
-
-private:
- /// Returns the size of a given mipmap level inside a layer.
- std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const;
-
- /// Returns the size of all mipmap levels and aligns as needed.
- std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
- return GetLayerSize(as_host_size, uncompressed) *
- (layer_only ? 1U : (is_layered ? depth : 1U));
- }
-
- /// Returns the size of a layer
- std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
-
- /// Returns true if these parameters are from a layered surface.
- bool IsLayered() const;
-};
-
-} // namespace VideoCommon
-
-namespace std {
-
-template <>
-struct hash<VideoCommon::SurfaceParams> {
- std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
- return k.Hash();
- }
-};
-
-} // namespace std
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp
deleted file mode 100644
index 6b5f5984b..000000000
--- a/src/video_core/texture_cache/surface_view.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <tuple>
-
-#include "common/common_types.h"
-#include "video_core/texture_cache/surface_view.h"
-
-namespace VideoCommon {
-
-std::size_t ViewParams::Hash() const {
- return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^
- (static_cast<std::size_t>(base_level) << 24) ^
- (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36);
-}
-
-bool ViewParams::operator==(const ViewParams& rhs) const {
- return std::tie(base_layer, num_layers, base_level, num_levels, target) ==
- std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target);
-}
-
-bool ViewParams::operator!=(const ViewParams& rhs) const {
- return !operator==(rhs);
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h
deleted file mode 100644
index 199f72732..000000000
--- a/src/video_core/texture_cache/surface_view.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <functional>
-
-#include "common/common_types.h"
-#include "video_core/surface.h"
-#include "video_core/texture_cache/surface_params.h"
-
-namespace VideoCommon {
-
-struct ViewParams {
- constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target_, u32 base_layer_,
- u32 num_layers_, u32 base_level_, u32 num_levels_)
- : target{target_}, base_layer{base_layer_}, num_layers{num_layers_},
- base_level{base_level_}, num_levels{num_levels_} {}
-
- std::size_t Hash() const;
-
- bool operator==(const ViewParams& rhs) const;
- bool operator!=(const ViewParams& rhs) const;
-
- bool IsLayered() const {
- switch (target) {
- case VideoCore::Surface::SurfaceTarget::Texture1DArray:
- case VideoCore::Surface::SurfaceTarget::Texture2DArray:
- case VideoCore::Surface::SurfaceTarget::TextureCubemap:
- case VideoCore::Surface::SurfaceTarget::TextureCubeArray:
- return true;
- default:
- return false;
- }
- }
-
- VideoCore::Surface::SurfaceTarget target{};
- u32 base_layer{};
- u32 num_layers{};
- u32 base_level{};
- u32 num_levels{};
-};
-
-class ViewBase {
-public:
- constexpr explicit ViewBase(const ViewParams& view_params) : params{view_params} {}
-
- constexpr const ViewParams& GetViewParams() const {
- return params;
- }
-
-protected:
- ViewParams params;
-};
-
-} // namespace VideoCommon
-
-namespace std {
-
-template <>
-struct hash<VideoCommon::ViewParams> {
- std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept {
- return k.Hash();
- }
-};
-
-} // namespace std
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 581d8dd5b..968059842 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -6,1298 +6,1449 @@
#include <algorithm>
#include <array>
-#include <list>
+#include <bit>
#include <memory>
#include <mutex>
-#include <set>
-#include <tuple>
+#include <optional>
+#include <span>
+#include <type_traits>
#include <unordered_map>
+#include <utility>
#include <vector>
#include <boost/container/small_vector.hpp>
-#include <boost/icl/interval_map.hpp>
-#include <boost/range/iterator_range.hpp>
-#include "common/assert.h"
+#include "common/alignment.h"
+#include "common/common_funcs.h"
#include "common/common_types.h"
-#include "common/math_util.h"
-#include "core/core.h"
-#include "core/memory.h"
-#include "core/settings.h"
+#include "common/logging/log.h"
#include "video_core/compatible_formats.h"
+#include "video_core/delayed_destruction_ring.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
-#include "video_core/texture_cache/copy_params.h"
+#include "video_core/texture_cache/descriptor_table.h"
#include "video_core/texture_cache/format_lookup_table.h"
-#include "video_core/texture_cache/surface_base.h"
-#include "video_core/texture_cache/surface_params.h"
-#include "video_core/texture_cache/surface_view.h"
-
-namespace Tegra::Texture {
-struct FullTextureInfo;
-}
-
-namespace VideoCore {
-class RasterizerInterface;
-}
+#include "video_core/texture_cache/formatter.h"
+#include "video_core/texture_cache/image_base.h"
+#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/image_view_base.h"
+#include "video_core/texture_cache/image_view_info.h"
+#include "video_core/texture_cache/render_targets.h"
+#include "video_core/texture_cache/samples_helper.h"
+#include "video_core/texture_cache/slot_vector.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/texture_cache/util.h"
+#include "video_core/textures/texture.h"
namespace VideoCommon {
-using VideoCore::Surface::FormatCompatibility;
+using Tegra::Texture::SwizzleSource;
+using Tegra::Texture::TextureType;
+using Tegra::Texture::TICEntry;
+using Tegra::Texture::TSCEntry;
+using VideoCore::Surface::GetFormatType;
+using VideoCore::Surface::IsCopyCompatible;
using VideoCore::Surface::PixelFormat;
-using VideoCore::Surface::SurfaceTarget;
-using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+using VideoCore::Surface::SurfaceType;
-template <typename TSurface, typename TView>
+template <class P>
class TextureCache {
- using VectorSurface = boost::container::small_vector<TSurface, 1>;
+ /// Address shift for caching images into a hash table
+ static constexpr u64 PAGE_SHIFT = 20;
+
+ /// Enables debugging features to the texture cache
+ static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
+ /// Implement blits as copies between framebuffers
+ static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
+ /// True when some copies have to be emulated
+ static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
+
+ /// Image view ID for null descriptors
+ static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
+ /// Sampler ID for bugged sampler ids
+ static constexpr SamplerId NULL_SAMPLER_ID{0};
+
+ using Runtime = typename P::Runtime;
+ using Image = typename P::Image;
+ using ImageAlloc = typename P::ImageAlloc;
+ using ImageView = typename P::ImageView;
+ using Sampler = typename P::Sampler;
+ using Framebuffer = typename P::Framebuffer;
+
+ struct BlitImages {
+ ImageId dst_id;
+ ImageId src_id;
+ PixelFormat dst_format;
+ PixelFormat src_format;
+ };
+
+ template <typename T>
+ struct IdentityHash {
+ [[nodiscard]] size_t operator()(T value) const noexcept {
+ return static_cast<size_t>(value);
+ }
+ };
public:
- void InvalidateRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
+ Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
- for (const auto& surface : GetSurfacesInRegion(addr, size)) {
- Unregister(surface);
- }
- }
+ /// Notify the cache that a new frame has been queued
+ void TickFrame();
- void OnCPUWrite(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ /// Return an unique mutually exclusive lock for the cache
+ [[nodiscard]] std::unique_lock<std::mutex> AcquireLock();
- for (const auto& surface : GetSurfacesInRegion(addr, size)) {
- if (surface->IsMemoryMarked()) {
- UnmarkMemory(surface);
- surface->SetSyncPending(true);
- marked_for_unregister.emplace_back(surface);
- }
- }
- }
+ /// Return a constant reference to the given image view id
+ [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
- void SyncGuestHost() {
- std::lock_guard lock{mutex};
+ /// Return a reference to the given image view id
+ [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
- for (const auto& surface : marked_for_unregister) {
- if (surface->IsRegistered()) {
- surface->SetSyncPending(false);
- Unregister(surface);
- }
- }
- marked_for_unregister.clear();
- }
+ /// Fill image_view_ids with the graphics images in indices
+ void FillGraphicsImageViews(std::span<const u32> indices,
+ std::span<ImageViewId> image_view_ids);
- /**
- * Guarantees that rendertargets don't unregister themselves if the
- * collide. Protection is currently only done on 3D slices.
- */
- void GuardRenderTargets(bool new_guard) {
- guard_render_targets = new_guard;
- }
+ /// Fill image_view_ids with the compute images in indices
+ void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
- void GuardSamplers(bool new_guard) {
- guard_samplers = new_guard;
- }
+ /// Get the sampler from the graphics descriptor table in the specified index
+ Sampler* GetGraphicsSampler(u32 index);
- void FlushRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ /// Get the sampler from the compute descriptor table in the specified index
+ Sampler* GetComputeSampler(u32 index);
- auto surfaces = GetSurfacesInRegion(addr, size);
- if (surfaces.empty()) {
- return;
- }
- std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) {
- return a->GetModificationTick() < b->GetModificationTick();
- });
- for (const auto& surface : surfaces) {
- mutex.unlock();
- FlushSurface(surface);
- mutex.lock();
- }
- }
+ /// Refresh the state for graphics image view and sampler descriptors
+ void SynchronizeGraphicsDescriptors();
- bool MustFlushRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ /// Refresh the state for compute image view and sampler descriptors
+ void SynchronizeComputeDescriptors();
- const auto surfaces = GetSurfacesInRegion(addr, size);
- return std::any_of(surfaces.cbegin(), surfaces.cend(),
- [](const TSurface& surface) { return surface->IsModified(); });
- }
+ /// Update bound render targets and upload memory if necessary
+ /// @param is_clear True when the render targets are being used for clears
+ void UpdateRenderTargets(bool is_clear);
- TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
- const VideoCommon::Shader::Sampler& entry) {
- std::lock_guard lock{mutex};
- const auto gpu_addr{tic.Address()};
- if (!gpu_addr) {
- return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
- }
+ /// Find a framebuffer with the currently bound render targets
+ /// UpdateRenderTargets should be called before this
+ Framebuffer* GetFramebuffer();
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
- if (!cpu_addr) {
- return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
- }
+ /// Mark images in a range as modified from the CPU
+ void WriteMemory(VAddr cpu_addr, size_t size);
- if (!IsTypeCompatible(tic.texture_type, entry)) {
- return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
- }
+ /// Download contents of host images to guest memory in a region
+ void DownloadMemory(VAddr cpu_addr, size_t size);
- const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
- const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
- if (guard_samplers) {
- sampled_textures.push_back(surface);
- }
- return view;
- }
+ /// Remove images in a region
+ void UnmapMemory(VAddr cpu_addr, size_t size);
- TView GetImageSurface(const Tegra::Texture::TICEntry& tic,
- const VideoCommon::Shader::Image& entry) {
- std::lock_guard lock{mutex};
- const auto gpu_addr{tic.Address()};
- if (!gpu_addr) {
- return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
- }
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
- if (!cpu_addr) {
- return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
- }
- const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
- const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
- if (guard_samplers) {
- sampled_textures.push_back(surface);
- }
- return view;
- }
+ /// Blit an image with the given parameters
+ void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
+ const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Config& copy);
- bool TextureBarrier() {
- const bool any_rt =
- std::any_of(sampled_textures.begin(), sampled_textures.end(),
- [](const auto& surface) { return surface->IsRenderTarget(); });
- sampled_textures.clear();
- return any_rt;
- }
+ /// Invalidate the contents of the color buffer index
+ /// These contents become unspecified, the cache can assume aggressive optimizations.
+ void InvalidateColorBuffer(size_t index);
- TView GetDepthBufferSurface(bool preserve_contents) {
- std::lock_guard lock{mutex};
- auto& dirty = maxwell3d.dirty;
- if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
- return depth_buffer.view;
- }
- dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
+ /// Invalidate the contents of the depth buffer
+ /// These contents become unspecified, the cache can assume aggressive optimizations.
+ void InvalidateDepthBuffer();
- const auto& regs{maxwell3d.regs};
- const auto gpu_addr{regs.zeta.Address()};
- if (!gpu_addr || !regs.zeta_enable) {
- SetEmptyDepthBuffer();
- return {};
- }
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
- if (!cpu_addr) {
- SetEmptyDepthBuffer();
- return {};
- }
- const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)};
- auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
- if (depth_buffer.target)
- depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
- depth_buffer.target = surface_view.first;
- depth_buffer.view = surface_view.second;
- if (depth_buffer.target)
- depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
- return surface_view.second;
- }
-
- TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
- std::lock_guard lock{mutex};
- ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
- if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
- return render_targets[index].view;
- }
- maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false;
+ /// Try to find a cached image view in the given CPU address
+ [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
- const auto& regs{maxwell3d.regs};
- if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
- regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
- SetEmptyColorBuffer(index);
- return {};
- }
+ /// Return true when there are uncommitted images to be downloaded
+ [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
- const auto& config{regs.rt[index]};
- const auto gpu_addr{config.Address()};
- if (!gpu_addr) {
- SetEmptyColorBuffer(index);
- return {};
- }
+ /// Return true when the caller should wait for async downloads
+ [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
- if (!cpu_addr) {
- SetEmptyColorBuffer(index);
- return {};
- }
+ /// Commit asynchronous downloads
+ void CommitAsyncFlushes();
+
+ /// Pop asynchronous downloads
+ void PopAsyncFlushes();
+
+ /// Return true when a CPU region is modified from the GPU
+ [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
- auto surface_view =
- GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index),
- preserve_contents, true);
- if (render_targets[index].target) {
- auto& surface = render_targets[index].target;
- surface->MarkAsRenderTarget(false, NO_RT);
- const auto& cr_params = surface->GetSurfaceParams();
- if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- AsyncFlushSurface(surface);
+private:
+ /// Iterate over all page indices in a range
+ template <typename Func>
+ static void ForEachPage(VAddr addr, size_t size, Func&& func) {
+ static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
+ const u64 page_end = (addr + size - 1) >> PAGE_SHIFT;
+ for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
+ if constexpr (RETURNS_BOOL) {
+ if (func(page)) {
+ break;
+ }
+ } else {
+ func(page);
}
}
- render_targets[index].target = surface_view.first;
- render_targets[index].view = surface_view.second;
- if (render_targets[index].target)
- render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
- return surface_view.second;
}
- void MarkColorBufferInUse(std::size_t index) {
- if (auto& render_target = render_targets[index].target) {
- render_target->MarkAsModified(true, Tick());
- }
- }
+ /// Fills image_view_ids in the image views in indices
+ void FillImageViews(DescriptorTable<TICEntry>& table,
+ std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
+ std::span<ImageViewId> image_view_ids);
- void MarkDepthBufferInUse() {
- if (depth_buffer.target) {
- depth_buffer.target->MarkAsModified(true, Tick());
- }
- }
+ /// Find or create an image view in the guest descriptor table
+ ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
+ std::span<ImageViewId> cached_image_view_ids, u32 index);
- void SetEmptyDepthBuffer() {
- if (depth_buffer.target == nullptr) {
- return;
- }
- depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
- depth_buffer.target = nullptr;
- depth_buffer.view = nullptr;
- }
+ /// Find or create a framebuffer with the given render target parameters
+ FramebufferId GetFramebufferId(const RenderTargets& key);
- void SetEmptyColorBuffer(std::size_t index) {
- if (render_targets[index].target == nullptr) {
- return;
- }
- render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
- render_targets[index].target = nullptr;
- render_targets[index].view = nullptr;
- }
-
- void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
- const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
- const Tegra::Engines::Fermi2D::Config& copy_config) {
- std::lock_guard lock{mutex};
- SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
- SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
- const GPUVAddr src_gpu_addr = src_config.Address();
- const GPUVAddr dst_gpu_addr = dst_config.Address();
- DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
-
- const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr);
- const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr);
- std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
- TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
- ImageBlit(src_surface, dst_surface.second, copy_config);
- dst_surface.first->MarkAsModified(true, Tick());
- }
-
- TSurface TryFindFramebufferSurface(VAddr addr) const {
- if (!addr) {
- return nullptr;
- }
- const VAddr page = addr >> registry_page_bits;
- const auto it = registry.find(page);
- if (it == registry.end()) {
- return nullptr;
- }
- const auto& list = it->second;
- const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
- return surface->GetCpuAddr() == addr;
- });
- return found != list.end() ? *found : nullptr;
- }
+ /// Refresh the contents (pixel data) of an image
+ void RefreshContents(Image& image);
- u64 Tick() {
- return ++ticks;
- }
+ /// Upload data from guest to an image
+ template <typename MapBuffer>
+ void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset);
- void CommitAsyncFlushes() {
- committed_flushes.push_back(uncommitted_flushes);
- uncommitted_flushes.reset();
- }
+ /// Find or create an image view from a guest descriptor
+ [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
- bool HasUncommittedFlushes() const {
- return uncommitted_flushes != nullptr;
- }
+ /// Create a new image view from a guest descriptor
+ [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
- bool ShouldWaitAsyncFlushes() const {
- return !committed_flushes.empty() && committed_flushes.front() != nullptr;
- }
+ /// Find or create an image from the given parameters
+ [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
+ RelaxedOptions options = RelaxedOptions{});
- void PopAsyncFlushes() {
- if (committed_flushes.empty()) {
- return;
- }
- auto& flush_list = committed_flushes.front();
- if (!flush_list) {
- committed_flushes.pop_front();
- return;
- }
- for (TSurface& surface : *flush_list) {
- FlushSurface(surface);
- }
- committed_flushes.pop_front();
- }
+ /// Find an image from the given parameters
+ [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
+ RelaxedOptions options);
-protected:
- explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
- bool is_astc_supported_)
- : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
- gpu_memory{gpu_memory_} {
- for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
- SetEmptyColorBuffer(i);
- }
+ /// Create an image from the given parameters
+ [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
+ RelaxedOptions options);
- SetEmptyDepthBuffer();
- staging_cache.SetSize(2);
+ /// Create a new image and join perfectly matching existing images
+ /// Remove joined images from the cache
+ [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
- const auto make_siblings = [this](PixelFormat a, PixelFormat b) {
- siblings_table[static_cast<std::size_t>(a)] = b;
- siblings_table[static_cast<std::size_t>(b)] = a;
- };
- std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
- make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM);
- make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT);
- make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT);
+ /// Return a blit image pair from the given guest blit parameters
+ [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
+ const Tegra::Engines::Fermi2D::Surface& src);
- sampled_textures.reserve(64);
- }
+ /// Find or create a sampler from a guest descriptor sampler
+ [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
- ~TextureCache() = default;
+ /// Find or create an image view for the given color buffer index
+ [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
- virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0;
+ /// Find or create an image view for the depth buffer
+ [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
- virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface,
- const CopyParams& copy_params) = 0;
+ /// Find or create a view for a render target with the given image parameters
+ [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
+ bool is_clear);
- virtual void ImageBlit(TView& src_view, TView& dst_view,
- const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
+ /// Iterates over all the images in a region calling func
+ template <typename Func>
+ void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
- // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
- // and reading it from a separate buffer.
- virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
+ /// Find or create an image view in the given image with the passed parameters
+ [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
- void ManageRenderTargetUnregister(TSurface& surface) {
- auto& dirty = maxwell3d.dirty;
- const u32 index = surface->GetRenderTarget();
- if (index == DEPTH_RT) {
- dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true;
- } else {
- dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true;
- }
- dirty.flags[VideoCommon::Dirty::RenderTargets] = true;
+ /// Register image in the page table
+ void RegisterImage(ImageId image);
+
+ /// Unregister image from the page table
+ void UnregisterImage(ImageId image);
+
+ /// Track CPU reads and writes for image
+ void TrackImage(ImageBase& image);
+
+ /// Stop tracking CPU reads and writes for image
+ void UntrackImage(ImageBase& image);
+
+ /// Delete image from the cache
+ void DeleteImage(ImageId image);
+
+ /// Remove image views references from the cache
+ void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
+
+ /// Remove framebuffers using the given image views from the cache
+ void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
+
+ /// Mark an image as modified from the GPU
+ void MarkModification(ImageBase& image) noexcept;
+
+ /// Synchronize image aliases, copying data if needed
+ void SynchronizeAliases(ImageId image_id);
+
+ /// Prepare an image to be used
+ void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
+
+ /// Prepare an image view to be used
+ void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
+
+ /// Execute copies from one image to the other, even if they are incompatible
+ void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
+
+ /// Bind an image view as render target, downloading resources preemtively if needed
+ void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
+
+ /// Create a render target from a given image and image view parameters
+ [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
+ ImageId, const ImageViewInfo& view_info);
+
+ /// Returns true if the current clear parameters clear the whole image of a given image view
+ [[nodiscard]] bool IsFullClear(ImageViewId id);
+
+ Runtime& runtime;
+ VideoCore::RasterizerInterface& rasterizer;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
+
+ DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
+ DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
+ std::vector<SamplerId> graphics_sampler_ids;
+ std::vector<ImageViewId> graphics_image_view_ids;
+
+ DescriptorTable<TICEntry> compute_image_table{gpu_memory};
+ DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
+ std::vector<SamplerId> compute_sampler_ids;
+ std::vector<ImageViewId> compute_image_view_ids;
+
+ RenderTargets render_targets;
+
+ std::mutex mutex;
+
+ std::unordered_map<TICEntry, ImageViewId> image_views;
+ std::unordered_map<TSCEntry, SamplerId> samplers;
+ std::unordered_map<RenderTargets, FramebufferId> framebuffers;
+
+ std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
+
+ bool has_deleted_images = false;
+
+ SlotVector<Image> slot_images;
+ SlotVector<ImageView> slot_image_views;
+ SlotVector<ImageAlloc> slot_image_allocs;
+ SlotVector<Sampler> slot_samplers;
+ SlotVector<Framebuffer> slot_framebuffers;
+
+ // TODO: This data structure is not optimal and it should be reworked
+ std::vector<ImageId> uncommitted_downloads;
+ std::queue<std::vector<ImageId>> committed_downloads;
+
+ static constexpr size_t TICKS_TO_DESTROY = 6;
+ DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
+ DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
+ DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
+
+ std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
+
+ u64 modification_tick = 0;
+ u64 frame_tick = 0;
+};
+
+template <class P>
+TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_)
+ : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+ kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
+ // Configure null sampler
+ TSCEntry sampler_descriptor{};
+ sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
+ sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear);
+ sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
+ sampler_descriptor.cubemap_anisotropy.Assign(1);
+
+ // Make sure the first index is reserved for the null resources
+ // This way the null resource becomes a compile time constant
+ void(slot_image_views.insert(runtime, NullImageParams{}));
+ void(slot_samplers.insert(runtime, sampler_descriptor));
+}
+
+template <class P>
+void TextureCache<P>::TickFrame() {
+ // Tick sentenced resources in this order to ensure they are destroyed in the right order
+ sentenced_images.Tick();
+ sentenced_framebuffers.Tick();
+ sentenced_image_view.Tick();
+ ++frame_tick;
+}
+
+template <class P>
+std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() {
+ return std::unique_lock{mutex};
+}
+
+template <class P>
+const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
+ return slot_image_views[id];
+}
+
+template <class P>
+typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
+ return slot_image_views[id];
+}
+
+template <class P>
+void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
+ std::span<ImageViewId> image_view_ids) {
+ FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
+}
+
+template <class P>
+void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
+ std::span<ImageViewId> image_view_ids) {
+ FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
+}
+
+template <class P>
+typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
+ [[unlikely]] if (index > graphics_sampler_table.Limit()) {
+ LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
+ return &slot_samplers[NULL_SAMPLER_ID];
+ }
+ const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
+ SamplerId& id = graphics_sampler_ids[index];
+ [[unlikely]] if (is_new) {
+ id = FindSampler(descriptor);
}
+ return &slot_samplers[id];
+}
- void Register(TSurface surface) {
- const GPUVAddr gpu_addr = surface->GetGpuAddr();
- const std::size_t size = surface->GetSizeInBytes();
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
- if (!cpu_addr) {
- LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
- gpu_addr);
- return;
- }
- surface->SetCpuAddr(*cpu_addr);
- RegisterInnerCache(surface);
- surface->MarkAsRegistered(true);
- surface->SetMemoryMarked(true);
- rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
+template <class P>
+typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
+ [[unlikely]] if (index > compute_sampler_table.Limit()) {
+ LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
+ return &slot_samplers[NULL_SAMPLER_ID];
+ }
+ const auto [descriptor, is_new] = compute_sampler_table.Read(index);
+ SamplerId& id = compute_sampler_ids[index];
+ [[unlikely]] if (is_new) {
+ id = FindSampler(descriptor);
}
+ return &slot_samplers[id];
+}
- void UnmarkMemory(TSurface surface) {
- if (!surface->IsMemoryMarked()) {
- return;
- }
- const std::size_t size = surface->GetSizeInBytes();
- const VAddr cpu_addr = surface->GetCpuAddr();
- rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
- surface->SetMemoryMarked(false);
+template <class P>
+void TextureCache<P>::SynchronizeGraphicsDescriptors() {
+ using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
+ const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
+ const u32 tic_limit = maxwell3d.regs.tic.limit;
+ const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
+ if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
+ graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
}
+ if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
+ graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+ }
+}
- void Unregister(TSurface surface) {
- if (guard_render_targets && surface->IsProtected()) {
- return;
- }
- if (!guard_render_targets && surface->IsRenderTarget()) {
- ManageRenderTargetUnregister(surface);
- }
- UnmarkMemory(surface);
- if (surface->IsSyncPending()) {
- marked_for_unregister.remove(surface);
- surface->SetSyncPending(false);
- }
- UnregisterInnerCache(surface);
- surface->MarkAsRegistered(false);
- ReserveSurface(surface->GetSurfaceParams(), surface);
+template <class P>
+void TextureCache<P>::SynchronizeComputeDescriptors() {
+ const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
+ const u32 tic_limit = kepler_compute.regs.tic.limit;
+ const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
+ const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
+ if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
+ compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
}
+ if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
+ compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+ }
+}
- TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
- if (const auto surface = TryGetReservedSurface(params); surface) {
- surface->SetGpuAddr(gpu_addr);
- return surface;
- }
- // No reserved surface available, create a new one and reserve it
- auto new_surface{CreateSurface(gpu_addr, params)};
- return new_surface;
+template <class P>
+void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
+ using namespace VideoCommon::Dirty;
+ auto& flags = maxwell3d.dirty.flags;
+ if (!flags[Dirty::RenderTargets]) {
+ return;
}
+ flags[Dirty::RenderTargets] = false;
- const bool is_astc_supported;
+ // Render target control is used on all render targets, so force look ups when this one is up
+ const bool force = flags[Dirty::RenderTargetControl];
+ flags[Dirty::RenderTargetControl] = false;
-private:
- enum class RecycleStrategy : u32 {
- Ignore = 0,
- Flush = 1,
- BufferCopy = 3,
- };
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
+ if (flags[Dirty::ColorBuffer0 + index] || force) {
+ flags[Dirty::ColorBuffer0 + index] = false;
+ BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
+ }
+ PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
+ }
+ if (flags[Dirty::ZetaBuffer] || force) {
+ flags[Dirty::ZetaBuffer] = false;
+ BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
+ }
+ const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
+ PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
- enum class DeductionType : u32 {
- DeductionComplete,
- DeductionIncomplete,
- DeductionFailed,
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
+ }
+ render_targets.size = Extent2D{
+ maxwell3d.regs.render_area.width,
+ maxwell3d.regs.render_area.height,
};
+}
- struct Deduction {
- DeductionType type{DeductionType::DeductionFailed};
- TSurface surface{};
+template <class P>
+typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
+ return &slot_framebuffers[GetFramebufferId(render_targets)];
+}
- bool Failed() const {
- return type == DeductionType::DeductionFailed;
- }
+template <class P>
+void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
+ std::span<ImageViewId> cached_image_view_ids,
+ std::span<const u32> indices,
+ std::span<ImageViewId> image_view_ids) {
+ ASSERT(indices.size() <= image_view_ids.size());
+ do {
+ has_deleted_images = false;
+ std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
+ return VisitImageView(table, cached_image_view_ids, index);
+ });
+ } while (has_deleted_images);
+}
- bool Incomplete() const {
- return type == DeductionType::DeductionIncomplete;
- }
+template <class P>
+ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
+ std::span<ImageViewId> cached_image_view_ids,
+ u32 index) {
+ if (index > table.Limit()) {
+ LOG_ERROR(HW_GPU, "Invalid image view index={}", index);
+ return NULL_IMAGE_VIEW_ID;
+ }
+ const auto [descriptor, is_new] = table.Read(index);
+ ImageViewId& image_view_id = cached_image_view_ids[index];
+ if (is_new) {
+ image_view_id = FindImageView(descriptor);
+ }
+ if (image_view_id != NULL_IMAGE_VIEW_ID) {
+ PrepareImageView(image_view_id, false, false);
+ }
+ return image_view_id;
+}
- bool IsDepth() const {
- return surface->GetSurfaceParams().IsPixelFormatZeta();
- }
- };
+template <class P>
+FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
+ const auto [pair, is_new] = framebuffers.try_emplace(key);
+ FramebufferId& framebuffer_id = pair->second;
+ if (!is_new) {
+ return framebuffer_id;
+ }
+ std::array<ImageView*, NUM_RT> color_buffers;
+ std::ranges::transform(key.color_buffer_ids, color_buffers.begin(),
+ [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; });
+ ImageView* const depth_buffer =
+ key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
+ framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key);
+ return framebuffer_id;
+}
- /**
- * Takes care of selecting a proper strategy to deal with a texture recycle.
- *
- * @param overlaps The overlapping surfaces registered in the cache.
- * @param params The parameters on the new surface.
- * @param gpu_addr The starting address of the new surface.
- * @param untopological Indicates to the recycler that the texture has no way
- * to match the overlaps due to topological reasons.
- **/
- RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
- const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
- if (Settings::IsGPULevelExtreme()) {
- return RecycleStrategy::Flush;
- }
- // 3D Textures decision
- if (params.target == SurfaceTarget::Texture3D) {
- return RecycleStrategy::Flush;
- }
- for (const auto& s : overlaps) {
- const auto& s_params = s->GetSurfaceParams();
- if (s_params.target == SurfaceTarget::Texture3D) {
- return RecycleStrategy::Flush;
- }
- }
- // Untopological decision
- if (untopological == MatchTopologyResult::CompressUnmatch) {
- return RecycleStrategy::Flush;
- }
- if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
- return RecycleStrategy::Flush;
- }
- return RecycleStrategy::Ignore;
- }
-
- /**
- * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented
- * strategies: Ignore and Flush.
- *
- * - Ignore: Just unregisters all the overlaps and loads the new texture.
- * - Flush: Flushes all the overlaps into memory and loads the new surface from that data.
- *
- * @param overlaps The overlapping surfaces registered in the cache.
- * @param params The parameters for the new surface.
- * @param gpu_addr The starting address of the new surface.
- * @param preserve_contents Indicates that the new surface should be loaded from memory or left
- * blank.
- * @param untopological Indicates to the recycler that the texture has no way to match the
- * overlaps due to topological reasons.
- **/
- std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
- const GPUVAddr gpu_addr, const bool preserve_contents,
- const MatchTopologyResult untopological) {
- const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
- for (auto& surface : overlaps) {
- Unregister(surface);
- }
- switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
- case RecycleStrategy::Ignore: {
- return InitializeSurface(gpu_addr, params, do_load);
- }
- case RecycleStrategy::Flush: {
- std::sort(overlaps.begin(), overlaps.end(),
- [](const TSurface& a, const TSurface& b) -> bool {
- return a->GetModificationTick() < b->GetModificationTick();
- });
- for (auto& surface : overlaps) {
- FlushSurface(surface);
- }
- return InitializeSurface(gpu_addr, params, preserve_contents);
+template <class P>
+void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
+ ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) {
+ if (True(image.flags & ImageFlagBits::CpuModified)) {
+ return;
}
- case RecycleStrategy::BufferCopy: {
- auto new_surface = GetUncachedSurface(gpu_addr, params);
- BufferCopy(overlaps[0], new_surface);
- return {new_surface, new_surface->GetMainView()};
+ image.flags |= ImageFlagBits::CpuModified;
+ UntrackImage(image);
+ });
+}
+
+template <class P>
+void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
+ std::vector<ImageId> images;
+ ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
+ // Skip images that were not modified from the GPU
+ if (False(image.flags & ImageFlagBits::GpuModified)) {
+ return;
}
- default: {
- UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
- return InitializeSurface(gpu_addr, params, do_load);
+ // Skip images that .are. modified from the CPU
+ // We don't want to write sensitive data from the guest
+ if (True(image.flags & ImageFlagBits::CpuModified)) {
+ return;
}
+ if (image.info.num_samples > 1) {
+ LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
+ return;
}
+ image.flags &= ~ImageFlagBits::GpuModified;
+ images.push_back(image_id);
+ });
+ if (images.empty()) {
+ return;
+ }
+ std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) {
+ return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
+ });
+ for (const ImageId image_id : images) {
+ Image& image = slot_images[image_id];
+ auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes);
+ const auto copies = FullDownloadCopies(image.info);
+ image.DownloadMemory(map, 0, copies);
+ runtime.Finish();
+ SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span());
}
+}
- /**
- * Takes a single surface and recreates into another that may differ in
- * format, target or width alignment.
- *
- * @param current_surface The registered surface in the cache which we want to convert.
- * @param params The new surface params which we'll use to recreate the surface.
- * @param is_render Whether or not the surface is a render target.
- **/
- std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params,
- bool is_render) {
- const auto gpu_addr = current_surface->GetGpuAddr();
- const auto& cr_params = current_surface->GetSurfaceParams();
- TSurface new_surface;
- if (cr_params.pixel_format != params.pixel_format && !is_render &&
- GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
- SurfaceParams new_params = params;
- new_params.pixel_format = cr_params.pixel_format;
- new_params.type = cr_params.type;
- new_surface = GetUncachedSurface(gpu_addr, new_params);
- } else {
- new_surface = GetUncachedSurface(gpu_addr, params);
- }
- const SurfaceParams& final_params = new_surface->GetSurfaceParams();
- if (cr_params.type != final_params.type) {
- if (Settings::IsGPULevelExtreme()) {
- BufferCopy(current_surface, new_surface);
- }
- } else {
- std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
- for (auto& brick : bricks) {
- TryCopyImage(current_surface, new_surface, brick);
- }
- }
- Unregister(current_surface);
- Register(new_surface);
- new_surface->MarkAsModified(current_surface->IsModified(), Tick());
- return {new_surface, new_surface->GetMainView()};
- }
-
- /**
- * Takes a single surface and checks with the new surface's params if it's an exact
- * match, we return the main view of the registered surface. If its formats don't
- * match, we rebuild the surface. We call this last method a `Mirage`. If formats
- * match but the targets don't, we create an overview View of the registered surface.
- *
- * @param current_surface The registered surface in the cache which we want to convert.
- * @param params The new surface params which we want to check.
- * @param is_render Whether or not the surface is a render target.
- **/
- std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
- const SurfaceParams& params, bool is_render) {
- const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
- const bool matches_target = current_surface->MatchTarget(params.target);
- const auto match_check = [&]() -> std::pair<TSurface, TView> {
- if (matches_target) {
- return {current_surface, current_surface->GetMainView()};
- }
- return {current_surface, current_surface->EmplaceOverview(params)};
- };
- if (!is_mirage) {
- return match_check();
- }
- if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) {
- return match_check();
- }
- return RebuildSurface(current_surface, params, is_render);
- }
-
- /**
- * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate
- * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps
- * of the new surface, if they all match we end up recreating a surface for them,
- * else we return nothing.
- *
- * @param overlaps The overlapping surfaces registered in the cache.
- * @param params The parameters on the new surface.
- * @param gpu_addr The starting address of the new surface.
- **/
- std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
- const SurfaceParams& params,
- GPUVAddr gpu_addr) {
- if (params.target == SurfaceTarget::Texture3D) {
- return std::nullopt;
- }
- const auto test_modified = [](TSurface& surface) { return surface->IsModified(); };
- TSurface new_surface = GetUncachedSurface(gpu_addr, params);
+template <class P>
+void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
+ std::vector<ImageId> deleted_images;
+ ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
+ for (const ImageId id : deleted_images) {
+ Image& image = slot_images[id];
+ if (True(image.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(image);
+ }
+ UnregisterImage(id);
+ DeleteImage(id);
+ }
+}
- if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) {
- LoadSurface(new_surface);
- for (const auto& surface : overlaps) {
- Unregister(surface);
- }
- Register(new_surface);
- return {{new_surface, new_surface->GetMainView()}};
- }
+template <class P>
+void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
+ const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Config& copy) {
+ const BlitImages images = GetBlitImages(dst, src);
+ const ImageId dst_id = images.dst_id;
+ const ImageId src_id = images.src_id;
+ PrepareImage(src_id, false, false);
+ PrepareImage(dst_id, true, false);
+
+ ImageBase& dst_image = slot_images[dst_id];
+ const ImageBase& src_image = slot_images[src_id];
+
+ // TODO: Deduplicate
+ const std::optional dst_base = dst_image.TryFindBase(dst.Address());
+ const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
+ const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
+ const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
+ const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
+ const std::array src_region{
+ Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
+ Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
+ };
- std::size_t passed_tests = 0;
- for (auto& surface : overlaps) {
- const SurfaceParams& src_params = surface->GetSurfaceParams();
- const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
- if (!mipmap_layer) {
- continue;
- }
- const auto [base_layer, base_mipmap] = *mipmap_layer;
- if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) {
- continue;
- }
- ++passed_tests;
-
- // Copy all mipmaps and layers
- const u32 block_width = params.GetDefaultBlockWidth();
- const u32 block_height = params.GetDefaultBlockHeight();
- for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) {
- const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
- const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
- if (width < block_width || height < block_height) {
- // Current APIs forbid copying small compressed textures, avoid errors
- break;
- }
- const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
- src_params.depth);
- TryCopyImage(surface, new_surface, copy_params);
- }
- }
- if (passed_tests == 0) {
- return std::nullopt;
- }
- if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
- // In Accurate GPU all tests should pass, else we recycle
- return std::nullopt;
- }
+ const std::optional src_base = src_image.TryFindBase(src.Address());
+ const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
+ const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
+ const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
+ const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
+ const std::array dst_region{
+ Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
+ Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
+ };
- const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified);
- for (const auto& surface : overlaps) {
- Unregister(surface);
- }
+ // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
+ Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
+ if constexpr (FRAMEBUFFER_BLITS) {
+ // OpenGL blits from framebuffers, not images
+ Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id];
+ runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region,
+ copy.filter, copy.operation);
+ } else {
+ // Vulkan can blit images, but it lacks format reinterpretations
+ // Provide a framebuffer in case it's necessary
+ ImageView& dst_view = slot_image_views[dst_view_id];
+ ImageView& src_view = slot_image_views[src_view_id];
+ runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
+ copy.operation);
+ }
+}
- new_surface->MarkAsModified(modified, Tick());
- Register(new_surface);
- return {{new_surface, new_surface->GetMainView()}};
- }
-
- /**
- * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D
- * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of
- * the HLE methods.
- *
- * @param overlaps The overlapping surfaces registered in the cache.
- * @param params The parameters on the new surface.
- * @param gpu_addr The starting address of the new surface.
- * @param cpu_addr The starting address of the new surface on physical memory.
- * @param preserve_contents Indicates that the new surface should be loaded from memory or
- * left blank.
- */
- std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
- const SurfaceParams& params,
- GPUVAddr gpu_addr, VAddr cpu_addr,
- bool preserve_contents) {
- if (params.target != SurfaceTarget::Texture3D) {
- for (const auto& surface : overlaps) {
- if (!surface->MatchTarget(params.target)) {
- if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
- if (Settings::IsGPULevelExtreme()) {
- return std::nullopt;
- }
- Unregister(surface);
- return InitializeSurface(gpu_addr, params, preserve_contents);
- }
- return std::nullopt;
- }
- if (surface->GetCpuAddr() != cpu_addr) {
- continue;
- }
- if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
- return std::make_pair(surface, surface->GetMainView());
- }
- }
- return InitializeSurface(gpu_addr, params, preserve_contents);
- }
+template <class P>
+void TextureCache<P>::InvalidateColorBuffer(size_t index) {
+ ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
+ color_buffer_id = FindColorBuffer(index, false);
+ if (!color_buffer_id) {
+ LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index);
+ return;
+ }
+ // When invalidating a color buffer, the old contents are no longer relevant
+ ImageView& color_buffer = slot_image_views[color_buffer_id];
+ Image& image = slot_images[color_buffer.image_id];
+ image.flags &= ~ImageFlagBits::CpuModified;
+ image.flags &= ~ImageFlagBits::GpuModified;
- if (params.num_levels > 1) {
- // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
- return std::nullopt;
- }
+ runtime.InvalidateColorBuffer(color_buffer, index);
+}
- if (overlaps.size() == 1) {
- const auto& surface = overlaps[0];
- const SurfaceParams& overlap_params = surface->GetSurfaceParams();
- // Don't attempt to render to textures with more than one level for now
- // The texture has to be to the right or the sample address if we want to render to it
- if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) {
- const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr());
- const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
- if (slice < overlap_params.depth) {
- auto view = surface->Emplace3DView(slice, params.depth, 0, 1);
- return std::make_pair(std::move(surface), std::move(view));
- }
- }
- }
+template <class P>
+void TextureCache<P>::InvalidateDepthBuffer() {
+ ImageViewId& depth_buffer_id = render_targets.depth_buffer_id;
+ depth_buffer_id = FindDepthBuffer(false);
+ if (!depth_buffer_id) {
+ LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer");
+ return;
+ }
+ // When invalidating the depth buffer, the old contents are no longer relevant
+ ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id];
+ image.flags &= ~ImageFlagBits::CpuModified;
+ image.flags &= ~ImageFlagBits::GpuModified;
- TSurface new_surface = GetUncachedSurface(gpu_addr, params);
- bool modified = false;
+ ImageView& depth_buffer = slot_image_views[depth_buffer_id];
+ runtime.InvalidateDepthBuffer(depth_buffer);
+}
- for (auto& surface : overlaps) {
- const SurfaceParams& src_params = surface->GetSurfaceParams();
- if (src_params.target != SurfaceTarget::Texture2D ||
- src_params.height != params.height ||
- src_params.block_depth != params.block_depth ||
- src_params.block_height != params.block_height) {
- return std::nullopt;
- }
- modified |= surface->IsModified();
-
- const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
- const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
- const u32 width = params.width;
- const u32 height = params.height;
- const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
- TryCopyImage(surface, new_surface, copy_params);
+template <class P>
+typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
+ // TODO: Properly implement this
+ const auto it = page_table.find(cpu_addr >> PAGE_SHIFT);
+ if (it == page_table.end()) {
+ return nullptr;
+ }
+ const auto& image_ids = it->second;
+ for (const ImageId image_id : image_ids) {
+ const ImageBase& image = slot_images[image_id];
+ if (image.cpu_addr != cpu_addr) {
+ continue;
}
- for (const auto& surface : overlaps) {
- Unregister(surface);
+ if (image.image_view_ids.empty()) {
+ continue;
}
- new_surface->MarkAsModified(modified, Tick());
- Register(new_surface);
-
- TView view = new_surface->GetMainView();
- return std::make_pair(std::move(new_surface), std::move(view));
- }
-
- /**
- * Gets the starting address and parameters of a candidate surface and tries
- * to find a matching surface within the cache. This is done in 3 big steps:
- *
- * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
- *
- * 2. Check if there are any overlaps at all, if there are none, we just load the texture from
- * memory else we move to step 3.
- *
- * 3. Consists of figuring out the relationship between the candidate texture and the
- * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If
- * there's many, we just try to reconstruct a new surface out of them based on the
- * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we
- * have to check if the candidate is a view (layer/mipmap) of the overlap or if the
- * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct
- * a new surface.
- *
- * @param gpu_addr The starting address of the candidate surface.
- * @param params The parameters on the candidate surface.
- * @param preserve_contents Indicates that the new surface should be loaded from memory or
- * left blank.
- * @param is_render Whether or not the surface is a render target.
- **/
- std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
- const SurfaceParams& params, bool preserve_contents,
- bool is_render) {
- // Step 1
- // Check Level 1 Cache for a fast structural match. If candidate surface
- // matches at certain level we are pretty much done.
- if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
- TSurface& current_surface = iter->second;
- const auto topological_result = current_surface->MatchesTopology(params);
- if (topological_result != MatchTopologyResult::FullMatch) {
- VectorSurface overlaps{current_surface};
- return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
- topological_result);
- }
+ return &slot_image_views[image.image_view_ids.at(0)];
+ }
+ return nullptr;
+}
- const auto struct_result = current_surface->MatchesStructure(params);
- if (struct_result != MatchStructureResult::None) {
- const auto& old_params = current_surface->GetSurfaceParams();
- const bool not_3d = params.target != SurfaceTarget::Texture3D &&
- old_params.target != SurfaceTarget::Texture3D;
- if (not_3d || current_surface->MatchTarget(params.target)) {
- if (struct_result == MatchStructureResult::FullMatch) {
- return ManageStructuralMatch(current_surface, params, is_render);
- } else {
- return RebuildSurface(current_surface, params, is_render);
- }
- }
- }
- }
+template <class P>
+bool TextureCache<P>::HasUncommittedFlushes() const noexcept {
+ return !uncommitted_downloads.empty();
+}
- // Step 2
- // Obtain all possible overlaps in the memory region
- const std::size_t candidate_size = params.GetGuestSizeInBytes();
- auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};
+template <class P>
+bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
+ return !committed_downloads.empty() && !committed_downloads.front().empty();
+}
- // If none are found, we are done. we just load the surface and create it.
- if (overlaps.empty()) {
- return InitializeSurface(gpu_addr, params, preserve_contents);
- }
+template <class P>
+void TextureCache<P>::CommitAsyncFlushes() {
+ // This is intentionally passing the value by copy
+ committed_downloads.push(uncommitted_downloads);
+ uncommitted_downloads.clear();
+}
- // Step 3
- // Now we need to figure the relationship between the texture and its overlaps
- // we do a topological test to ensure we can find some relationship. If it fails
- // immediately recycle the texture
- for (const auto& surface : overlaps) {
- const auto topological_result = surface->MatchesTopology(params);
- if (topological_result != MatchTopologyResult::FullMatch) {
- return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
- topological_result);
- }
- }
+template <class P>
+void TextureCache<P>::PopAsyncFlushes() {
+ if (committed_downloads.empty()) {
+ return;
+ }
+ const std::span<const ImageId> download_ids = committed_downloads.front();
+ if (download_ids.empty()) {
+ committed_downloads.pop();
+ return;
+ }
+ size_t total_size_bytes = 0;
+ for (const ImageId image_id : download_ids) {
+ total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
+ }
+ auto download_map = runtime.MapDownloadBuffer(total_size_bytes);
+ size_t buffer_offset = 0;
+ for (const ImageId image_id : download_ids) {
+ Image& image = slot_images[image_id];
+ const auto copies = FullDownloadCopies(image.info);
+ image.DownloadMemory(download_map, buffer_offset, copies);
+ buffer_offset += image.unswizzled_size_bytes;
+ }
+ // Wait for downloads to finish
+ runtime.Finish();
+
+ buffer_offset = 0;
+ const std::span<u8> download_span = download_map.Span();
+ for (const ImageId image_id : download_ids) {
+ const ImageBase& image = slot_images[image_id];
+ const auto copies = FullDownloadCopies(image.info);
+ const std::span<u8> image_download_span = download_span.subspan(buffer_offset);
+ SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span);
+ buffer_offset += image.unswizzled_size_bytes;
+ }
+ committed_downloads.pop();
+}
- // Manage 3D textures
- if (params.block_depth > 0) {
- auto surface =
- Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
- if (surface) {
- return *surface;
- }
+template <class P>
+bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
+ bool is_modified = false;
+ ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
+ if (False(image.flags & ImageFlagBits::GpuModified)) {
+ return false;
}
+ is_modified = true;
+ return true;
+ });
+ return is_modified;
+}
- // Split cases between 1 overlap or many.
- if (overlaps.size() == 1) {
- TSurface current_surface = overlaps[0];
- // First check if the surface is within the overlap. If not, it means
- // two things either the candidate surface is a supertexture of the overlap
- // or they don't match in any known way.
- if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
- const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr);
- if (view) {
- return *view;
- }
- return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
- MatchTopologyResult::FullMatch);
- }
- // Now we check if the candidate is a mipmap/layer of the overlap
- std::optional<TView> view =
- current_surface->EmplaceView(params, gpu_addr, candidate_size);
- if (view) {
- const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
- if (is_mirage) {
- // On a mirage view, we need to recreate the surface under this new view
- // and then obtain a view again.
- SurfaceParams new_params = current_surface->GetSurfaceParams();
- const u32 wh = SurfaceParams::ConvertWidth(
- new_params.width, new_params.pixel_format, params.pixel_format);
- const u32 hh = SurfaceParams::ConvertHeight(
- new_params.height, new_params.pixel_format, params.pixel_format);
- new_params.width = wh;
- new_params.height = hh;
- new_params.pixel_format = params.pixel_format;
- std::pair<TSurface, TView> pair =
- RebuildSurface(current_surface, new_params, is_render);
- std::optional<TView> mirage_view =
- pair.first->EmplaceView(params, gpu_addr, candidate_size);
- if (mirage_view)
- return {pair.first, *mirage_view};
- return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
- MatchTopologyResult::FullMatch);
- }
- return {current_surface, *view};
- }
- } else {
- // If there are many overlaps, odds are they are subtextures of the candidate
- // surface. We try to construct a new surface based on the candidate parameters,
- // using the overlaps. If a single overlap fails, this will fail.
- std::optional<std::pair<TSurface, TView>> view =
- TryReconstructSurface(overlaps, params, gpu_addr);
- if (view) {
- return *view;
- }
- }
- // We failed all the tests, recycle the overlaps into a new texture.
- return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
- MatchTopologyResult::FullMatch);
- }
-
- /**
- * Gets the starting address and parameters of a candidate surface and tries to find a
- * matching surface within the cache that's similar to it. If there are many textures
- * or the texture found if entirely incompatible, it will fail. If no texture is found, the
- * blit will be unsuccessful.
- *
- * @param gpu_addr The starting address of the candidate surface.
- * @param params The parameters on the candidate surface.
- **/
- Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
-
- if (!cpu_addr) {
- Deduction result{};
- result.type = DeductionType::DeductionFailed;
- return result;
- }
+template <class P>
+void TextureCache<P>::RefreshContents(Image& image) {
+ if (False(image.flags & ImageFlagBits::CpuModified)) {
+ // Only upload modified images
+ return;
+ }
+ image.flags &= ~ImageFlagBits::CpuModified;
+ TrackImage(image);
- if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {
- TSurface& current_surface = iter->second;
- const auto topological_result = current_surface->MatchesTopology(params);
- if (topological_result != MatchTopologyResult::FullMatch) {
- Deduction result{};
- result.type = DeductionType::DeductionFailed;
- return result;
- }
- const auto struct_result = current_surface->MatchesStructure(params);
- if (struct_result != MatchStructureResult::None &&
- current_surface->MatchTarget(params.target)) {
- Deduction result{};
- result.type = DeductionType::DeductionComplete;
- result.surface = current_surface;
- return result;
- }
- }
+ if (image.info.num_samples > 1) {
+ LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
+ return;
+ }
+ auto map = runtime.MapUploadBuffer(MapSizeBytes(image));
+ UploadImageContents(image, map, 0);
+ runtime.InsertUploadMemoryBarrier();
+}
- const std::size_t candidate_size = params.GetGuestSizeInBytes();
- auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};
+template <class P>
+template <typename MapBuffer>
+void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) {
+ const std::span<u8> mapped_span = map.Span().subspan(buffer_offset);
+ const GPUVAddr gpu_addr = image.gpu_addr;
+
+ if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
+ gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
+ const auto uploads = FullUploadSwizzles(image.info);
+ runtime.AccelerateImageUpload(image, map, buffer_offset, uploads);
+ } else if (True(image.flags & ImageFlagBits::Converted)) {
+ std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
+ auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
+ ConvertImage(unswizzled_data, image.info, mapped_span, copies);
+ image.UploadMemory(map, buffer_offset, copies);
+ } else if (image.info.type == ImageType::Buffer) {
+ const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
+ image.UploadMemory(map, buffer_offset, copies);
+ } else {
+ const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
+ image.UploadMemory(map, buffer_offset, copies);
+ }
+}
- if (overlaps.empty()) {
- Deduction result{};
- result.type = DeductionType::DeductionIncomplete;
- return result;
- }
+template <class P>
+ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
+ if (!IsValidAddress(gpu_memory, config)) {
+ return NULL_IMAGE_VIEW_ID;
+ }
+ const auto [pair, is_new] = image_views.try_emplace(config);
+ ImageViewId& image_view_id = pair->second;
+ if (is_new) {
+ image_view_id = CreateImageView(config);
+ }
+ return image_view_id;
+}
- if (overlaps.size() > 1) {
- Deduction result{};
- result.type = DeductionType::DeductionFailed;
- return result;
- } else {
- Deduction result{};
- result.type = DeductionType::DeductionComplete;
- result.surface = overlaps[0];
- return result;
- }
+template <class P>
+ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
+ const ImageInfo info(config);
+ const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride;
+ const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
+ if (!image_id) {
+ return NULL_IMAGE_VIEW_ID;
}
+ ImageBase& image = slot_images[image_id];
+ const SubresourceBase base = image.TryFindBase(config.Address()).value();
+ ASSERT(base.level == 0);
+ const ImageViewInfo view_info(config, base.layer);
+ const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info);
+ ImageViewBase& image_view = slot_image_views[image_view_id];
+ image_view.flags |= ImageViewFlagBits::Strong;
+ image.flags |= ImageFlagBits::Strong;
+ return image_view_id;
+}
- /**
- * Gets a null surface based on a target texture.
- * @param target The target of the null surface.
- */
- TView GetNullSurface(SurfaceTarget target) {
- const u32 i_target = static_cast<u32>(target);
- if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) {
- return it->second->GetMainView();
- }
- SurfaceParams params{};
- params.target = target;
- params.is_tiled = false;
- params.srgb_conversion = false;
- params.is_layered =
- target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray ||
- target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray;
- params.block_width = 0;
- params.block_height = 0;
- params.block_depth = 0;
- params.tile_width_spacing = 1;
- params.width = 1;
- params.height = 1;
- params.depth = 1;
- if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) {
- params.depth = 6;
- }
- params.pitch = 4;
- params.num_levels = 1;
- params.emulated_levels = 1;
- params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM;
- params.type = VideoCore::Surface::SurfaceType::ColorTexture;
- auto surface = CreateSurface(0ULL, params);
- invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
- surface->UploadTexture(invalid_memory);
- surface->MarkAsModified(false, Tick());
- invalid_cache.emplace(i_target, surface);
- return surface->GetMainView();
- }
-
- /**
- * Gets the a source and destination starting address and parameters,
- * and tries to deduce if they are supposed to be depth textures. If so, their
- * parameters are modified and fixed into so.
- *
- * @param src_params The parameters of the candidate surface.
- * @param dst_params The parameters of the destination surface.
- * @param src_gpu_addr The starting address of the candidate surface.
- * @param dst_gpu_addr The starting address of the destination surface.
- **/
- void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
- const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
- auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
- auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
- if (deduced_src.Failed() || deduced_dst.Failed()) {
- return;
+template <class P>
+ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
+ RelaxedOptions options) {
+ if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) {
+ return image_id;
+ }
+ return InsertImage(info, gpu_addr, options);
+}
+
+template <class P>
+ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
+ RelaxedOptions options) {
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
+ return ImageId{};
+ }
+ ImageId image_id;
+ const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
+ if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
+ const bool strict_size = False(options & RelaxedOptions::Size) &&
+ True(existing_image.flags & ImageFlagBits::Strong);
+ const ImageInfo& existing = existing_image.info;
+ if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
+ existing.pitch == info.pitch &&
+ IsPitchLinearSameSize(existing, info, strict_size) &&
+ IsViewCompatible(existing.format, info.format)) {
+ image_id = existing_image_id;
+ return true;
+ }
+ } else if (IsSubresource(info, existing_image, gpu_addr, options)) {
+ image_id = existing_image_id;
+ return true;
}
+ return false;
+ };
+ ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
+ return image_id;
+}
- const bool incomplete_src = deduced_src.Incomplete();
- const bool incomplete_dst = deduced_dst.Incomplete();
+template <class P>
+ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
+ RelaxedOptions options) {
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
+ const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
+ const Image& image = slot_images[image_id];
+ // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different
+ const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr);
+ if (is_new) {
+ it->second = slot_image_allocs.insert();
+ }
+ slot_image_allocs[it->second].images.push_back(image_id);
+ return image_id;
+}
- if (incomplete_src && incomplete_dst) {
+template <class P>
+ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) {
+ ImageInfo new_info = info;
+ const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
+ std::vector<ImageId> overlap_ids;
+ std::vector<ImageId> left_aliased_ids;
+ std::vector<ImageId> right_aliased_ids;
+ ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
+ if (info.type != overlap.info.type) {
return;
}
-
- const bool any_incomplete = incomplete_src || incomplete_dst;
-
- if (!any_incomplete) {
- if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
- return;
- }
- } else {
- if (incomplete_src && !(deduced_dst.IsDepth())) {
- return;
- }
-
- if (incomplete_dst && !(deduced_src.IsDepth())) {
- return;
+ if (info.type == ImageType::Linear) {
+ if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
+ // Alias linear images with the same pitch
+ left_aliased_ids.push_back(overlap_id);
}
+ return;
+ }
+ const auto solution = ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, true);
+ if (solution) {
+ gpu_addr = solution->gpu_addr;
+ cpu_addr = solution->cpu_addr;
+ new_info.resources = solution->resources;
+ overlap_ids.push_back(overlap_id);
+ return;
+ }
+ static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
+ const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
+ if (IsSubresource(new_info, overlap, gpu_addr, options)) {
+ left_aliased_ids.push_back(overlap_id);
+ } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options)) {
+ right_aliased_ids.push_back(overlap_id);
}
+ });
+ const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
+ Image& new_image = slot_images[new_image_id];
- const auto inherit_format = [](SurfaceParams& to, TSurface from) {
- const SurfaceParams& params = from->GetSurfaceParams();
- to.pixel_format = params.pixel_format;
- to.type = params.type;
- };
- // Now we got the cases where one or both is Depth and the other is not known
- if (!incomplete_src) {
- inherit_format(src_params, deduced_src.surface);
+ // TODO: Only upload what we need
+ RefreshContents(new_image);
+
+ for (const ImageId overlap_id : overlap_ids) {
+ Image& overlap = slot_images[overlap_id];
+ if (overlap.info.num_samples != new_image.info.num_samples) {
+ LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
} else {
- inherit_format(src_params, deduced_dst.surface);
+ const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
+ const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
+ runtime.CopyImage(new_image, overlap, copies);
}
- if (!incomplete_dst) {
- inherit_format(dst_params, deduced_dst.surface);
- } else {
- inherit_format(dst_params, deduced_src.surface);
+ if (True(overlap.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(overlap);
}
+ UnregisterImage(overlap_id);
+ DeleteImage(overlap_id);
+ }
+ ImageBase& new_image_base = new_image;
+ for (const ImageId aliased_id : right_aliased_ids) {
+ ImageBase& aliased = slot_images[aliased_id];
+ AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
+ }
+ for (const ImageId aliased_id : left_aliased_ids) {
+ ImageBase& aliased = slot_images[aliased_id];
+ AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
}
+ RegisterImage(new_image_id);
+ return new_image_id;
+}
- std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
- bool preserve_contents) {
- auto new_surface{GetUncachedSurface(gpu_addr, params)};
- Register(new_surface);
- if (preserve_contents) {
- LoadSurface(new_surface);
- }
- return {new_surface, new_surface->GetMainView()};
+template <class P>
+typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
+ const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
+ static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
+ const GPUVAddr dst_addr = dst.Address();
+ const GPUVAddr src_addr = src.Address();
+ ImageInfo dst_info(dst);
+ ImageInfo src_info(src);
+ ImageId dst_id;
+ ImageId src_id;
+ do {
+ has_deleted_images = false;
+ dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
+ src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
+ const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
+ const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
+ DeduceBlitImages(dst_info, src_info, dst_image, src_image);
+ if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
+ continue;
+ }
+ if (!dst_id) {
+ dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
+ }
+ if (!src_id) {
+ src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
+ }
+ } while (has_deleted_images);
+ return BlitImages{
+ .dst_id = dst_id,
+ .src_id = src_id,
+ .dst_format = dst_info.format,
+ .src_format = src_info.format,
+ };
+}
+
+template <class P>
+SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
+ if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
+ return NULL_SAMPLER_ID;
+ }
+ const auto [pair, is_new] = samplers.try_emplace(config);
+ if (is_new) {
+ pair->second = slot_samplers.insert(runtime, config);
}
+ return pair->second;
+}
- void LoadSurface(const TSurface& surface) {
- staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
- surface->LoadBuffer(gpu_memory, staging_cache);
- surface->UploadTexture(staging_cache.GetBuffer(0));
- surface->MarkAsModified(false, Tick());
+template <class P>
+ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
+ const auto& regs = maxwell3d.regs;
+ if (index >= regs.rt_control.count) {
+ return ImageViewId{};
+ }
+ const auto& rt = regs.rt[index];
+ const GPUVAddr gpu_addr = rt.Address();
+ if (gpu_addr == 0) {
+ return ImageViewId{};
+ }
+ if (rt.format == Tegra::RenderTargetFormat::NONE) {
+ return ImageViewId{};
}
+ const ImageInfo info(regs, index);
+ return FindRenderTargetView(info, gpu_addr, is_clear);
+}
- void FlushSurface(const TSurface& surface) {
- if (!surface->IsModified()) {
- return;
- }
- staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
- surface->DownloadTexture(staging_cache.GetBuffer(0));
- surface->FlushBuffer(gpu_memory, staging_cache);
- surface->MarkAsModified(false, Tick());
- }
-
- void RegisterInnerCache(TSurface& surface) {
- const VAddr cpu_addr = surface->GetCpuAddr();
- VAddr start = cpu_addr >> registry_page_bits;
- const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
- l1_cache[cpu_addr] = surface;
- while (start <= end) {
- registry[start].push_back(surface);
- start++;
- }
+template <class P>
+ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
+ const auto& regs = maxwell3d.regs;
+ if (!regs.zeta_enable) {
+ return ImageViewId{};
+ }
+ const GPUVAddr gpu_addr = regs.zeta.Address();
+ if (gpu_addr == 0) {
+ return ImageViewId{};
}
+ const ImageInfo info(regs);
+ return FindRenderTargetView(info, gpu_addr, is_clear);
+}
- void UnregisterInnerCache(TSurface& surface) {
- const VAddr cpu_addr = surface->GetCpuAddr();
- VAddr start = cpu_addr >> registry_page_bits;
- const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
- l1_cache.erase(cpu_addr);
- while (start <= end) {
- auto& reg{registry[start]};
- reg.erase(std::find(reg.begin(), reg.end(), surface));
- start++;
- }
+template <class P>
+ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
+ bool is_clear) {
+ const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
+ const ImageId image_id = FindOrInsertImage(info, gpu_addr, options);
+ if (!image_id) {
+ return NULL_IMAGE_VIEW_ID;
+ }
+ Image& image = slot_images[image_id];
+ const ImageViewType view_type = RenderTargetImageViewType(info);
+ SubresourceBase base;
+ if (image.info.type == ImageType::Linear) {
+ base = SubresourceBase{.level = 0, .layer = 0};
+ } else {
+ base = image.TryFindBase(gpu_addr).value();
}
+ const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers;
+ const SubresourceRange range{
+ .base = base,
+ .extent = {.levels = 1, .layers = layers},
+ };
+ return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range));
+}
- VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
- if (size == 0) {
- return {};
+template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
+ using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
+ static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
+ boost::container::small_vector<ImageId, 32> images;
+ ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
+ const auto it = page_table.find(page);
+ if (it == page_table.end()) {
+ if constexpr (BOOL_BREAK) {
+ return false;
+ } else {
+ return;
+ }
}
- const VAddr cpu_addr_end = cpu_addr + size;
- const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
- VectorSurface surfaces;
- for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
- const auto it = registry.find(start);
- if (it == registry.end()) {
+ for (const ImageId image_id : it->second) {
+ Image& image = slot_images[image_id];
+ if (True(image.flags & ImageFlagBits::Picked)) {
continue;
}
- for (auto& surface : it->second) {
- if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
- continue;
+ if (!image.Overlaps(cpu_addr, size)) {
+ continue;
+ }
+ image.flags |= ImageFlagBits::Picked;
+ images.push_back(image_id);
+ if constexpr (BOOL_BREAK) {
+ if (func(image_id, image)) {
+ return true;
}
- surface->MarkAsPicked(true);
- surfaces.push_back(surface);
+ } else {
+ func(image_id, image);
}
}
- for (auto& surface : surfaces) {
- surface->MarkAsPicked(false);
+ if constexpr (BOOL_BREAK) {
+ return false;
}
- return surfaces;
+ });
+ for (const ImageId image_id : images) {
+ slot_images[image_id].flags &= ~ImageFlagBits::Picked;
}
+}
- void ReserveSurface(const SurfaceParams& params, TSurface surface) {
- surface_reserve[params].push_back(std::move(surface));
+template <class P>
+ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
+ Image& image = slot_images[image_id];
+ if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
+ return image_view_id;
}
+ const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image);
+ image.InsertView(info, image_view_id);
+ return image_view_id;
+}
+
+template <class P>
+void TextureCache<P>::RegisterImage(ImageId image_id) {
+ ImageBase& image = slot_images[image_id];
+ ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
+ "Trying to register an already registered image");
+ image.flags |= ImageFlagBits::Registered;
+ ForEachPage(image.cpu_addr, image.guest_size_bytes,
+ [this, image_id](u64 page) { page_table[page].push_back(image_id); });
+}
- TSurface TryGetReservedSurface(const SurfaceParams& params) {
- auto search{surface_reserve.find(params)};
- if (search == surface_reserve.end()) {
- return {};
+template <class P>
+void TextureCache<P>::UnregisterImage(ImageId image_id) {
+ Image& image = slot_images[image_id];
+ ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
+ "Trying to unregister an already registered image");
+ image.flags &= ~ImageFlagBits::Registered;
+ ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
+ const auto page_it = page_table.find(page);
+ if (page_it == page_table.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_SHIFT);
+ return;
}
- for (auto& surface : search->second) {
- if (!surface->IsRegistered()) {
- return surface;
- }
+ std::vector<ImageId>& image_ids = page_it->second;
+ const auto vector_it = std::ranges::find(image_ids, image_id);
+ if (vector_it == image_ids.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_SHIFT);
+ return;
}
- return {};
- }
+ image_ids.erase(vector_it);
+ });
+}
- /// Try to do an image copy logging when formats are incompatible.
- void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) {
- const SurfaceParams& src_params = src->GetSurfaceParams();
- const SurfaceParams& dst_params = dst->GetSurfaceParams();
- if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) {
- LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", dst_params.pixel_format,
- src_params.pixel_format);
- return;
+template <class P>
+void TextureCache<P>::TrackImage(ImageBase& image) {
+ ASSERT(False(image.flags & ImageFlagBits::Tracked));
+ image.flags |= ImageFlagBits::Tracked;
+ rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
+}
+
+template <class P>
+void TextureCache<P>::UntrackImage(ImageBase& image) {
+ ASSERT(True(image.flags & ImageFlagBits::Tracked));
+ image.flags &= ~ImageFlagBits::Tracked;
+ rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
+}
+
+template <class P>
+void TextureCache<P>::DeleteImage(ImageId image_id) {
+ ImageBase& image = slot_images[image_id];
+ const GPUVAddr gpu_addr = image.gpu_addr;
+ const auto alloc_it = image_allocs_table.find(gpu_addr);
+ if (alloc_it == image_allocs_table.end()) {
+ UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}",
+ gpu_addr);
+ return;
+ }
+ const ImageAllocId alloc_id = alloc_it->second;
+ std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
+ const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
+ if (alloc_image_it == alloc_images.end()) {
+ UNREACHABLE_MSG("Trying to delete an image that does not exist");
+ return;
+ }
+ ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
+ ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
+
+ // Mark render targets as dirty
+ auto& dirty = maxwell3d.dirty.flags;
+ dirty[Dirty::RenderTargets] = true;
+ dirty[Dirty::ZetaBuffer] = true;
+ for (size_t rt = 0; rt < NUM_RT; ++rt) {
+ dirty[Dirty::ColorBuffer0 + rt] = true;
+ }
+ const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
+ for (const ImageViewId image_view_id : image_view_ids) {
+ std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
+ if (render_targets.depth_buffer_id == image_view_id) {
+ render_targets.depth_buffer_id = ImageViewId{};
}
- ImageCopy(src, dst, copy);
}
+ RemoveImageViewReferences(image_view_ids);
+ RemoveFramebuffers(image_view_ids);
+
+ for (const AliasedImage& alias : image.aliased_images) {
+ ImageBase& other_image = slot_images[alias.id];
+ [[maybe_unused]] const size_t num_removed_aliases =
+ std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
+ return other_alias.id == image_id;
+ });
+ ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
+ num_removed_aliases);
+ }
+ for (const ImageViewId image_view_id : image_view_ids) {
+ sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
+ slot_image_views.erase(image_view_id);
+ }
+ sentenced_images.Push(std::move(slot_images[image_id]));
+ slot_images.erase(image_id);
- constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
- return siblings_table[static_cast<std::size_t>(format)];
+ alloc_images.erase(alloc_image_it);
+ if (alloc_images.empty()) {
+ image_allocs_table.erase(alloc_it);
}
+ if constexpr (ENABLE_VALIDATION) {
+ std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
+ std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
+ }
+ graphics_image_table.Invalidate();
+ compute_image_table.Invalidate();
+ has_deleted_images = true;
+}
- /// Returns true the shader sampler entry is compatible with the TIC texture type.
- static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type,
- const VideoCommon::Shader::Sampler& entry) {
- const auto shader_type = entry.type;
- switch (tic_type) {
- case Tegra::Texture::TextureType::Texture1D:
- case Tegra::Texture::TextureType::Texture1DArray:
- return shader_type == Tegra::Shader::TextureType::Texture1D;
- case Tegra::Texture::TextureType::Texture1DBuffer:
- // TODO(Rodrigo): Assume as valid for now
- return true;
- case Tegra::Texture::TextureType::Texture2D:
- case Tegra::Texture::TextureType::Texture2DNoMipmap:
- return shader_type == Tegra::Shader::TextureType::Texture2D;
- case Tegra::Texture::TextureType::Texture2DArray:
- return shader_type == Tegra::Shader::TextureType::Texture2D ||
- shader_type == Tegra::Shader::TextureType::TextureCube;
- case Tegra::Texture::TextureType::Texture3D:
- return shader_type == Tegra::Shader::TextureType::Texture3D;
- case Tegra::Texture::TextureType::TextureCubeArray:
- case Tegra::Texture::TextureType::TextureCubemap:
- if (shader_type == Tegra::Shader::TextureType::TextureCube) {
- return true;
- }
- return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array;
+template <class P>
+void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
+ auto it = image_views.begin();
+ while (it != image_views.end()) {
+ const auto found = std::ranges::find(removed_views, it->second);
+ if (found != removed_views.end()) {
+ it = image_views.erase(it);
+ } else {
+ ++it;
}
- UNREACHABLE();
- return true;
}
+}
- struct FramebufferTargetInfo {
- TSurface target;
- TView view;
- };
-
- void AsyncFlushSurface(TSurface& surface) {
- if (!uncommitted_flushes) {
- uncommitted_flushes = std::make_shared<std::list<TSurface>>();
+template <class P>
+void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) {
+ auto it = framebuffers.begin();
+ while (it != framebuffers.end()) {
+ if (it->first.Contains(removed_views)) {
+ it = framebuffers.erase(it);
+ } else {
+ ++it;
}
- uncommitted_flushes->push_back(surface);
}
+}
- VideoCore::RasterizerInterface& rasterizer;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::MemoryManager& gpu_memory;
-
- FormatLookupTable format_lookup_table;
- FormatCompatibility format_compatibility;
-
- u64 ticks{};
-
- // Guards the cache for protection conflicts.
- bool guard_render_targets{};
- bool guard_samplers{};
-
- // The siblings table is for formats that can inter exchange with one another
- // without causing issues. This is only valid when a conflict occurs on a non
- // rendering use.
- std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table;
-
- // The internal Cache is different for the Texture Cache. It's based on buckets
- // of 1MB. This fits better for the purpose of this cache as textures are normaly
- // large in size.
- static constexpr u64 registry_page_bits{20};
- static constexpr u64 registry_page_size{1 << registry_page_bits};
- std::unordered_map<VAddr, std::vector<TSurface>> registry;
+template <class P>
+void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
+ image.flags |= ImageFlagBits::GpuModified;
+ image.modification_tick = ++modification_tick;
+}
- static constexpr u32 DEPTH_RT = 8;
- static constexpr u32 NO_RT = 0xFFFFFFFF;
+template <class P>
+void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
+ boost::container::small_vector<const AliasedImage*, 1> aliased_images;
+ ImageBase& image = slot_images[image_id];
+ u64 most_recent_tick = image.modification_tick;
+ for (const AliasedImage& aliased : image.aliased_images) {
+ ImageBase& aliased_image = slot_images[aliased.id];
+ if (image.modification_tick < aliased_image.modification_tick) {
+ most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
+ aliased_images.push_back(&aliased);
+ }
+ }
+ if (aliased_images.empty()) {
+ return;
+ }
+ image.modification_tick = most_recent_tick;
+ std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
+ const ImageBase& lhs_image = slot_images[lhs->id];
+ const ImageBase& rhs_image = slot_images[rhs->id];
+ return lhs_image.modification_tick < rhs_image.modification_tick;
+ });
+ for (const AliasedImage* const aliased : aliased_images) {
+ CopyImage(image_id, aliased->id, aliased->copies);
+ }
+}
- // The L1 Cache is used for fast texture lookup before checking the overlaps
- // This avoids calculating size and other stuffs.
- std::unordered_map<VAddr, TSurface> l1_cache;
+template <class P>
+void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
+ Image& image = slot_images[image_id];
+ if (invalidate) {
+ image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
+ if (False(image.flags & ImageFlagBits::Tracked)) {
+ TrackImage(image);
+ }
+ } else {
+ RefreshContents(image);
+ SynchronizeAliases(image_id);
+ }
+ if (is_modification) {
+ MarkModification(image);
+ }
+ image.frame_tick = frame_tick;
+}
- /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
- /// previously been used. This is to prevent surfaces from being constantly created and
- /// destroyed when used with different surface parameters.
- std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve;
- std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
- render_targets;
- FramebufferTargetInfo depth_buffer;
+template <class P>
+void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
+ bool invalidate) {
+ if (!image_view_id) {
+ return;
+ }
+ const ImageViewBase& image_view = slot_image_views[image_view_id];
+ PrepareImage(image_view.image_id, is_modification, invalidate);
+}
- std::vector<TSurface> sampled_textures;
+template <class P>
+void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
+ Image& dst = slot_images[dst_id];
+ Image& src = slot_images[src_id];
+ const auto dst_format_type = GetFormatType(dst.info.format);
+ const auto src_format_type = GetFormatType(src.info.format);
+ if (src_format_type == dst_format_type) {
+ if constexpr (HAS_EMULATED_COPIES) {
+ if (!runtime.CanImageBeCopied(dst, src)) {
+ return runtime.EmulateCopyImage(dst, src, copies);
+ }
+ }
+ return runtime.CopyImage(dst, src, copies);
+ }
+ UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
+ UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
+ for (const ImageCopy& copy : copies) {
+ UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
+ UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1);
+ UNIMPLEMENTED_IF(copy.src_offset != Offset3D{});
+ UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{});
+
+ const SubresourceBase dst_base{
+ .level = copy.dst_subresource.base_level,
+ .layer = copy.dst_subresource.base_layer,
+ };
+ const SubresourceBase src_base{
+ .level = copy.src_subresource.base_level,
+ .layer = copy.src_subresource.base_layer,
+ };
+ const SubresourceExtent dst_extent{.levels = 1, .layers = 1};
+ const SubresourceExtent src_extent{.levels = 1, .layers = 1};
+ const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};
+ const SubresourceRange src_range{.base = src_base, .extent = src_extent};
+ const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range);
+ const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);
+ const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
+ Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
+ const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info);
+ ImageView& dst_view = slot_image_views[dst_view_id];
+ ImageView& src_view = slot_image_views[src_view_id];
+ [[maybe_unused]] const Extent3D expected_size{
+ .width = std::min(dst_view.size.width, src_view.size.width),
+ .height = std::min(dst_view.size.height, src_view.size.height),
+ .depth = std::min(dst_view.size.depth, src_view.size.depth),
+ };
+ UNIMPLEMENTED_IF(copy.extent != expected_size);
- /// This cache stores null surfaces in order to be used as a placeholder
- /// for invalid texture calls.
- std::unordered_map<u32, TSurface> invalid_cache;
- std::vector<u8> invalid_memory;
+ runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
+ }
+}
- std::list<TSurface> marked_for_unregister;
+template <class P>
+void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) {
+ if (*old_id == new_id) {
+ return;
+ }
+ if (*old_id) {
+ const ImageViewBase& old_view = slot_image_views[*old_id];
+ if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
+ uncommitted_downloads.push_back(old_view.image_id);
+ }
+ }
+ *old_id = new_id;
+}
- std::shared_ptr<std::list<TSurface>> uncommitted_flushes{};
- std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes;
+template <class P>
+std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
+ ImageId image_id, const ImageViewInfo& view_info) {
+ const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
+ const ImageBase& image = slot_images[image_id];
+ const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
+ const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
+ const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
+ const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
+ const u32 num_samples = image.info.num_samples;
+ const auto [samples_x, samples_y] = SamplesLog2(num_samples);
+ const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
+ .color_buffer_ids = {color_view_id},
+ .depth_buffer_id = depth_view_id,
+ .size = {extent.width >> samples_x, extent.height >> samples_y},
+ });
+ return {framebuffer_id, view_id};
+}
- StagingCache staging_cache;
- std::recursive_mutex mutex;
-};
+template <class P>
+bool TextureCache<P>::IsFullClear(ImageViewId id) {
+ if (!id) {
+ return true;
+ }
+ const ImageViewBase& image_view = slot_image_views[id];
+ const ImageBase& image = slot_images[image_view.image_id];
+ const Extent3D size = image_view.size;
+ const auto& regs = maxwell3d.regs;
+ const auto& scissor = regs.scissor_test[0];
+ if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
+ // Images with multiple resources can't be cleared in a single call
+ return false;
+ }
+ if (regs.clear_flags.scissor == 0) {
+ // If scissor testing is disabled, the clear is always full
+ return true;
+ }
+ // Make sure the clear covers all texels in the subresource
+ return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width &&
+ scissor.max_y >= size.height;
+}
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
new file mode 100644
index 000000000..2ad2d72a6
--- /dev/null
+++ b/src/video_core/texture_cache/types.h
@@ -0,0 +1,140 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/texture_cache/slot_vector.h"
+
+namespace VideoCommon {
+
+constexpr size_t NUM_RT = 8;
+constexpr size_t MAX_MIP_LEVELS = 14;
+
+constexpr SlotId CORRUPT_ID{0xfffffffe};
+
+using ImageId = SlotId;
+using ImageViewId = SlotId;
+using ImageAllocId = SlotId;
+using SamplerId = SlotId;
+using FramebufferId = SlotId;
+
+enum class ImageType : u32 {
+ e1D,
+ e2D,
+ e3D,
+ Linear,
+ Buffer,
+};
+
+enum class ImageViewType : u32 {
+ e1D,
+ e2D,
+ Cube,
+ e3D,
+ e1DArray,
+ e2DArray,
+ CubeArray,
+ Rect,
+ Buffer,
+};
+constexpr size_t NUM_IMAGE_VIEW_TYPES = 9;
+
+enum class RelaxedOptions : u32 {
+ Size = 1 << 0,
+ Format = 1 << 1,
+ Samples = 1 << 2,
+};
+DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions)
+
+struct Offset2D {
+ constexpr auto operator<=>(const Offset2D&) const noexcept = default;
+
+ s32 x;
+ s32 y;
+};
+
+struct Offset3D {
+ constexpr auto operator<=>(const Offset3D&) const noexcept = default;
+
+ s32 x;
+ s32 y;
+ s32 z;
+};
+
+struct Extent2D {
+ constexpr auto operator<=>(const Extent2D&) const noexcept = default;
+
+ u32 width;
+ u32 height;
+};
+
+struct Extent3D {
+ constexpr auto operator<=>(const Extent3D&) const noexcept = default;
+
+ u32 width;
+ u32 height;
+ u32 depth;
+};
+
+struct SubresourceLayers {
+ s32 base_level = 0;
+ s32 base_layer = 0;
+ s32 num_layers = 1;
+};
+
+struct SubresourceBase {
+ constexpr auto operator<=>(const SubresourceBase&) const noexcept = default;
+
+ s32 level = 0;
+ s32 layer = 0;
+};
+
+struct SubresourceExtent {
+ constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default;
+
+ s32 levels = 1;
+ s32 layers = 1;
+};
+
+struct SubresourceRange {
+ constexpr auto operator<=>(const SubresourceRange&) const noexcept = default;
+
+ SubresourceBase base;
+ SubresourceExtent extent;
+};
+
+struct ImageCopy {
+ SubresourceLayers src_subresource;
+ SubresourceLayers dst_subresource;
+ Offset3D src_offset;
+ Offset3D dst_offset;
+ Extent3D extent;
+};
+
+struct BufferImageCopy {
+ size_t buffer_offset;
+ size_t buffer_size;
+ u32 buffer_row_length;
+ u32 buffer_image_height;
+ SubresourceLayers image_subresource;
+ Offset3D image_offset;
+ Extent3D image_extent;
+};
+
+struct BufferCopy {
+ size_t src_offset;
+ size_t dst_offset;
+ size_t size;
+};
+
+struct SwizzleParameters {
+ Extent3D num_tiles;
+ Extent3D block;
+ size_t buffer_offset;
+ s32 level;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
new file mode 100644
index 000000000..9ed1fc007
--- /dev/null
+++ b/src/video_core/texture_cache/util.cpp
@@ -0,0 +1,1232 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// This files contains code from Ryujinx
+// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx
+// The sections using code from Ryujinx are marked with a link to the original version
+
+// MIT License
+//
+// Copyright (c) Ryujinx Team and Contributors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+// associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#include <algorithm>
+#include <array>
+#include <numeric>
+#include <optional>
+#include <span>
+#include <vector>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "video_core/compatible_formats.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/decode_bc4.h"
+#include "video_core/texture_cache/format_lookup_table.h"
+#include "video_core/texture_cache/formatter.h"
+#include "video_core/texture_cache/samples_helper.h"
+#include "video_core/texture_cache/util.h"
+#include "video_core/textures/astc.h"
+#include "video_core/textures/decoders.h"
+
+namespace VideoCommon {
+
+namespace {
+
+using Tegra::Texture::GOB_SIZE;
+using Tegra::Texture::GOB_SIZE_SHIFT;
+using Tegra::Texture::GOB_SIZE_X;
+using Tegra::Texture::GOB_SIZE_X_SHIFT;
+using Tegra::Texture::GOB_SIZE_Y;
+using Tegra::Texture::GOB_SIZE_Y_SHIFT;
+using Tegra::Texture::GOB_SIZE_Z;
+using Tegra::Texture::GOB_SIZE_Z_SHIFT;
+using Tegra::Texture::MsaaMode;
+using Tegra::Texture::SwizzleTexture;
+using Tegra::Texture::TextureFormat;
+using Tegra::Texture::TextureType;
+using Tegra::Texture::TICEntry;
+using Tegra::Texture::UnswizzleTexture;
+using VideoCore::Surface::BytesPerBlock;
+using VideoCore::Surface::DefaultBlockHeight;
+using VideoCore::Surface::DefaultBlockWidth;
+using VideoCore::Surface::IsCopyCompatible;
+using VideoCore::Surface::IsPixelFormatASTC;
+using VideoCore::Surface::IsViewCompatible;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+using VideoCore::Surface::SurfaceType;
+
+constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
+
+struct LevelInfo {
+ Extent3D size;
+ Extent3D block;
+ Extent2D tile_size;
+ u32 bpp_log2;
+ u32 tile_width_spacing;
+};
+
+[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) {
+ if (shift == 0) {
+ return 0;
+ }
+ u32 x = unit_factor << (shift - 1);
+ if (x >= dimension) {
+ while (--shift) {
+ x >>= 1;
+ if (x < dimension) {
+ break;
+ }
+ }
+ }
+ return shift;
+}
+
+[[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) {
+ return std::max<u32>(size >> level, 1);
+}
+
+[[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) {
+ return Extent3D{
+ .width = AdjustMipSize(size.width, level),
+ .height = AdjustMipSize(size.height, level),
+ .depth = AdjustMipSize(size.depth, level),
+ };
+}
+
+[[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) {
+ const auto [samples_x, samples_y] = SamplesLog2(num_samples);
+ return Extent3D{
+ .width = size.width >> samples_x,
+ .height = size.height >> samples_y,
+ .depth = size.depth,
+ };
+}
+
+template <u32 GOB_EXTENT>
+[[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) {
+ do {
+ while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) {
+ --block_size;
+ }
+ } while (level--);
+ return block_size;
+}
+
+[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size,
+ u32 level) {
+ return {
+ .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
+ .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
+ .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
+ };
+}
+
+[[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) {
+ return {
+ .width = Common::DivCeil(size.width, tile_size.width),
+ .height = Common::DivCeil(size.height, tile_size.height),
+ .depth = size.depth,
+ };
+}
+
+[[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) {
+ return std::countl_zero(bytes_per_block) ^ 0x1F;
+}
+
+[[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) {
+ return BytesPerBlockLog2(BytesPerBlock(format));
+}
+
+[[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) {
+ const Extent3D num_blocks = AdjustTileSize(size, tile_size);
+ return num_blocks.width * num_blocks.height * num_blocks.depth;
+}
+
+[[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) {
+ return Common::DivCeil(AdjustMipSize(size, level), block_size);
+}
+
+[[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) {
+ return config.Width() * config.Height() * BytesPerBlock(format);
+}
+
+[[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) {
+ switch (type) {
+ case TextureType::Texture2D:
+ case TextureType::Texture2DArray:
+ case TextureType::Texture2DNoMipmap:
+ case TextureType::Texture3D:
+ case TextureType::TextureCubeArray:
+ case TextureType::TextureCubemap:
+ return true;
+ case TextureType::Texture1D:
+ case TextureType::Texture1DArray:
+ case TextureType::Texture1DBuffer:
+ return false;
+ }
+ return false;
+}
+
+[[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) {
+ switch (type) {
+ case ImageType::e2D:
+ case ImageType::e3D:
+ case ImageType::Linear:
+ return true;
+ case ImageType::e1D:
+ case ImageType::Buffer:
+ return false;
+ }
+ UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type));
+}
+
+[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) {
+ switch (num_samples) {
+ case 1:
+ return {1, 1};
+ case 2:
+ return {2, 1};
+ case 4:
+ return {2, 2};
+ case 8:
+ return {4, 2};
+ case 16:
+ return {4, 4};
+ }
+ UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
+ return {1, 1};
+}
+
+[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) {
+ return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
+}
+
+[[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) {
+ return Extent3D{
+ .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2,
+ .height = AdjustSize(info.size.height, level, info.tile_size.height),
+ .depth = AdjustMipSize(info.size.depth, level),
+ };
+}
+
+[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
+ const Extent3D blocks = NumLevelBlocks(info, level);
+ return Extent3D{
+ .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width),
+ .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height),
+ .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth),
+ };
+}
+
+[[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) {
+ return Extent2D{
+ .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing,
+ .height = GOB_SIZE_Y_SHIFT + block_height,
+ };
+}
+
+[[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob,
+ u32 block_depth) {
+ return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) ||
+ num_tiles.depth < (1U << block_depth);
+}
+
+[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob,
+ u32 bpp_log2) {
+ if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) {
+ return GOB_SIZE_X_SHIFT - bpp_log2;
+ } else {
+ return gob.width;
+ }
+}
+
+[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2,
+ u32 tile_width_spacing) {
+ const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing);
+ return StrideAlignment(num_tiles, block, gob, bpp_log2);
+}
+
+[[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) {
+ const Extent3D blocks = NumLevelBlocks(info, level);
+ const Extent2D gobs{
+ .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT),
+ .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT),
+ };
+ const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing);
+ const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth);
+ const u32 alignment = is_small ? 0 : info.tile_width_spacing;
+ return Extent2D{
+ .width = Common::AlignBits(gobs.width, alignment),
+ .height = gobs.height,
+ };
+}
+
+[[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) {
+ const Extent3D blocks = NumLevelBlocks(info, level);
+ const Extent3D tile_shift = TileShift(info, level);
+ const Extent2D gobs = NumGobs(info, level);
+ return Extent3D{
+ .width = Common::DivCeilLog2(gobs.width, tile_shift.width),
+ .height = Common::DivCeilLog2(gobs.height, tile_shift.height),
+ .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth),
+ };
+}
+
+[[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) {
+ const Extent3D tile_shift = TileShift(info, level);
+ const Extent3D tiles = LevelTiles(info, level);
+ const u32 num_tiles = tiles.width * tiles.height * tiles.depth;
+ const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth;
+ return num_tiles << shift;
+}
+
+[[nodiscard]] constexpr std::array<u32, MAX_MIP_LEVELS> CalculateLevelSizes(const LevelInfo& info,
+ u32 num_levels) {
+ ASSERT(num_levels <= MAX_MIP_LEVELS);
+ std::array<u32, MAX_MIP_LEVELS> sizes{};
+ for (u32 level = 0; level < num_levels; ++level) {
+ sizes[level] = CalculateLevelSize(info, level);
+ }
+ return sizes;
+}
+
+[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
+ u32 num_samples, u32 tile_width_spacing) {
+ const auto [samples_x, samples_y] = Samples(num_samples);
+ const u32 bytes_per_block = BytesPerBlock(format);
+ return {
+ .size =
+ {
+ .width = size.width * samples_x,
+ .height = size.height * samples_y,
+ .depth = size.depth,
+ },
+ .block = block,
+ .tile_size = DefaultBlockSize(format),
+ .bpp_log2 = BytesPerBlockLog2(bytes_per_block),
+ .tile_width_spacing = tile_width_spacing,
+ };
+}
+
+[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) {
+ return MakeLevelInfo(info.format, info.size, info.block, info.num_samples,
+ info.tile_width_spacing);
+}
+
+[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
+ u32 num_samples, u32 tile_width_spacing,
+ u32 level) {
+ const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing);
+ u32 offset = 0;
+ for (u32 current_level = 0; current_level < level; ++current_level) {
+ offset += CalculateLevelSize(info, current_level);
+ }
+ return offset;
+}
+
+[[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block,
+ u32 tile_size_y, u32 tile_width_spacing) {
+ // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134
+ if (tile_width_spacing > 0) {
+ const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth;
+ return Common::AlignBits(size_bytes, alignment_log2);
+ }
+ const u32 aligned_height = Common::AlignUp(size.height, tile_size_y);
+ while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) {
+ --block.height;
+ }
+ while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) {
+ --block.depth;
+ }
+ const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth;
+ const u32 num_blocks = size_bytes >> block_shift;
+ if (size_bytes != num_blocks << block_shift) {
+ return (num_blocks + 1) << block_shift;
+ }
+ return size_bytes;
+}
+
+[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapEqualAddress(const ImageInfo& new_info,
+ const ImageBase& overlap,
+ bool strict_size) {
+ const ImageInfo& info = overlap.info;
+ if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) {
+ return std::nullopt;
+ }
+ if (new_info.block != info.block) {
+ return std::nullopt;
+ }
+ const SubresourceExtent resources = new_info.resources;
+ return SubresourceExtent{
+ .levels = std::max(resources.levels, info.resources.levels),
+ .layers = std::max(resources.layers, info.resources.layers),
+ };
+}
+
+[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
+ const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
+ const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info);
+ const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
+ const auto it = std::ranges::find(slice_offsets, diff);
+ if (it == slice_offsets.end()) {
+ return std::nullopt;
+ }
+ const std::vector subresources = CalculateSliceSubresources(new_info);
+ const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
+ const ImageInfo& info = overlap.info;
+ if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
+ return std::nullopt;
+ }
+ const u32 mip_depth = std::max(1U, new_info.size.depth << base.level);
+ if (mip_depth < info.size.depth + base.layer) {
+ return std::nullopt;
+ }
+ if (MipBlockSize(new_info, base.level) != info.block) {
+ return std::nullopt;
+ }
+ return SubresourceExtent{
+ .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
+ .layers = 1,
+ };
+}
+
+[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D(
+ const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
+ const u32 layer_stride = new_info.layer_stride;
+ const s32 new_size = layer_stride * new_info.resources.layers;
+ const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr);
+ if (diff > new_size) {
+ return std::nullopt;
+ }
+ const s32 base_layer = diff / layer_stride;
+ const s32 mip_offset = diff % layer_stride;
+ const std::array offsets = CalculateMipLevelOffsets(new_info);
+ const auto end = offsets.begin() + new_info.resources.levels;
+ const auto it = std::find(offsets.begin(), end, mip_offset);
+ if (it == end) {
+ // Mipmap is not aligned to any valid size
+ return std::nullopt;
+ }
+ const SubresourceBase base{
+ .level = static_cast<s32>(std::distance(offsets.begin(), it)),
+ .layer = base_layer,
+ };
+ const ImageInfo& info = overlap.info;
+ if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
+ return std::nullopt;
+ }
+ if (MipBlockSize(new_info, base.level) != info.block) {
+ return std::nullopt;
+ }
+ return SubresourceExtent{
+ .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
+ .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer),
+ };
+}
+
+[[nodiscard]] std::optional<OverlapResult> ResolveOverlapRightAddress(const ImageInfo& new_info,
+ GPUVAddr gpu_addr,
+ VAddr cpu_addr,
+ const ImageBase& overlap,
+ bool strict_size) {
+ std::optional<SubresourceExtent> resources;
+ if (new_info.type != ImageType::e3D) {
+ resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size);
+ } else {
+ resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size);
+ }
+ if (!resources) {
+ return std::nullopt;
+ }
+ return OverlapResult{
+ .gpu_addr = gpu_addr,
+ .cpu_addr = cpu_addr,
+ .resources = *resources,
+ };
+}
+
+[[nodiscard]] std::optional<OverlapResult> ResolveOverlapLeftAddress(const ImageInfo& new_info,
+ GPUVAddr gpu_addr,
+ VAddr cpu_addr,
+ const ImageBase& overlap,
+ bool strict_size) {
+ const std::optional<SubresourceBase> base = overlap.TryFindBase(gpu_addr);
+ if (!base) {
+ return std::nullopt;
+ }
+ const ImageInfo& info = overlap.info;
+ if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) {
+ return std::nullopt;
+ }
+ if (new_info.block != MipBlockSize(info, base->level)) {
+ return std::nullopt;
+ }
+ const SubresourceExtent resources = new_info.resources;
+ s32 layers = 1;
+ if (info.type != ImageType::e3D) {
+ layers = std::max(resources.layers, info.resources.layers + base->layer);
+ }
+ return OverlapResult{
+ .gpu_addr = overlap.gpu_addr,
+ .cpu_addr = overlap.cpu_addr,
+ .resources =
+ {
+ .levels = std::max(resources.levels + base->level, info.resources.levels),
+ .layers = layers,
+ },
+ };
+}
+
+[[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) {
+ // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212
+ static constexpr u32 STRIDE_ALIGNMENT = 32;
+ ASSERT(info.type == ImageType::Linear);
+ const Extent2D num_tiles{
+ .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)),
+ .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)),
+ };
+ const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format);
+ return Extent2D{
+ .width = Common::AlignUp(num_tiles.width, width_alignment),
+ .height = num_tiles.height,
+ };
+}
+
+[[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) {
+ // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176
+ ASSERT(info.type != ImageType::Linear);
+ const Extent3D size = AdjustMipSize(info.size, level);
+ const Extent3D num_tiles{
+ .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)),
+ .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)),
+ .depth = size.depth,
+ };
+ const u32 bpp_log2 = BytesPerBlockLog2(info.format);
+ const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing);
+ const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0);
+ return Extent3D{
+ .width = Common::AlignBits(num_tiles.width, alignment),
+ .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height),
+ .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth),
+ };
+}
+
+[[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept {
+ u32 num_blocks = 0;
+ for (s32 level = 0; level < info.resources.levels; ++level) {
+ const Extent3D mip_size = AdjustMipSize(info.size, level);
+ num_blocks += NumBlocks(mip_size, tile_size);
+ }
+ return num_blocks;
+}
+
+[[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept {
+ ASSERT(info.type == ImageType::e3D);
+ u32 num_slices = 0;
+ for (s32 level = 0; level < info.resources.levels; ++level) {
+ num_slices += AdjustMipSize(info.size.depth, level);
+ }
+ return num_slices;
+}
+
+void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
+ const ImageInfo& info, const BufferImageCopy& copy,
+ std::span<const u8> memory) {
+ ASSERT(copy.image_offset.z == 0);
+ ASSERT(copy.image_extent.depth == 1);
+ ASSERT(copy.image_subresource.base_level == 0);
+ ASSERT(copy.image_subresource.base_layer == 0);
+ ASSERT(copy.image_subresource.num_layers == 1);
+
+ const u32 bytes_per_block = BytesPerBlock(info.format);
+ const u32 row_length = copy.image_extent.width * bytes_per_block;
+ const u32 guest_offset_x = copy.image_offset.x * bytes_per_block;
+
+ for (u32 line = 0; line < copy.image_extent.height; ++line) {
+ const u32 host_offset_y = line * info.pitch;
+ const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch;
+ const u32 guest_offset = guest_offset_x + guest_offset_y;
+ gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y,
+ row_length);
+ }
+}
+
+void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
+ const ImageInfo& info, const BufferImageCopy& copy,
+ std::span<const u8> input) {
+ const Extent3D size = info.size;
+ const LevelInfo level_info = MakeLevelInfo(info);
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ const u32 bytes_per_block = BytesPerBlock(info.format);
+
+ const s32 level = copy.image_subresource.base_level;
+ const Extent3D level_size = AdjustMipSize(size, level);
+ const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
+ const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block;
+
+ UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
+
+ UNIMPLEMENTED_IF(copy.image_offset.x != 0);
+ UNIMPLEMENTED_IF(copy.image_offset.y != 0);
+ UNIMPLEMENTED_IF(copy.image_offset.z != 0);
+ UNIMPLEMENTED_IF(copy.image_extent != level_size);
+
+ const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
+ const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
+
+ size_t host_offset = copy.buffer_offset;
+
+ const u32 num_levels = info.resources.levels;
+ const std::array sizes = CalculateLevelSizes(level_info, num_levels);
+ size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0);
+ const size_t layer_stride =
+ AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size,
+ level_info.block, tile_size.height, info.tile_width_spacing);
+ const size_t subresource_size = sizes[level];
+
+ const auto dst_data = std::make_unique<u8[]>(subresource_size);
+ const std::span<u8> dst(dst_data.get(), subresource_size);
+
+ for (s32 layer = 0; layer < info.resources.layers; ++layer) {
+ const std::span<const u8> src = input.subspan(host_offset);
+ SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
+ num_tiles.depth, block.height, block.depth);
+
+ gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
+
+ host_offset += host_bytes_per_layer;
+ guest_offset += layer_stride;
+ }
+ ASSERT(host_offset - copy.buffer_offset == copy.buffer_size);
+}
+
+} // Anonymous namespace
+
+u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept {
+ if (info.type == ImageType::Buffer) {
+ return info.size.width * BytesPerBlock(info.format);
+ }
+ if (info.type == ImageType::Linear) {
+ return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
+ }
+ if (info.resources.layers > 1) {
+ ASSERT(info.layer_stride != 0);
+ return info.layer_stride * info.resources.layers;
+ } else {
+ return CalculateLayerSize(info);
+ }
+}
+
+u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept {
+ if (info.type == ImageType::Buffer) {
+ return info.size.width * BytesPerBlock(info.format);
+ }
+ if (info.num_samples > 1) {
+ // Multisample images can't be uploaded or downloaded to the host
+ return 0;
+ }
+ if (info.type == ImageType::Linear) {
+ return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
+ }
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format);
+}
+
+u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
+ if (info.type == ImageType::Buffer) {
+ return info.size.width * BytesPerBlock(info.format);
+ }
+ static constexpr Extent2D TILE_SIZE{1, 1};
+ return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
+}
+
+u32 CalculateLayerStride(const ImageInfo& info) noexcept {
+ ASSERT(info.type != ImageType::Linear);
+ const u32 layer_size = CalculateLayerSize(info);
+ const Extent3D size = info.size;
+ const Extent3D block = info.block;
+ const u32 tile_size_y = DefaultBlockHeight(info.format);
+ return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing);
+}
+
+u32 CalculateLayerSize(const ImageInfo& info) noexcept {
+ ASSERT(info.type != ImageType::Linear);
+ return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples,
+ info.tile_width_spacing, info.resources.levels);
+}
+
+std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
+ ASSERT(info.resources.levels <= MAX_MIP_LEVELS);
+ const LevelInfo level_info = MakeLevelInfo(info);
+ std::array<u32, MAX_MIP_LEVELS> offsets{};
+ u32 offset = 0;
+ for (s32 level = 0; level < info.resources.levels; ++level) {
+ offsets[level] = offset;
+ offset += CalculateLevelSize(level_info, level);
+ }
+ return offsets;
+}
+
+std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
+ ASSERT(info.type == ImageType::e3D);
+ std::vector<u32> offsets;
+ offsets.reserve(NumSlices(info));
+
+ const LevelInfo level_info = MakeLevelInfo(info);
+ u32 mip_offset = 0;
+ for (s32 level = 0; level < info.resources.levels; ++level) {
+ const Extent3D tile_shift = TileShift(level_info, level);
+ const Extent3D tiles = LevelTiles(level_info, level);
+ const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT;
+ const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift;
+ const u32 z_mask = (1U << tile_shift.depth) - 1;
+ const u32 depth = AdjustMipSize(info.size.depth, level);
+ for (u32 slice = 0; slice < depth; ++slice) {
+ const u32 z_low = slice & z_mask;
+ const u32 z_high = slice & ~z_mask;
+ offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size));
+ }
+ mip_offset += CalculateLevelSize(level_info, level);
+ }
+ return offsets;
+}
+
+std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) {
+ ASSERT(info.type == ImageType::e3D);
+ std::vector<SubresourceBase> subresources;
+ subresources.reserve(NumSlices(info));
+ for (s32 level = 0; level < info.resources.levels; ++level) {
+ const s32 depth = AdjustMipSize(info.size.depth, level);
+ for (s32 slice = 0; slice < depth; ++slice) {
+ subresources.emplace_back(SubresourceBase{
+ .level = level,
+ .layer = slice,
+ });
+ }
+ }
+ return subresources;
+}
+
+u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) {
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ const Extent3D level_size = AdjustMipSize(info.size, level);
+ const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
+ const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level);
+ const u32 bpp_log2 = BytesPerBlockLog2(info.format);
+ return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing);
+}
+
+PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept {
+ return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
+ config.a_type, config.srgb_conversion);
+}
+
+ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
+ switch (info.type) {
+ case ImageType::e2D:
+ return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D;
+ case ImageType::e3D:
+ return ImageViewType::e2DArray;
+ case ImageType::Linear:
+ return ImageViewType::e2D;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(info.type));
+ return ImageViewType{};
+ }
+}
+
+std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
+ SubresourceBase base) {
+ ASSERT(dst.resources.levels >= src.resources.levels);
+ ASSERT(dst.num_samples == src.num_samples);
+
+ const bool is_dst_3d = dst.type == ImageType::e3D;
+ if (is_dst_3d) {
+ ASSERT(src.type == ImageType::e3D);
+ ASSERT(src.resources.levels == 1);
+ }
+
+ std::vector<ImageCopy> copies;
+ copies.reserve(src.resources.levels);
+ for (s32 level = 0; level < src.resources.levels; ++level) {
+ ImageCopy& copy = copies.emplace_back();
+ copy.src_subresource = SubresourceLayers{
+ .base_level = level,
+ .base_layer = 0,
+ .num_layers = src.resources.layers,
+ };
+ copy.dst_subresource = SubresourceLayers{
+ .base_level = base.level + level,
+ .base_layer = is_dst_3d ? 0 : base.layer,
+ .num_layers = is_dst_3d ? 1 : src.resources.layers,
+ };
+ copy.src_offset = Offset3D{
+ .x = 0,
+ .y = 0,
+ .z = 0,
+ };
+ copy.dst_offset = Offset3D{
+ .x = 0,
+ .y = 0,
+ .z = is_dst_3d ? base.layer : 0,
+ };
+ const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level);
+ copy.extent = AdjustSamplesSize(mip_size, dst.num_samples);
+ if (is_dst_3d) {
+ copy.extent.depth = src.size.depth;
+ }
+ }
+ return copies;
+}
+
+bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
+ if (config.Address() == 0) {
+ return false;
+ }
+ if (config.Address() > (u64(1) << 48)) {
+ return false;
+ }
+ return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
+}
+
+std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
+ const ImageInfo& info, std::span<u8> output) {
+ const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
+ const u32 bpp_log2 = BytesPerBlockLog2(info.format);
+ const Extent3D size = info.size;
+
+ if (info.type == ImageType::Linear) {
+ gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes);
+
+ ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch);
+ return {{
+ .buffer_offset = 0,
+ .buffer_size = guest_size_bytes,
+ .buffer_row_length = info.pitch >> bpp_log2,
+ .buffer_image_height = size.height,
+ .image_subresource =
+ {
+ .base_level = 0,
+ .base_layer = 0,
+ .num_layers = 1,
+ },
+ .image_offset = {0, 0, 0},
+ .image_extent = size,
+ }};
+ }
+ const auto input_data = std::make_unique<u8[]>(guest_size_bytes);
+ gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes);
+ const std::span<const u8> input(input_data.get(), guest_size_bytes);
+
+ const LevelInfo level_info = MakeLevelInfo(info);
+ const s32 num_layers = info.resources.layers;
+ const s32 num_levels = info.resources.levels;
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ const std::array level_sizes = CalculateLevelSizes(level_info, num_levels);
+ const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing);
+ const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0);
+ const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height,
+ info.tile_width_spacing);
+ size_t guest_offset = 0;
+ u32 host_offset = 0;
+ std::vector<BufferImageCopy> copies(num_levels);
+
+ for (s32 level = 0; level < num_levels; ++level) {
+ const Extent3D level_size = AdjustMipSize(size, level);
+ const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
+ const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2;
+ copies[level] = BufferImageCopy{
+ .buffer_offset = host_offset,
+ .buffer_size = static_cast<size_t>(host_bytes_per_layer) * num_layers,
+ .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width),
+ .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height),
+ .image_subresource =
+ {
+ .base_level = level,
+ .base_layer = 0,
+ .num_layers = info.resources.layers,
+ },
+ .image_offset = {0, 0, 0},
+ .image_extent = level_size,
+ };
+ const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
+ const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
+ const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2);
+ size_t guest_layer_offset = 0;
+
+ for (s32 layer = 0; layer < info.resources.layers; ++layer) {
+ const std::span<u8> dst = output.subspan(host_offset);
+ const std::span<const u8> src = input.subspan(guest_offset + guest_layer_offset);
+ UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height,
+ num_tiles.depth, block.height, block.depth, stride_alignment);
+ guest_layer_offset += layer_stride;
+ host_offset += host_bytes_per_layer;
+ }
+ guest_offset += level_sizes[level];
+ }
+ return copies;
+}
+
+BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
+ const ImageBase& image, std::span<u8> output) {
+ gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes);
+ return BufferCopy{
+ .src_offset = 0,
+ .dst_offset = 0,
+ .size = image.guest_size_bytes,
+ };
+}
+
+void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
+ std::span<BufferImageCopy> copies) {
+ u32 output_offset = 0;
+
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ for (BufferImageCopy& copy : copies) {
+ const u32 level = copy.image_subresource.base_level;
+ const Extent3D mip_size = AdjustMipSize(info.size, level);
+ ASSERT(copy.image_offset == Offset3D{});
+ ASSERT(copy.image_subresource.base_layer == 0);
+ ASSERT(copy.image_extent == mip_size);
+ ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
+ ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
+
+ if (IsPixelFormatASTC(info.format)) {
+ ASSERT(copy.image_extent.depth == 1);
+ Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset),
+ copy.image_extent.width, copy.image_extent.height,
+ copy.image_subresource.num_layers, tile_size.width,
+ tile_size.height, output.subspan(output_offset));
+ } else {
+ DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent,
+ output.subspan(output_offset));
+ }
+ copy.buffer_offset = output_offset;
+ copy.buffer_row_length = mip_size.width;
+ copy.buffer_image_height = mip_size.height;
+
+ output_offset += copy.image_extent.width * copy.image_extent.height *
+ copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
+ }
+}
+
+std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
+ const Extent3D size = info.size;
+ const u32 bytes_per_block = BytesPerBlock(info.format);
+ if (info.type == ImageType::Linear) {
+ ASSERT(info.pitch % bytes_per_block == 0);
+ return {{
+ .buffer_offset = 0,
+ .buffer_size = static_cast<size_t>(info.pitch) * size.height,
+ .buffer_row_length = info.pitch / bytes_per_block,
+ .buffer_image_height = size.height,
+ .image_subresource =
+ {
+ .base_level = 0,
+ .base_layer = 0,
+ .num_layers = 1,
+ },
+ .image_offset = {0, 0, 0},
+ .image_extent = size,
+ }};
+ }
+ UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
+
+ const s32 num_layers = info.resources.layers;
+ const s32 num_levels = info.resources.levels;
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+
+ u32 host_offset = 0;
+
+ std::vector<BufferImageCopy> copies(num_levels);
+ for (s32 level = 0; level < num_levels; ++level) {
+ const Extent3D level_size = AdjustMipSize(size, level);
+ const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
+ const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers;
+ copies[level] = BufferImageCopy{
+ .buffer_offset = host_offset,
+ .buffer_size = host_bytes_per_level,
+ .buffer_row_length = level_size.width,
+ .buffer_image_height = level_size.height,
+ .image_subresource =
+ {
+ .base_level = level,
+ .base_layer = 0,
+ .num_layers = info.resources.layers,
+ },
+ .image_offset = {0, 0, 0},
+ .image_extent = level_size,
+ };
+ host_offset += host_bytes_per_level;
+ }
+ return copies;
+}
+
+Extent3D MipSize(Extent3D size, u32 level) {
+ return AdjustMipSize(size, level);
+}
+
+Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
+ const LevelInfo level_info = MakeLevelInfo(info);
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ const Extent3D level_size = AdjustMipSize(info.size, level);
+ const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
+ return AdjustMipBlockSize(num_tiles, level_info.block, level);
+}
+
+std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ if (info.type == ImageType::Linear) {
+ return std::vector{SwizzleParameters{
+ .num_tiles = AdjustTileSize(info.size, tile_size),
+ .block = {},
+ .buffer_offset = 0,
+ .level = 0,
+ }};
+ }
+ const LevelInfo level_info = MakeLevelInfo(info);
+ const Extent3D size = info.size;
+ const s32 num_levels = info.resources.levels;
+
+ u32 guest_offset = 0;
+ std::vector<SwizzleParameters> params(num_levels);
+ for (s32 level = 0; level < num_levels; ++level) {
+ const Extent3D level_size = AdjustMipSize(size, level);
+ const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
+ const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
+ params[level] = SwizzleParameters{
+ .num_tiles = num_tiles,
+ .block = block,
+ .buffer_offset = guest_offset,
+ .level = level,
+ };
+ guest_offset += CalculateLevelSize(level_info, level);
+ }
+ return params;
+}
+
+void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
+ std::span<const BufferImageCopy> copies, std::span<const u8> memory) {
+ const bool is_pitch_linear = info.type == ImageType::Linear;
+ for (const BufferImageCopy& copy : copies) {
+ if (is_pitch_linear) {
+ SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory);
+ } else {
+ SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory);
+ }
+ }
+}
+
+bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level,
+ u32 rhs_level, bool strict_size) noexcept {
+ ASSERT(lhs.type != ImageType::Linear);
+ ASSERT(rhs.type != ImageType::Linear);
+ if (strict_size) {
+ const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level);
+ const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level);
+ return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
+ } else {
+ const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level);
+ const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level);
+ return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
+ }
+}
+
+bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept {
+ ASSERT(lhs.type == ImageType::Linear);
+ ASSERT(rhs.type == ImageType::Linear);
+ if (strict_size) {
+ return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height;
+ } else {
+ const Extent2D lhs_size = PitchLinearAlignedSize(lhs);
+ const Extent2D rhs_size = PitchLinearAlignedSize(rhs);
+ return lhs_size == rhs_size;
+ }
+}
+
+std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr,
+ VAddr cpu_addr, const ImageBase& overlap,
+ bool strict_size) {
+ ASSERT(new_info.type != ImageType::Linear);
+ ASSERT(overlap.info.type != ImageType::Linear);
+ if (!IsLayerStrideCompatible(new_info, overlap.info)) {
+ return std::nullopt;
+ }
+ if (!IsViewCompatible(overlap.info.format, new_info.format)) {
+ return std::nullopt;
+ }
+ if (gpu_addr == overlap.gpu_addr) {
+ const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size);
+ if (!solution) {
+ return std::nullopt;
+ }
+ return OverlapResult{
+ .gpu_addr = gpu_addr,
+ .cpu_addr = cpu_addr,
+ .resources = *solution,
+ };
+ }
+ if (overlap.gpu_addr > gpu_addr) {
+ return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
+ }
+ // if overlap.gpu_addr < gpu_addr
+ return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
+}
+
+bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) {
+ // If either of the layer strides is zero, we can assume they are compatible
+ // These images generally come from rendertargets
+ if (lhs.layer_stride == 0) {
+ return true;
+ }
+ if (rhs.layer_stride == 0) {
+ return true;
+ }
+ // It's definitely compatible if the layer stride matches
+ if (lhs.layer_stride == rhs.layer_stride) {
+ return true;
+ }
+ // Although we also have to compare for cases where it can be unaligned
+ // This can happen if the image doesn't have layers, so the stride is not aligned
+ if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) {
+ return true;
+ }
+ return false;
+}
+
+std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image,
+ GPUVAddr candidate_addr, RelaxedOptions options) {
+ const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
+ if (!base) {
+ return std::nullopt;
+ }
+ const ImageInfo& existing = image.info;
+ if (False(options & RelaxedOptions::Format)) {
+ if (!IsViewCompatible(existing.format, candidate.format)) {
+ return std::nullopt;
+ }
+ }
+ if (!IsLayerStrideCompatible(existing, candidate)) {
+ return std::nullopt;
+ }
+ if (existing.type != candidate.type) {
+ return std::nullopt;
+ }
+ if (False(options & RelaxedOptions::Samples)) {
+ if (existing.num_samples != candidate.num_samples) {
+ return std::nullopt;
+ }
+ }
+ if (existing.resources.levels < candidate.resources.levels + base->level) {
+ return std::nullopt;
+ }
+ if (existing.type == ImageType::e3D) {
+ const u32 mip_depth = std::max(1U, existing.size.depth << base->level);
+ if (mip_depth < candidate.size.depth + base->layer) {
+ return std::nullopt;
+ }
+ } else {
+ if (existing.resources.layers < candidate.resources.layers + base->layer) {
+ return std::nullopt;
+ }
+ }
+ const bool strict_size = False(options & RelaxedOptions::Size);
+ if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) {
+ return std::nullopt;
+ }
+ // TODO: compare block sizes
+ return base;
+}
+
+bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr,
+ RelaxedOptions options) {
+ return FindSubresource(candidate, image, candidate_addr, options).has_value();
+}
+
+void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
+ const ImageBase* src) {
+ if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
+ src_info.format = src->info.format;
+ }
+ if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
+ dst_info.format = dst->info.format;
+ }
+ if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
+ dst_info.format = src->info.format;
+ }
+ if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
+ src_info.format = src->info.format;
+ }
+}
+
+u32 MapSizeBytes(const ImageBase& image) {
+ if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
+ return image.guest_size_bytes;
+ } else if (True(image.flags & ImageFlagBits::Converted)) {
+ return image.converted_size_bytes;
+ } else {
+ return image.unswizzled_size_bytes;
+ }
+}
+
+using P = PixelFormat;
+
+static_assert(CalculateLevelSize(LevelInfo{{1920, 1080}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000);
+static_assert(CalculateLevelSize(LevelInfo{{32, 32}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000);
+
+static_assert(CalculateLevelOffset(P::R8_SINT, {1920, 1080}, {0, 2}, 1, 0, 7) == 0x2afc00);
+static_assert(CalculateLevelOffset(P::ASTC_2D_12X12_UNORM, {8192, 4096}, {0, 2}, 1, 0, 12) ==
+ 0x50d200);
+
+static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 0) == 0);
+static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 1) == 0x400000);
+static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 2) == 0x500000);
+static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 3) == 0x540000);
+static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 4) == 0x550000);
+static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 5) == 0x554000);
+static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 6) == 0x555000);
+static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 7) == 0x555400);
+static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 8) == 0x555600);
+static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 9) == 0x555800);
+
+constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height,
+ u32 tile_width_spacing, u32 level) {
+ const Extent3D size{width, height, 1};
+ const Extent3D block{0, block_height, 0};
+ const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level);
+ return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing);
+}
+
+static_assert(ValidateLayerSize(P::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) == 0x50d800);
+static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000);
+static_assert(ValidateLayerSize(P::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000);
+
+static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000,
+ "Tile width spacing is not working");
+static_assert(ValidateLayerSize(P::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000,
+ "Compressed tile width spacing is not working");
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
new file mode 100644
index 000000000..dbbbd33cd
--- /dev/null
+++ b/src/video_core/texture_cache/util.h
@@ -0,0 +1,107 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <span>
+
+#include "common/common_types.h"
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/image_base.h"
+#include "video_core/texture_cache/image_view_base.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+using Tegra::Texture::TICEntry;
+
+struct OverlapResult {
+ GPUVAddr gpu_addr;
+ VAddr cpu_addr;
+ SubresourceExtent resources;
+};
+
+[[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept;
+
+[[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept;
+
+[[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept;
+
+[[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept;
+
+[[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept;
+
+[[nodiscard]] std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(
+ const ImageInfo& info) noexcept;
+
+[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
+
+[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
+
+[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
+
+[[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC(
+ const Tegra::Texture::TICEntry& config) noexcept;
+
+[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
+
+[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst,
+ const ImageInfo& src,
+ SubresourceBase base);
+
+[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
+
+[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
+ GPUVAddr gpu_addr, const ImageInfo& info,
+ std::span<u8> output);
+
+[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
+ const ImageBase& image, std::span<u8> output);
+
+void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
+ std::span<BufferImageCopy> copies);
+
+[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
+
+[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
+
+[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
+
+[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info);
+
+void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
+ std::span<const BufferImageCopy> copies, std::span<const u8> memory);
+
+[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info,
+ const ImageInfo& overlap_info, u32 new_level,
+ u32 overlap_level, bool strict_size) noexcept;
+
+[[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs,
+ bool strict_size) noexcept;
+
+[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info,
+ GPUVAddr gpu_addr, VAddr cpu_addr,
+ const ImageBase& overlap,
+ bool strict_size);
+
+[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs);
+
+[[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate,
+ const ImageBase& image,
+ GPUVAddr candidate_addr,
+ RelaxedOptions options);
+
+[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image,
+ GPUVAddr candidate_addr, RelaxedOptions options);
+
+void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
+ const ImageBase* src);
+
+[[nodiscard]] u32 MapSizeBytes(const ImageBase& image);
+
+} // namespace VideoCommon