summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/macro/macro_hle.cpp7
-rw-r--r--src/video_core/rasterizer_interface.h8
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h6
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h27
-rw-r--r--src/video_core/texture_cache/decode_bc.cpp50
-rw-r--r--src/video_core/texture_cache/decode_bc.h2
-rw-r--r--src/video_core/texture_cache/util.cpp16
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp16
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp4
13 files changed, 136 insertions, 44 deletions
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 046c8085e..46e853e04 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -327,12 +327,13 @@ public:
explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+ const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback();
+
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0xFFFFU);
- if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
+ if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) {
Fallback(parameters);
return;
}
-
auto& params = maxwell3d.draw_manager->GetIndirectParams();
params.is_byte_count = true;
params.is_indexed = false;
@@ -503,6 +504,8 @@ public:
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)),
regs.transform_feedback.controls[0].stride, true);
+
+ maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address());
}
};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index af1469147..49224ca85 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -173,5 +173,13 @@ public:
virtual void BindChannel(Tegra::Control::ChannelState& channel) {}
virtual void ReleaseChannel(s32 channel_id) {}
+
+ /// Register the address as a Transform Feedback Object
+ virtual void RegisterTransformFeedback(GPUVAddr tfb_object_addr) {}
+
+ /// Returns true when the rasterizer has Draw Transform Feedback capabilities
+ virtual bool HasDrawTransformFeedback() {
+ return false;
+ }
};
} // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index b787b6994..517ac14dd 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -376,4 +376,15 @@ void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, P
*image_handles++ = buffer.View(offset, size, format);
}
+void BufferCacheRuntime::BindTransformFeedbackObject(GPUVAddr tfb_object_addr) {
+ OGLTransformFeedback& tfb_object = tfb_objects[tfb_object_addr];
+ tfb_object.Create();
+ glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, tfb_object.handle);
+}
+
+GLuint BufferCacheRuntime::GetTransformFeedbackObject(GPUVAddr tfb_object_addr) {
+ ASSERT(tfb_objects.contains(tfb_object_addr));
+ return tfb_objects[tfb_object_addr].handle;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 1e8708f59..2c18de166 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -5,6 +5,7 @@
#include <array>
#include <span>
+#include <unordered_map>
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache_base.h"
@@ -121,6 +122,9 @@ public:
void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format);
+ void BindTransformFeedbackObject(GPUVAddr tfb_object_addr);
+ GLuint GetTransformFeedbackObject(GPUVAddr tfb_object_addr);
+
u64 GetDeviceMemoryUsage() const;
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
@@ -233,6 +237,7 @@ private:
u32 index_buffer_offset = 0;
u64 device_access_memory;
+ std::unordered_map<GPUVAddr, OGLTransformFeedback> tfb_objects;
};
struct BufferCacheParams {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 339950d2e..7a5fad735 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -309,6 +309,13 @@ void RasterizerOpenGL::DrawIndirect() {
const auto& params = maxwell3d->draw_manager->GetIndirectParams();
buffer_cache.SetDrawIndirect(&params);
PrepareDraw(params.is_indexed, [this, &params](GLenum primitive_mode) {
+ if (params.is_byte_count) {
+ const GPUVAddr tfb_object_base_addr = params.indirect_start_address - 4U;
+ const GLuint tfb_object =
+ buffer_cache_runtime.GetTransformFeedbackObject(tfb_object_base_addr);
+ glDrawTransformFeedback(primitive_mode, tfb_object);
+ return;
+ }
const auto [buffer, offset] = buffer_cache.GetDrawIndirectBuffer();
const GLvoid* const gl_offset =
reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(offset));
@@ -1371,6 +1378,10 @@ void RasterizerOpenGL::ReleaseChannel(s32 channel_id) {
query_cache.EraseChannel(channel_id);
}
+void RasterizerOpenGL::RegisterTransformFeedback(GPUVAddr tfb_object_addr) {
+ buffer_cache_runtime.BindTransformFeedbackObject(tfb_object_addr);
+}
+
AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_, TextureCache& texture_cache_)
: buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index b79d7a70c..ce3460938 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -139,6 +139,12 @@ public:
void ReleaseChannel(s32 channel_id) override;
+ void RegisterTransformFeedback(GPUVAddr tfb_object_addr) override;
+
+ bool HasDrawTransformFeedback() override {
+ return true;
+ }
+
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index eae8fd110..1d2c9b70a 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -207,4 +207,21 @@ void OGLQuery::Release() {
handle = 0;
}
+void OGLTransformFeedback::Create() {
+ if (handle != 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+ glCreateTransformFeedbacks(1, &handle);
+}
+
+void OGLTransformFeedback::Release() {
+ if (handle == 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteTransformFeedbacks(1, &handle);
+ handle = 0;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 77362acd2..6ca8227bd 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -323,4 +323,31 @@ public:
GLuint handle = 0;
};
+class OGLTransformFeedback final {
+public:
+ YUZU_NON_COPYABLE(OGLTransformFeedback);
+
+ OGLTransformFeedback() = default;
+
+ OGLTransformFeedback(OGLTransformFeedback&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
+ ~OGLTransformFeedback() {
+ Release();
+ }
+
+ OGLTransformFeedback& operator=(OGLTransformFeedback&& o) noexcept {
+ Release();
+ handle = std::exchange(o.handle, 0);
+ return *this;
+ }
+
+ /// Creates a new internal OpenGL resource and stores the handle
+ void Create();
+
+ /// Deletes the internal OpenGL resource
+ void Release();
+
+ GLuint handle = 0;
+};
+
} // namespace OpenGL
diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp
index 3e26474a3..a018c6df4 100644
--- a/src/video_core/texture_cache/decode_bc.cpp
+++ b/src/video_core/texture_cache/decode_bc.cpp
@@ -60,66 +60,72 @@ u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) {
}
template <auto decompress, PixelFormat pixel_format>
-void DecompressBlocks(std::span<const u8> input, std::span<u8> output, Extent3D extent,
+void DecompressBlocks(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,
bool is_signed = false) {
const u32 out_bpp = ConvertedBytesPerBlock(pixel_format);
- const u32 block_width = std::min(extent.width, BLOCK_SIZE);
- const u32 block_height = std::min(extent.height, BLOCK_SIZE);
- const u32 pitch = extent.width * out_bpp;
+ const u32 block_size = BlockSize(pixel_format);
+ const u32 width = copy.image_extent.width;
+ const u32 height = copy.image_extent.height * copy.image_subresource.num_layers;
+ const u32 depth = copy.image_extent.depth;
+ const u32 block_width = std::min(width, BLOCK_SIZE);
+ const u32 block_height = std::min(height, BLOCK_SIZE);
+ const u32 pitch = width * out_bpp;
size_t input_offset = 0;
size_t output_offset = 0;
- for (u32 slice = 0; slice < extent.depth; ++slice) {
- for (u32 y = 0; y < extent.height; y += block_height) {
- size_t row_offset = 0;
- for (u32 x = 0; x < extent.width;
- x += block_width, row_offset += block_width * out_bpp) {
- const u8* src = input.data() + input_offset;
- u8* const dst = output.data() + output_offset + row_offset;
+ for (u32 slice = 0; slice < depth; ++slice) {
+ for (u32 y = 0; y < height; y += block_height) {
+ size_t src_offset = input_offset;
+ size_t dst_offset = output_offset;
+ for (u32 x = 0; x < width; x += block_width) {
+ const u8* src = input.data() + src_offset;
+ u8* const dst = output.data() + dst_offset;
if constexpr (IsSigned(pixel_format)) {
- decompress(src, dst, x, y, extent.width, extent.height, is_signed);
+ decompress(src, dst, x, y, width, height, is_signed);
} else {
- decompress(src, dst, x, y, extent.width, extent.height);
+ decompress(src, dst, x, y, width, height);
}
- input_offset += BlockSize(pixel_format);
+ src_offset += block_size;
+ dst_offset += block_width * out_bpp;
}
+ input_offset += copy.buffer_row_length * block_size / block_width;
output_offset += block_height * pitch;
}
}
}
-void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent,
+void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,
VideoCore::Surface::PixelFormat pixel_format) {
switch (pixel_format) {
case PixelFormat::BC1_RGBA_UNORM:
case PixelFormat::BC1_RGBA_SRGB:
- DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, extent);
+ DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, copy);
break;
case PixelFormat::BC2_UNORM:
case PixelFormat::BC2_SRGB:
- DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, extent);
+ DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, copy);
break;
case PixelFormat::BC3_UNORM:
case PixelFormat::BC3_SRGB:
- DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, extent);
+ DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, copy);
break;
case PixelFormat::BC4_SNORM:
case PixelFormat::BC4_UNORM:
DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>(
- input, output, extent, pixel_format == PixelFormat::BC4_SNORM);
+ input, output, copy, pixel_format == PixelFormat::BC4_SNORM);
break;
case PixelFormat::BC5_SNORM:
case PixelFormat::BC5_UNORM:
DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>(
- input, output, extent, pixel_format == PixelFormat::BC5_SNORM);
+ input, output, copy, pixel_format == PixelFormat::BC5_SNORM);
break;
case PixelFormat::BC6H_SFLOAT:
case PixelFormat::BC6H_UFLOAT:
DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>(
- input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT);
+ input, output, copy, pixel_format == PixelFormat::BC6H_SFLOAT);
break;
case PixelFormat::BC7_SRGB:
case PixelFormat::BC7_UNORM:
- DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, extent);
+ DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, copy);
break;
default:
LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format);
diff --git a/src/video_core/texture_cache/decode_bc.h b/src/video_core/texture_cache/decode_bc.h
index 41d1ec0a3..4e3b9b8ac 100644
--- a/src/video_core/texture_cache/decode_bc.h
+++ b/src/video_core/texture_cache/decode_bc.h
@@ -13,7 +13,7 @@ namespace VideoCommon {
[[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format);
-void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent,
+void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,
VideoCore::Surface::PixelFormat pixel_format);
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 15596c925..fcf70068e 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -837,6 +837,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
std::span<u8> output) {
const size_t guest_size_bytes = input.size_bytes();
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
+ const Extent2D tile_size = DefaultBlockSize(info.format);
const Extent3D size = info.size;
if (info.type == ImageType::Linear) {
@@ -847,7 +848,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
return {{
.buffer_offset = 0,
.buffer_size = guest_size_bytes,
- .buffer_row_length = info.pitch >> bpp_log2,
+ .buffer_row_length = info.pitch * tile_size.width >> bpp_log2,
.buffer_image_height = size.height,
.image_subresource =
{
@@ -862,7 +863,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
const LevelInfo level_info = MakeLevelInfo(info);
const s32 num_layers = info.resources.layers;
const s32 num_levels = info.resources.levels;
- const Extent2D tile_size = DefaultBlockSize(info.format);
const std::array level_sizes = CalculateLevelSizes(level_info, num_levels);
const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing);
const u32 layer_size = CalculateLevelBytes(level_sizes, num_levels);
@@ -926,8 +926,6 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
const auto input_offset = input.subspan(copy.buffer_offset);
copy.buffer_offset = output_offset;
- copy.buffer_row_length = mip_size.width;
- copy.buffer_image_height = mip_size.height;
const auto recompression_setting = Settings::values.astc_recompression.GetValue();
const bool astc = IsPixelFormatASTC(info.format);
@@ -972,16 +970,14 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
bpp_div;
output_offset += static_cast<u32>(copy.buffer_size);
} else {
- const Extent3D image_extent{
- .width = copy.image_extent.width,
- .height = copy.image_extent.height * copy.image_subresource.num_layers,
- .depth = copy.image_extent.depth,
- };
- DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format);
+ DecompressBCn(input_offset, output.subspan(output_offset), copy, info.format);
output_offset += copy.image_extent.width * copy.image_extent.height *
copy.image_subresource.num_layers *
ConvertedBytesPerBlock(info.format);
}
+
+ copy.buffer_row_length = mip_size.width;
+ copy.buffer_image_height = mip_size.height;
}
}
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index a6fbca69e..727bbd98d 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -755,10 +755,10 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
// The wanted format is not supported by hardware, search for alternatives
const VkFormat* alternatives = GetFormatAlternatives(wanted_format);
if (alternatives == nullptr) {
- ASSERT_MSG(false,
- "Format={} with usage={} and type={} has no defined alternatives and host "
- "hardware does not support it",
- wanted_format, wanted_usage, format_type);
+ LOG_ERROR(Render_Vulkan,
+ "Format={} with usage={} and type={} has no defined alternatives and host "
+ "hardware does not support it",
+ wanted_format, wanted_usage, format_type);
return wanted_format;
}
@@ -774,10 +774,10 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
}
// No alternatives found, panic
- ASSERT_MSG(false,
- "Format={} with usage={} and type={} is not supported by the host hardware and "
- "doesn't support any of the alternatives",
- wanted_format, wanted_usage, format_type);
+ LOG_ERROR(Render_Vulkan,
+ "Format={} with usage={} and type={} is not supported by the host hardware and "
+ "doesn't support any of the alternatives",
+ wanted_format, wanted_usage, format_type);
return wanted_format;
}
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 2f78b8af0..074aed964 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -246,7 +246,9 @@ void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjec
.objectHandle = reinterpret_cast<u64>(handle),
.pObjectName = name,
};
- Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info));
+ if (dld->vkSetDebugUtilsObjectNameEXT) {
+ Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info));
+ }
}
} // Anonymous namespace