summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt242
-rw-r--r--src/video_core/buffer_cache/buffer_base.h590
-rw-r--r--src/video_core/buffer_cache/buffer_block.h67
-rw-r--r--src/video_core/buffer_cache/buffer_cache.cpp13
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h1662
-rw-r--r--src/video_core/buffer_cache/map_interval.cpp33
-rw-r--r--src/video_core/buffer_cache/map_interval.h92
-rw-r--r--src/video_core/cdma_pusher.cpp170
-rw-r--r--src/video_core/cdma_pusher.h136
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp129
-rw-r--r--src/video_core/command_classes/codecs/codec.h70
-rw-r--r--src/video_core/command_classes/codecs/h264.cpp293
-rw-r--r--src/video_core/command_classes/codecs/h264.h118
-rw-r--r--src/video_core/command_classes/codecs/vp9.cpp989
-rw-r--r--src/video_core/command_classes/codecs/vp9.h197
-rw-r--r--src/video_core/command_classes/codecs/vp9_types.h302
-rw-r--r--src/video_core/command_classes/host1x.cpp30
-rw-r--r--src/video_core/command_classes/host1x.h37
-rw-r--r--src/video_core/command_classes/nvdec.cpp48
-rw-r--r--src/video_core/command_classes/nvdec.h38
-rw-r--r--src/video_core/command_classes/nvdec_common.h48
-rw-r--r--src/video_core/command_classes/sync_manager.cpp60
-rw-r--r--src/video_core/command_classes/sync_manager.h64
-rw-r--r--src/video_core/command_classes/vic.cpp172
-rw-r--r--src/video_core/command_classes/vic.h110
-rw-r--r--src/video_core/compatible_formats.cpp145
-rw-r--r--src/video_core/compatible_formats.h23
-rw-r--r--src/video_core/delayed_destruction_ring.h32
-rw-r--r--src/video_core/dirty_flags.cpp38
-rw-r--r--src/video_core/dirty_flags.h11
-rw-r--r--src/video_core/dma_pusher.cpp67
-rw-r--r--src/video_core/dma_pusher.h53
-rw-r--r--src/video_core/engines/engine_upload.cpp8
-rw-r--r--src/video_core/engines/engine_upload.h4
-rw-r--r--src/video_core/engines/fermi_2d.cpp98
-rw-r--r--src/video_core/engines/fermi_2d.h331
-rw-r--r--src/video_core/engines/kepler_compute.cpp31
-rw-r--r--src/video_core/engines/kepler_compute.h23
-rw-r--r--src/video_core/engines/kepler_memory.cpp5
-rw-r--r--src/video_core/engines/kepler_memory.h6
-rw-r--r--src/video_core/engines/maxwell_3d.cpp376
-rw-r--r--src/video_core/engines/maxwell_3d.h345
-rw-r--r--src/video_core/engines/maxwell_dma.cpp14
-rw-r--r--src/video_core/engines/maxwell_dma.h16
-rw-r--r--src/video_core/engines/shader_bytecode.h192
-rw-r--r--src/video_core/engines/shader_header.h51
-rw-r--r--src/video_core/fence_manager.h56
-rw-r--r--src/video_core/framebuffer_config.h31
-rw-r--r--src/video_core/gpu.cpp191
-rw-r--r--src/video_core/gpu.h175
-rw-r--r--src/video_core/gpu_asynch.cpp64
-rw-r--r--src/video_core/gpu_asynch.h46
-rw-r--r--src/video_core/gpu_synch.cpp45
-rw-r--r--src/video_core/gpu_synch.h40
-rw-r--r--src/video_core/gpu_thread.cpp72
-rw-r--r--src/video_core/gpu_thread.h54
-rw-r--r--src/video_core/guest_driver.h4
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt85
-rw-r--r--src/video_core/host_shaders/StringShaderHeader.cmake2
-rw-r--r--src/video_core/host_shaders/block_linear_unswizzle_2d.comp122
-rw-r--r--src/video_core/host_shaders/block_linear_unswizzle_3d.comp125
-rw-r--r--src/video_core/host_shaders/convert_depth_to_float.frag13
-rw-r--r--src/video_core/host_shaders/convert_float_to_depth.frag13
-rw-r--r--src/video_core/host_shaders/full_screen_triangle.vert29
-rw-r--r--src/video_core/host_shaders/opengl_copy_bc4.comp70
-rw-r--r--src/video_core/host_shaders/opengl_present.frag4
-rw-r--r--src/video_core/host_shaders/opengl_present.vert4
-rw-r--r--src/video_core/host_shaders/pitch_unswizzle.comp86
-rw-r--r--src/video_core/host_shaders/vulkan_blit_color_float.frag14
-rw-r--r--src/video_core/host_shaders/vulkan_blit_depth_stencil.frag16
-rw-r--r--src/video_core/host_shaders/vulkan_present.frag (renamed from src/video_core/renderer_vulkan/shaders/blit.frag)9
-rw-r--r--src/video_core/host_shaders/vulkan_present.vert (renamed from src/video_core/renderer_vulkan/shaders/blit.vert)9
-rw-r--r--src/video_core/host_shaders/vulkan_quad_indexed.comp (renamed from src/video_core/renderer_vulkan/shaders/quad_indexed.comp)9
-rw-r--r--src/video_core/host_shaders/vulkan_uint8.comp (renamed from src/video_core/renderer_vulkan/shaders/uint8.comp)18
-rw-r--r--src/video_core/macro/macro.cpp2
-rw-r--r--src/video_core/macro/macro_hle.cpp6
-rw-r--r--src/video_core/macro/macro_hle.h2
-rw-r--r--src/video_core/macro/macro_interpreter.cpp27
-rw-r--r--src/video_core/macro/macro_interpreter.h10
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp31
-rw-r--r--src/video_core/macro/macro_jit_x64.h4
-rw-r--r--src/video_core/memory_manager.cpp76
-rw-r--r--src/video_core/memory_manager.h22
-rw-r--r--src/video_core/morton.cpp250
-rw-r--r--src/video_core/morton.h18
-rw-r--r--src/video_core/query_cache.h45
-rw-r--r--src/video_core/rasterizer_interface.h37
-rw-r--r--src/video_core/renderer_base.h36
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp132
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp232
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h157
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp79
-rw-r--r--src/video_core/renderer_opengl/gl_device.h29
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h13
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.cpp85
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.h68
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp27
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h22
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1187
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h146
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h3
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.cpp52
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.h25
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp84
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h26
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp149
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp26
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h71
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp104
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h61
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp1357
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h279
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h85
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp399
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h56
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp225
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h52
-rw-r--r--src/video_core/renderer_opengl/utils.cpp42
-rw-r--r--src/video_core/renderer_opengl/utils.h16
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp624
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h96
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp109
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h45
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp128
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h24
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp425
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h76
-rw-r--r--src/video_core/renderer_vulkan/shaders/quad_array.comp37
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp348
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h54
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp380
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h136
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.cpp46
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp500
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h61
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.h19
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp65
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h30
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp110
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h30
-rw-r--r--src/video_core/renderer_vulkan/vk_image.cpp135
-rw-r--r--src/video_core/renderer_vulkan/vk_image.h84
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp56
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h75
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp230
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.h126
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp135
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h43
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp83
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h43
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp1329
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h204
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.cpp158
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.h70
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp311
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.h196
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp63
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.h43
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp83
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h29
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp148
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h106
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp117
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h41
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp285
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h100
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp78
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h23
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp34
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h23
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp27
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h16
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp1537
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h340
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h38
-rw-r--r--src/video_core/sampler_cache.cpp21
-rw-r--r--src/video_core/sampler_cache.h60
-rw-r--r--src/video_core/shader/ast.cpp13
-rw-r--r--src/video_core/shader/ast.h56
-rw-r--r--src/video_core/shader/async_shaders.cpp87
-rw-r--r--src/video_core/shader/async_shaders.h42
-rw-r--r--src/video_core/shader/control_flow.cpp47
-rw-r--r--src/video_core/shader/control_flow.h14
-rw-r--r--src/video_core/shader/decode.cpp12
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp6
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp3
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp9
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp40
-rw-r--r--src/video_core/shader/decode/conversion.cpp4
-rw-r--r--src/video_core/shader/decode/half_set.cpp14
-rw-r--r--src/video_core/shader/decode/image.cpp40
-rw-r--r--src/video_core/shader/decode/memory.cpp25
-rw-r--r--src/video_core/shader/decode/other.cpp45
-rw-r--r--src/video_core/shader/decode/shift.cpp2
-rw-r--r--src/video_core/shader/decode/texture.cpp107
-rw-r--r--src/video_core/shader/decode/warp.cpp2
-rw-r--r--src/video_core/shader/expr.h6
-rw-r--r--src/video_core/shader/memory_util.cpp7
-rw-r--r--src/video_core/shader/memory_util.h6
-rw-r--r--src/video_core/shader/node.h159
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/registry.cpp50
-rw-r--r--src/video_core/shader/registry.h2
-rw-r--r--src/video_core/shader/shader_ir.cpp21
-rw-r--r--src/video_core/shader/shader_ir.h31
-rw-r--r--src/video_core/shader/track.cpp4
-rw-r--r--src/video_core/surface.cpp14
-rw-r--r--src/video_core/surface.h152
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.cpp70
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.h45
-rw-r--r--src/video_core/texture_cache/copy_params.h36
-rw-r--r--src/video_core/texture_cache/decode_bc4.cpp97
-rw-r--r--src/video_core/texture_cache/decode_bc4.h16
-rw-r--r--src/video_core/texture_cache/descriptor_table.h82
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp380
-rw-r--r--src/video_core/texture_cache/format_lookup_table.h42
-rw-r--r--src/video_core/texture_cache/formatter.cpp95
-rw-r--r--src/video_core/texture_cache/formatter.h263
-rw-r--r--src/video_core/texture_cache/image_base.cpp218
-rw-r--r--src/video_core/texture_cache/image_base.h83
-rw-r--r--src/video_core/texture_cache/image_info.cpp189
-rw-r--r--src/video_core/texture_cache/image_info.h38
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp41
-rw-r--r--src/video_core/texture_cache/image_view_base.h47
-rw-r--r--src/video_core/texture_cache/image_view_info.cpp88
-rw-r--r--src/video_core/texture_cache/image_view_info.h50
-rw-r--r--src/video_core/texture_cache/render_targets.h51
-rw-r--r--src/video_core/texture_cache/samples_helper.h55
-rw-r--r--src/video_core/texture_cache/slot_vector.h156
-rw-r--r--src/video_core/texture_cache/surface_base.cpp294
-rw-r--r--src/video_core/texture_cache/surface_base.h333
-rw-r--r--src/video_core/texture_cache/surface_params.cpp445
-rw-r--r--src/video_core/texture_cache/surface_params.h293
-rw-r--r--src/video_core/texture_cache/surface_view.cpp27
-rw-r--r--src/video_core/texture_cache/surface_view.h68
-rw-r--r--src/video_core/texture_cache/texture_cache.h2402
-rw-r--r--src/video_core/texture_cache/types.h140
-rw-r--r--src/video_core/texture_cache/util.cpp1210
-rw-r--r--src/video_core/texture_cache/util.h109
-rw-r--r--src/video_core/textures/astc.cpp87
-rw-r--r--src/video_core/textures/astc.h5
-rw-r--r--src/video_core/textures/convert.cpp93
-rw-r--r--src/video_core/textures/convert.h22
-rw-r--r--src/video_core/textures/decoders.cpp255
-rw-r--r--src/video_core/textures/decoders.h44
-rw-r--r--src/video_core/textures/texture.cpp16
-rw-r--r--src/video_core/textures/texture.h239
-rw-r--r--src/video_core/video_core.cpp41
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.cpp (renamed from src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp)43
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.h (renamed from src/video_core/renderer_vulkan/nsight_aftermath_tracker.h)13
-rw-r--r--src/video_core/vulkan_common/vulkan_debug_callback.cpp46
-rw-r--r--src/video_core/vulkan_common/vulkan_debug_callback.h13
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp (renamed from src/video_core/renderer_vulkan/vk_device.cpp)528
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h (renamed from src/video_core/renderer_vulkan/vk_device.h)81
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp155
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.h32
-rw-r--r--src/video_core/vulkan_common/vulkan_library.cpp36
-rw-r--r--src/video_core/vulkan_common/vulkan_library.h13
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.cpp326
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.h129
-rw-r--r--src/video_core/vulkan_common/vulkan_surface.cpp81
-rw-r--r--src/video_core/vulkan_common/vulkan_surface.h18
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp (renamed from src/video_core/renderer_vulkan/wrapper.cpp)279
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h (renamed from src/video_core/renderer_vulkan/wrapper.h)518
278 files changed, 21565 insertions, 14923 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index d85f1e9d1..9b931976a 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,12 +1,30 @@
add_subdirectory(host_shaders)
add_library(video_core STATIC
- buffer_cache/buffer_block.h
+ buffer_cache/buffer_base.h
+ buffer_cache/buffer_cache.cpp
buffer_cache/buffer_cache.h
- buffer_cache/map_interval.cpp
- buffer_cache/map_interval.h
+ cdma_pusher.cpp
+ cdma_pusher.h
+ command_classes/codecs/codec.cpp
+ command_classes/codecs/codec.h
+ command_classes/codecs/h264.cpp
+ command_classes/codecs/h264.h
+ command_classes/codecs/vp9.cpp
+ command_classes/codecs/vp9.h
+ command_classes/codecs/vp9_types.h
+ command_classes/host1x.cpp
+ command_classes/host1x.h
+ command_classes/nvdec.cpp
+ command_classes/nvdec.h
+ command_classes/nvdec_common.h
+ command_classes/sync_manager.cpp
+ command_classes/sync_manager.h
+ command_classes/vic.cpp
+ command_classes/vic.h
compatible_formats.cpp
compatible_formats.h
+ delayed_destruction_ring.h
dirty_flags.cpp
dirty_flags.h
dma_pusher.cpp
@@ -29,6 +47,7 @@ add_library(video_core STATIC
engines/shader_bytecode.h
engines/shader_header.h
engines/shader_type.h
+ framebuffer_config.h
macro/macro.cpp
macro/macro.h
macro/macro_hle.cpp
@@ -40,18 +59,12 @@ add_library(video_core STATIC
fence_manager.h
gpu.cpp
gpu.h
- gpu_asynch.cpp
- gpu_asynch.h
- gpu_synch.cpp
- gpu_synch.h
gpu_thread.cpp
gpu_thread.h
guest_driver.cpp
guest_driver.h
memory_manager.cpp
memory_manager.h
- morton.cpp
- morton.h
query_cache.h
rasterizer_accelerated.cpp
rasterizer_accelerated.h
@@ -66,14 +79,10 @@ add_library(video_core STATIC
renderer_opengl/gl_device.h
renderer_opengl/gl_fence_manager.cpp
renderer_opengl/gl_fence_manager.h
- renderer_opengl/gl_framebuffer_cache.cpp
- renderer_opengl/gl_framebuffer_cache.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_resource_manager.cpp
renderer_opengl/gl_resource_manager.h
- renderer_opengl/gl_sampler_cache.cpp
- renderer_opengl/gl_sampler_cache.h
renderer_opengl/gl_shader_cache.cpp
renderer_opengl/gl_shader_cache.h
renderer_opengl/gl_shader_decompiler.cpp
@@ -95,10 +104,58 @@ add_library(video_core STATIC
renderer_opengl/maxwell_to_gl.h
renderer_opengl/renderer_opengl.cpp
renderer_opengl/renderer_opengl.h
- renderer_opengl/utils.cpp
- renderer_opengl/utils.h
- sampler_cache.cpp
- sampler_cache.h
+ renderer_opengl/util_shaders.cpp
+ renderer_opengl/util_shaders.h
+ renderer_vulkan/blit_image.cpp
+ renderer_vulkan/blit_image.h
+ renderer_vulkan/fixed_pipeline_state.cpp
+ renderer_vulkan/fixed_pipeline_state.h
+ renderer_vulkan/maxwell_to_vk.cpp
+ renderer_vulkan/maxwell_to_vk.h
+ renderer_vulkan/renderer_vulkan.h
+ renderer_vulkan/renderer_vulkan.cpp
+ renderer_vulkan/vk_blit_screen.cpp
+ renderer_vulkan/vk_blit_screen.h
+ renderer_vulkan/vk_buffer_cache.cpp
+ renderer_vulkan/vk_buffer_cache.h
+ renderer_vulkan/vk_command_pool.cpp
+ renderer_vulkan/vk_command_pool.h
+ renderer_vulkan/vk_compute_pass.cpp
+ renderer_vulkan/vk_compute_pass.h
+ renderer_vulkan/vk_compute_pipeline.cpp
+ renderer_vulkan/vk_compute_pipeline.h
+ renderer_vulkan/vk_descriptor_pool.cpp
+ renderer_vulkan/vk_descriptor_pool.h
+ renderer_vulkan/vk_fence_manager.cpp
+ renderer_vulkan/vk_fence_manager.h
+ renderer_vulkan/vk_graphics_pipeline.cpp
+ renderer_vulkan/vk_graphics_pipeline.h
+ renderer_vulkan/vk_master_semaphore.cpp
+ renderer_vulkan/vk_master_semaphore.h
+ renderer_vulkan/vk_pipeline_cache.cpp
+ renderer_vulkan/vk_pipeline_cache.h
+ renderer_vulkan/vk_query_cache.cpp
+ renderer_vulkan/vk_query_cache.h
+ renderer_vulkan/vk_rasterizer.cpp
+ renderer_vulkan/vk_rasterizer.h
+ renderer_vulkan/vk_resource_pool.cpp
+ renderer_vulkan/vk_resource_pool.h
+ renderer_vulkan/vk_scheduler.cpp
+ renderer_vulkan/vk_scheduler.h
+ renderer_vulkan/vk_shader_decompiler.cpp
+ renderer_vulkan/vk_shader_decompiler.h
+ renderer_vulkan/vk_shader_util.cpp
+ renderer_vulkan/vk_shader_util.h
+ renderer_vulkan/vk_staging_buffer_pool.cpp
+ renderer_vulkan/vk_staging_buffer_pool.h
+ renderer_vulkan/vk_state_tracker.cpp
+ renderer_vulkan/vk_state_tracker.h
+ renderer_vulkan/vk_swapchain.cpp
+ renderer_vulkan/vk_swapchain.h
+ renderer_vulkan/vk_texture_cache.cpp
+ renderer_vulkan/vk_texture_cache.h
+ renderer_vulkan/vk_update_descriptor.cpp
+ renderer_vulkan/vk_update_descriptor.h
shader_cache.h
shader_notify.cpp
shader_notify.h
@@ -155,119 +212,104 @@ add_library(video_core STATIC
shader/transform_feedback.h
surface.cpp
surface.h
+ texture_cache/accelerated_swizzle.cpp
+ texture_cache/accelerated_swizzle.h
+ texture_cache/decode_bc4.cpp
+ texture_cache/decode_bc4.h
+ texture_cache/descriptor_table.h
+ texture_cache/formatter.cpp
+ texture_cache/formatter.h
texture_cache/format_lookup_table.cpp
texture_cache/format_lookup_table.h
- texture_cache/surface_base.cpp
- texture_cache/surface_base.h
- texture_cache/surface_params.cpp
- texture_cache/surface_params.h
- texture_cache/surface_view.cpp
- texture_cache/surface_view.h
+ texture_cache/image_base.cpp
+ texture_cache/image_base.h
+ texture_cache/image_info.cpp
+ texture_cache/image_info.h
+ texture_cache/image_view_base.cpp
+ texture_cache/image_view_base.h
+ texture_cache/image_view_info.cpp
+ texture_cache/image_view_info.h
+ texture_cache/render_targets.h
+ texture_cache/samples_helper.h
+ texture_cache/slot_vector.h
texture_cache/texture_cache.h
+ texture_cache/types.h
+ texture_cache/util.cpp
+ texture_cache/util.h
textures/astc.cpp
textures/astc.h
- textures/convert.cpp
- textures/convert.h
textures/decoders.cpp
textures/decoders.h
textures/texture.cpp
textures/texture.h
video_core.cpp
video_core.h
+ vulkan_common/vulkan_debug_callback.cpp
+ vulkan_common/vulkan_debug_callback.h
+ vulkan_common/vulkan_device.cpp
+ vulkan_common/vulkan_device.h
+ vulkan_common/vulkan_instance.cpp
+ vulkan_common/vulkan_instance.h
+ vulkan_common/vulkan_library.cpp
+ vulkan_common/vulkan_library.h
+ vulkan_common/vulkan_memory_allocator.cpp
+ vulkan_common/vulkan_memory_allocator.h
+ vulkan_common/vulkan_surface.cpp
+ vulkan_common/vulkan_surface.h
+ vulkan_common/vulkan_wrapper.cpp
+ vulkan_common/vulkan_wrapper.h
+ vulkan_common/nsight_aftermath_tracker.cpp
+ vulkan_common/nsight_aftermath_tracker.h
)
-if (ENABLE_VULKAN)
- target_sources(video_core PRIVATE
- renderer_vulkan/fixed_pipeline_state.cpp
- renderer_vulkan/fixed_pipeline_state.h
- renderer_vulkan/maxwell_to_vk.cpp
- renderer_vulkan/maxwell_to_vk.h
- renderer_vulkan/nsight_aftermath_tracker.cpp
- renderer_vulkan/nsight_aftermath_tracker.h
- renderer_vulkan/renderer_vulkan.h
- renderer_vulkan/renderer_vulkan.cpp
- renderer_vulkan/vk_blit_screen.cpp
- renderer_vulkan/vk_blit_screen.h
- renderer_vulkan/vk_buffer_cache.cpp
- renderer_vulkan/vk_buffer_cache.h
- renderer_vulkan/vk_compute_pass.cpp
- renderer_vulkan/vk_compute_pass.h
- renderer_vulkan/vk_compute_pipeline.cpp
- renderer_vulkan/vk_compute_pipeline.h
- renderer_vulkan/vk_descriptor_pool.cpp
- renderer_vulkan/vk_descriptor_pool.h
- renderer_vulkan/vk_device.cpp
- renderer_vulkan/vk_device.h
- renderer_vulkan/vk_fence_manager.cpp
- renderer_vulkan/vk_fence_manager.h
- renderer_vulkan/vk_graphics_pipeline.cpp
- renderer_vulkan/vk_graphics_pipeline.h
- renderer_vulkan/vk_image.cpp
- renderer_vulkan/vk_image.h
- renderer_vulkan/vk_memory_manager.cpp
- renderer_vulkan/vk_memory_manager.h
- renderer_vulkan/vk_pipeline_cache.cpp
- renderer_vulkan/vk_pipeline_cache.h
- renderer_vulkan/vk_query_cache.cpp
- renderer_vulkan/vk_query_cache.h
- renderer_vulkan/vk_rasterizer.cpp
- renderer_vulkan/vk_rasterizer.h
- renderer_vulkan/vk_renderpass_cache.cpp
- renderer_vulkan/vk_renderpass_cache.h
- renderer_vulkan/vk_resource_manager.cpp
- renderer_vulkan/vk_resource_manager.h
- renderer_vulkan/vk_sampler_cache.cpp
- renderer_vulkan/vk_sampler_cache.h
- renderer_vulkan/vk_scheduler.cpp
- renderer_vulkan/vk_scheduler.h
- renderer_vulkan/vk_shader_decompiler.cpp
- renderer_vulkan/vk_shader_decompiler.h
- renderer_vulkan/vk_shader_util.cpp
- renderer_vulkan/vk_shader_util.h
- renderer_vulkan/vk_staging_buffer_pool.cpp
- renderer_vulkan/vk_staging_buffer_pool.h
- renderer_vulkan/vk_state_tracker.cpp
- renderer_vulkan/vk_state_tracker.h
- renderer_vulkan/vk_stream_buffer.cpp
- renderer_vulkan/vk_stream_buffer.h
- renderer_vulkan/vk_swapchain.cpp
- renderer_vulkan/vk_swapchain.h
- renderer_vulkan/vk_texture_cache.cpp
- renderer_vulkan/vk_texture_cache.h
- renderer_vulkan/vk_update_descriptor.cpp
- renderer_vulkan/vk_update_descriptor.h
- renderer_vulkan/wrapper.cpp
- renderer_vulkan/wrapper.h
- )
-endif()
-
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PRIVATE glad xbyak)
+if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
+ add_dependencies(video_core ffmpeg-build)
+endif()
+
+target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
+target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
+
add_dependencies(video_core host_shaders)
target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
-
-if (ENABLE_VULKAN)
- target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
- target_compile_definitions(video_core PRIVATE HAS_VULKAN)
- target_link_libraries(video_core PRIVATE sirit)
-endif()
+target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
+target_link_libraries(video_core PRIVATE sirit)
if (ENABLE_NSIGHT_AFTERMATH)
if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK})
- message(ERROR "Environment variable NSIGHT_AFTERMATH_SDK has to be provided")
+ message(FATAL_ERROR "Environment variable NSIGHT_AFTERMATH_SDK has to be provided")
endif()
if (NOT WIN32)
- message(ERROR "Nsight Aftermath doesn't support non-Windows platforms")
+ message(FATAL_ERROR "Nsight Aftermath doesn't support non-Windows platforms")
endif()
target_compile_definitions(video_core PRIVATE HAS_NSIGHT_AFTERMATH)
target_include_directories(video_core PRIVATE "$ENV{NSIGHT_AFTERMATH_SDK}/include")
endif()
if (MSVC)
- target_compile_options(video_core PRIVATE /we4267)
+ target_compile_options(video_core PRIVATE
+ /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
+ /we4456 # Declaration of 'identifier' hides previous local declaration
+ /we4457 # Declaration of 'identifier' hides function parameter
+ /we4458 # Declaration of 'identifier' hides class member
+ /we4459 # Declaration of 'identifier' hides global declaration
+ /we4715 # 'function' : not all control paths return a value
+ )
else()
- target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion)
+ target_compile_options(video_core PRIVATE
+ -Werror=conversion
+ -Wno-error=sign-conversion
+ -Werror=pessimizing-move
+ -Werror=redundant-move
+ -Werror=shadow
+ -Werror=type-limits
+
+ $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess>
+ $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
+ $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
+ )
endif()
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
new file mode 100644
index 000000000..0c00ae280
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -0,0 +1,590 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <bit>
+#include <limits>
+#include <utility>
+
+#include "common/alignment.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "core/memory.h"
+
+namespace VideoCommon {
+
+enum class BufferFlagBits {
+ Picked = 1 << 0,
+ CachedWrites = 1 << 1,
+};
+DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits)
+
+/// Tag for creating null buffers with no storage or size
+struct NullBufferParams {};
+
+/**
+ * Range tracking buffer container.
+ *
+ * It keeps track of the modified CPU and GPU ranges on a CPU page granularity, notifying the given
+ * rasterizer about state changes in the tracking behavior of the buffer.
+ *
+ * The buffer size and address is forcefully aligned to CPU page boundaries.
+ */
+template <class RasterizerInterface>
+class BufferBase {
+ static constexpr u64 PAGES_PER_WORD = 64;
+ static constexpr u64 BYTES_PER_PAGE = Core::Memory::PAGE_SIZE;
+ static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
+
+ /// Vector tracking modified pages tightly packed with small vector optimization
+ union WordsArray {
+ /// Returns the pointer to the words state
+ [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
+ return is_short ? &stack : heap;
+ }
+
+ /// Returns the pointer to the words state
+ [[nodiscard]] u64* Pointer(bool is_short) noexcept {
+ return is_short ? &stack : heap;
+ }
+
+ u64 stack = 0; ///< Small buffers storage
+ u64* heap; ///< Not-small buffers pointer to the storage
+ };
+
+ struct Words {
+ explicit Words() = default;
+ explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
+ if (IsShort()) {
+ cpu.stack = ~u64{0};
+ gpu.stack = 0;
+ cached_cpu.stack = 0;
+ untracked.stack = ~u64{0};
+ } else {
+ // Share allocation between CPU and GPU pages and set their default values
+ const size_t num_words = NumWords();
+ u64* const alloc = new u64[num_words * 4];
+ cpu.heap = alloc;
+ gpu.heap = alloc + num_words;
+ cached_cpu.heap = alloc + num_words * 2;
+ untracked.heap = alloc + num_words * 3;
+ std::fill_n(cpu.heap, num_words, ~u64{0});
+ std::fill_n(gpu.heap, num_words, 0);
+ std::fill_n(cached_cpu.heap, num_words, 0);
+ std::fill_n(untracked.heap, num_words, ~u64{0});
+ }
+ // Clean up tailing bits
+ const u64 last_word_size = size_bytes % BYTES_PER_WORD;
+ const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
+ const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
+ const u64 last_word = (~u64{0} << shift) >> shift;
+ cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
+ untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
+ }
+
+ ~Words() {
+ Release();
+ }
+
+ Words& operator=(Words&& rhs) noexcept {
+ Release();
+ size_bytes = rhs.size_bytes;
+ cpu = rhs.cpu;
+ gpu = rhs.gpu;
+ cached_cpu = rhs.cached_cpu;
+ untracked = rhs.untracked;
+ rhs.cpu.heap = nullptr;
+ return *this;
+ }
+
+ Words(Words&& rhs) noexcept
+ : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu},
+ cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
+ rhs.cpu.heap = nullptr;
+ }
+
+ Words& operator=(const Words&) = delete;
+ Words(const Words&) = delete;
+
+ /// Returns true when the buffer fits in the small vector optimization
+ [[nodiscard]] bool IsShort() const noexcept {
+ return size_bytes <= BYTES_PER_WORD;
+ }
+
+ /// Returns the number of words of the buffer
+ [[nodiscard]] size_t NumWords() const noexcept {
+ return Common::DivCeil(size_bytes, BYTES_PER_WORD);
+ }
+
+ /// Release buffer resources
+ void Release() {
+ if (!IsShort()) {
+ // CPU written words is the base for the heap allocation
+ delete[] cpu.heap;
+ }
+ }
+
+ u64 size_bytes = 0;
+ WordsArray cpu;
+ WordsArray gpu;
+ WordsArray cached_cpu;
+ WordsArray untracked;
+ };
+
+ enum class Type {
+ CPU,
+ GPU,
+ CachedCPU,
+ Untracked,
+ };
+
+public:
+ explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes)
+ : rasterizer{&rasterizer_}, cpu_addr{Common::AlignDown(cpu_addr_, BYTES_PER_PAGE)},
+ words(Common::AlignUp(size_bytes + (cpu_addr_ - cpu_addr), BYTES_PER_PAGE)) {}
+
+ explicit BufferBase(NullBufferParams) {}
+
+ BufferBase& operator=(const BufferBase&) = delete;
+ BufferBase(const BufferBase&) = delete;
+
+ BufferBase& operator=(BufferBase&&) = default;
+ BufferBase(BufferBase&&) = default;
+
+ /// Returns the inclusive CPU modified range in a begin end pair
+ [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
+ u64 query_size) const noexcept {
+ const u64 offset = query_cpu_addr - cpu_addr;
+ return ModifiedRegion<Type::CPU>(offset, query_size);
+ }
+
+ /// Returns the inclusive GPU modified range in a begin end pair
+ [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
+ u64 query_size) const noexcept {
+ const u64 offset = query_cpu_addr - cpu_addr;
+ return ModifiedRegion<Type::GPU>(offset, query_size);
+ }
+
+ /// Returns true if a region has been modified from the CPU
+ [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
+ const u64 offset = query_cpu_addr - cpu_addr;
+ return IsRegionModified<Type::CPU>(offset, query_size);
+ }
+
+ /// Returns true if a region has been modified from the GPU
+ [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
+ const u64 offset = query_cpu_addr - cpu_addr;
+ return IsRegionModified<Type::GPU>(offset, query_size);
+ }
+
+ /// Mark region as CPU modified, notifying the rasterizer about this change
+ void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
+ ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size);
+ }
+
+ /// Unmark region as CPU modified, notifying the rasterizer about this change
+ void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
+ ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size);
+ }
+
+ /// Mark region as modified from the host GPU
+ void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
+ ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size);
+ }
+
+ /// Unmark region as modified from the host GPU
+ void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
+ ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size);
+ }
+
+ /// Mark region as modified from the CPU
+ /// but don't mark it as modified until FlusHCachedWrites is called.
+ void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) {
+ flags |= BufferFlagBits::CachedWrites;
+ ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size);
+ }
+
+ /// Flushes cached CPU writes, and notify the rasterizer about the deltas
+ void FlushCachedWrites() noexcept {
+ flags &= ~BufferFlagBits::CachedWrites;
+ const u64 num_words = NumWords();
+ const u64* const cached_words = Array<Type::CachedCPU>();
+ u64* const untracked_words = Array<Type::Untracked>();
+ u64* const cpu_words = Array<Type::CPU>();
+ for (u64 word_index = 0; word_index < num_words; ++word_index) {
+ const u64 cached_bits = cached_words[word_index];
+ NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
+ untracked_words[word_index] |= cached_bits;
+ cpu_words[word_index] |= cached_bits;
+ }
+ }
+
+ /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
+ template <typename Func>
+ void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
+ ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func);
+ }
+
+ /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
+ template <typename Func>
+ void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
+ ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func);
+ }
+
+ /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
+ template <typename Func>
+ void ForEachDownloadRange(Func&& func) {
+ ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func);
+ }
+
+ /// Mark buffer as picked
+ void Pick() noexcept {
+ flags |= BufferFlagBits::Picked;
+ }
+
+ /// Unmark buffer as picked
+ void Unpick() noexcept {
+ flags &= ~BufferFlagBits::Picked;
+ }
+
+ /// Increases the likeliness of this being a stream buffer
+ void IncreaseStreamScore(int score) noexcept {
+ stream_score += score;
+ }
+
+ /// Returns the likeliness of this being a stream buffer
+ [[nodiscard]] int StreamScore() const noexcept {
+ return stream_score;
+ }
+
+ /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
+ [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
+ return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
+ }
+
+ /// Returns true if the buffer has been marked as picked
+ [[nodiscard]] bool IsPicked() const noexcept {
+ return True(flags & BufferFlagBits::Picked);
+ }
+
+ /// Returns true when the buffer has pending cached writes
+ [[nodiscard]] bool HasCachedWrites() const noexcept {
+ return True(flags & BufferFlagBits::CachedWrites);
+ }
+
+ /// Returns the base CPU address of the buffer
+ [[nodiscard]] VAddr CpuAddr() const noexcept {
+ return cpu_addr;
+ }
+
+ /// Returns the offset relative to the given CPU address
+ /// @pre IsInBounds returns true
+ [[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept {
+ return static_cast<u32>(other_cpu_addr - cpu_addr);
+ }
+
+ /// Returns the size in bytes of the buffer
+ [[nodiscard]] u64 SizeBytes() const noexcept {
+ return words.size_bytes;
+ }
+
+private:
+ template <Type type>
+ u64* Array() noexcept {
+ if constexpr (type == Type::CPU) {
+ return words.cpu.Pointer(IsShort());
+ } else if constexpr (type == Type::GPU) {
+ return words.gpu.Pointer(IsShort());
+ } else if constexpr (type == Type::CachedCPU) {
+ return words.cached_cpu.Pointer(IsShort());
+ } else if constexpr (type == Type::Untracked) {
+ return words.untracked.Pointer(IsShort());
+ }
+ }
+
+ template <Type type>
+ const u64* Array() const noexcept {
+ if constexpr (type == Type::CPU) {
+ return words.cpu.Pointer(IsShort());
+ } else if constexpr (type == Type::GPU) {
+ return words.gpu.Pointer(IsShort());
+ } else if constexpr (type == Type::CachedCPU) {
+ return words.cached_cpu.Pointer(IsShort());
+ } else if constexpr (type == Type::Untracked) {
+ return words.untracked.Pointer(IsShort());
+ }
+ }
+
+ /**
+ * Change the state of a range of pages
+ *
+ * @param dirty_addr Base address to mark or unmark as modified
+ * @param size Size in bytes to mark or unmark as modified
+ */
+ template <Type type, bool enable>
+ void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) {
+ const s64 difference = dirty_addr - cpu_addr;
+ const u64 offset = std::max<s64>(difference, 0);
+ size += std::min<s64>(difference, 0);
+ if (offset >= SizeBytes() || size < 0) {
+ return;
+ }
+ u64* const untracked_words = Array<Type::Untracked>();
+ u64* const state_words = Array<type>();
+ const u64 offset_end = std::min(offset + size, SizeBytes());
+ const u64 begin_page_index = offset / BYTES_PER_PAGE;
+ const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
+ const u64 end_page_index = Common::DivCeil(offset_end, BYTES_PER_PAGE);
+ const u64 end_word_index = Common::DivCeil(end_page_index, PAGES_PER_WORD);
+ u64 page_index = begin_page_index % PAGES_PER_WORD;
+ u64 word_index = begin_word_index;
+ while (word_index < end_word_index) {
+ const u64 next_word_first_page = (word_index + 1) * PAGES_PER_WORD;
+ const u64 left_offset =
+ std::min(next_word_first_page - end_page_index, PAGES_PER_WORD) % PAGES_PER_WORD;
+ const u64 right_offset = page_index;
+ u64 bits = ~u64{0};
+ bits = (bits >> right_offset) << right_offset;
+ bits = (bits << left_offset) >> left_offset;
+ if constexpr (type == Type::CPU || type == Type::CachedCPU) {
+ NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits);
+ }
+ if constexpr (enable) {
+ state_words[word_index] |= bits;
+ if constexpr (type == Type::CPU || type == Type::CachedCPU) {
+ untracked_words[word_index] |= bits;
+ }
+ } else {
+ state_words[word_index] &= ~bits;
+ if constexpr (type == Type::CPU || type == Type::CachedCPU) {
+ untracked_words[word_index] &= ~bits;
+ }
+ }
+ page_index = 0;
+ ++word_index;
+ }
+ }
+
+ /**
+ * Notify rasterizer about changes in the CPU tracking state of a word in the buffer
+ *
+ * @param word_index Index to the word to notify to the rasterizer
+ * @param current_bits Current state of the word
+ * @param new_bits New state of the word
+ *
+ * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
+ */
+ template <bool add_to_rasterizer>
+ void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
+ u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
+ VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
+ while (changed_bits != 0) {
+ const int empty_bits = std::countr_zero(changed_bits);
+ addr += empty_bits * BYTES_PER_PAGE;
+ changed_bits >>= empty_bits;
+
+ const u32 continuous_bits = std::countr_one(changed_bits);
+ const u64 size = continuous_bits * BYTES_PER_PAGE;
+ const VAddr begin_addr = addr;
+ addr += size;
+ changed_bits = continuous_bits < PAGES_PER_WORD ? (changed_bits >> continuous_bits) : 0;
+ rasterizer->UpdatePagesCachedCount(begin_addr, size, add_to_rasterizer ? 1 : -1);
+ }
+ }
+
+ /**
+ * Loop over each page in the given range, turn off those bits and notify the rasterizer if
+ * needed. Call the given function on each turned off range.
+ *
+ * @param query_cpu_range Base CPU address to loop over
+ * @param size Size in bytes of the CPU range to loop over
+ * @param func Function to call for each turned off region
+ */
+ template <Type type, typename Func>
+ void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
+ static_assert(type != Type::Untracked);
+
+ const s64 difference = query_cpu_range - cpu_addr;
+ const u64 query_begin = std::max<s64>(difference, 0);
+ size += std::min<s64>(difference, 0);
+ if (query_begin >= SizeBytes() || size < 0) {
+ return;
+ }
+ u64* const untracked_words = Array<Type::Untracked>();
+ u64* const state_words = Array<type>();
+ const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
+ u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
+ u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);
+
+ const auto modified = [](u64 word) { return word != 0; };
+ const auto first_modified_word = std::find_if(words_begin, words_end, modified);
+ if (first_modified_word == words_end) {
+ // Exit early when the buffer is not modified
+ return;
+ }
+ const auto last_modified_word = std::find_if_not(first_modified_word, words_end, modified);
+
+ const u64 word_index_begin = std::distance(state_words, first_modified_word);
+ const u64 word_index_end = std::distance(state_words, last_modified_word);
+
+ const unsigned local_page_begin = std::countr_zero(*first_modified_word);
+ const unsigned local_page_end =
+ static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]);
+ const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
+ const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
+ const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
+ const u64 query_page_end = Common::DivCeil(query_end, BYTES_PER_PAGE);
+ const u64 page_index_begin = std::max(word_page_begin + local_page_begin, query_page_begin);
+ const u64 page_index_end = std::min(word_page_end + local_page_end, query_page_end);
+ const u64 first_word_page_begin = page_index_begin % PAGES_PER_WORD;
+ const u64 last_word_page_end = (page_index_end - 1) % PAGES_PER_WORD + 1;
+
+ u64 page_begin = first_word_page_begin;
+ u64 current_base = 0;
+ u64 current_size = 0;
+ bool on_going = false;
+ for (u64 word_index = word_index_begin; word_index < word_index_end; ++word_index) {
+ const bool is_last_word = word_index + 1 == word_index_end;
+ const u64 page_end = is_last_word ? last_word_page_end : PAGES_PER_WORD;
+ const u64 right_offset = page_begin;
+ const u64 left_offset = PAGES_PER_WORD - page_end;
+ u64 bits = ~u64{0};
+ bits = (bits >> right_offset) << right_offset;
+ bits = (bits << left_offset) >> left_offset;
+
+ const u64 current_word = state_words[word_index] & bits;
+ state_words[word_index] &= ~bits;
+
+ if constexpr (type == Type::CPU) {
+ const u64 current_bits = untracked_words[word_index] & bits;
+ untracked_words[word_index] &= ~bits;
+ NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
+ }
+ // Exclude CPU modified pages when visiting GPU pages
+ const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
+ u64 page = page_begin;
+ page_begin = 0;
+
+ while (page < page_end) {
+ const int empty_bits = std::countr_zero(word >> page);
+ if (on_going && empty_bits != 0) {
+ InvokeModifiedRange(func, current_size, current_base);
+ current_size = 0;
+ on_going = false;
+ }
+ page += empty_bits;
+
+ const int continuous_bits = std::countr_one(word >> page);
+ if (!on_going && continuous_bits != 0) {
+ current_base = word_index * PAGES_PER_WORD + page;
+ on_going = true;
+ }
+ current_size += continuous_bits;
+ page += continuous_bits;
+ }
+ }
+ if (on_going && current_size > 0) {
+ InvokeModifiedRange(func, current_size, current_base);
+ }
+ }
+
+ template <typename Func>
+ void InvokeModifiedRange(Func&& func, u64 current_size, u64 current_base) {
+ const u64 current_size_bytes = current_size * BYTES_PER_PAGE;
+ const u64 offset_begin = current_base * BYTES_PER_PAGE;
+ const u64 offset_end = std::min(offset_begin + current_size_bytes, SizeBytes());
+ func(offset_begin, offset_end - offset_begin);
+ }
+
+ /**
+ * Returns true when a region has been modified
+ *
+ * @param offset Offset in bytes from the start of the buffer
+ * @param size Size in bytes of the region to query for modifications
+ */
+ template <Type type>
+ [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
+ static_assert(type != Type::Untracked);
+
+ const u64* const untracked_words = Array<Type::Untracked>();
+ const u64* const state_words = Array<type>();
+ const u64 num_query_words = size / BYTES_PER_WORD + 1;
+ const u64 word_begin = offset / BYTES_PER_WORD;
+ const u64 word_end = std::min(word_begin + num_query_words, NumWords());
+ const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
+ u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
+ for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
+ const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
+ const u64 word = state_words[word_index] & ~off_word;
+ if (word == 0) {
+ continue;
+ }
+ const u64 page_end = std::min((word_index + 1) * PAGES_PER_WORD, page_limit);
+ const u64 local_page_end = page_end % PAGES_PER_WORD;
+ const u64 page_end_shift = (PAGES_PER_WORD - local_page_end) % PAGES_PER_WORD;
+ if (((word >> page_index) << page_index) << page_end_shift != 0) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Returns a begin end pair with the inclusive modified region
+ *
+ * @param offset Offset in bytes from the start of the buffer
+ * @param size Size in bytes of the region to query for modifications
+ */
+ template <Type type>
+ [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
+ static_assert(type != Type::Untracked);
+
+ const u64* const untracked_words = Array<Type::Untracked>();
+ const u64* const state_words = Array<type>();
+ const u64 num_query_words = size / BYTES_PER_WORD + 1;
+ const u64 word_begin = offset / BYTES_PER_WORD;
+ const u64 word_end = std::min(word_begin + num_query_words, NumWords());
+ const u64 page_base = offset / BYTES_PER_PAGE;
+ const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
+ u64 begin = std::numeric_limits<u64>::max();
+ u64 end = 0;
+ for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
+ const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
+ const u64 word = state_words[word_index] & ~off_word;
+ if (word == 0) {
+ continue;
+ }
+ const u64 local_page_begin = std::countr_zero(word);
+ const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word);
+ const u64 page_index = word_index * PAGES_PER_WORD;
+ const u64 page_begin = std::max(page_index + local_page_begin, page_base);
+ const u64 page_end = std::min(page_index + local_page_end, page_limit);
+ begin = std::min(begin, page_begin);
+ end = std::max(end, page_end);
+ }
+ static constexpr std::pair<u64, u64> EMPTY{0, 0};
+ return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY;
+ }
+
+ /// Returns the number of words of the buffer
+ [[nodiscard]] size_t NumWords() const noexcept {
+ return words.NumWords();
+ }
+
+ /// Returns true when the buffer fits in the small vector optimization
+ [[nodiscard]] bool IsShort() const noexcept {
+ return words.IsShort();
+ }
+
+ RasterizerInterface* rasterizer = nullptr;
+ VAddr cpu_addr = 0;
+ Words words;
+ BufferFlagBits flags{};
+ int stream_score = 0;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
deleted file mode 100644
index e64170e66..000000000
--- a/src/video_core/buffer_cache/buffer_block.h
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <unordered_set>
-#include <utility>
-
-#include "common/alignment.h"
-#include "common/common_types.h"
-#include "video_core/gpu.h"
-
-namespace VideoCommon {
-
-class BufferBlock {
-public:
- bool Overlaps(VAddr start, VAddr end) const {
- return (cpu_addr < end) && (cpu_addr_end > start);
- }
-
- bool IsInside(VAddr other_start, VAddr other_end) const {
- return cpu_addr <= other_start && other_end <= cpu_addr_end;
- }
-
- std::size_t Offset(VAddr in_addr) const {
- return static_cast<std::size_t>(in_addr - cpu_addr);
- }
-
- VAddr CpuAddr() const {
- return cpu_addr;
- }
-
- VAddr CpuAddrEnd() const {
- return cpu_addr_end;
- }
-
- void SetCpuAddr(VAddr new_addr) {
- cpu_addr = new_addr;
- cpu_addr_end = new_addr + size;
- }
-
- std::size_t Size() const {
- return size;
- }
-
- u64 Epoch() const {
- return epoch;
- }
-
- void SetEpoch(u64 new_epoch) {
- epoch = new_epoch;
- }
-
-protected:
- explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
- SetCpuAddr(cpu_addr_);
- }
-
-private:
- VAddr cpu_addr{};
- VAddr cpu_addr_end{};
- std::size_t size{};
- u64 epoch{};
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
new file mode 100644
index 000000000..ab32294c8
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -0,0 +1,13 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/microprofile.h"
+
+namespace VideoCommon {
+
+MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 128, 128));
+MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128));
+MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128));
+
+} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index b5dc68902..2a6844ab1 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -4,597 +4,1289 @@
#pragma once
-#include <list>
+#include <algorithm>
+#include <array>
+#include <deque>
#include <memory>
#include <mutex>
+#include <span>
#include <unordered_map>
-#include <unordered_set>
-#include <utility>
#include <vector>
#include <boost/container/small_vector.hpp>
-#include <boost/icl/interval_set.hpp>
-#include <boost/intrusive/set.hpp>
-#include "common/alignment.h"
-#include "common/assert.h"
#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "core/core.h"
+#include "common/div_ceil.h"
+#include "common/microprofile.h"
+#include "common/scope_exit.h"
#include "core/memory.h"
#include "core/settings.h"
-#include "video_core/buffer_cache/buffer_block.h"
-#include "video_core/buffer_cache/map_interval.h"
+#include "video_core/buffer_cache/buffer_base.h"
+#include "video_core/delayed_destruction_ring.h"
+#include "video_core/dirty_flags.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/texture_cache/slot_vector.h"
+#include "video_core/texture_cache/types.h"
namespace VideoCommon {
-template <typename Buffer, typename BufferType, typename StreamBuffer>
+MICROPROFILE_DECLARE(GPU_PrepareBuffers);
+MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
+MICROPROFILE_DECLARE(GPU_DownloadMemory);
+
+using BufferId = SlotId;
+
+constexpr u32 NUM_VERTEX_BUFFERS = 32;
+constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
+constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
+constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
+constexpr u32 NUM_STORAGE_BUFFERS = 16;
+constexpr u32 NUM_STAGES = 5;
+
+template <typename P>
class BufferCache {
- using IntervalSet = boost::icl::interval_set<VAddr>;
- using IntervalType = typename IntervalSet::interval_type;
- using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
+ // Page size for caching purposes.
+ // This is unrelated to the CPU page size and it can be changed as it seems optimal.
+ static constexpr u32 PAGE_BITS = 16;
+ static constexpr u64 PAGE_SIZE = u64{1} << PAGE_BITS;
+
+ static constexpr bool IS_OPENGL = P::IS_OPENGL;
+ static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS =
+ P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS;
+ static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT =
+ P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
+ static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
+ static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
+ static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
+
+ static constexpr BufferId NULL_BUFFER_ID{0};
+
+ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+ using Runtime = typename P::Runtime;
+ using Buffer = typename P::Buffer;
+
+ struct Empty {};
+
+ struct OverlapResult {
+ std::vector<BufferId> ids;
+ VAddr begin;
+ VAddr end;
+ bool has_stream_leap = false;
+ };
- static constexpr u64 WRITE_PAGE_BIT = 11;
- static constexpr u64 BLOCK_PAGE_BITS = 21;
- static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
+ struct Binding {
+ VAddr cpu_addr{};
+ u32 size{};
+ BufferId buffer_id;
+ };
-public:
- struct BufferInfo {
- BufferType handle;
- u64 offset;
- u64 address;
+ static constexpr Binding NULL_BINDING{
+ .cpu_addr = 0,
+ .size = 0,
+ .buffer_id = NULL_BUFFER_ID,
};
- BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
- bool is_written = false, bool use_fast_cbuf = false) {
- std::lock_guard lock{mutex};
+public:
+ static constexpr u32 SKIP_CACHE_SIZE = 4096;
- auto& memory_manager = system.GPU().MemoryManager();
- const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
- if (!cpu_addr_opt) {
- return GetEmptyBuffer(size);
- }
- const VAddr cpu_addr = *cpu_addr_opt;
-
- // Cache management is a big overhead, so only cache entries with a given size.
- // TODO: Figure out which size is the best for given games.
- constexpr std::size_t max_stream_size = 0x800;
- if (use_fast_cbuf || size < max_stream_size) {
- if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
- const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
- if (use_fast_cbuf) {
- u8* dest;
- if (is_granular) {
- dest = memory_manager.GetPointer(gpu_addr);
- } else {
- staging_buffer.resize(size);
- dest = staging_buffer.data();
- memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
- }
- return ConstBufferUpload(dest, size);
- }
- if (is_granular) {
- u8* const host_ptr = memory_manager.GetPointer(gpu_addr);
- return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
- std::memcpy(dest, host_ptr, size);
- });
- } else {
- return StreamBufferUpload(
- size, alignment, [&memory_manager, gpu_addr, size](u8* dest) {
- memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
- });
- }
- }
- }
+ explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+ Runtime& runtime_);
- Buffer* const block = GetBlock(cpu_addr, size);
- MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
- if (!map) {
- return GetEmptyBuffer(size);
- }
- if (is_written) {
- map->MarkAsModified(true, GetModifiedTicks());
- if (Settings::IsGPULevelHigh() &&
- Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- MarkForAsyncFlush(map);
- }
- if (!map->is_written) {
- map->is_written = true;
- MarkRegionAsWritten(map->start, map->end - 1);
- }
- }
+ void TickFrame();
- return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
- }
+ void WriteMemory(VAddr cpu_addr, u64 size);
- /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
- BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
- std::size_t alignment = 4) {
- std::lock_guard lock{mutex};
- return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
- std::memcpy(dest, raw_pointer, size);
- });
- }
+ void CachedWriteMemory(VAddr cpu_addr, u64 size);
- /// Prepares the buffer cache for data uploading
- /// @param max_size Maximum number of bytes that will be uploaded
- /// @return True when a stream buffer invalidation was required, false otherwise
- bool Map(std::size_t max_size) {
- std::lock_guard lock{mutex};
+ void DownloadMemory(VAddr cpu_addr, u64 size);
- bool invalidated;
- std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
- buffer_offset = buffer_offset_base;
+ void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
- return invalidated;
- }
+ void UpdateGraphicsBuffers(bool is_indexed);
- /// Finishes the upload stream
- void Unmap() {
- std::lock_guard lock{mutex};
- stream_buffer->Unmap(buffer_offset - buffer_offset_base);
- }
+ void UpdateComputeBuffers();
- /// Function called at the end of each frame, inteded for deferred operations
- void TickFrame() {
- ++epoch;
+ void BindHostGeometryBuffers(bool is_indexed);
- while (!pending_destruction.empty()) {
- // Delay at least 4 frames before destruction.
- // This is due to triple buffering happening on some drivers.
- static constexpr u64 epochs_to_destroy = 5;
- if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
- break;
- }
- pending_destruction.pop();
- }
- }
+ void BindHostStageBuffers(size_t stage);
- /// Write any cached resources overlapping the specified region back to memory
- void FlushRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ void BindHostComputeBuffers();
- VectorMapInterval objects = GetMapsInRange(addr, size);
- std::sort(objects.begin(), objects.end(),
- [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; });
- for (MapInterval* object : objects) {
- if (object->is_modified && object->is_registered) {
- mutex.unlock();
- FlushMap(object);
- mutex.lock();
- }
- }
- }
+ void SetEnabledUniformBuffers(size_t stage, u32 enabled);
- bool MustFlushRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ void SetEnabledComputeUniformBuffers(u32 enabled);
- const VectorMapInterval objects = GetMapsInRange(addr, size);
- return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) {
- return map->is_modified && map->is_registered;
- });
- }
+ void UnbindGraphicsStorageBuffers(size_t stage);
- /// Mark the specified region as being invalidated
- void InvalidateRegion(VAddr addr, u64 size) {
- std::lock_guard lock{mutex};
+ void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
+ bool is_written);
- for (auto& object : GetMapsInRange(addr, size)) {
- if (object->is_registered) {
- Unregister(object);
- }
- }
- }
+ void UnbindComputeStorageBuffers();
- void OnCPUWrite(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
+ bool is_written);
- for (MapInterval* object : GetMapsInRange(addr, size)) {
- if (object->is_memory_marked && object->is_registered) {
- UnmarkMemory(object);
- object->is_sync_pending = true;
- marked_for_unregister.emplace_back(object);
- }
- }
- }
+ void FlushCachedWrites();
- void SyncGuestHost() {
- std::lock_guard lock{mutex};
+ /// Return true when there are uncommitted buffers to be downloaded
+ [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
- for (auto& object : marked_for_unregister) {
- if (object->is_registered) {
- object->is_sync_pending = false;
- Unregister(object);
- }
+ /// Return true when the caller should wait for async downloads
+ [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
+
+ /// Commit asynchronous downloads
+ void CommitAsyncFlushes();
+
+ /// Pop asynchronous downloads
+ void PopAsyncFlushes();
+
+ /// Return true when a CPU region is modified from the GPU
+ [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
+
+ std::mutex mutex;
+
+private:
+ template <typename Func>
+ static void ForEachEnabledBit(u32 enabled_mask, Func&& func) {
+ for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) {
+ const int disabled_bits = std::countr_zero(enabled_mask);
+ index += disabled_bits;
+ enabled_mask >>= disabled_bits;
+ func(index);
}
- marked_for_unregister.clear();
}
- void CommitAsyncFlushes() {
- if (uncommitted_flushes) {
- auto commit_list = std::make_shared<std::list<MapInterval*>>();
- for (MapInterval* map : *uncommitted_flushes) {
- if (map->is_registered && map->is_modified) {
- // TODO(Blinkhawk): Implement backend asynchronous flushing
- // AsyncFlushMap(map)
- commit_list->push_back(map);
- }
- }
- if (!commit_list->empty()) {
- committed_flushes.push_back(commit_list);
- } else {
- committed_flushes.emplace_back();
+ template <typename Func>
+ void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) {
+ const u64 page_end = Common::DivCeil(cpu_addr + size, PAGE_SIZE);
+ for (u64 page = cpu_addr >> PAGE_BITS; page < page_end;) {
+ const BufferId buffer_id = page_table[page];
+ if (!buffer_id) {
+ ++page;
+ continue;
}
- } else {
- committed_flushes.emplace_back();
+ Buffer& buffer = slot_buffers[buffer_id];
+ func(buffer_id, buffer);
+
+ const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
+ page = Common::DivCeil(end_addr, PAGE_SIZE);
}
- uncommitted_flushes.reset();
}
- bool ShouldWaitAsyncFlushes() const {
- return !committed_flushes.empty() && committed_flushes.front() != nullptr;
+ static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
+ return (cpu_addr & ~Core::Memory::PAGE_MASK) ==
+ ((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
}
- bool HasUncommittedFlushes() const {
- return uncommitted_flushes != nullptr;
- }
+ void BindHostIndexBuffer();
- void PopAsyncFlushes() {
- if (committed_flushes.empty()) {
- return;
- }
- auto& flush_list = committed_flushes.front();
- if (!flush_list) {
- committed_flushes.pop_front();
- return;
- }
- for (MapInterval* map : *flush_list) {
- if (map->is_registered) {
- // TODO(Blinkhawk): Replace this for reading the asynchronous flush
- FlushMap(map);
- }
- }
- committed_flushes.pop_front();
- }
+ void BindHostVertexBuffers();
- virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
+ void BindHostGraphicsUniformBuffers(size_t stage);
-protected:
- explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
- std::unique_ptr<StreamBuffer> stream_buffer)
- : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
+ void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
- ~BufferCache() = default;
+ void BindHostGraphicsStorageBuffers(size_t stage);
- virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
+ void BindHostTransformFeedbackBuffers();
- virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
- return {};
- }
+ void BindHostComputeUniformBuffers();
- /// Register an object into the cache
- MapInterval* Register(MapInterval new_map, bool inherit_written = false) {
- const VAddr cpu_addr = new_map.start;
- if (!cpu_addr) {
- LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
- new_map.gpu_addr);
- return nullptr;
- }
- const std::size_t size = new_map.end - new_map.start;
- new_map.is_registered = true;
- rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
- new_map.is_memory_marked = true;
- if (inherit_written) {
- MarkRegionAsWritten(new_map.start, new_map.end - 1);
- new_map.is_written = true;
- }
- MapInterval* const storage = mapped_addresses_allocator.Allocate();
- *storage = new_map;
- mapped_addresses.insert(*storage);
- return storage;
- }
+ void BindHostComputeStorageBuffers();
- void UnmarkMemory(MapInterval* map) {
- if (!map->is_memory_marked) {
- return;
- }
- const std::size_t size = map->end - map->start;
- rasterizer.UpdatePagesCachedCount(map->start, size, -1);
- map->is_memory_marked = false;
- }
-
- /// Unregisters an object from the cache
- void Unregister(MapInterval* map) {
- UnmarkMemory(map);
- map->is_registered = false;
- if (map->is_sync_pending) {
- map->is_sync_pending = false;
- marked_for_unregister.remove(map);
+ void DoUpdateGraphicsBuffers(bool is_indexed);
+
+ void DoUpdateComputeBuffers();
+
+ void UpdateIndexBuffer();
+
+ void UpdateVertexBuffers();
+
+ void UpdateVertexBuffer(u32 index);
+
+ void UpdateUniformBuffers(size_t stage);
+
+ void UpdateStorageBuffers(size_t stage);
+
+ void UpdateTransformFeedbackBuffers();
+
+ void UpdateTransformFeedbackBuffer(u32 index);
+
+ void UpdateComputeUniformBuffers();
+
+ void UpdateComputeStorageBuffers();
+
+ void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
+
+ [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
+
+ [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
+
+ void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
+
+ [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
+
+ void Register(BufferId buffer_id);
+
+ void Unregister(BufferId buffer_id);
+
+ template <bool insert>
+ void ChangeRegister(BufferId buffer_id);
+
+ void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
+
+ void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
+
+ void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
+ std::span<BufferCopy> copies);
+
+ void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
+ std::span<const BufferCopy> copies);
+
+ void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
+
+ void DeleteBuffer(BufferId buffer_id);
+
+ void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
+
+ void NotifyBufferDeletion();
+
+ [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
+
+ [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
+
+ [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
+
+ [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
+
+ VideoCore::RasterizerInterface& rasterizer;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
+ Core::Memory::Memory& cpu_memory;
+ Runtime& runtime;
+
+ SlotVector<Buffer> slot_buffers;
+ DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
+
+ u32 last_index_count = 0;
+
+ Binding index_buffer;
+ std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
+ std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
+ std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
+ std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
+
+ std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
+ std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
+
+ std::array<u32, NUM_STAGES> enabled_uniform_buffers{};
+ u32 enabled_compute_uniform_buffers = 0;
+
+ std::array<u32, NUM_STAGES> enabled_storage_buffers{};
+ std::array<u32, NUM_STAGES> written_storage_buffers{};
+ u32 enabled_compute_storage_buffers = 0;
+ u32 written_compute_storage_buffers = 0;
+
+ std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
+
+ bool has_deleted_buffers = false;
+
+ std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
+ dirty_uniform_buffers{};
+
+ std::vector<BufferId> cached_write_buffer_ids;
+
+ // TODO: This data structure is not optimal and it should be reworked
+ std::vector<BufferId> uncommitted_downloads;
+ std::deque<std::vector<BufferId>> committed_downloads;
+
+ size_t immediate_buffer_capacity = 0;
+ std::unique_ptr<u8[]> immediate_buffer_alloc;
+
+ std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
+};
+
+template <class P>
+BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+ Runtime& runtime_)
+ : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
+ gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
+ // Ensure the first slot is used for the null buffer
+ void(slot_buffers.insert(runtime, NullBufferParams{}));
+}
+
+template <class P>
+void BufferCache<P>::TickFrame() {
+ delayed_destruction_ring.Tick();
+}
+
+template <class P>
+void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
+ ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
+ buffer.MarkRegionAsCpuModified(cpu_addr, size);
+ });
+}
+
+template <class P>
+void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
+ ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
+ if (!buffer.HasCachedWrites()) {
+ cached_write_buffer_ids.push_back(buffer_id);
}
- if (map->is_written) {
- UnmarkRegionAsWritten(map->start, map->end - 1);
+ buffer.CachedCpuWrite(cpu_addr, size);
+ });
+}
+
+template <class P>
+void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
+ ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
+ boost::container::small_vector<BufferCopy, 1> copies;
+ u64 total_size_bytes = 0;
+ u64 largest_copy = 0;
+ buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ copies.push_back(BufferCopy{
+ .src_offset = range_offset,
+ .dst_offset = total_size_bytes,
+ .size = range_size,
+ });
+ total_size_bytes += range_size;
+ largest_copy = std::max(largest_copy, range_size);
+ });
+ if (total_size_bytes == 0) {
+ return;
}
- const auto it = mapped_addresses.find(*map);
- ASSERT(it != mapped_addresses.end());
- mapped_addresses.erase(it);
- mapped_addresses_allocator.Release(map);
- }
-
-private:
- MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
- const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
- if (overlaps.empty()) {
- auto& memory_manager = system.GPU().MemoryManager();
- const VAddr cpu_addr_end = cpu_addr + size;
- if (memory_manager.IsGranularRange(gpu_addr, size)) {
- u8* host_ptr = memory_manager.GetPointer(gpu_addr);
- block->Upload(block->Offset(cpu_addr), size, host_ptr);
- } else {
- staging_buffer.resize(size);
- memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
- block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
+ MICROPROFILE_SCOPE(GPU_DownloadMemory);
+
+ if constexpr (USE_MEMORY_MAPS) {
+ auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
+ const u8* const mapped_memory = download_staging.mapped_span.data();
+ const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
+ for (BufferCopy& copy : copies) {
+ // Modify copies to have the staging offset in mind
+ copy.dst_offset += download_staging.offset;
}
- return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
- }
-
- const VAddr cpu_addr_end = cpu_addr + size;
- if (overlaps.size() == 1) {
- MapInterval* const current_map = overlaps[0];
- if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
- return current_map;
+ runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
+ runtime.Finish();
+ for (const BufferCopy& copy : copies) {
+ const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ // Undo the modified offset
+ const u64 dst_offset = copy.dst_offset - download_staging.offset;
+ const u8* copy_mapped_memory = mapped_memory + dst_offset;
+ cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
+ }
+ } else {
+ const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
+ for (const BufferCopy& copy : copies) {
+ buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
+ const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
}
}
- VAddr new_start = cpu_addr;
- VAddr new_end = cpu_addr_end;
- bool write_inheritance = false;
- bool modified_inheritance = false;
- // Calculate new buffer parameters
- for (MapInterval* overlap : overlaps) {
- new_start = std::min(overlap->start, new_start);
- new_end = std::max(overlap->end, new_end);
- write_inheritance |= overlap->is_written;
- modified_inheritance |= overlap->is_modified;
+ });
+}
+
+template <class P>
+void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+ u32 size) {
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
+ uniform_buffers[stage][index] = NULL_BINDING;
+ return;
+ }
+ const Binding binding{
+ .cpu_addr = *cpu_addr,
+ .size = size,
+ .buffer_id = BufferId{},
+ };
+ uniform_buffers[stage][index] = binding;
+}
+
+template <class P>
+void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) {
+ MICROPROFILE_SCOPE(GPU_PrepareBuffers);
+ do {
+ has_deleted_buffers = false;
+ DoUpdateGraphicsBuffers(is_indexed);
+ } while (has_deleted_buffers);
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeBuffers() {
+ MICROPROFILE_SCOPE(GPU_PrepareBuffers);
+ do {
+ has_deleted_buffers = false;
+ DoUpdateComputeBuffers();
+ } while (has_deleted_buffers);
+}
+
+template <class P>
+void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
+ MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
+ if (is_indexed) {
+ BindHostIndexBuffer();
+ } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
+ const auto& regs = maxwell3d.regs;
+ if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
+ runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
}
- GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
- for (auto& overlap : overlaps) {
- Unregister(overlap);
+ }
+ BindHostVertexBuffers();
+ BindHostTransformFeedbackBuffers();
+}
+
+template <class P>
+void BufferCache<P>::BindHostStageBuffers(size_t stage) {
+ MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
+ BindHostGraphicsUniformBuffers(stage);
+ BindHostGraphicsStorageBuffers(stage);
+}
+
+template <class P>
+void BufferCache<P>::BindHostComputeBuffers() {
+ MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
+ BindHostComputeUniformBuffers();
+ BindHostComputeStorageBuffers();
+}
+
+template <class P>
+void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) {
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ if (enabled_uniform_buffers[stage] != enabled) {
+ dirty_uniform_buffers[stage] = ~u32{0};
}
- UpdateBlock(block, new_start, new_end, overlaps);
-
- const MapInterval new_map{new_start, new_end, new_gpu_addr};
- MapInterval* const map = Register(new_map, write_inheritance);
- if (!map) {
- return nullptr;
+ }
+ enabled_uniform_buffers[stage] = enabled;
+}
+
+template <class P>
+void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) {
+ enabled_compute_uniform_buffers = enabled;
+}
+
+template <class P>
+void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
+ enabled_storage_buffers[stage] = 0;
+ written_storage_buffers[stage] = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
+ u32 cbuf_offset, bool is_written) {
+ enabled_storage_buffers[stage] |= 1U << ssbo_index;
+ written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
+
+ const auto& cbufs = maxwell3d.state.shader_stages[stage];
+ const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
+ storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
+}
+
+template <class P>
+void BufferCache<P>::UnbindComputeStorageBuffers() {
+ enabled_compute_storage_buffers = 0;
+ written_compute_storage_buffers = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
+ bool is_written) {
+ enabled_compute_storage_buffers |= 1U << ssbo_index;
+ written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
+
+ const auto& launch_desc = kepler_compute.launch_description;
+ ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
+
+ const auto& cbufs = launch_desc.const_buffer_config;
+ const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
+ compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr);
+}
+
+template <class P>
+void BufferCache<P>::FlushCachedWrites() {
+ for (const BufferId buffer_id : cached_write_buffer_ids) {
+ slot_buffers[buffer_id].FlushCachedWrites();
+ }
+ cached_write_buffer_ids.clear();
+}
+
+template <class P>
+bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
+ return !uncommitted_downloads.empty();
+}
+
+template <class P>
+bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
+ return !committed_downloads.empty() && !committed_downloads.front().empty();
+}
+
+template <class P>
+void BufferCache<P>::CommitAsyncFlushes() {
+ // This is intentionally passing the value by copy
+ committed_downloads.push_front(uncommitted_downloads);
+ uncommitted_downloads.clear();
+}
+
+template <class P>
+void BufferCache<P>::PopAsyncFlushes() {
+ if (committed_downloads.empty()) {
+ return;
+ }
+ auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); });
+ const std::span<const BufferId> download_ids = committed_downloads.back();
+ if (download_ids.empty()) {
+ return;
+ }
+ MICROPROFILE_SCOPE(GPU_DownloadMemory);
+
+ boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
+ u64 total_size_bytes = 0;
+ u64 largest_copy = 0;
+ for (const BufferId buffer_id : download_ids) {
+ slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) {
+ downloads.push_back({
+ BufferCopy{
+ .src_offset = range_offset,
+ .dst_offset = total_size_bytes,
+ .size = range_size,
+ },
+ buffer_id,
+ });
+ total_size_bytes += range_size;
+ largest_copy = std::max(largest_copy, range_size);
+ });
+ }
+ if (downloads.empty()) {
+ return;
+ }
+ if constexpr (USE_MEMORY_MAPS) {
+ auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
+ for (auto& [copy, buffer_id] : downloads) {
+ // Have in mind the staging buffer offset for the copy
+ copy.dst_offset += download_staging.offset;
+ const std::array copies{copy};
+ runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
}
- if (modified_inheritance) {
- map->MarkAsModified(true, GetModifiedTicks());
- if (Settings::IsGPULevelHigh() &&
- Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- MarkForAsyncFlush(map);
- }
+ runtime.Finish();
+ for (const auto [copy, buffer_id] : downloads) {
+ const Buffer& buffer = slot_buffers[buffer_id];
+ const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ // Undo the modified offset
+ const u64 dst_offset = copy.dst_offset - download_staging.offset;
+ const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
+ cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
+ }
+ } else {
+ const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
+ for (const auto [copy, buffer_id] : downloads) {
+ Buffer& buffer = slot_buffers[buffer_id];
+ buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
+ const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
}
- return map;
}
-
- void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) {
- const IntervalType base_interval{start, end};
- IntervalSet interval_set{};
- interval_set.add(base_interval);
- for (auto& overlap : overlaps) {
- const IntervalType subtract{overlap->start, overlap->end};
- interval_set.subtract(subtract);
+}
+
+template <class P>
+bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
+ const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
+ for (u64 page = addr >> PAGE_BITS; page < page_end;) {
+ const BufferId image_id = page_table[page];
+ if (!image_id) {
+ ++page;
+ continue;
}
- for (auto& interval : interval_set) {
- const std::size_t size = interval.upper() - interval.lower();
- if (size == 0) {
- continue;
- }
- staging_buffer.resize(size);
- system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
- block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
+ Buffer& buffer = slot_buffers[image_id];
+ if (buffer.IsRegionGpuModified(addr, size)) {
+ return true;
}
+ const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
+ page = Common::DivCeil(end_addr, PAGE_SIZE);
}
-
- VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) {
- VectorMapInterval result;
- if (size == 0) {
- return result;
+ return false;
+}
+
+template <class P>
+void BufferCache<P>::BindHostIndexBuffer() {
+ Buffer& buffer = slot_buffers[index_buffer.buffer_id];
+ const u32 offset = buffer.Offset(index_buffer.cpu_addr);
+ const u32 size = index_buffer.size;
+ SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
+ if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
+ runtime.BindIndexBuffer(buffer, offset, size);
+ } else {
+ runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
+ maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
+ buffer, offset, size);
+ }
+}
+
+template <class P>
+void BufferCache<P>::BindHostVertexBuffers() {
+ auto& flags = maxwell3d.dirty.flags;
+ for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+ const Binding& binding = vertex_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
+ if (!flags[Dirty::VertexBuffer0 + index]) {
+ continue;
}
+ flags[Dirty::VertexBuffer0 + index] = false;
+
+ const u32 stride = maxwell3d.regs.vertex_array[index].stride;
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
+ }
+}
- const VAddr addr_end = addr + size;
- auto it = mapped_addresses.lower_bound(addr);
- if (it != mapped_addresses.begin()) {
- --it;
+template <class P>
+void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
+ u32 dirty = ~0U;
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ dirty = std::exchange(dirty_uniform_buffers[stage], 0);
+ }
+ u32 binding_index = 0;
+ ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+ const bool needs_bind = ((dirty >> index) & 1) != 0;
+ BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
+ if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
+ ++binding_index;
}
- while (it != mapped_addresses.end() && it->start < addr_end) {
- if (it->Overlaps(addr, addr_end)) {
- result.push_back(&*it);
+ });
+}
+
+template <class P>
+void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
+ bool needs_bind) {
+ const Binding& binding = uniform_buffers[stage][index];
+ const VAddr cpu_addr = binding.cpu_addr;
+ const u32 size = binding.size;
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
+ if constexpr (IS_OPENGL) {
+ if (runtime.HasFastBufferSubData()) {
+ // Fast path for Nvidia
+ if (!HasFastUniformBufferBound(stage, binding_index)) {
+ // We only have to bind when the currently bound buffer is not the fast version
+ runtime.BindFastUniformBuffer(stage, binding_index, size);
+ }
+ const auto span = ImmediateBufferWithData(cpu_addr, size);
+ runtime.PushFastUniformBuffer(stage, binding_index, span);
+ return;
}
- ++it;
}
- return result;
- }
+ fast_bound_uniform_buffers[stage] |= 1U << binding_index;
- /// Returns a ticks counter used for tracking when cached objects were last modified
- u64 GetModifiedTicks() {
- return ++modified_ticks;
+ // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
+ const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
+ cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
+ return;
}
-
- void FlushMap(MapInterval* map) {
- const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
- ASSERT_OR_EXECUTE(it != blocks.end(), return;);
-
- std::shared_ptr<Buffer> block = it->second;
-
- const std::size_t size = map->end - map->start;
- staging_buffer.resize(size);
- block->Download(block->Offset(map->start), size, staging_buffer.data());
- system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
- map->MarkAsModified(false, 0);
+ // Classic cached path
+ SynchronizeBuffer(buffer, cpu_addr, size);
+ if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
+ // Skip binding if it's not needed and if the bound buffer is not the fast version
+ // This exists to avoid instances where the fast buffer is bound and a GPU write happens
+ return;
}
+ fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
- template <typename Callable>
- BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) {
- AlignBuffer(alignment);
- const std::size_t uploaded_offset = buffer_offset;
- callable(buffer_ptr);
-
- buffer_ptr += size;
- buffer_offset += size;
- return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
+ const u32 offset = buffer.Offset(cpu_addr);
+ if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
+ runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
+ } else {
+ runtime.BindUniformBuffer(buffer, offset, size);
}
+}
+
+template <class P>
+void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
+ u32 binding_index = 0;
+ ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
+ const Binding& binding = storage_buffers[stage][index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0;
+ if constexpr (NEEDS_BIND_STORAGE_INDEX) {
+ runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
+ ++binding_index;
+ } else {
+ runtime.BindStorageBuffer(buffer, offset, size, is_written);
+ }
+ });
+}
- void AlignBuffer(std::size_t alignment) {
- // Align the offset, not the mapped pointer
- const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
- buffer_ptr += offset_aligned - buffer_offset;
- buffer_offset = offset_aligned;
+template <class P>
+void BufferCache<P>::BindHostTransformFeedbackBuffers() {
+ if (maxwell3d.regs.tfb_enabled == 0) {
+ return;
}
+ for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
+ const Binding& binding = transform_feedback_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ runtime.BindTransformFeedbackBuffer(index, buffer, offset, size);
+ }
+}
- std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
- const std::size_t old_size = buffer->Size();
- const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
- const VAddr cpu_addr = buffer->CpuAddr();
- std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
- new_buffer->CopyFrom(*buffer, 0, 0, old_size);
- QueueDestruction(std::move(buffer));
-
- const VAddr cpu_addr_end = cpu_addr + new_size - 1;
- const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
- for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
- blocks.insert_or_assign(page_start, new_buffer);
+template <class P>
+void BufferCache<P>::BindHostComputeUniformBuffers() {
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ // Mark all uniform buffers as dirty
+ dirty_uniform_buffers.fill(~u32{0});
+ }
+ u32 binding_index = 0;
+ ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+ const Binding& binding = compute_uniform_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
+ runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
+ ++binding_index;
+ } else {
+ runtime.BindUniformBuffer(buffer, offset, size);
}
+ });
+}
+
+template <class P>
+void BufferCache<P>::BindHostComputeStorageBuffers() {
+ u32 binding_index = 0;
+ ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
+ const Binding& binding = compute_storage_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0;
+ if constexpr (NEEDS_BIND_STORAGE_INDEX) {
+ runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
+ ++binding_index;
+ } else {
+ runtime.BindStorageBuffer(buffer, offset, size, is_written);
+ }
+ });
+}
- return new_buffer;
+template <class P>
+void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
+ if (is_indexed) {
+ UpdateIndexBuffer();
}
+ UpdateVertexBuffers();
+ UpdateTransformFeedbackBuffers();
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ UpdateUniformBuffers(stage);
+ UpdateStorageBuffers(stage);
+ }
+}
+
+template <class P>
+void BufferCache<P>::DoUpdateComputeBuffers() {
+ UpdateComputeUniformBuffers();
+ UpdateComputeStorageBuffers();
+}
+
+template <class P>
+void BufferCache<P>::UpdateIndexBuffer() {
+ // We have to check for the dirty flags and index count
+ // The index count is currently changed without updating the dirty flags
+ const auto& index_array = maxwell3d.regs.index_array;
+ auto& flags = maxwell3d.dirty.flags;
+ if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
+ return;
+ }
+ flags[Dirty::IndexBuffer] = false;
+ last_index_count = index_array.count;
+
+ const GPUVAddr gpu_addr_begin = index_array.StartAddress();
+ const GPUVAddr gpu_addr_end = index_array.EndAddress();
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
+ const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+ const u32 draw_size = index_array.count * index_array.FormatSizeInBytes();
+ const u32 size = std::min(address_size, draw_size);
+ if (size == 0 || !cpu_addr) {
+ index_buffer = NULL_BINDING;
+ return;
+ }
+ index_buffer = Binding{
+ .cpu_addr = *cpu_addr,
+ .size = size,
+ .buffer_id = FindBuffer(*cpu_addr, size),
+ };
+}
- std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
- std::shared_ptr<Buffer> second) {
- const std::size_t size_1 = first->Size();
- const std::size_t size_2 = second->Size();
- const VAddr first_addr = first->CpuAddr();
- const VAddr second_addr = second->CpuAddr();
- const VAddr new_addr = std::min(first_addr, second_addr);
- const std::size_t new_size = size_1 + size_2;
-
- std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
- new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
- new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
- QueueDestruction(std::move(first));
- QueueDestruction(std::move(second));
+template <class P>
+void BufferCache<P>::UpdateVertexBuffers() {
+ auto& flags = maxwell3d.dirty.flags;
+ if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
+ return;
+ }
+ flags[Dirty::VertexBuffers] = false;
- const VAddr cpu_addr_end = new_addr + new_size - 1;
- const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
- for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
- blocks.insert_or_assign(page_start, new_buffer);
- }
- return new_buffer;
+ for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+ UpdateVertexBuffer(index);
}
+}
- Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
- std::shared_ptr<Buffer> found;
+template <class P>
+void BufferCache<P>::UpdateVertexBuffer(u32 index) {
+ if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
+ return;
+ }
+ const auto& array = maxwell3d.regs.vertex_array[index];
+ const auto& limit = maxwell3d.regs.vertex_array_limit[index];
+ const GPUVAddr gpu_addr_begin = array.StartAddress();
+ const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
+ const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+ const u32 size = address_size; // TODO: Analyze stride and number of vertices
+ if (array.enable == 0 || size == 0 || !cpu_addr) {
+ vertex_buffers[index] = NULL_BINDING;
+ return;
+ }
+ vertex_buffers[index] = Binding{
+ .cpu_addr = *cpu_addr,
+ .size = size,
+ .buffer_id = FindBuffer(*cpu_addr, size),
+ };
+}
+
+template <class P>
+void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
+ ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+ Binding& binding = uniform_buffers[stage][index];
+ if (binding.buffer_id) {
+ // Already updated
+ return;
+ }
+ // Mark as dirty
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ dirty_uniform_buffers[stage] |= 1U << index;
+ }
+ // Resolve buffer
+ binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ });
+}
+
+template <class P>
+void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
+ const u32 written_mask = written_storage_buffers[stage];
+ ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
+ // Resolve buffer
+ Binding& binding = storage_buffers[stage][index];
+ const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ binding.buffer_id = buffer_id;
+ // Mark buffer as written if needed
+ if (((written_mask >> index) & 1) != 0) {
+ MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
+ }
+ });
+}
- const VAddr cpu_addr_end = cpu_addr + size - 1;
- const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
- for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
- auto it = blocks.find(page_start);
- if (it == blocks.end()) {
- if (found) {
- found = EnlargeBlock(found);
- continue;
- }
- const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
- found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
- blocks.insert_or_assign(page_start, found);
- continue;
- }
- if (!found) {
- found = it->second;
- continue;
- }
- if (found != it->second) {
- found = MergeBlocks(std::move(found), it->second);
+template <class P>
+void BufferCache<P>::UpdateTransformFeedbackBuffers() {
+ if (maxwell3d.regs.tfb_enabled == 0) {
+ return;
+ }
+ for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
+ UpdateTransformFeedbackBuffer(index);
+ }
+}
+
+template <class P>
+void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
+ const auto& binding = maxwell3d.regs.tfb_bindings[index];
+ const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
+ const u32 size = binding.buffer_size;
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
+ transform_feedback_buffers[index] = NULL_BINDING;
+ return;
+ }
+ const BufferId buffer_id = FindBuffer(*cpu_addr, size);
+ transform_feedback_buffers[index] = Binding{
+ .cpu_addr = *cpu_addr,
+ .size = size,
+ .buffer_id = buffer_id,
+ };
+ MarkWrittenBuffer(buffer_id, *cpu_addr, size);
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeUniformBuffers() {
+ ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+ Binding& binding = compute_uniform_buffers[index];
+ binding = NULL_BINDING;
+ const auto& launch_desc = kepler_compute.launch_description;
+ if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
+ const auto& cbuf = launch_desc.const_buffer_config[index];
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
+ if (cpu_addr) {
+ binding.cpu_addr = *cpu_addr;
+ binding.size = cbuf.size;
}
}
- return found.get();
+ binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ });
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeStorageBuffers() {
+ ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
+ // Resolve buffer
+ Binding& binding = compute_storage_buffers[index];
+ const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ binding.buffer_id = buffer_id;
+ // Mark as written if needed
+ if (((written_compute_storage_buffers >> index) & 1) != 0) {
+ MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
+ }
+ });
+}
+
+template <class P>
+void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
+ Buffer& buffer = slot_buffers[buffer_id];
+ buffer.MarkRegionAsGpuModified(cpu_addr, size);
+
+ const bool is_accuracy_high = Settings::IsGPULevelHigh();
+ const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
+ if (!is_accuracy_high || !is_async) {
+ return;
+ }
+ if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) {
+ // Already inserted
+ return;
}
+ uncommitted_downloads.push_back(buffer_id);
+}
- void MarkRegionAsWritten(VAddr start, VAddr end) {
- const u64 page_end = end >> WRITE_PAGE_BIT;
- for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
- if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) {
- ++it->second;
- }
+template <class P>
+BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
+ if (cpu_addr == 0) {
+ return NULL_BUFFER_ID;
+ }
+ const u64 page = cpu_addr >> PAGE_BITS;
+ const BufferId buffer_id = page_table[page];
+ if (!buffer_id) {
+ return CreateBuffer(cpu_addr, size);
+ }
+ const Buffer& buffer = slot_buffers[buffer_id];
+ if (buffer.IsInBounds(cpu_addr, size)) {
+ return buffer_id;
+ }
+ return CreateBuffer(cpu_addr, size);
+}
+
+template <class P>
+typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
+ u32 wanted_size) {
+ static constexpr int STREAM_LEAP_THRESHOLD = 16;
+ std::vector<BufferId> overlap_ids;
+ VAddr begin = cpu_addr;
+ VAddr end = cpu_addr + wanted_size;
+ int stream_score = 0;
+ bool has_stream_leap = false;
+ for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) {
+ const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS];
+ if (!overlap_id) {
+ continue;
+ }
+ Buffer& overlap = slot_buffers[overlap_id];
+ if (overlap.IsPicked()) {
+ continue;
+ }
+ overlap_ids.push_back(overlap_id);
+ overlap.Pick();
+ const VAddr overlap_cpu_addr = overlap.CpuAddr();
+ if (overlap_cpu_addr < begin) {
+ cpu_addr = begin = overlap_cpu_addr;
+ }
+ end = std::max(end, overlap_cpu_addr + overlap.SizeBytes());
+
+ stream_score += overlap.StreamScore();
+ if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) {
+ // When this memory region has been joined a bunch of times, we assume it's being used
+ // as a stream buffer. Increase the size to skip constantly recreating buffers.
+ has_stream_leap = true;
+ end += PAGE_SIZE * 256;
}
}
-
- void UnmarkRegionAsWritten(VAddr start, VAddr end) {
- const u64 page_end = end >> WRITE_PAGE_BIT;
- for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
- auto it = written_pages.find(page_start);
- if (it != written_pages.end()) {
- if (it->second > 1) {
- --it->second;
- } else {
- written_pages.erase(it);
- }
- }
+ return OverlapResult{
+ .ids = std::move(overlap_ids),
+ .begin = begin,
+ .end = end,
+ .has_stream_leap = has_stream_leap,
+ };
+}
+
+template <class P>
+void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
+ bool accumulate_stream_score) {
+ Buffer& new_buffer = slot_buffers[new_buffer_id];
+ Buffer& overlap = slot_buffers[overlap_id];
+ if (accumulate_stream_score) {
+ new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1);
+ }
+ std::vector<BufferCopy> copies;
+ const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr();
+ overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) {
+ copies.push_back(BufferCopy{
+ .src_offset = begin,
+ .dst_offset = dst_base_offset + begin,
+ .size = range_size,
+ });
+ new_buffer.UnmarkRegionAsCpuModified(begin, range_size);
+ new_buffer.MarkRegionAsGpuModified(begin, range_size);
+ });
+ if (!copies.empty()) {
+ runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies);
+ }
+ ReplaceBufferDownloads(overlap_id, new_buffer_id);
+ DeleteBuffer(overlap_id);
+}
+
+template <class P>
+BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
+ const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
+ const u32 size = static_cast<u32>(overlap.end - overlap.begin);
+ const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
+ for (const BufferId overlap_id : overlap.ids) {
+ JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
+ }
+ Register(new_buffer_id);
+ return new_buffer_id;
+}
+
+template <class P>
+void BufferCache<P>::Register(BufferId buffer_id) {
+ ChangeRegister<true>(buffer_id);
+}
+
+template <class P>
+void BufferCache<P>::Unregister(BufferId buffer_id) {
+ ChangeRegister<false>(buffer_id);
+}
+
+template <class P>
+template <bool insert>
+void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
+ const Buffer& buffer = slot_buffers[buffer_id];
+ const VAddr cpu_addr_begin = buffer.CpuAddr();
+ const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes();
+ const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
+ const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
+ for (u64 page = page_begin; page != page_end; ++page) {
+ if constexpr (insert) {
+ page_table[page] = buffer_id;
+ } else {
+ page_table[page] = BufferId{};
}
}
+}
- bool IsRegionWritten(VAddr start, VAddr end) const {
- const u64 page_end = end >> WRITE_PAGE_BIT;
- for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
- if (written_pages.count(page_start) > 0) {
- return true;
+template <class P>
+void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
+ if (buffer.CpuAddr() == 0) {
+ return;
+ }
+ SynchronizeBufferImpl(buffer, cpu_addr, size);
+}
+
+template <class P>
+void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
+ boost::container::small_vector<BufferCopy, 4> copies;
+ u64 total_size_bytes = 0;
+ u64 largest_copy = 0;
+ buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ copies.push_back(BufferCopy{
+ .src_offset = total_size_bytes,
+ .dst_offset = range_offset,
+ .size = range_size,
+ });
+ total_size_bytes += range_size;
+ largest_copy = std::max(largest_copy, range_size);
+ });
+ if (total_size_bytes == 0) {
+ return;
+ }
+ const std::span<BufferCopy> copies_span(copies.data(), copies.size());
+ UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
+}
+
+template <class P>
+void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
+ std::span<BufferCopy> copies) {
+ if constexpr (USE_MEMORY_MAPS) {
+ MappedUploadMemory(buffer, total_size_bytes, copies);
+ } else {
+ ImmediateUploadMemory(buffer, largest_copy, copies);
+ }
+}
+
+template <class P>
+void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
+ std::span<const BufferCopy> copies) {
+ std::span<u8> immediate_buffer;
+ for (const BufferCopy& copy : copies) {
+ std::span<const u8> upload_span;
+ const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
+ if (IsRangeGranular(cpu_addr, copy.size)) {
+ upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size);
+ } else {
+ if (immediate_buffer.empty()) {
+ immediate_buffer = ImmediateBuffer(largest_copy);
}
+ cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
+ upload_span = immediate_buffer.subspan(0, copy.size);
}
- return false;
+ buffer.ImmediateUpload(copy.dst_offset, upload_span);
}
-
- void QueueDestruction(std::shared_ptr<Buffer> buffer) {
- buffer->SetEpoch(epoch);
- pending_destruction.push(std::move(buffer));
+}
+
+template <class P>
+void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
+ std::span<BufferCopy> copies) {
+ auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
+ const std::span<u8> staging_pointer = upload_staging.mapped_span;
+ for (BufferCopy& copy : copies) {
+ u8* const src_pointer = staging_pointer.data() + copy.src_offset;
+ const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
+ cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size);
+
+ // Apply the staging offset
+ copy.src_offset += upload_staging.offset;
}
-
- void MarkForAsyncFlush(MapInterval* map) {
- if (!uncommitted_flushes) {
- uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
+ runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
+}
+
+template <class P>
+void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
+ const auto scalar_replace = [buffer_id](Binding& binding) {
+ if (binding.buffer_id == buffer_id) {
+ binding.buffer_id = BufferId{};
+ }
+ };
+ const auto replace = [scalar_replace](std::span<Binding> bindings) {
+ std::ranges::for_each(bindings, scalar_replace);
+ };
+ scalar_replace(index_buffer);
+ replace(vertex_buffers);
+ std::ranges::for_each(uniform_buffers, replace);
+ std::ranges::for_each(storage_buffers, replace);
+ replace(transform_feedback_buffers);
+ replace(compute_uniform_buffers);
+ replace(compute_storage_buffers);
+ std::erase(cached_write_buffer_ids, buffer_id);
+
+ // Mark the whole buffer as CPU written to stop tracking CPU writes
+ Buffer& buffer = slot_buffers[buffer_id];
+ buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
+
+ Unregister(buffer_id);
+ delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
+
+ NotifyBufferDeletion();
+}
+
+template <class P>
+void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) {
+ const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) {
+ std::ranges::replace(buffers, old_buffer_id, new_buffer_id);
+ if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) {
+ buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end());
}
- uncommitted_flushes->insert(map);
+ };
+ replace(uncommitted_downloads);
+ std::ranges::for_each(committed_downloads, replace);
+}
+
+template <class P>
+void BufferCache<P>::NotifyBufferDeletion() {
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ dirty_uniform_buffers.fill(~u32{0});
}
+ auto& flags = maxwell3d.dirty.flags;
+ flags[Dirty::IndexBuffer] = true;
+ flags[Dirty::VertexBuffers] = true;
+ for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+ flags[Dirty::VertexBuffer0 + index] = true;
+ }
+ has_deleted_buffers = true;
+}
+
+template <class P>
+typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
+ const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
+ const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr || size == 0) {
+ return NULL_BINDING;
+ }
+ // HACK(Rodrigo): This is the number of bytes bound in host beyond the guest API's range.
+ // It exists due to some games like Astral Chain operate out of bounds.
+ // Binding the whole map range would be technically correct, but games have large maps that make
+ // this approach unaffordable for now.
+ static constexpr u32 arbitrary_extra_bytes = 0xc000;
+ const u32 bytes_to_map_end = static_cast<u32>(gpu_memory.BytesToMapEnd(gpu_addr));
+ const Binding binding{
+ .cpu_addr = *cpu_addr,
+ .size = std::min(size + arbitrary_extra_bytes, bytes_to_map_end),
+ .buffer_id = BufferId{},
+ };
+ return binding;
+}
+
+template <class P>
+std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) {
+ u8* const base_pointer = cpu_memory.GetPointer(cpu_addr);
+ if (IsRangeGranular(cpu_addr, size) ||
+ base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) {
+ return std::span(base_pointer, size);
+ } else {
+ const std::span<u8> span = ImmediateBuffer(size);
+ cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
+ return span;
+ }
+}
- VideoCore::RasterizerInterface& rasterizer;
- Core::System& system;
-
- std::unique_ptr<StreamBuffer> stream_buffer;
- BufferType stream_buffer_handle;
-
- u8* buffer_ptr = nullptr;
- u64 buffer_offset = 0;
- u64 buffer_offset_base = 0;
-
- MapIntervalAllocator mapped_addresses_allocator;
- boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
- mapped_addresses;
-
- std::unordered_map<u64, u32> written_pages;
- std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;
-
- std::queue<std::shared_ptr<Buffer>> pending_destruction;
- u64 epoch = 0;
- u64 modified_ticks = 0;
-
- std::vector<u8> staging_buffer;
-
- std::list<MapInterval*> marked_for_unregister;
-
- std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
- std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes;
-
- std::recursive_mutex mutex;
-};
+template <class P>
+std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
+ if (wanted_capacity > immediate_buffer_capacity) {
+ immediate_buffer_capacity = wanted_capacity;
+ immediate_buffer_alloc = std::make_unique<u8[]>(wanted_capacity);
+ }
+ return std::span<u8>(immediate_buffer_alloc.get(), wanted_capacity);
+}
+
+template <class P>
+bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
+ if constexpr (IS_OPENGL) {
+ return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
+ } else {
+ // Only OpenGL has fast uniform buffers
+ return false;
+ }
+}
} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp
deleted file mode 100644
index 62587e18a..000000000
--- a/src/video_core/buffer_cache/map_interval.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
-#include <memory>
-
-#include "video_core/buffer_cache/map_interval.h"
-
-namespace VideoCommon {
-
-MapIntervalAllocator::MapIntervalAllocator() {
- FillFreeList(first_chunk);
-}
-
-MapIntervalAllocator::~MapIntervalAllocator() = default;
-
-void MapIntervalAllocator::AllocateNewChunk() {
- *new_chunk = std::make_unique<Chunk>();
- FillFreeList(**new_chunk);
- new_chunk = &(*new_chunk)->next;
-}
-
-void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
- const std::size_t old_size = free_list.size();
- free_list.resize(old_size + chunk.data.size());
- std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
- [](MapInterval& interval) { return &interval; });
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
deleted file mode 100644
index fe0bcd1d8..000000000
--- a/src/video_core/buffer_cache/map_interval.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <memory>
-#include <vector>
-
-#include <boost/intrusive/set_hook.hpp>
-
-#include "common/common_types.h"
-#include "video_core/gpu.h"
-
-namespace VideoCommon {
-
-struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
- MapInterval() = default;
-
- /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
-
- explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
- : start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
-
- bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
- return start <= other_start && other_end <= end;
- }
-
- bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
- return start < other_end && other_start < end;
- }
-
- void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
- is_modified = is_modified_;
- ticks = ticks_;
- }
-
- boost::intrusive::set_member_hook<> member_hook_;
- VAddr start = 0;
- VAddr end = 0;
- GPUVAddr gpu_addr = 0;
- u64 ticks = 0;
- bool is_written = false;
- bool is_modified = false;
- bool is_registered = false;
- bool is_memory_marked = false;
- bool is_sync_pending = false;
-};
-
-struct MapIntervalCompare {
- constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
- return lhs.start < rhs.start;
- }
-};
-
-class MapIntervalAllocator {
-public:
- MapIntervalAllocator();
- ~MapIntervalAllocator();
-
- MapInterval* Allocate() {
- if (free_list.empty()) {
- AllocateNewChunk();
- }
- MapInterval* const interval = free_list.back();
- free_list.pop_back();
- return interval;
- }
-
- void Release(MapInterval* interval) {
- free_list.push_back(interval);
- }
-
-private:
- struct Chunk {
- std::unique_ptr<Chunk> next;
- std::array<MapInterval, 0x8000> data;
- };
-
- void AllocateNewChunk();
-
- void FillFreeList(Chunk& chunk);
-
- std::vector<MapInterval*> free_list;
- std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
-
- Chunk first_chunk;
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
new file mode 100644
index 000000000..33b3c060b
--- /dev/null
+++ b/src/video_core/cdma_pusher.cpp
@@ -0,0 +1,170 @@
+// MIT License
+//
+// Copyright (c) Ryujinx Team and Contributors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+// associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#include <bit>
+#include "command_classes/host1x.h"
+#include "command_classes/nvdec.h"
+#include "command_classes/vic.h"
+#include "video_core/cdma_pusher.h"
+#include "video_core/command_classes/nvdec_common.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra {
+CDmaPusher::CDmaPusher(GPU& gpu_)
+ : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
+ vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
+ host1x_processor(std::make_unique<Host1x>(gpu)),
+ sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {}
+
+CDmaPusher::~CDmaPusher() = default;
+
+void CDmaPusher::Push(ChCommandHeaderList&& entries) {
+ cdma_queue.push(std::move(entries));
+}
+
+void CDmaPusher::DispatchCalls() {
+ while (!cdma_queue.empty()) {
+ Step();
+ }
+}
+
+void CDmaPusher::Step() {
+ const auto entries{cdma_queue.front()};
+ cdma_queue.pop();
+
+ std::vector<u32> values(entries.size());
+ std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32));
+
+ for (const u32 value : values) {
+ if (mask != 0) {
+ const auto lbs = static_cast<u32>(std::countr_zero(mask));
+ mask &= ~(1U << lbs);
+ ExecuteCommand(static_cast<u32>(offset + lbs), value);
+ continue;
+ } else if (count != 0) {
+ --count;
+ ExecuteCommand(static_cast<u32>(offset), value);
+ if (incrementing) {
+ ++offset;
+ }
+ continue;
+ }
+ const auto mode = static_cast<ChSubmissionMode>((value >> 28) & 0xf);
+ switch (mode) {
+ case ChSubmissionMode::SetClass: {
+ mask = value & 0x3f;
+ offset = (value >> 16) & 0xfff;
+ current_class = static_cast<ChClassId>((value >> 6) & 0x3ff);
+ break;
+ }
+ case ChSubmissionMode::Incrementing:
+ case ChSubmissionMode::NonIncrementing:
+ count = value & 0xffff;
+ offset = (value >> 16) & 0xfff;
+ incrementing = mode == ChSubmissionMode::Incrementing;
+ break;
+ case ChSubmissionMode::Mask:
+ mask = value & 0xffff;
+ offset = (value >> 16) & 0xfff;
+ break;
+ case ChSubmissionMode::Immediate: {
+ const u32 data = value & 0xfff;
+ offset = (value >> 16) & 0xfff;
+ ExecuteCommand(static_cast<u32>(offset), data);
+ break;
+ }
+ default:
+ UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", static_cast<u32>(mode));
+ break;
+ }
+ }
+}
+
+void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
+ switch (current_class) {
+ case ChClassId::NvDec:
+ ThiStateWrite(nvdec_thi_state, state_offset, {data});
+ switch (static_cast<ThiMethod>(state_offset)) {
+ case ThiMethod::IncSyncpt: {
+ LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method");
+ const auto syncpoint_id = static_cast<u32>(data & 0xFF);
+ const auto cond = static_cast<u32>((data >> 8) & 0xFF);
+ if (cond == 0) {
+ sync_manager->Increment(syncpoint_id);
+ } else {
+ sync_manager->SignalDone(
+ sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
+ }
+ break;
+ }
+ case ThiMethod::SetMethod1:
+ LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
+ static_cast<u32>(nvdec_thi_state.method_0));
+ nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0),
+ {data});
+ break;
+ default:
+ break;
+ }
+ break;
+ case ChClassId::GraphicsVic:
+ ThiStateWrite(vic_thi_state, static_cast<u32>(state_offset), {data});
+ switch (static_cast<ThiMethod>(state_offset)) {
+ case ThiMethod::IncSyncpt: {
+ LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method");
+ const auto syncpoint_id = static_cast<u32>(data & 0xFF);
+ const auto cond = static_cast<u32>((data >> 8) & 0xFF);
+ if (cond == 0) {
+ sync_manager->Increment(syncpoint_id);
+ } else {
+ sync_manager->SignalDone(
+ sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
+ }
+ break;
+ }
+ case ThiMethod::SetMethod1:
+ LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
+ static_cast<u32>(vic_thi_state.method_0), data);
+ vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), {data});
+ break;
+ default:
+ break;
+ }
+ break;
+ case ChClassId::Host1x:
+ // This device is mainly for syncpoint synchronization
+ LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
+ host1x_processor->ProcessMethod(static_cast<Host1x::Method>(state_offset), {data});
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
+ break;
+ }
+}
+
+void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 state_offset,
+ const std::vector<u32>& arguments) {
+ u8* const state_offset_ptr = reinterpret_cast<u8*>(&state) + sizeof(u32) * state_offset;
+ std::memcpy(state_offset_ptr, arguments.data(), sizeof(u32) * arguments.size());
+}
+
+} // namespace Tegra
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
new file mode 100644
index 000000000..e5f212c1a
--- /dev/null
+++ b/src/video_core/cdma_pusher.h
@@ -0,0 +1,136 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+#include <queue>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "video_core/command_classes/sync_manager.h"
+
+namespace Tegra {
+
+class GPU;
+class Nvdec;
+class Vic;
+class Host1x;
+
+enum class ChSubmissionMode : u32 {
+ SetClass = 0,
+ Incrementing = 1,
+ NonIncrementing = 2,
+ Mask = 3,
+ Immediate = 4,
+ Restart = 5,
+ Gather = 6,
+};
+
+enum class ChClassId : u32 {
+ NoClass = 0x0,
+ Host1x = 0x1,
+ VideoEncodeMpeg = 0x20,
+ VideoEncodeNvEnc = 0x21,
+ VideoStreamingVi = 0x30,
+ VideoStreamingIsp = 0x32,
+ VideoStreamingIspB = 0x34,
+ VideoStreamingViI2c = 0x36,
+ GraphicsVic = 0x5d,
+ Graphics3D = 0x60,
+ GraphicsGpu = 0x61,
+ Tsec = 0xe0,
+ TsecB = 0xe1,
+ NvJpg = 0xc0,
+ NvDec = 0xf0
+};
+
+enum class ChMethod : u32 {
+ Empty = 0,
+ SetMethod = 0x10,
+ SetData = 0x11,
+};
+
+union ChCommandHeader {
+ u32 raw;
+ BitField<0, 16, u32> value;
+ BitField<16, 12, ChMethod> method_offset;
+ BitField<28, 4, ChSubmissionMode> submission_mode;
+};
+static_assert(sizeof(ChCommandHeader) == sizeof(u32), "ChCommand header is an invalid size");
+
+struct ChCommand {
+ ChClassId class_id{};
+ int method_offset{};
+ std::vector<u32> arguments;
+};
+
+using ChCommandHeaderList = std::vector<ChCommandHeader>;
+using ChCommandList = std::vector<ChCommand>;
+
+struct ThiRegisters {
+ u32_le increment_syncpt{};
+ INSERT_PADDING_WORDS(1);
+ u32_le increment_syncpt_error{};
+ u32_le ctx_switch_incremement_syncpt{};
+ INSERT_PADDING_WORDS(4);
+ u32_le ctx_switch{};
+ INSERT_PADDING_WORDS(1);
+ u32_le ctx_syncpt_eof{};
+ INSERT_PADDING_WORDS(5);
+ u32_le method_0{};
+ u32_le method_1{};
+ INSERT_PADDING_WORDS(12);
+ u32_le int_status{};
+ u32_le int_mask{};
+};
+
+enum class ThiMethod : u32 {
+ IncSyncpt = offsetof(ThiRegisters, increment_syncpt) / sizeof(u32),
+ SetMethod0 = offsetof(ThiRegisters, method_0) / sizeof(u32),
+ SetMethod1 = offsetof(ThiRegisters, method_1) / sizeof(u32),
+};
+
+class CDmaPusher {
+public:
+ explicit CDmaPusher(GPU& gpu_);
+ ~CDmaPusher();
+
+ /// Push NVDEC command buffer entries into queue
+ void Push(ChCommandHeaderList&& entries);
+
+ /// Process queued command buffer entries
+ void DispatchCalls();
+
+ /// Process one queue element
+ void Step();
+
+ /// Invoke command class devices to execute the command based on the current state
+ void ExecuteCommand(u32 state_offset, u32 data);
+
+private:
+ /// Write arguments value to the ThiRegisters member at the specified offset
+ void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments);
+
+ GPU& gpu;
+ std::shared_ptr<Tegra::Nvdec> nvdec_processor;
+ std::unique_ptr<Tegra::Vic> vic_processor;
+ std::unique_ptr<Tegra::Host1x> host1x_processor;
+ std::unique_ptr<SyncptIncrManager> sync_manager;
+ ChClassId current_class{};
+ ThiRegisters vic_thi_state{};
+ ThiRegisters nvdec_thi_state{};
+
+ s32 count{};
+ s32 offset{};
+ u32 mask{};
+ bool incrementing{};
+
+ // Queue of command lists to be processed
+ std::queue<ChCommandHeaderList> cdma_queue;
+};
+
+} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
new file mode 100644
index 000000000..39bc923a5
--- /dev/null
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -0,0 +1,129 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <fstream>
+#include <vector>
+#include "common/assert.h"
+#include "video_core/command_classes/codecs/codec.h"
+#include "video_core/command_classes/codecs/h264.h"
+#include "video_core/command_classes/codecs/vp9.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+
+extern "C" {
+#include <libavutil/opt.h>
+}
+
+namespace Tegra {
+
+void AVFrameDeleter(AVFrame* ptr) {
+ av_frame_unref(ptr);
+ av_free(ptr);
+}
+
+Codec::Codec(GPU& gpu_)
+ : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)),
+ vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
+
+Codec::~Codec() {
+ if (!initialized) {
+ return;
+ }
+ // Free libav memory
+ AVFrame* av_frame{nullptr};
+ avcodec_send_packet(av_codec_ctx, nullptr);
+ av_frame = av_frame_alloc();
+ avcodec_receive_frame(av_codec_ctx, av_frame);
+ avcodec_flush_buffers(av_codec_ctx);
+
+ av_frame_unref(av_frame);
+ av_free(av_frame);
+ avcodec_close(av_codec_ctx);
+}
+
+void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
+ LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", codec);
+ current_codec = codec;
+}
+
+void Codec::StateWrite(u32 offset, u64 arguments) {
+ u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
+ std::memcpy(state_offset, &arguments, sizeof(u64));
+}
+
+void Codec::Decode() {
+ bool is_first_frame = false;
+
+ if (!initialized) {
+ if (current_codec == NvdecCommon::VideoCodec::H264) {
+ av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
+ } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
+ av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
+ } else {
+ LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec);
+ return;
+ }
+
+ av_codec_ctx = avcodec_alloc_context3(av_codec);
+ av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
+
+ // TODO(ameerj): libavcodec gpu hw acceleration
+
+ const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
+ if (av_error < 0) {
+ LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
+ avcodec_close(av_codec_ctx);
+ return;
+ }
+ initialized = true;
+ is_first_frame = true;
+ }
+ bool vp9_hidden_frame = false;
+
+ AVPacket packet{};
+ av_init_packet(&packet);
+ std::vector<u8> frame_data;
+
+ if (current_codec == NvdecCommon::VideoCodec::H264) {
+ frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame);
+ } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
+ frame_data = vp9_decoder->ComposeFrameHeader(state);
+ vp9_hidden_frame = vp9_decoder->WasFrameHidden();
+ }
+
+ packet.data = frame_data.data();
+ packet.size = static_cast<int>(frame_data.size());
+
+ avcodec_send_packet(av_codec_ctx, &packet);
+
+ if (!vp9_hidden_frame) {
+ // Only receive/store visible frames
+ AVFramePtr frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
+ avcodec_receive_frame(av_codec_ctx, frame.get());
+ av_frames.push(std::move(frame));
+ // Limit queue to 10 frames. Workaround for ZLA decode and queue spam
+ if (av_frames.size() > 10) {
+ av_frames.pop();
+ }
+ }
+}
+
+AVFramePtr Codec::GetCurrentFrame() {
+ // Sometimes VIC will request more frames than have been decoded.
+ // in this case, return a nullptr and don't overwrite previous frame data
+ if (av_frames.empty()) {
+ return AVFramePtr{nullptr, AVFrameDeleter};
+ }
+
+ AVFramePtr frame = std::move(av_frames.front());
+ av_frames.pop();
+ return frame;
+}
+
+NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
+ return current_codec;
+}
+
+} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
new file mode 100644
index 000000000..8a2a6c360
--- /dev/null
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -0,0 +1,70 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <queue>
+#include "common/common_types.h"
+#include "video_core/command_classes/nvdec_common.h"
+
+extern "C" {
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+#include <libavcodec/avcodec.h>
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+}
+
+namespace Tegra {
+class GPU;
+struct VicRegisters;
+
+void AVFrameDeleter(AVFrame* ptr);
+using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
+
+namespace Decoder {
+class H264;
+class VP9;
+} // namespace Decoder
+
+class Codec {
+public:
+ explicit Codec(GPU& gpu);
+ ~Codec();
+
+ /// Sets NVDEC video stream codec
+ void SetTargetCodec(NvdecCommon::VideoCodec codec);
+
+ /// Populate NvdecRegisters state with argument value at the provided offset
+ void StateWrite(u32 offset, u64 arguments);
+
+ /// Call decoders to construct headers, decode AVFrame with ffmpeg
+ void Decode();
+
+ /// Returns next decoded frame
+ [[nodiscard]] AVFramePtr GetCurrentFrame();
+
+ /// Returns the value of current_codec
+ [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
+
+private:
+ bool initialized{};
+ NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
+
+ AVCodec* av_codec{nullptr};
+ AVCodecContext* av_codec_ctx{nullptr};
+
+ GPU& gpu;
+ std::unique_ptr<Decoder::H264> h264_decoder;
+ std::unique_ptr<Decoder::VP9> vp9_decoder;
+
+ NvdecCommon::NvdecRegisters state{};
+ std::queue<AVFramePtr> av_frames{};
+};
+
+} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
new file mode 100644
index 000000000..fea6aed98
--- /dev/null
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -0,0 +1,293 @@
+// MIT License
+//
+// Copyright (c) Ryujinx Team and Contributors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+// associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#include <array>
+#include <bit>
+#include "video_core/command_classes/codecs/h264.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Decoder {
+namespace {
+// ZigZag LUTs from libavcodec.
+constexpr std::array<u8, 64> zig_zag_direct{
+ 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48,
+ 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
+ 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
+};
+
+constexpr std::array<u8, 16> zig_zag_scan{
+ 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
+ 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
+};
+} // Anonymous namespace
+
+H264::H264(GPU& gpu_) : gpu(gpu_) {}
+
+H264::~H264() = default;
+
+const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state,
+ bool is_first_frame) {
+ H264DecoderContext context{};
+ gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
+
+ const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff);
+ if (!is_first_frame && frame_number != 0) {
+ frame.resize(context.frame_data_size);
+
+ gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
+ } else {
+ /// Encode header
+ H264BitWriter writer{};
+ writer.WriteU(1, 24);
+ writer.WriteU(0, 1);
+ writer.WriteU(3, 2);
+ writer.WriteU(7, 5);
+ writer.WriteU(100, 8);
+ writer.WriteU(0, 8);
+ writer.WriteU(31, 8);
+ writer.WriteUe(0);
+ const auto chroma_format_idc =
+ static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
+ writer.WriteUe(chroma_format_idc);
+ if (chroma_format_idc == 3) {
+ writer.WriteBit(false);
+ }
+
+ writer.WriteUe(0);
+ writer.WriteUe(0);
+ writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
+ writer.WriteBit(false); // Scaling matrix present flag
+
+ const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
+ writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
+ writer.WriteUe(order_cnt_type);
+ if (order_cnt_type == 0) {
+ writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
+ } else if (order_cnt_type == 1) {
+ writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
+
+ writer.WriteSe(0);
+ writer.WriteSe(0);
+ writer.WriteUe(0);
+ }
+
+ const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
+ (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
+
+ writer.WriteUe(16);
+ writer.WriteBit(false);
+ writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
+ writer.WriteUe(pic_height - 1);
+ writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
+
+ if (!context.h264_parameter_set.frame_mbs_only_flag) {
+ writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
+ }
+
+ writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0);
+ writer.WriteBit(false); // Frame cropping flag
+ writer.WriteBit(false); // VUI parameter present flag
+
+ writer.End();
+
+ // H264 PPS
+ writer.WriteU(1, 24);
+ writer.WriteU(0, 1);
+ writer.WriteU(3, 2);
+ writer.WriteU(8, 5);
+
+ writer.WriteUe(0);
+ writer.WriteUe(0);
+
+ writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
+ writer.WriteBit(false);
+ writer.WriteUe(0);
+ writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
+ writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
+ writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0);
+ writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2);
+ s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f);
+ pic_init_qp = (pic_init_qp << 26) >> 26;
+ writer.WriteSe(pic_init_qp);
+ writer.WriteSe(0);
+ s32 chroma_qp_index_offset =
+ static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
+ chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
+
+ writer.WriteSe(chroma_qp_index_offset);
+ writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0);
+ writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0);
+ writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
+ writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
+
+ writer.WriteBit(true);
+
+ for (s32 index = 0; index < 6; index++) {
+ writer.WriteBit(true);
+ const auto matrix_x4 =
+ std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end());
+ writer.WriteScalingList(matrix_x4, index * 16, 16);
+ }
+
+ if (context.h264_parameter_set.transform_8x8_mode_flag) {
+ for (s32 index = 0; index < 2; index++) {
+ writer.WriteBit(true);
+ const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
+ context.scaling_matrix_8.end());
+
+ writer.WriteScalingList(matrix_x8, index * 64, 64);
+ }
+ }
+
+ s32 chroma_qp_index_offset2 =
+ static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f);
+ chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
+
+ writer.WriteSe(chroma_qp_index_offset2);
+
+ writer.End();
+
+ const auto& encoded_header = writer.GetByteArray();
+ frame.resize(encoded_header.size() + context.frame_data_size);
+ std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
+
+ gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
+ frame.data() + encoded_header.size(),
+ context.frame_data_size);
+ }
+
+ return frame;
+}
+
+H264BitWriter::H264BitWriter() = default;
+
+H264BitWriter::~H264BitWriter() = default;
+
+void H264BitWriter::WriteU(s32 value, s32 value_sz) {
+ WriteBits(value, value_sz);
+}
+
+void H264BitWriter::WriteSe(s32 value) {
+ WriteExpGolombCodedInt(value);
+}
+
+void H264BitWriter::WriteUe(u32 value) {
+ WriteExpGolombCodedUInt(value);
+}
+
+void H264BitWriter::End() {
+ WriteBit(true);
+ Flush();
+}
+
+void H264BitWriter::WriteBit(bool state) {
+ WriteBits(state ? 1 : 0, 1);
+}
+
+void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) {
+ std::vector<u8> scan(count);
+ if (count == 16) {
+ std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
+ } else {
+ std::memcpy(scan.data(), zig_zag_direct.data(), scan.size());
+ }
+ u8 last_scale = 8;
+
+ for (s32 index = 0; index < count; index++) {
+ const u8 value = list[start + scan[index]];
+ const s32 delta_scale = static_cast<s32>(value - last_scale);
+
+ WriteSe(delta_scale);
+
+ last_scale = value;
+ }
+}
+
+std::vector<u8>& H264BitWriter::GetByteArray() {
+ return byte_array;
+}
+
+const std::vector<u8>& H264BitWriter::GetByteArray() const {
+ return byte_array;
+}
+
+void H264BitWriter::WriteBits(s32 value, s32 bit_count) {
+ s32 value_pos = 0;
+
+ s32 remaining = bit_count;
+
+ while (remaining > 0) {
+ s32 copy_size = remaining;
+
+ const s32 free_bits = GetFreeBufferBits();
+
+ if (copy_size > free_bits) {
+ copy_size = free_bits;
+ }
+
+ const s32 mask = (1 << copy_size) - 1;
+
+ const s32 src_shift = (bit_count - value_pos) - copy_size;
+ const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
+
+ buffer |= ((value >> src_shift) & mask) << dst_shift;
+
+ value_pos += copy_size;
+ buffer_pos += copy_size;
+ remaining -= copy_size;
+ }
+}
+
+void H264BitWriter::WriteExpGolombCodedInt(s32 value) {
+ const s32 sign = value <= 0 ? 0 : 1;
+ if (value < 0) {
+ value = -value;
+ }
+ value = (value << 1) - sign;
+ WriteExpGolombCodedUInt(value);
+}
+
+void H264BitWriter::WriteExpGolombCodedUInt(u32 value) {
+ const s32 size = 32 - std::countl_zero(value + 1);
+ WriteBits(1, size);
+
+ value -= (1U << (size - 1)) - 1;
+ WriteBits(static_cast<s32>(value), size - 1);
+}
+
+s32 H264BitWriter::GetFreeBufferBits() {
+ if (buffer_pos == buffer_size) {
+ Flush();
+ }
+
+ return buffer_size - buffer_pos;
+}
+
+void H264BitWriter::Flush() {
+ if (buffer_pos == 0) {
+ return;
+ }
+ byte_array.push_back(static_cast<u8>(buffer));
+
+ buffer = 0;
+ buffer_pos = 0;
+}
+} // namespace Tegra::Decoder
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
new file mode 100644
index 000000000..0f3a1d9f3
--- /dev/null
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -0,0 +1,118 @@
+// MIT License
+//
+// Copyright (c) Ryujinx Team and Contributors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+// associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#pragma once
+
+#include <vector>
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/command_classes/nvdec_common.h"
+
+namespace Tegra {
+class GPU;
+namespace Decoder {
+
+class H264BitWriter {
+public:
+ H264BitWriter();
+ ~H264BitWriter();
+
+ /// The following Write methods are based on clause 9.1 in the H.264 specification.
+ /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax
+ void WriteU(s32 value, s32 value_sz);
+ void WriteSe(s32 value);
+ void WriteUe(u32 value);
+
+ /// Finalize the bitstream
+ void End();
+
+ /// append a bit to the stream, equivalent value to the state parameter
+ void WriteBit(bool state);
+
+ /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
+ /// Writes the scaling matrices of the sream
+ void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
+
+ /// Return the bitstream as a vector.
+ [[nodiscard]] std::vector<u8>& GetByteArray();
+ [[nodiscard]] const std::vector<u8>& GetByteArray() const;
+
+private:
+ void WriteBits(s32 value, s32 bit_count);
+ void WriteExpGolombCodedInt(s32 value);
+ void WriteExpGolombCodedUInt(u32 value);
+ [[nodiscard]] s32 GetFreeBufferBits();
+ void Flush();
+
+ s32 buffer_size{8};
+
+ s32 buffer{};
+ s32 buffer_pos{};
+ std::vector<u8> byte_array;
+};
+
+class H264 {
+public:
+ explicit H264(GPU& gpu);
+ ~H264();
+
+ /// Compose the H264 header of the frame for FFmpeg decoding
+ [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(
+ const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
+
+private:
+ struct H264ParameterSet {
+ u32 log2_max_pic_order_cnt{};
+ u32 delta_pic_order_always_zero_flag{};
+ u32 frame_mbs_only_flag{};
+ u32 pic_width_in_mbs{};
+ u32 pic_height_in_map_units{};
+ INSERT_PADDING_WORDS(1);
+ u32 entropy_coding_mode_flag{};
+ u32 bottom_field_pic_order_flag{};
+ u32 num_refidx_l0_default_active{};
+ u32 num_refidx_l1_default_active{};
+ u32 deblocking_filter_control_flag{};
+ u32 redundant_pic_count_flag{};
+ u32 transform_8x8_mode_flag{};
+ INSERT_PADDING_WORDS(9);
+ u64 flags{};
+ u32 frame_number{};
+ u32 frame_number2{};
+ };
+ static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size");
+
+ struct H264DecoderContext {
+ INSERT_PADDING_BYTES(0x48);
+ u32 frame_data_size{};
+ INSERT_PADDING_BYTES(0xc);
+ H264ParameterSet h264_parameter_set{};
+ INSERT_PADDING_BYTES(0x100);
+ std::array<u8, 0x60> scaling_matrix_4;
+ std::array<u8, 0x80> scaling_matrix_8;
+ };
+ static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size");
+
+ std::vector<u8> frame;
+ GPU& gpu;
+};
+
+} // namespace Decoder
+} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
new file mode 100644
index 000000000..59e586695
--- /dev/null
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -0,0 +1,989 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring> // for std::memcpy
+#include <numeric>
+#include "video_core/command_classes/codecs/vp9.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Decoder {
+namespace {
+// Default compressed header probabilities once frame context resets
+constexpr Vp9EntropyProbs default_probs{
+ .y_mode_prob{
+ 65, 32, 18, 144, 162, 194, 41, 51, 98, 132, 68, 18, 165, 217, 196, 45, 40, 78,
+ 173, 80, 19, 176, 240, 193, 64, 35, 46, 221, 135, 38, 194, 248, 121, 96, 85, 29,
+ },
+ .partition_prob{
+ 199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0,
+ 174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0,
+ 177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0,
+ 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0,
+ },
+ .coef_probs{
+ 195, 29, 183, 84, 49, 136, 8, 42, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 31, 107, 169, 35, 99, 159, 17, 82, 140, 8, 66, 114, 2, 44, 76, 1, 19, 32,
+ 40, 132, 201, 29, 114, 187, 13, 91, 157, 7, 75, 127, 3, 58, 95, 1, 28, 47,
+ 69, 142, 221, 42, 122, 201, 15, 91, 159, 6, 67, 121, 1, 42, 77, 1, 17, 31,
+ 102, 148, 228, 67, 117, 204, 17, 82, 154, 6, 59, 114, 2, 39, 75, 1, 15, 29,
+ 156, 57, 233, 119, 57, 212, 58, 48, 163, 29, 40, 124, 12, 30, 81, 3, 12, 31,
+ 191, 107, 226, 124, 117, 204, 25, 99, 155, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 29, 148, 210, 37, 126, 194, 8, 93, 157, 2, 68, 118, 1, 39, 69, 1, 17, 33,
+ 41, 151, 213, 27, 123, 193, 3, 82, 144, 1, 58, 105, 1, 32, 60, 1, 13, 26,
+ 59, 159, 220, 23, 126, 198, 4, 88, 151, 1, 66, 114, 1, 38, 71, 1, 18, 34,
+ 114, 136, 232, 51, 114, 207, 11, 83, 155, 3, 56, 105, 1, 33, 65, 1, 17, 34,
+ 149, 65, 234, 121, 57, 215, 61, 49, 166, 28, 36, 114, 12, 25, 76, 3, 16, 42,
+ 214, 49, 220, 132, 63, 188, 42, 65, 137, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 85, 137, 221, 104, 131, 216, 49, 111, 192, 21, 87, 155, 2, 49, 87, 1, 16, 28,
+ 89, 163, 230, 90, 137, 220, 29, 100, 183, 10, 70, 135, 2, 42, 81, 1, 17, 33,
+ 108, 167, 237, 55, 133, 222, 15, 97, 179, 4, 72, 135, 1, 45, 85, 1, 19, 38,
+ 124, 146, 240, 66, 124, 224, 17, 88, 175, 4, 58, 122, 1, 36, 75, 1, 18, 37,
+ 141, 79, 241, 126, 70, 227, 66, 58, 182, 30, 44, 136, 12, 34, 96, 2, 20, 47,
+ 229, 99, 249, 143, 111, 235, 46, 109, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 82, 158, 236, 94, 146, 224, 25, 117, 191, 9, 87, 149, 3, 56, 99, 1, 33, 57,
+ 83, 167, 237, 68, 145, 222, 10, 103, 177, 2, 72, 131, 1, 41, 79, 1, 20, 39,
+ 99, 167, 239, 47, 141, 224, 10, 104, 178, 2, 73, 133, 1, 44, 85, 1, 22, 47,
+ 127, 145, 243, 71, 129, 228, 17, 93, 177, 3, 61, 124, 1, 41, 84, 1, 21, 52,
+ 157, 78, 244, 140, 72, 231, 69, 58, 184, 31, 44, 137, 14, 38, 105, 8, 23, 61,
+ 125, 34, 187, 52, 41, 133, 6, 31, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 37, 109, 153, 51, 102, 147, 23, 87, 128, 8, 67, 101, 1, 41, 63, 1, 19, 29,
+ 31, 154, 185, 17, 127, 175, 6, 96, 145, 2, 73, 114, 1, 51, 82, 1, 28, 45,
+ 23, 163, 200, 10, 131, 185, 2, 93, 148, 1, 67, 111, 1, 41, 69, 1, 14, 24,
+ 29, 176, 217, 12, 145, 201, 3, 101, 156, 1, 69, 111, 1, 39, 63, 1, 14, 23,
+ 57, 192, 233, 25, 154, 215, 6, 109, 167, 3, 78, 118, 1, 48, 69, 1, 21, 29,
+ 202, 105, 245, 108, 106, 216, 18, 90, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 33, 172, 219, 64, 149, 206, 14, 117, 177, 5, 90, 141, 2, 61, 95, 1, 37, 57,
+ 33, 179, 220, 11, 140, 198, 1, 89, 148, 1, 60, 104, 1, 33, 57, 1, 12, 21,
+ 30, 181, 221, 8, 141, 198, 1, 87, 145, 1, 58, 100, 1, 31, 55, 1, 12, 20,
+ 32, 186, 224, 7, 142, 198, 1, 86, 143, 1, 58, 100, 1, 31, 55, 1, 12, 22,
+ 57, 192, 227, 20, 143, 204, 3, 96, 154, 1, 68, 112, 1, 42, 69, 1, 19, 32,
+ 212, 35, 215, 113, 47, 169, 29, 48, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 74, 129, 203, 106, 120, 203, 49, 107, 178, 19, 84, 144, 4, 50, 84, 1, 15, 25,
+ 71, 172, 217, 44, 141, 209, 15, 102, 173, 6, 76, 133, 2, 51, 89, 1, 24, 42,
+ 64, 185, 231, 31, 148, 216, 8, 103, 175, 3, 74, 131, 1, 46, 81, 1, 18, 30,
+ 65, 196, 235, 25, 157, 221, 5, 105, 174, 1, 67, 120, 1, 38, 69, 1, 15, 30,
+ 65, 204, 238, 30, 156, 224, 7, 107, 177, 2, 70, 124, 1, 42, 73, 1, 18, 34,
+ 225, 86, 251, 144, 104, 235, 42, 99, 181, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 85, 175, 239, 112, 165, 229, 29, 136, 200, 12, 103, 162, 6, 77, 123, 2, 53, 84,
+ 75, 183, 239, 30, 155, 221, 3, 106, 171, 1, 74, 128, 1, 44, 76, 1, 17, 28,
+ 73, 185, 240, 27, 159, 222, 2, 107, 172, 1, 75, 127, 1, 42, 73, 1, 17, 29,
+ 62, 190, 238, 21, 159, 222, 2, 107, 172, 1, 72, 122, 1, 40, 71, 1, 18, 32,
+ 61, 199, 240, 27, 161, 226, 4, 113, 180, 1, 76, 129, 1, 46, 80, 1, 23, 41,
+ 7, 27, 153, 5, 30, 95, 1, 16, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 50, 75, 127, 57, 75, 124, 27, 67, 108, 10, 54, 86, 1, 33, 52, 1, 12, 18,
+ 43, 125, 151, 26, 108, 148, 7, 83, 122, 2, 59, 89, 1, 38, 60, 1, 17, 27,
+ 23, 144, 163, 13, 112, 154, 2, 75, 117, 1, 50, 81, 1, 31, 51, 1, 14, 23,
+ 18, 162, 185, 6, 123, 171, 1, 78, 125, 1, 51, 86, 1, 31, 54, 1, 14, 23,
+ 15, 199, 227, 3, 150, 204, 1, 91, 146, 1, 55, 95, 1, 30, 53, 1, 11, 20,
+ 19, 55, 240, 19, 59, 196, 3, 52, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 41, 166, 207, 104, 153, 199, 31, 123, 181, 14, 101, 152, 5, 72, 106, 1, 36, 52,
+ 35, 176, 211, 12, 131, 190, 2, 88, 144, 1, 60, 101, 1, 36, 60, 1, 16, 28,
+ 28, 183, 213, 8, 134, 191, 1, 86, 142, 1, 56, 96, 1, 30, 53, 1, 12, 20,
+ 20, 190, 215, 4, 135, 192, 1, 84, 139, 1, 53, 91, 1, 28, 49, 1, 11, 20,
+ 13, 196, 216, 2, 137, 192, 1, 86, 143, 1, 57, 99, 1, 32, 56, 1, 13, 24,
+ 211, 29, 217, 96, 47, 156, 22, 43, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 78, 120, 193, 111, 116, 186, 46, 102, 164, 15, 80, 128, 2, 49, 76, 1, 18, 28,
+ 71, 161, 203, 42, 132, 192, 10, 98, 150, 3, 69, 109, 1, 44, 70, 1, 18, 29,
+ 57, 186, 211, 30, 140, 196, 4, 93, 146, 1, 62, 102, 1, 38, 65, 1, 16, 27,
+ 47, 199, 217, 14, 145, 196, 1, 88, 142, 1, 57, 98, 1, 36, 62, 1, 15, 26,
+ 26, 219, 229, 5, 155, 207, 1, 94, 151, 1, 60, 104, 1, 36, 62, 1, 16, 28,
+ 233, 29, 248, 146, 47, 220, 43, 52, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 100, 163, 232, 179, 161, 222, 63, 142, 204, 37, 113, 174, 26, 89, 137, 18, 68, 97,
+ 85, 181, 230, 32, 146, 209, 7, 100, 164, 3, 71, 121, 1, 45, 77, 1, 18, 30,
+ 65, 187, 230, 20, 148, 207, 2, 97, 159, 1, 68, 116, 1, 40, 70, 1, 14, 29,
+ 40, 194, 227, 8, 147, 204, 1, 94, 155, 1, 65, 112, 1, 39, 66, 1, 14, 26,
+ 16, 208, 228, 3, 151, 207, 1, 98, 160, 1, 67, 117, 1, 41, 74, 1, 17, 31,
+ 17, 38, 140, 7, 34, 80, 1, 17, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 37, 75, 128, 41, 76, 128, 26, 66, 116, 12, 52, 94, 2, 32, 55, 1, 10, 16,
+ 50, 127, 154, 37, 109, 152, 16, 82, 121, 5, 59, 85, 1, 35, 54, 1, 13, 20,
+ 40, 142, 167, 17, 110, 157, 2, 71, 112, 1, 44, 72, 1, 27, 45, 1, 11, 17,
+ 30, 175, 188, 9, 124, 169, 1, 74, 116, 1, 48, 78, 1, 30, 49, 1, 11, 18,
+ 10, 222, 223, 2, 150, 194, 1, 83, 128, 1, 48, 79, 1, 27, 45, 1, 11, 17,
+ 36, 41, 235, 29, 36, 193, 10, 27, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 85, 165, 222, 177, 162, 215, 110, 135, 195, 57, 113, 168, 23, 83, 120, 10, 49, 61,
+ 85, 190, 223, 36, 139, 200, 5, 90, 146, 1, 60, 103, 1, 38, 65, 1, 18, 30,
+ 72, 202, 223, 23, 141, 199, 2, 86, 140, 1, 56, 97, 1, 36, 61, 1, 16, 27,
+ 55, 218, 225, 13, 145, 200, 1, 86, 141, 1, 57, 99, 1, 35, 61, 1, 13, 22,
+ 15, 235, 212, 1, 132, 184, 1, 84, 139, 1, 57, 97, 1, 34, 56, 1, 14, 23,
+ 181, 21, 201, 61, 37, 123, 10, 38, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 47, 106, 172, 95, 104, 173, 42, 93, 159, 18, 77, 131, 4, 50, 81, 1, 17, 23,
+ 62, 147, 199, 44, 130, 189, 28, 102, 154, 18, 75, 115, 2, 44, 65, 1, 12, 19,
+ 55, 153, 210, 24, 130, 194, 3, 93, 146, 1, 61, 97, 1, 31, 50, 1, 10, 16,
+ 49, 186, 223, 17, 148, 204, 1, 96, 142, 1, 53, 83, 1, 26, 44, 1, 11, 17,
+ 13, 217, 212, 2, 136, 180, 1, 78, 124, 1, 50, 83, 1, 29, 49, 1, 14, 23,
+ 197, 13, 247, 82, 17, 222, 25, 17, 162, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 126, 186, 247, 234, 191, 243, 176, 177, 234, 104, 158, 220, 66, 128, 186, 55, 90, 137,
+ 111, 197, 242, 46, 158, 219, 9, 104, 171, 2, 65, 125, 1, 44, 80, 1, 17, 91,
+ 104, 208, 245, 39, 168, 224, 3, 109, 162, 1, 79, 124, 1, 50, 102, 1, 43, 102,
+ 84, 220, 246, 31, 177, 231, 2, 115, 180, 1, 79, 134, 1, 55, 77, 1, 60, 79,
+ 43, 243, 240, 8, 180, 217, 1, 115, 166, 1, 84, 121, 1, 51, 67, 1, 16, 6,
+ },
+ .switchable_interp_prob{235, 162, 36, 255, 34, 3, 149, 144},
+ .inter_mode_prob{
+ 2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94,
+ 66, 0, 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0,
+ },
+ .intra_inter_prob{9, 102, 187, 225},
+ .comp_inter_prob{9, 102, 187, 225, 0},
+ .single_ref_prob{33, 16, 77, 74, 142, 142, 172, 170, 238, 247},
+ .comp_ref_prob{50, 126, 123, 221, 226},
+ .tx_32x32_prob{3, 136, 37, 5, 52, 13},
+ .tx_16x16_prob{20, 152, 15, 101},
+ .tx_8x8_prob{100, 66},
+ .skip_probs{192, 128, 64},
+ .joints{32, 64, 96},
+ .sign{128, 128},
+ .classes{
+ 224, 144, 192, 168, 192, 176, 192, 198, 198, 245,
+ 216, 128, 176, 160, 176, 176, 192, 198, 198, 208,
+ },
+ .class_0{216, 208},
+ .prob_bits{
+ 136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
+ 136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
+ },
+ .class_0_fr{128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64},
+ .fr{64, 96, 64, 64, 96, 64},
+ .class_0_hp{160, 160},
+ .high_precision{128, 128},
+};
+
+constexpr std::array<s32, 256> norm_lut{
+ 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+constexpr std::array<s32, 254> map_lut{
+ 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
+ 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 51, 52, 53, 54,
+ 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
+ 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
+ 108, 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124,
+ 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142,
+ 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159,
+ 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177,
+ 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212,
+ 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17,
+ 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 19,
+};
+
+// 6.2.14 Tile size calculation
+
+[[nodiscard]] s32 CalcMinLog2TileCols(s32 frame_width) {
+ const s32 sb64_cols = (frame_width + 63) / 64;
+ s32 min_log2 = 0;
+
+ while ((64 << min_log2) < sb64_cols) {
+ min_log2++;
+ }
+
+ return min_log2;
+}
+
+[[nodiscard]] s32 CalcMaxLog2TileCols(s32 frame_width) {
+ const s32 sb64_cols = (frame_width + 63) / 64;
+ s32 max_log2 = 1;
+
+ while ((sb64_cols >> max_log2) >= 4) {
+ max_log2++;
+ }
+
+ return max_log2 - 1;
+}
+
+// Recenters probability. Based on section 6.3.6 of VP9 Specification
+[[nodiscard]] s32 RecenterNonNeg(s32 new_prob, s32 old_prob) {
+ if (new_prob > old_prob * 2) {
+ return new_prob;
+ }
+
+ if (new_prob >= old_prob) {
+ return (new_prob - old_prob) * 2;
+ }
+
+ return (old_prob - new_prob) * 2 - 1;
+}
+
+// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
+[[nodiscard]] s32 RemapProbability(s32 new_prob, s32 old_prob) {
+ new_prob--;
+ old_prob--;
+
+ std::size_t index{};
+
+ if (old_prob * 2 <= 0xff) {
+ index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1));
+ } else {
+ index = static_cast<std::size_t>(
+ std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
+ }
+
+ return map_lut[index];
+}
+} // Anonymous namespace
+
+VP9::VP9(GPU& gpu_) : gpu{gpu_} {}
+
+VP9::~VP9() = default;
+
+void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
+ const bool update = new_prob != old_prob;
+
+ writer.Write(update, diff_update_probability);
+
+ if (update) {
+ WriteProbabilityDelta(writer, new_prob, old_prob);
+ }
+}
+template <typename T, std::size_t N>
+void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
+ const std::array<T, N>& old_prob) {
+ for (std::size_t offset = 0; offset < new_prob.size(); ++offset) {
+ WriteProbabilityUpdate(writer, new_prob[offset], old_prob[offset]);
+ }
+}
+
+template <typename T, std::size_t N>
+void VP9::WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
+ const std::array<T, N>& old_prob) {
+ for (std::size_t offset = 0; offset < new_prob.size(); offset += 4) {
+ WriteProbabilityUpdate(writer, new_prob[offset + 0], old_prob[offset + 0]);
+ WriteProbabilityUpdate(writer, new_prob[offset + 1], old_prob[offset + 1]);
+ WriteProbabilityUpdate(writer, new_prob[offset + 2], old_prob[offset + 2]);
+ }
+}
+
+void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
+ const int delta = RemapProbability(new_prob, old_prob);
+
+ EncodeTermSubExp(writer, delta);
+}
+
+void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) {
+ if (WriteLessThan(writer, value, 16)) {
+ writer.Write(value, 4);
+ } else if (WriteLessThan(writer, value, 32)) {
+ writer.Write(value - 16, 4);
+ } else if (WriteLessThan(writer, value, 64)) {
+ writer.Write(value - 32, 5);
+ } else {
+ value -= 64;
+
+ constexpr s32 size = 8;
+
+ const s32 mask = (1 << size) - 191;
+
+ const s32 delta = value - mask;
+
+ if (delta < 0) {
+ writer.Write(value, size - 1);
+ } else {
+ writer.Write(delta / 2 + mask, size - 1);
+ writer.Write(delta & 1, 1);
+ }
+ }
+}
+
+bool VP9::WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test) {
+ const bool is_lt = value < test;
+ writer.Write(!is_lt);
+ return is_lt;
+}
+
+void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
+ const std::array<u8, 1728>& new_prob,
+ const std::array<u8, 1728>& old_prob) {
+ constexpr u32 block_bytes = 2 * 2 * 6 * 6 * 3;
+
+ const auto needs_update = [&](u32 base_index) {
+ return !std::equal(new_prob.begin() + base_index,
+ new_prob.begin() + base_index + block_bytes,
+ old_prob.begin() + base_index);
+ };
+
+ for (u32 block_index = 0; block_index < 4; block_index++) {
+ const u32 base_index = block_index * block_bytes;
+ const bool update = needs_update(base_index);
+ writer.Write(update);
+
+ if (update) {
+ u32 index = base_index;
+ for (s32 i = 0; i < 2; i++) {
+ for (s32 j = 0; j < 2; j++) {
+ for (s32 k = 0; k < 6; k++) {
+ for (s32 l = 0; l < 6; l++) {
+ if (k != 0 || l < 3) {
+ WriteProbabilityUpdate(writer, new_prob[index + 0],
+ old_prob[index + 0]);
+ WriteProbabilityUpdate(writer, new_prob[index + 1],
+ old_prob[index + 1]);
+ WriteProbabilityUpdate(writer, new_prob[index + 2],
+ old_prob[index + 2]);
+ }
+ index += 3;
+ }
+ }
+ }
+ }
+ }
+ if (block_index == static_cast<u32>(tx_mode)) {
+ break;
+ }
+ }
+}
+
+void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
+ const bool update = new_prob != old_prob;
+ writer.Write(update, diff_update_probability);
+
+ if (update) {
+ writer.Write(new_prob >> 1, 7);
+ }
+}
+
+Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
+ PictureInfo picture_info{};
+ gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
+ Vp9PictureInfo vp9_info = picture_info.Convert();
+
+ InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
+
+ // surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
+ // order: last, golden, altref, current. It may be worthwhile to track the updates done here
+ // to avoid buffering frame data needed for reference frame updating in the header composition.
+ std::memcpy(vp9_info.frame_offsets.data(), state.surface_luma_offset.data(), 4 * sizeof(u64));
+
+ return vp9_info;
+}
+
+void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
+ EntropyProbs entropy{};
+ gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
+ entropy.Convert(dst);
+}
+
+Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) {
+ Vp9FrameContainer current_frame{};
+ {
+ gpu.SyncGuestHost();
+ current_frame.info = GetVp9PictureInfo(state);
+ current_frame.bit_stream.resize(current_frame.info.bitstream_size);
+ gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
+ current_frame.info.bitstream_size);
+ }
+ // Buffer two frames, saving the last show frame info
+ if (!next_next_frame.bit_stream.empty()) {
+ Vp9FrameContainer temp{
+ .info = current_frame.info,
+ .bit_stream = std::move(current_frame.bit_stream),
+ };
+ next_next_frame.info.show_frame = current_frame.info.last_frame_shown;
+ current_frame.info = next_next_frame.info;
+ current_frame.bit_stream = std::move(next_next_frame.bit_stream);
+ next_next_frame = std::move(temp);
+
+ if (!next_frame.bit_stream.empty()) {
+ Vp9FrameContainer temp2{
+ .info = current_frame.info,
+ .bit_stream = std::move(current_frame.bit_stream),
+ };
+ next_frame.info.show_frame = current_frame.info.last_frame_shown;
+ current_frame.info = next_frame.info;
+ current_frame.bit_stream = std::move(next_frame.bit_stream);
+ next_frame = std::move(temp2);
+ } else {
+ next_frame.info = current_frame.info;
+ next_frame.bit_stream = std::move(current_frame.bit_stream);
+ }
+ } else {
+ next_next_frame.info = current_frame.info;
+ next_next_frame.bit_stream = std::move(current_frame.bit_stream);
+ }
+ return current_frame;
+}
+
+std::vector<u8> VP9::ComposeCompressedHeader() {
+ VpxRangeEncoder writer{};
+ const bool update_probs = current_frame_info.show_frame && !current_frame_info.is_key_frame;
+ if (!current_frame_info.lossless) {
+ if (static_cast<u32>(current_frame_info.transform_mode) >= 3) {
+ writer.Write(3, 2);
+ writer.Write(current_frame_info.transform_mode == 4);
+ } else {
+ writer.Write(current_frame_info.transform_mode, 2);
+ }
+ }
+
+ if (current_frame_info.transform_mode == 4) {
+ // tx_mode_probs() in the spec
+ WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_8x8_prob,
+ prev_frame_probs.tx_8x8_prob);
+ WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_16x16_prob,
+ prev_frame_probs.tx_16x16_prob);
+ WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_32x32_prob,
+ prev_frame_probs.tx_32x32_prob);
+ if (update_probs) {
+ prev_frame_probs.tx_8x8_prob = current_frame_info.entropy.tx_8x8_prob;
+ prev_frame_probs.tx_16x16_prob = current_frame_info.entropy.tx_16x16_prob;
+ prev_frame_probs.tx_32x32_prob = current_frame_info.entropy.tx_32x32_prob;
+ }
+ }
+ // read_coef_probs() in the spec
+ WriteCoefProbabilityUpdate(writer, current_frame_info.transform_mode,
+ current_frame_info.entropy.coef_probs, prev_frame_probs.coef_probs);
+ // read_skip_probs() in the spec
+ WriteProbabilityUpdate(writer, current_frame_info.entropy.skip_probs,
+ prev_frame_probs.skip_probs);
+
+ if (update_probs) {
+ prev_frame_probs.coef_probs = current_frame_info.entropy.coef_probs;
+ prev_frame_probs.skip_probs = current_frame_info.entropy.skip_probs;
+ }
+
+ if (!current_frame_info.intra_only) {
+ // read_inter_probs() in the spec
+ WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.inter_mode_prob,
+ prev_frame_probs.inter_mode_prob);
+
+ if (current_frame_info.interp_filter == 4) {
+ // read_interp_filter_probs() in the spec
+ WriteProbabilityUpdate(writer, current_frame_info.entropy.switchable_interp_prob,
+ prev_frame_probs.switchable_interp_prob);
+ if (update_probs) {
+ prev_frame_probs.switchable_interp_prob =
+ current_frame_info.entropy.switchable_interp_prob;
+ }
+ }
+
+ // read_is_inter_probs() in the spec
+ WriteProbabilityUpdate(writer, current_frame_info.entropy.intra_inter_prob,
+ prev_frame_probs.intra_inter_prob);
+
+ // frame_reference_mode() in the spec
+ if ((current_frame_info.ref_frame_sign_bias[1] & 1) !=
+ (current_frame_info.ref_frame_sign_bias[2] & 1) ||
+ (current_frame_info.ref_frame_sign_bias[1] & 1) !=
+ (current_frame_info.ref_frame_sign_bias[3] & 1)) {
+ if (current_frame_info.reference_mode >= 1) {
+ writer.Write(1, 1);
+ writer.Write(current_frame_info.reference_mode == 2);
+ } else {
+ writer.Write(0, 1);
+ }
+ }
+
+ // frame_reference_mode_probs() in the spec
+ if (current_frame_info.reference_mode == 2) {
+ WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_inter_prob,
+ prev_frame_probs.comp_inter_prob);
+ if (update_probs) {
+ prev_frame_probs.comp_inter_prob = current_frame_info.entropy.comp_inter_prob;
+ }
+ }
+
+ if (current_frame_info.reference_mode != 1) {
+ WriteProbabilityUpdate(writer, current_frame_info.entropy.single_ref_prob,
+ prev_frame_probs.single_ref_prob);
+ if (update_probs) {
+ prev_frame_probs.single_ref_prob = current_frame_info.entropy.single_ref_prob;
+ }
+ }
+
+ if (current_frame_info.reference_mode != 0) {
+ WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_ref_prob,
+ prev_frame_probs.comp_ref_prob);
+ if (update_probs) {
+ prev_frame_probs.comp_ref_prob = current_frame_info.entropy.comp_ref_prob;
+ }
+ }
+
+ // read_y_mode_probs
+ for (std::size_t index = 0; index < current_frame_info.entropy.y_mode_prob.size();
+ ++index) {
+ WriteProbabilityUpdate(writer, current_frame_info.entropy.y_mode_prob[index],
+ prev_frame_probs.y_mode_prob[index]);
+ }
+
+ // read_partition_probs
+ WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.partition_prob,
+ prev_frame_probs.partition_prob);
+
+ // mv_probs
+ for (s32 i = 0; i < 3; i++) {
+ WriteMvProbabilityUpdate(writer, current_frame_info.entropy.joints[i],
+ prev_frame_probs.joints[i]);
+ }
+ if (update_probs) {
+ prev_frame_probs.inter_mode_prob = current_frame_info.entropy.inter_mode_prob;
+ prev_frame_probs.intra_inter_prob = current_frame_info.entropy.intra_inter_prob;
+ prev_frame_probs.y_mode_prob = current_frame_info.entropy.y_mode_prob;
+ prev_frame_probs.partition_prob = current_frame_info.entropy.partition_prob;
+ prev_frame_probs.joints = current_frame_info.entropy.joints;
+ }
+
+ for (s32 i = 0; i < 2; i++) {
+ WriteMvProbabilityUpdate(writer, current_frame_info.entropy.sign[i],
+ prev_frame_probs.sign[i]);
+ for (s32 j = 0; j < 10; j++) {
+ const int index = i * 10 + j;
+ WriteMvProbabilityUpdate(writer, current_frame_info.entropy.classes[index],
+ prev_frame_probs.classes[index]);
+ }
+ WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0[i],
+ prev_frame_probs.class_0[i]);
+
+ for (s32 j = 0; j < 10; j++) {
+ const int index = i * 10 + j;
+ WriteMvProbabilityUpdate(writer, current_frame_info.entropy.prob_bits[index],
+ prev_frame_probs.prob_bits[index]);
+ }
+ }
+
+ for (s32 i = 0; i < 2; i++) {
+ for (s32 j = 0; j < 2; j++) {
+ for (s32 k = 0; k < 3; k++) {
+ const int index = i * 2 * 3 + j * 3 + k;
+ WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_fr[index],
+ prev_frame_probs.class_0_fr[index]);
+ }
+ }
+
+ for (s32 j = 0; j < 3; j++) {
+ const int index = i * 3 + j;
+ WriteMvProbabilityUpdate(writer, current_frame_info.entropy.fr[index],
+ prev_frame_probs.fr[index]);
+ }
+ }
+
+ if (current_frame_info.allow_high_precision_mv) {
+ for (s32 index = 0; index < 2; index++) {
+ WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_hp[index],
+ prev_frame_probs.class_0_hp[index]);
+ WriteMvProbabilityUpdate(writer, current_frame_info.entropy.high_precision[index],
+ prev_frame_probs.high_precision[index]);
+ }
+ }
+
+ // save previous probs
+ if (update_probs) {
+ prev_frame_probs.sign = current_frame_info.entropy.sign;
+ prev_frame_probs.classes = current_frame_info.entropy.classes;
+ prev_frame_probs.class_0 = current_frame_info.entropy.class_0;
+ prev_frame_probs.prob_bits = current_frame_info.entropy.prob_bits;
+ prev_frame_probs.class_0_fr = current_frame_info.entropy.class_0_fr;
+ prev_frame_probs.fr = current_frame_info.entropy.fr;
+ prev_frame_probs.class_0_hp = current_frame_info.entropy.class_0_hp;
+ prev_frame_probs.high_precision = current_frame_info.entropy.high_precision;
+ }
+ }
+ writer.End();
+ return writer.GetBuffer();
+}
+
+VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
+ VpxBitStreamWriter uncomp_writer{};
+
+ uncomp_writer.WriteU(2, 2); // Frame marker.
+ uncomp_writer.WriteU(0, 2); // Profile.
+ uncomp_writer.WriteBit(false); // Show existing frame.
+ uncomp_writer.WriteBit(!current_frame_info.is_key_frame); // is key frame?
+ uncomp_writer.WriteBit(current_frame_info.show_frame); // show frame?
+ uncomp_writer.WriteBit(current_frame_info.error_resilient_mode); // error reslience
+
+ if (current_frame_info.is_key_frame) {
+ uncomp_writer.WriteU(frame_sync_code, 24);
+ uncomp_writer.WriteU(0, 3); // Color space.
+ uncomp_writer.WriteU(0, 1); // Color range.
+ uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
+ uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
+ uncomp_writer.WriteBit(false); // Render and frame size different.
+
+ // Reset context
+ prev_frame_probs = default_probs;
+ swap_next_golden = false;
+ loop_filter_ref_deltas.fill(0);
+ loop_filter_mode_deltas.fill(0);
+
+ // allow frames offsets to stabilize before checking for golden frames
+ grace_period = 4;
+
+ // On key frames, all frame slots are set to the current frame,
+ // so the value of the selected slot doesn't really matter.
+ frame_ctxs.fill({current_frame_number, false, default_probs});
+
+ // intra only, meaning the frame can be recreated with no other references
+ current_frame_info.intra_only = true;
+
+ } else {
+
+ if (!current_frame_info.show_frame) {
+ uncomp_writer.WriteBit(current_frame_info.intra_only);
+ if (!current_frame_info.last_frame_was_key) {
+ swap_next_golden = !swap_next_golden;
+ }
+ } else {
+ current_frame_info.intra_only = false;
+ }
+ if (!current_frame_info.error_resilient_mode) {
+ uncomp_writer.WriteU(0, 2); // Reset frame context.
+ }
+
+ // Last, Golden, Altref frames
+ std::array<s32, 3> ref_frame_index{0, 1, 2};
+
+ // Set when next frame is hidden
+ // altref and golden references are swapped
+ if (swap_next_golden) {
+ ref_frame_index = std::array<s32, 3>{0, 2, 1};
+ }
+
+ // update Last Frame
+ u64 refresh_frame_flags = 1;
+
+ // golden frame may refresh, determined if the next golden frame offset is changed
+ bool golden_refresh = false;
+ if (grace_period <= 0) {
+ for (s32 index = 1; index < 3; ++index) {
+ if (current_frame_info.frame_offsets[index] !=
+ next_frame.info.frame_offsets[index]) {
+ current_frame_info.refresh_frame[index] = true;
+ golden_refresh = true;
+ grace_period = 3;
+ }
+ }
+ }
+
+ if (current_frame_info.show_frame &&
+ (!next_frame.info.show_frame || next_frame.info.is_key_frame)) {
+ // Update golden frame
+ refresh_frame_flags = swap_next_golden ? 2 : 4;
+ }
+
+ if (!current_frame_info.show_frame) {
+ // Update altref
+ refresh_frame_flags = swap_next_golden ? 2 : 4;
+ } else if (golden_refresh) {
+ refresh_frame_flags = 3;
+ }
+
+ if (current_frame_info.intra_only) {
+ uncomp_writer.WriteU(frame_sync_code, 24);
+ uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
+ uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
+ uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
+ uncomp_writer.WriteBit(false); // Render and frame size different.
+ } else {
+ uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
+
+ for (s32 index = 1; index < 4; index++) {
+ uncomp_writer.WriteU(ref_frame_index[index - 1], 3);
+ uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1);
+ }
+
+ uncomp_writer.WriteBit(true); // Frame size with refs.
+ uncomp_writer.WriteBit(false); // Render and frame size different.
+ uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv);
+ uncomp_writer.WriteBit(current_frame_info.interp_filter == 4);
+
+ if (current_frame_info.interp_filter != 4) {
+ uncomp_writer.WriteU(current_frame_info.interp_filter, 2);
+ }
+ }
+ }
+
+ if (!current_frame_info.error_resilient_mode) {
+ uncomp_writer.WriteBit(true); // Refresh frame context. where do i get this info from?
+ uncomp_writer.WriteBit(true); // Frame parallel decoding mode.
+ }
+
+ int frame_ctx_idx = 0;
+ if (!current_frame_info.show_frame) {
+ frame_ctx_idx = 1;
+ }
+
+ uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
+ prev_frame_probs =
+ frame_ctxs[frame_ctx_idx].probs; // reference probabilities for compressed header
+ frame_ctxs[frame_ctx_idx] = {current_frame_number, false, current_frame_info.entropy};
+
+ uncomp_writer.WriteU(current_frame_info.first_level, 6);
+ uncomp_writer.WriteU(current_frame_info.sharpness_level, 3);
+ uncomp_writer.WriteBit(current_frame_info.mode_ref_delta_enabled);
+
+ if (current_frame_info.mode_ref_delta_enabled) {
+ // check if ref deltas are different, update accordingly
+ std::array<bool, 4> update_loop_filter_ref_deltas;
+ std::array<bool, 2> update_loop_filter_mode_deltas;
+
+ bool loop_filter_delta_update = false;
+
+ for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
+ const s8 old_deltas = loop_filter_ref_deltas[index];
+ const s8 new_deltas = current_frame_info.ref_deltas[index];
+ const bool differing_delta = old_deltas != new_deltas;
+
+ update_loop_filter_ref_deltas[index] = differing_delta;
+ loop_filter_delta_update |= differing_delta;
+ }
+
+ for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
+ const s8 old_deltas = loop_filter_mode_deltas[index];
+ const s8 new_deltas = current_frame_info.mode_deltas[index];
+ const bool differing_delta = old_deltas != new_deltas;
+
+ update_loop_filter_mode_deltas[index] = differing_delta;
+ loop_filter_delta_update |= differing_delta;
+ }
+
+ uncomp_writer.WriteBit(loop_filter_delta_update);
+
+ if (loop_filter_delta_update) {
+ for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
+ uncomp_writer.WriteBit(update_loop_filter_ref_deltas[index]);
+
+ if (update_loop_filter_ref_deltas[index]) {
+ uncomp_writer.WriteS(current_frame_info.ref_deltas[index], 6);
+ }
+ }
+
+ for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
+ uncomp_writer.WriteBit(update_loop_filter_mode_deltas[index]);
+
+ if (update_loop_filter_mode_deltas[index]) {
+ uncomp_writer.WriteS(current_frame_info.mode_deltas[index], 6);
+ }
+ }
+ // save new deltas
+ loop_filter_ref_deltas = current_frame_info.ref_deltas;
+ loop_filter_mode_deltas = current_frame_info.mode_deltas;
+ }
+ }
+
+ uncomp_writer.WriteU(current_frame_info.base_q_index, 8);
+
+ uncomp_writer.WriteDeltaQ(current_frame_info.y_dc_delta_q);
+ uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
+ uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
+
+ uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
+
+ const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);
+ const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width);
+
+ const s32 tile_cols_log2_diff = current_frame_info.log2_tile_cols - min_tile_cols_log2;
+ const s32 tile_cols_log2_inc_mask = (1 << tile_cols_log2_diff) - 1;
+
+ // If it's less than the maximum, we need to add an extra 0 on the bitstream
+ // to indicate that it should stop reading.
+ if (current_frame_info.log2_tile_cols < max_tile_cols_log2) {
+ uncomp_writer.WriteU(tile_cols_log2_inc_mask << 1, tile_cols_log2_diff + 1);
+ } else {
+ uncomp_writer.WriteU(tile_cols_log2_inc_mask, tile_cols_log2_diff);
+ }
+
+ const bool tile_rows_log2_is_nonzero = current_frame_info.log2_tile_rows != 0;
+
+ uncomp_writer.WriteBit(tile_rows_log2_is_nonzero);
+
+ if (tile_rows_log2_is_nonzero) {
+ uncomp_writer.WriteBit(current_frame_info.log2_tile_rows > 1);
+ }
+
+ return uncomp_writer;
+}
+
+const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state) {
+ std::vector<u8> bitstream;
+ {
+ Vp9FrameContainer curr_frame = GetCurrentFrame(state);
+ current_frame_info = curr_frame.info;
+ bitstream = std::move(curr_frame.bit_stream);
+ }
+
+ // The uncompressed header routine sets PrevProb parameters needed for the compressed header
+ auto uncomp_writer = ComposeUncompressedHeader();
+ std::vector<u8> compressed_header = ComposeCompressedHeader();
+
+ uncomp_writer.WriteU(static_cast<s32>(compressed_header.size()), 16);
+ uncomp_writer.Flush();
+ std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray();
+
+ // Write headers and frame to buffer
+ frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size());
+ std::memcpy(frame.data(), uncompressed_header.data(), uncompressed_header.size());
+ std::memcpy(frame.data() + uncompressed_header.size(), compressed_header.data(),
+ compressed_header.size());
+ std::memcpy(frame.data() + uncompressed_header.size() + compressed_header.size(),
+ bitstream.data(), bitstream.size());
+
+ // keep track of frame number
+ current_frame_number++;
+ grace_period--;
+
+ // don't display hidden frames
+ hidden = !current_frame_info.show_frame;
+ return frame;
+}
+
+VpxRangeEncoder::VpxRangeEncoder() {
+ Write(false);
+}
+
+VpxRangeEncoder::~VpxRangeEncoder() = default;
+
+void VpxRangeEncoder::Write(s32 value, s32 value_size) {
+ for (s32 bit = value_size - 1; bit >= 0; bit--) {
+ Write(((value >> bit) & 1) != 0);
+ }
+}
+
+void VpxRangeEncoder::Write(bool bit) {
+ Write(bit, half_probability);
+}
+
+void VpxRangeEncoder::Write(bool bit, s32 probability) {
+ u32 local_range = range;
+ const u32 split = 1 + (((local_range - 1) * static_cast<u32>(probability)) >> 8);
+ local_range = split;
+
+ if (bit) {
+ low_value += split;
+ local_range = range - split;
+ }
+
+ s32 shift = norm_lut[local_range];
+ local_range <<= shift;
+ count += shift;
+
+ if (count >= 0) {
+ const s32 offset = shift - count;
+
+ if (((low_value << (offset - 1)) >> 31) != 0) {
+ const s32 current_pos = static_cast<s32>(base_stream.GetPosition());
+ base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
+ while (PeekByte() == 0xff) {
+ base_stream.WriteByte(0);
+
+ base_stream.Seek(-2, Common::SeekOrigin::FromCurrentPos);
+ }
+ base_stream.WriteByte(static_cast<u8>((PeekByte() + 1)));
+ base_stream.Seek(current_pos, Common::SeekOrigin::SetOrigin);
+ }
+ base_stream.WriteByte(static_cast<u8>((low_value >> (24 - offset))));
+
+ low_value <<= offset;
+ shift = count;
+ low_value &= 0xffffff;
+ count -= 8;
+ }
+
+ low_value <<= shift;
+ range = local_range;
+}
+
+void VpxRangeEncoder::End() {
+ for (std::size_t index = 0; index < 32; ++index) {
+ Write(false);
+ }
+}
+
+u8 VpxRangeEncoder::PeekByte() {
+ const u8 value = base_stream.ReadByte();
+ base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
+
+ return value;
+}
+
+VpxBitStreamWriter::VpxBitStreamWriter() = default;
+
+VpxBitStreamWriter::~VpxBitStreamWriter() = default;
+
+void VpxBitStreamWriter::WriteU(u32 value, u32 value_size) {
+ WriteBits(value, value_size);
+}
+
+void VpxBitStreamWriter::WriteS(s32 value, u32 value_size) {
+ const bool sign = value < 0;
+ if (sign) {
+ value = -value;
+ }
+
+ WriteBits(static_cast<u32>(value << 1) | (sign ? 1 : 0), value_size + 1);
+}
+
+void VpxBitStreamWriter::WriteDeltaQ(u32 value) {
+ const bool delta_coded = value != 0;
+ WriteBit(delta_coded);
+
+ if (delta_coded) {
+ WriteBits(value, 4);
+ }
+}
+
+void VpxBitStreamWriter::WriteBits(u32 value, u32 bit_count) {
+ s32 value_pos = 0;
+ s32 remaining = bit_count;
+
+ while (remaining > 0) {
+ s32 copy_size = remaining;
+
+ const s32 free = GetFreeBufferBits();
+
+ if (copy_size > free) {
+ copy_size = free;
+ }
+
+ const s32 mask = (1 << copy_size) - 1;
+
+ const s32 src_shift = (bit_count - value_pos) - copy_size;
+ const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
+
+ buffer |= ((value >> src_shift) & mask) << dst_shift;
+
+ value_pos += copy_size;
+ buffer_pos += copy_size;
+ remaining -= copy_size;
+ }
+}
+
+void VpxBitStreamWriter::WriteBit(bool state) {
+ WriteBits(state ? 1 : 0, 1);
+}
+
+s32 VpxBitStreamWriter::GetFreeBufferBits() {
+ if (buffer_pos == buffer_size) {
+ Flush();
+ }
+
+ return buffer_size - buffer_pos;
+}
+
+void VpxBitStreamWriter::Flush() {
+ if (buffer_pos == 0) {
+ return;
+ }
+ byte_array.push_back(static_cast<u8>(buffer));
+ buffer = 0;
+ buffer_pos = 0;
+}
+
+std::vector<u8>& VpxBitStreamWriter::GetByteArray() {
+ return byte_array;
+}
+
+const std::vector<u8>& VpxBitStreamWriter::GetByteArray() const {
+ return byte_array;
+}
+
+} // namespace Tegra::Decoder
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h
new file mode 100644
index 000000000..8396c8105
--- /dev/null
+++ b/src/video_core/command_classes/codecs/vp9.h
@@ -0,0 +1,197 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/stream.h"
+#include "video_core/command_classes/codecs/vp9_types.h"
+#include "video_core/command_classes/nvdec_common.h"
+
+namespace Tegra {
+class GPU;
+enum class FrameType { KeyFrame = 0, InterFrame = 1 };
+namespace Decoder {
+
+/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
+/// VP9 header bitstreams.
+
+class VpxRangeEncoder {
+public:
+ VpxRangeEncoder();
+ ~VpxRangeEncoder();
+
+ VpxRangeEncoder(const VpxRangeEncoder&) = delete;
+ VpxRangeEncoder& operator=(const VpxRangeEncoder&) = delete;
+
+ VpxRangeEncoder(VpxRangeEncoder&&) = default;
+ VpxRangeEncoder& operator=(VpxRangeEncoder&&) = default;
+
+ /// Writes the rightmost value_size bits from value into the stream
+ void Write(s32 value, s32 value_size);
+
+ /// Writes a single bit with half probability
+ void Write(bool bit);
+
+ /// Writes a bit to the base_stream encoded with probability
+ void Write(bool bit, s32 probability);
+
+ /// Signal the end of the bitstream
+ void End();
+
+ [[nodiscard]] std::vector<u8>& GetBuffer() {
+ return base_stream.GetBuffer();
+ }
+
+ [[nodiscard]] const std::vector<u8>& GetBuffer() const {
+ return base_stream.GetBuffer();
+ }
+
+private:
+ u8 PeekByte();
+ Common::Stream base_stream{};
+ u32 low_value{};
+ u32 range{0xff};
+ s32 count{-24};
+ s32 half_probability{128};
+};
+
+class VpxBitStreamWriter {
+public:
+ VpxBitStreamWriter();
+ ~VpxBitStreamWriter();
+
+ VpxBitStreamWriter(const VpxBitStreamWriter&) = delete;
+ VpxBitStreamWriter& operator=(const VpxBitStreamWriter&) = delete;
+
+ VpxBitStreamWriter(VpxBitStreamWriter&&) = default;
+ VpxBitStreamWriter& operator=(VpxBitStreamWriter&&) = default;
+
+ /// Write an unsigned integer value
+ void WriteU(u32 value, u32 value_size);
+
+ /// Write a signed integer value
+ void WriteS(s32 value, u32 value_size);
+
+ /// Based on 6.2.10 of VP9 Spec, writes a delta coded value
+ void WriteDeltaQ(u32 value);
+
+ /// Write a single bit.
+ void WriteBit(bool state);
+
+ /// Pushes current buffer into buffer_array, resets buffer
+ void Flush();
+
+ /// Returns byte_array
+ [[nodiscard]] std::vector<u8>& GetByteArray();
+
+ /// Returns const byte_array
+ [[nodiscard]] const std::vector<u8>& GetByteArray() const;
+
+private:
+ /// Write bit_count bits from value into buffer
+ void WriteBits(u32 value, u32 bit_count);
+
+ /// Gets next available position in buffer, invokes Flush() if buffer is full
+ s32 GetFreeBufferBits();
+
+ s32 buffer_size{8};
+
+ s32 buffer{};
+ s32 buffer_pos{};
+ std::vector<u8> byte_array;
+};
+
+class VP9 {
+public:
+ explicit VP9(GPU& gpu_);
+ ~VP9();
+
+ VP9(const VP9&) = delete;
+ VP9& operator=(const VP9&) = delete;
+
+ VP9(VP9&&) = default;
+ VP9& operator=(VP9&&) = delete;
+
+ /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
+ /// documentation
+ [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(
+ const NvdecCommon::NvdecRegisters& state);
+
+ /// Returns true if the most recent frame was a hidden frame.
+ [[nodiscard]] bool WasFrameHidden() const {
+ return hidden;
+ }
+
+private:
+ /// Generates compressed header probability updates in the bitstream writer
+ template <typename T, std::size_t N>
+ void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
+ const std::array<T, N>& old_prob);
+
+ /// Generates compressed header probability updates in the bitstream writer
+ /// If probs are not equal, WriteProbabilityDelta is invoked
+ void WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
+
+ /// Generates compressed header probability deltas in the bitstream writer
+ void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
+
+ /// Inverse of 6.3.4 Decode term subexp
+ void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value);
+
+ /// Writes if the value is less than the test value
+ bool WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test);
+
+ /// Writes probability updates for the Coef probabilities
+ void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
+ const std::array<u8, 1728>& new_prob,
+ const std::array<u8, 1728>& old_prob);
+
+ /// Write probabilities for 4-byte aligned structures
+ template <typename T, std::size_t N>
+ void WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
+ const std::array<T, N>& old_prob);
+
+ /// Write motion vector probability updates. 6.3.17 in the spec
+ void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
+
+ /// Returns VP9 information from NVDEC provided offset and size
+ [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
+
+ /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
+ void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
+
+ /// Returns frame to be decoded after buffering
+ [[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
+
+ /// Use NVDEC providied information to compose the headers for the current frame
+ [[nodiscard]] std::vector<u8> ComposeCompressedHeader();
+ [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
+
+ GPU& gpu;
+ std::vector<u8> frame;
+
+ std::array<s8, 4> loop_filter_ref_deltas{};
+ std::array<s8, 2> loop_filter_mode_deltas{};
+
+ bool hidden = false;
+ s64 current_frame_number = -2; // since we buffer 2 frames
+ s32 grace_period = 6; // frame offsets need to stabilize
+ std::array<FrameContexts, 4> frame_ctxs{};
+ Vp9FrameContainer next_frame{};
+ Vp9FrameContainer next_next_frame{};
+ bool swap_next_golden{};
+
+ Vp9PictureInfo current_frame_info{};
+ Vp9EntropyProbs prev_frame_probs{};
+
+ s32 diff_update_probability = 252;
+ s32 frame_sync_code = 0x498342;
+};
+
+} // namespace Decoder
+} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
new file mode 100644
index 000000000..139501a1c
--- /dev/null
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -0,0 +1,302 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstring>
+#include <vector>
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+class GPU;
+
+namespace Decoder {
+struct Vp9FrameDimensions {
+ s16 width{};
+ s16 height{};
+ s16 luma_pitch{};
+ s16 chroma_pitch{};
+};
+static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
+
+enum FrameFlags : u32 {
+ IsKeyFrame = 1 << 0,
+ LastFrameIsKeyFrame = 1 << 1,
+ FrameSizeChanged = 1 << 2,
+ ErrorResilientMode = 1 << 3,
+ LastShowFrame = 1 << 4,
+ IntraOnly = 1 << 5,
+};
+
+enum class TxSize {
+ Tx4x4 = 0, // 4x4 transform
+ Tx8x8 = 1, // 8x8 transform
+ Tx16x16 = 2, // 16x16 transform
+ Tx32x32 = 3, // 32x32 transform
+ TxSizes = 4
+};
+
+enum class TxMode {
+ Only4X4 = 0, // Only 4x4 transform used
+ Allow8X8 = 1, // Allow block transform size up to 8x8
+ Allow16X16 = 2, // Allow block transform size up to 16x16
+ Allow32X32 = 3, // Allow block transform size up to 32x32
+ TxModeSelect = 4, // Transform specified for each block
+ TxModes = 5
+};
+
+struct Segmentation {
+ u8 enabled{};
+ u8 update_map{};
+ u8 temporal_update{};
+ u8 abs_delta{};
+ std::array<u32, 8> feature_mask{};
+ std::array<std::array<s16, 4>, 8> feature_data{};
+};
+static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
+
+struct LoopFilter {
+ u8 mode_ref_delta_enabled{};
+ std::array<s8, 4> ref_deltas{};
+ std::array<s8, 2> mode_deltas{};
+};
+static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
+
+struct Vp9EntropyProbs {
+ std::array<u8, 36> y_mode_prob{};
+ std::array<u8, 64> partition_prob{};
+ std::array<u8, 1728> coef_probs{};
+ std::array<u8, 8> switchable_interp_prob{};
+ std::array<u8, 28> inter_mode_prob{};
+ std::array<u8, 4> intra_inter_prob{};
+ std::array<u8, 5> comp_inter_prob{};
+ std::array<u8, 10> single_ref_prob{};
+ std::array<u8, 5> comp_ref_prob{};
+ std::array<u8, 6> tx_32x32_prob{};
+ std::array<u8, 4> tx_16x16_prob{};
+ std::array<u8, 2> tx_8x8_prob{};
+ std::array<u8, 3> skip_probs{};
+ std::array<u8, 3> joints{};
+ std::array<u8, 2> sign{};
+ std::array<u8, 20> classes{};
+ std::array<u8, 2> class_0{};
+ std::array<u8, 20> prob_bits{};
+ std::array<u8, 12> class_0_fr{};
+ std::array<u8, 6> fr{};
+ std::array<u8, 2> class_0_hp{};
+ std::array<u8, 2> high_precision{};
+};
+static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
+
+struct Vp9PictureInfo {
+ bool is_key_frame{};
+ bool intra_only{};
+ bool last_frame_was_key{};
+ bool frame_size_changed{};
+ bool error_resilient_mode{};
+ bool last_frame_shown{};
+ bool show_frame{};
+ std::array<s8, 4> ref_frame_sign_bias{};
+ s32 base_q_index{};
+ s32 y_dc_delta_q{};
+ s32 uv_dc_delta_q{};
+ s32 uv_ac_delta_q{};
+ bool lossless{};
+ s32 transform_mode{};
+ bool allow_high_precision_mv{};
+ s32 interp_filter{};
+ s32 reference_mode{};
+ s8 comp_fixed_ref{};
+ std::array<s8, 2> comp_var_ref{};
+ s32 log2_tile_cols{};
+ s32 log2_tile_rows{};
+ bool segment_enabled{};
+ bool segment_map_update{};
+ bool segment_map_temporal_update{};
+ s32 segment_abs_delta{};
+ std::array<u32, 8> segment_feature_enable{};
+ std::array<std::array<s16, 4>, 8> segment_feature_data{};
+ bool mode_ref_delta_enabled{};
+ bool use_prev_in_find_mv_refs{};
+ std::array<s8, 4> ref_deltas{};
+ std::array<s8, 2> mode_deltas{};
+ Vp9EntropyProbs entropy{};
+ Vp9FrameDimensions frame_size{};
+ u8 first_level{};
+ u8 sharpness_level{};
+ u32 bitstream_size{};
+ std::array<u64, 4> frame_offsets{};
+ std::array<bool, 4> refresh_frame{};
+};
+
+struct Vp9FrameContainer {
+ Vp9PictureInfo info{};
+ std::vector<u8> bit_stream;
+};
+
+struct PictureInfo {
+ INSERT_PADDING_WORDS(12);
+ u32 bitstream_size{};
+ INSERT_PADDING_WORDS(5);
+ Vp9FrameDimensions last_frame_size{};
+ Vp9FrameDimensions golden_frame_size{};
+ Vp9FrameDimensions alt_frame_size{};
+ Vp9FrameDimensions current_frame_size{};
+ u32 vp9_flags{};
+ std::array<s8, 4> ref_frame_sign_bias{};
+ u8 first_level{};
+ u8 sharpness_level{};
+ u8 base_q_index{};
+ u8 y_dc_delta_q{};
+ u8 uv_ac_delta_q{};
+ u8 uv_dc_delta_q{};
+ u8 lossless{};
+ u8 tx_mode{};
+ u8 allow_high_precision_mv{};
+ u8 interp_filter{};
+ u8 reference_mode{};
+ s8 comp_fixed_ref{};
+ std::array<s8, 2> comp_var_ref{};
+ u8 log2_tile_cols{};
+ u8 log2_tile_rows{};
+ Segmentation segmentation{};
+ LoopFilter loop_filter{};
+ INSERT_PADDING_BYTES(5);
+ u32 surface_params{};
+ INSERT_PADDING_WORDS(3);
+
+ [[nodiscard]] Vp9PictureInfo Convert() const {
+ return {
+ .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
+ .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
+ .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
+ .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
+ .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
+ .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
+ .ref_frame_sign_bias = ref_frame_sign_bias,
+ .base_q_index = base_q_index,
+ .y_dc_delta_q = y_dc_delta_q,
+ .uv_dc_delta_q = uv_dc_delta_q,
+ .uv_ac_delta_q = uv_ac_delta_q,
+ .lossless = lossless != 0,
+ .transform_mode = tx_mode,
+ .allow_high_precision_mv = allow_high_precision_mv != 0,
+ .interp_filter = interp_filter,
+ .reference_mode = reference_mode,
+ .comp_fixed_ref = comp_fixed_ref,
+ .comp_var_ref = comp_var_ref,
+ .log2_tile_cols = log2_tile_cols,
+ .log2_tile_rows = log2_tile_rows,
+ .segment_enabled = segmentation.enabled != 0,
+ .segment_map_update = segmentation.update_map != 0,
+ .segment_map_temporal_update = segmentation.temporal_update != 0,
+ .segment_abs_delta = segmentation.abs_delta,
+ .segment_feature_enable = segmentation.feature_mask,
+ .segment_feature_data = segmentation.feature_data,
+ .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
+ .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) &&
+ !(vp9_flags == (FrameFlags::FrameSizeChanged)) &&
+ !(vp9_flags == (FrameFlags::IntraOnly)) &&
+ (vp9_flags == (FrameFlags::LastShowFrame)) &&
+ !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
+ .ref_deltas = loop_filter.ref_deltas,
+ .mode_deltas = loop_filter.mode_deltas,
+ .frame_size = current_frame_size,
+ .first_level = first_level,
+ .sharpness_level = sharpness_level,
+ .bitstream_size = bitstream_size,
+ };
+ }
+};
+static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
+
+struct EntropyProbs {
+ INSERT_PADDING_BYTES(1024);
+ std::array<u8, 28> inter_mode_prob{};
+ std::array<u8, 4> intra_inter_prob{};
+ INSERT_PADDING_BYTES(80);
+ std::array<u8, 2> tx_8x8_prob{};
+ std::array<u8, 4> tx_16x16_prob{};
+ std::array<u8, 6> tx_32x32_prob{};
+ std::array<u8, 4> y_mode_prob_e8{};
+ std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{};
+ INSERT_PADDING_BYTES(64);
+ std::array<u8, 64> partition_prob{};
+ INSERT_PADDING_BYTES(10);
+ std::array<u8, 8> switchable_interp_prob{};
+ std::array<u8, 5> comp_inter_prob{};
+ std::array<u8, 3> skip_probs{};
+ INSERT_PADDING_BYTES(1);
+ std::array<u8, 3> joints{};
+ std::array<u8, 2> sign{};
+ std::array<u8, 2> class_0{};
+ std::array<u8, 6> fr{};
+ std::array<u8, 2> class_0_hp{};
+ std::array<u8, 2> high_precision{};
+ std::array<u8, 20> classes{};
+ std::array<u8, 12> class_0_fr{};
+ std::array<u8, 20> pred_bits{};
+ std::array<u8, 10> single_ref_prob{};
+ std::array<u8, 5> comp_ref_prob{};
+ INSERT_PADDING_BYTES(17);
+ std::array<u8, 2304> coef_probs{};
+
+ void Convert(Vp9EntropyProbs& fc) {
+ fc.inter_mode_prob = inter_mode_prob;
+ fc.intra_inter_prob = intra_inter_prob;
+ fc.tx_8x8_prob = tx_8x8_prob;
+ fc.tx_16x16_prob = tx_16x16_prob;
+ fc.tx_32x32_prob = tx_32x32_prob;
+
+ for (std::size_t i = 0; i < 4; i++) {
+ for (std::size_t j = 0; j < 9; j++) {
+ fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i];
+ }
+ }
+
+ fc.partition_prob = partition_prob;
+ fc.switchable_interp_prob = switchable_interp_prob;
+ fc.comp_inter_prob = comp_inter_prob;
+ fc.skip_probs = skip_probs;
+ fc.joints = joints;
+ fc.sign = sign;
+ fc.class_0 = class_0;
+ fc.fr = fr;
+ fc.class_0_hp = class_0_hp;
+ fc.high_precision = high_precision;
+ fc.classes = classes;
+ fc.class_0_fr = class_0_fr;
+ fc.prob_bits = pred_bits;
+ fc.single_ref_prob = single_ref_prob;
+ fc.comp_ref_prob = comp_ref_prob;
+
+ // Skip the 4th element as it goes unused
+ for (std::size_t i = 0; i < coef_probs.size(); i += 4) {
+ const std::size_t j = i - i / 4;
+ fc.coef_probs[j] = coef_probs[i];
+ fc.coef_probs[j + 1] = coef_probs[i + 1];
+ fc.coef_probs[j + 2] = coef_probs[i + 2];
+ }
+ }
+};
+static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size");
+
+enum class Ref { Last, Golden, AltRef };
+
+struct RefPoolElement {
+ s64 frame{};
+ Ref ref{};
+ bool refresh{};
+};
+
+struct FrameContexts {
+ s64 from{};
+ bool adapted{};
+ Vp9EntropyProbs probs{};
+};
+
+}; // namespace Decoder
+}; // namespace Tegra
diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp
new file mode 100644
index 000000000..b12494528
--- /dev/null
+++ b/src/video_core/command_classes/host1x.cpp
@@ -0,0 +1,30 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/command_classes/host1x.h"
+#include "video_core/gpu.h"
+
+Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {}
+
+Tegra::Host1x::~Host1x() = default;
+
+void Tegra::Host1x::ProcessMethod(Method method, u32 argument) {
+ switch (method) {
+ case Method::LoadSyncptPayload32:
+ syncpoint_value = argument;
+ break;
+ case Method::WaitSyncpt:
+ case Method::WaitSyncpt32:
+ Execute(argument);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method));
+ break;
+ }
+}
+
+void Tegra::Host1x::Execute(u32 data) {
+ gpu.WaitFence(data, syncpoint_value);
+}
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h
new file mode 100644
index 000000000..7e94799dd
--- /dev/null
+++ b/src/video_core/command_classes/host1x.h
@@ -0,0 +1,37 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+class GPU;
+class Nvdec;
+
+class Host1x {
+public:
+ enum class Method : u32 {
+ WaitSyncpt = 0x8,
+ LoadSyncptPayload32 = 0x4e,
+ WaitSyncpt32 = 0x50,
+ };
+
+ explicit Host1x(GPU& gpu);
+ ~Host1x();
+
+ /// Writes the method into the state, Invoke Execute() if encountered
+ void ProcessMethod(Method method, u32 argument);
+
+private:
+ /// For Host1x, execute is waiting on a syncpoint previously written into the state
+ void Execute(u32 data);
+
+ u32 syncpoint_value{};
+ GPU& gpu;
+};
+
+} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
new file mode 100644
index 000000000..79e1f4e13
--- /dev/null
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -0,0 +1,48 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/command_classes/nvdec.h"
+#include "video_core/gpu.h"
+
+namespace Tegra {
+
+Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
+
+Nvdec::~Nvdec() = default;
+
+void Nvdec::ProcessMethod(Method method, const std::vector<u32>& arguments) {
+ if (method == Method::SetVideoCodec) {
+ codec->StateWrite(static_cast<u32>(method), arguments[0]);
+ } else {
+ codec->StateWrite(static_cast<u32>(method), static_cast<u64>(arguments[0]) << 8);
+ }
+
+ switch (method) {
+ case Method::SetVideoCodec:
+ codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(arguments[0]));
+ break;
+ case Method::Execute:
+ Execute();
+ break;
+ }
+}
+
+AVFramePtr Nvdec::GetFrame() {
+ return codec->GetCurrentFrame();
+}
+
+void Nvdec::Execute() {
+ switch (codec->GetCurrentCodec()) {
+ case NvdecCommon::VideoCodec::H264:
+ case NvdecCommon::VideoCodec::Vp9:
+ codec->Decode();
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unknown codec {}", static_cast<u32>(codec->GetCurrentCodec()));
+ break;
+ }
+}
+
+} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
new file mode 100644
index 000000000..e4877c533
--- /dev/null
+++ b/src/video_core/command_classes/nvdec.h
@@ -0,0 +1,38 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/command_classes/codecs/codec.h"
+
+namespace Tegra {
+class GPU;
+
+class Nvdec {
+public:
+ enum class Method : u32 {
+ SetVideoCodec = 0x80,
+ Execute = 0xc0,
+ };
+
+ explicit Nvdec(GPU& gpu);
+ ~Nvdec();
+
+ /// Writes the method into the state, Invoke Execute() if encountered
+ void ProcessMethod(Method method, const std::vector<u32>& arguments);
+
+ /// Return most recently decoded frame
+ [[nodiscard]] AVFramePtr GetFrame();
+
+private:
+ /// Invoke codec to decode a frame
+ void Execute();
+
+ GPU& gpu;
+ std::unique_ptr<Codec> codec;
+};
+} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h
new file mode 100644
index 000000000..01b5e086d
--- /dev/null
+++ b/src/video_core/command_classes/nvdec_common.h
@@ -0,0 +1,48 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra::NvdecCommon {
+
+struct NvdecRegisters {
+ INSERT_PADDING_WORDS(256);
+ u64 set_codec_id{};
+ INSERT_PADDING_WORDS(254);
+ u64 set_platform_id{};
+ u64 picture_info_offset{};
+ u64 frame_bitstream_offset{};
+ u64 frame_number{};
+ u64 h264_slice_data_offsets{};
+ u64 h264_mv_dump_offset{};
+ INSERT_PADDING_WORDS(6);
+ u64 frame_stats_offset{};
+ u64 h264_last_surface_luma_offset{};
+ u64 h264_last_surface_chroma_offset{};
+ std::array<u64, 17> surface_luma_offset{};
+ std::array<u64, 17> surface_chroma_offset{};
+ INSERT_PADDING_WORDS(132);
+ u64 vp9_entropy_probs_offset{};
+ u64 vp9_backward_updates_offset{};
+ u64 vp9_last_frame_segmap_offset{};
+ u64 vp9_curr_frame_segmap_offset{};
+ INSERT_PADDING_WORDS(2);
+ u64 vp9_last_frame_mvs_offset{};
+ u64 vp9_curr_frame_mvs_offset{};
+ INSERT_PADDING_WORDS(2);
+};
+static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
+
+enum class VideoCodec : u32 {
+ None = 0x0,
+ H264 = 0x3,
+ Vp8 = 0x5,
+ H265 = 0x7,
+ Vp9 = 0x9,
+};
+
+} // namespace Tegra::NvdecCommon
diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp
new file mode 100644
index 000000000..19dc9e0ab
--- /dev/null
+++ b/src/video_core/command_classes/sync_manager.cpp
@@ -0,0 +1,60 @@
+// MIT License
+//
+// Copyright (c) Ryujinx Team and Contributors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+// associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#include <algorithm>
+#include "sync_manager.h"
+#include "video_core/gpu.h"
+
+namespace Tegra {
+SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
+SyncptIncrManager::~SyncptIncrManager() = default;
+
+void SyncptIncrManager::Increment(u32 id) {
+ increments.emplace_back(0, 0, id, true);
+ IncrementAllDone();
+}
+
+u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
+ const u32 handle = current_id++;
+ increments.emplace_back(handle, class_id, id);
+ return handle;
+}
+
+void SyncptIncrManager::SignalDone(u32 handle) {
+ const auto done_incr =
+ std::find_if(increments.begin(), increments.end(),
+ [handle](const SyncptIncr& incr) { return incr.id == handle; });
+ if (done_incr != increments.cend()) {
+ done_incr->complete = true;
+ }
+ IncrementAllDone();
+}
+
+void SyncptIncrManager::IncrementAllDone() {
+ std::size_t done_count = 0;
+ for (; done_count < increments.size(); ++done_count) {
+ if (!increments[done_count].complete) {
+ break;
+ }
+ gpu.IncrementSyncPoint(increments[done_count].syncpt_id);
+ }
+ increments.erase(increments.begin(), increments.begin() + done_count);
+}
+} // namespace Tegra
diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/command_classes/sync_manager.h
new file mode 100644
index 000000000..2c321ec58
--- /dev/null
+++ b/src/video_core/command_classes/sync_manager.h
@@ -0,0 +1,64 @@
+// MIT License
+//
+// Copyright (c) Ryujinx Team and Contributors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+// associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#pragma once
+
+#include <mutex>
+#include <vector>
+#include "common/common_types.h"
+
+namespace Tegra {
+class GPU;
+struct SyncptIncr {
+ u32 id;
+ u32 class_id;
+ u32 syncpt_id;
+ bool complete;
+
+ SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false)
+ : id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
+};
+
+class SyncptIncrManager {
+public:
+ explicit SyncptIncrManager(GPU& gpu);
+ ~SyncptIncrManager();
+
+ /// Add syncpoint id and increment all
+ void Increment(u32 id);
+
+ /// Returns a handle to increment later
+ u32 IncrementWhenDone(u32 class_id, u32 id);
+
+ /// IncrememntAllDone, including handle
+ void SignalDone(u32 handle);
+
+ /// Increment all sequential pending increments that are already done.
+ void IncrementAllDone();
+
+private:
+ std::vector<SyncptIncr> increments;
+ std::mutex increment_lock;
+ u32 current_id{};
+
+ GPU& gpu;
+};
+
+} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
new file mode 100644
index 000000000..2b7569335
--- /dev/null
+++ b/src/video_core/command_classes/vic.cpp
@@ -0,0 +1,172 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include "common/assert.h"
+#include "video_core/command_classes/nvdec.h"
+#include "video_core/command_classes/vic.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/textures/decoders.h"
+
+extern "C" {
+#include <libswscale/swscale.h>
+}
+
+namespace Tegra {
+
+Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
+ : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {}
+Vic::~Vic() = default;
+
+void Vic::VicStateWrite(u32 offset, u32 arguments) {
+ u8* const state_offset = reinterpret_cast<u8*>(&vic_state) + offset * sizeof(u32);
+ std::memcpy(state_offset, &arguments, sizeof(u32));
+}
+
+void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
+ LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", method);
+ VicStateWrite(static_cast<u32>(method), arguments[0]);
+ const u64 arg = static_cast<u64>(arguments[0]) << 8;
+ switch (method) {
+ case Method::Execute:
+ Execute();
+ break;
+ case Method::SetConfigStructOffset:
+ config_struct_address = arg;
+ break;
+ case Method::SetOutputSurfaceLumaOffset:
+ output_surface_luma_address = arg;
+ break;
+ case Method::SetOutputSurfaceChromaUOffset:
+ output_surface_chroma_u_address = arg;
+ break;
+ case Method::SetOutputSurfaceChromaVOffset:
+ output_surface_chroma_v_address = arg;
+ break;
+ default:
+ break;
+ }
+}
+
+void Vic::Execute() {
+ if (output_surface_luma_address == 0) {
+ LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}",
+ vic_state.output_surface.luma_offset);
+ return;
+ }
+ const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)};
+ const AVFramePtr frame_ptr = nvdec_processor->GetFrame();
+ const auto* frame = frame_ptr.get();
+ if (!frame || frame->width == 0 || frame->height == 0) {
+ return;
+ }
+ const VideoPixelFormat pixel_format =
+ static_cast<VideoPixelFormat>(config.pixel_format.Value());
+ switch (pixel_format) {
+ case VideoPixelFormat::BGRA8:
+ case VideoPixelFormat::RGBA8: {
+ LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
+
+ if (scaler_ctx == nullptr || frame->width != scaler_width ||
+ frame->height != scaler_height) {
+ const AVPixelFormat target_format =
+ (pixel_format == VideoPixelFormat::RGBA8) ? AV_PIX_FMT_RGBA : AV_PIX_FMT_BGRA;
+
+ sws_freeContext(scaler_ctx);
+ scaler_ctx = nullptr;
+
+ // FFmpeg returns all frames in YUV420, convert it into expected format
+ scaler_ctx =
+ sws_getContext(frame->width, frame->height, AV_PIX_FMT_YUV420P, frame->width,
+ frame->height, target_format, 0, nullptr, nullptr, nullptr);
+
+ scaler_width = frame->width;
+ scaler_height = frame->height;
+ }
+ // Get Converted frame
+ const std::size_t linear_size = frame->width * frame->height * 4;
+
+ using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
+ AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free};
+
+ const int converted_stride{frame->width * 4};
+ u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
+
+ sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height,
+ &converted_frame_buf_addr, &converted_stride);
+
+ const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
+ if (blk_kind != 0) {
+ // swizzle pitch linear to block linear
+ const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
+ const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
+ block_height, 0);
+ std::vector<u8> swizzled_data(size);
+ Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4,
+ frame->width, 4, swizzled_data.data(),
+ converted_frame_buffer.get(), block_height, 0, 0);
+
+ gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
+ } else {
+ // send pitch linear frame
+ gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
+ linear_size);
+ }
+ break;
+ }
+ case VideoPixelFormat::Yuv420: {
+ LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
+
+ const std::size_t surface_width = config.surface_width_minus1 + 1;
+ const std::size_t surface_height = config.surface_height_minus1 + 1;
+ const std::size_t half_width = surface_width / 2;
+ const std::size_t half_height = config.surface_height_minus1 / 2;
+ const std::size_t aligned_width = (surface_width + 0xff) & ~0xff;
+
+ const auto* luma_ptr = frame->data[0];
+ const auto* chroma_b_ptr = frame->data[1];
+ const auto* chroma_r_ptr = frame->data[2];
+ const auto stride = frame->linesize[0];
+ const auto half_stride = frame->linesize[1];
+
+ std::vector<u8> luma_buffer(aligned_width * surface_height);
+ std::vector<u8> chroma_buffer(aligned_width * half_height);
+
+ // Populate luma buffer
+ for (std::size_t y = 0; y < surface_height - 1; ++y) {
+ std::size_t src = y * stride;
+ std::size_t dst = y * aligned_width;
+
+ std::size_t size = surface_width;
+
+ for (std::size_t offset = 0; offset < size; ++offset) {
+ luma_buffer[dst + offset] = luma_ptr[src + offset];
+ }
+ }
+ gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
+ luma_buffer.size());
+
+ // Populate chroma buffer from both channels with interleaving.
+ for (std::size_t y = 0; y < half_height; ++y) {
+ std::size_t src = y * half_stride;
+ std::size_t dst = y * aligned_width;
+
+ for (std::size_t x = 0; x < half_width; ++x) {
+ chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x];
+ chroma_buffer[dst + x * 2 + 1] = chroma_r_ptr[src + x];
+ }
+ }
+ gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
+ chroma_buffer.size());
+ break;
+ }
+ default:
+ UNIMPLEMENTED_MSG("Unknown video pixel format {}", config.pixel_format.Value());
+ break;
+ }
+}
+
+} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h
new file mode 100644
index 000000000..8c4e284a1
--- /dev/null
+++ b/src/video_core/command_classes/vic.h
@@ -0,0 +1,110 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+struct SwsContext;
+
+namespace Tegra {
+class GPU;
+class Nvdec;
+
+struct PlaneOffsets {
+ u32 luma_offset{};
+ u32 chroma_u_offset{};
+ u32 chroma_v_offset{};
+};
+
+struct VicRegisters {
+ INSERT_PADDING_WORDS(64);
+ u32 nop{};
+ INSERT_PADDING_WORDS(15);
+ u32 pm_trigger{};
+ INSERT_PADDING_WORDS(47);
+ u32 set_application_id{};
+ u32 set_watchdog_timer{};
+ INSERT_PADDING_WORDS(17);
+ u32 context_save_area{};
+ u32 context_switch{};
+ INSERT_PADDING_WORDS(43);
+ u32 execute{};
+ INSERT_PADDING_WORDS(63);
+ std::array<std::array<PlaneOffsets, 8>, 8> surfacex_slots{};
+ u32 picture_index{};
+ u32 control_params{};
+ u32 config_struct_offset{};
+ u32 filter_struct_offset{};
+ u32 palette_offset{};
+ u32 hist_offset{};
+ u32 context_id{};
+ u32 fce_ucode_size{};
+ PlaneOffsets output_surface{};
+ u32 fce_ucode_offset{};
+ INSERT_PADDING_WORDS(4);
+ std::array<u32, 8> slot_context_id{};
+ INSERT_PADDING_WORDS(16);
+};
+static_assert(sizeof(VicRegisters) == 0x7A0, "VicRegisters is an invalid size");
+
+class Vic {
+public:
+ enum class Method : u32 {
+ Execute = 0xc0,
+ SetControlParams = 0x1c1,
+ SetConfigStructOffset = 0x1c2,
+ SetOutputSurfaceLumaOffset = 0x1c8,
+ SetOutputSurfaceChromaUOffset = 0x1c9,
+ SetOutputSurfaceChromaVOffset = 0x1ca
+ };
+
+ explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
+ ~Vic();
+
+ /// Write to the device state.
+ void ProcessMethod(Method method, const std::vector<u32>& arguments);
+
+private:
+ void Execute();
+
+ void VicStateWrite(u32 offset, u32 arguments);
+ VicRegisters vic_state{};
+
+ enum class VideoPixelFormat : u64_le {
+ RGBA8 = 0x1f,
+ BGRA8 = 0x20,
+ Yuv420 = 0x44,
+ };
+
+ union VicConfig {
+ u64_le raw{};
+ BitField<0, 7, u64_le> pixel_format;
+ BitField<7, 2, u64_le> chroma_loc_horiz;
+ BitField<9, 2, u64_le> chroma_loc_vert;
+ BitField<11, 4, u64_le> block_linear_kind;
+ BitField<15, 4, u64_le> block_linear_height_log2;
+ BitField<19, 3, u64_le> reserved0;
+ BitField<22, 10, u64_le> reserved1;
+ BitField<32, 14, u64_le> surface_width_minus1;
+ BitField<46, 14, u64_le> surface_height_minus1;
+ };
+
+ GPU& gpu;
+ std::shared_ptr<Tegra::Nvdec> nvdec_processor;
+
+ GPUVAddr config_struct_address{};
+ GPUVAddr output_surface_luma_address{};
+ GPUVAddr output_surface_chroma_u_address{};
+ GPUVAddr output_surface_chroma_v_address{};
+
+ SwsContext* scaler_ctx{};
+ s32 scaler_width{};
+ s32 scaler_height{};
+};
+
+} // namespace Tegra
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
index b06c32c84..acf2668dc 100644
--- a/src/video_core/compatible_formats.cpp
+++ b/src/video_core/compatible_formats.cpp
@@ -3,33 +3,33 @@
// Refer to the license.txt file included.
#include <array>
-#include <bitset>
#include <cstddef>
+#include "common/common_types.h"
#include "video_core/compatible_formats.h"
#include "video_core/surface.h"
namespace VideoCore::Surface {
-
namespace {
+using Table = std::array<std::array<u64, 2>, MaxPixelFormat>;
// Compatibility table taken from Table 3.X.2 in:
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt
-constexpr std::array VIEW_CLASS_128_BITS = {
+constexpr std::array VIEW_CLASS_128_BITS{
PixelFormat::R32G32B32A32_FLOAT,
PixelFormat::R32G32B32A32_UINT,
PixelFormat::R32G32B32A32_SINT,
};
-constexpr std::array VIEW_CLASS_96_BITS = {
+constexpr std::array VIEW_CLASS_96_BITS{
PixelFormat::R32G32B32_FLOAT,
};
// Missing formats:
// PixelFormat::RGB32UI,
// PixelFormat::RGB32I,
-constexpr std::array VIEW_CLASS_64_BITS = {
+constexpr std::array VIEW_CLASS_64_BITS{
PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT,
PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT,
PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
@@ -38,7 +38,7 @@ constexpr std::array VIEW_CLASS_64_BITS = {
// TODO: How should we handle 48 bits?
-constexpr std::array VIEW_CLASS_32_BITS = {
+constexpr std::array VIEW_CLASS_32_BITS{
PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
@@ -50,43 +50,105 @@ constexpr std::array VIEW_CLASS_32_BITS = {
// TODO: How should we handle 24 bits?
-constexpr std::array VIEW_CLASS_16_BITS = {
+constexpr std::array VIEW_CLASS_16_BITS{
PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT,
PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM,
PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT,
};
-constexpr std::array VIEW_CLASS_8_BITS = {
+constexpr std::array VIEW_CLASS_8_BITS{
PixelFormat::R8_UINT,
PixelFormat::R8_UNORM,
PixelFormat::R8_SINT,
PixelFormat::R8_SNORM,
};
-constexpr std::array VIEW_CLASS_RGTC1_RED = {
+constexpr std::array VIEW_CLASS_RGTC1_RED{
PixelFormat::BC4_UNORM,
PixelFormat::BC4_SNORM,
};
-constexpr std::array VIEW_CLASS_RGTC2_RG = {
+constexpr std::array VIEW_CLASS_RGTC2_RG{
PixelFormat::BC5_UNORM,
PixelFormat::BC5_SNORM,
};
-constexpr std::array VIEW_CLASS_BPTC_UNORM = {
+constexpr std::array VIEW_CLASS_BPTC_UNORM{
PixelFormat::BC7_UNORM,
PixelFormat::BC7_SRGB,
};
-constexpr std::array VIEW_CLASS_BPTC_FLOAT = {
+constexpr std::array VIEW_CLASS_BPTC_FLOAT{
PixelFormat::BC6H_SFLOAT,
PixelFormat::BC6H_UFLOAT,
};
+constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{
+ PixelFormat::ASTC_2D_4X4_UNORM,
+ PixelFormat::ASTC_2D_4X4_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{
+ PixelFormat::ASTC_2D_5X4_UNORM,
+ PixelFormat::ASTC_2D_5X4_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{
+ PixelFormat::ASTC_2D_5X5_UNORM,
+ PixelFormat::ASTC_2D_5X5_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{
+ PixelFormat::ASTC_2D_6X5_UNORM,
+ PixelFormat::ASTC_2D_6X5_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{
+ PixelFormat::ASTC_2D_6X6_UNORM,
+ PixelFormat::ASTC_2D_6X6_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{
+ PixelFormat::ASTC_2D_8X5_UNORM,
+ PixelFormat::ASTC_2D_8X5_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{
+ PixelFormat::ASTC_2D_8X8_UNORM,
+ PixelFormat::ASTC_2D_8X8_SRGB,
+};
+
+// Missing formats:
+// PixelFormat::ASTC_2D_10X5_UNORM
+// PixelFormat::ASTC_2D_10X5_SRGB
+
+// Missing formats:
+// PixelFormat::ASTC_2D_10X6_UNORM
+// PixelFormat::ASTC_2D_10X6_SRGB
+
+constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{
+ PixelFormat::ASTC_2D_10X8_UNORM,
+ PixelFormat::ASTC_2D_10X8_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{
+ PixelFormat::ASTC_2D_10X10_UNORM,
+ PixelFormat::ASTC_2D_10X10_SRGB,
+};
+
+// Missing formats
+// ASTC_2D_12X10_UNORM,
+// ASTC_2D_12X10_SRGB,
+
+constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{
+ PixelFormat::ASTC_2D_12X12_UNORM,
+ PixelFormat::ASTC_2D_12X12_SRGB,
+};
+
// Compatibility table taken from Table 4.X.1 in:
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt
-constexpr std::array COPY_CLASS_128_BITS = {
+constexpr std::array COPY_CLASS_128_BITS{
PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT,
PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM,
PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM,
@@ -97,7 +159,7 @@ constexpr std::array COPY_CLASS_128_BITS = {
// PixelFormat::RGBA32I
// COMPRESSED_RG_RGTC2
-constexpr std::array COPY_CLASS_64_BITS = {
+constexpr std::array COPY_CLASS_64_BITS{
PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT,
@@ -110,32 +172,36 @@ constexpr std::array COPY_CLASS_64_BITS = {
// COMPRESSED_RGBA_S3TC_DXT1_EXT
// COMPRESSED_SIGNED_RED_RGTC1
-void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) {
- compatiblity[format_a][format_b] = true;
- compatiblity[format_b][format_a] = true;
+constexpr void Enable(Table& table, size_t format_a, size_t format_b) {
+ table[format_a][format_b / 64] |= u64(1) << (format_b % 64);
+ table[format_b][format_a / 64] |= u64(1) << (format_a % 64);
}
-void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) {
- Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
+constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) {
+ Enable(table, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
}
template <typename Range>
-void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) {
+constexpr void EnableRange(Table& table, const Range& range) {
for (auto it_a = range.begin(); it_a != range.end(); ++it_a) {
for (auto it_b = it_a; it_b != range.end(); ++it_b) {
- Enable(compatibility, *it_a, *it_b);
+ Enable(table, *it_a, *it_b);
}
}
}
-} // Anonymous namespace
+constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) {
+ const size_t a = static_cast<size_t>(format_a);
+ const size_t b = static_cast<size_t>(format_b);
+ return ((table[a][b / 64] >> (b % 64)) & 1) != 0;
+}
-FormatCompatibility::FormatCompatibility() {
+constexpr Table MakeViewTable() {
+ Table view{};
for (size_t i = 0; i < MaxPixelFormat; ++i) {
// Identity is allowed
Enable(view, i, i);
}
-
EnableRange(view, VIEW_CLASS_128_BITS);
EnableRange(view, VIEW_CLASS_96_BITS);
EnableRange(view, VIEW_CLASS_64_BITS);
@@ -146,10 +212,39 @@ FormatCompatibility::FormatCompatibility() {
EnableRange(view, VIEW_CLASS_RGTC2_RG);
EnableRange(view, VIEW_CLASS_BPTC_UNORM);
EnableRange(view, VIEW_CLASS_BPTC_FLOAT);
+ EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA);
+ EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA);
+ EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA);
+ EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA);
+ EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA);
+ EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA);
+ EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA);
+ EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA);
+ EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
+ EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
+ return view;
+}
- copy = view;
+constexpr Table MakeCopyTable() {
+ Table copy = MakeViewTable();
EnableRange(copy, COPY_CLASS_128_BITS);
EnableRange(copy, COPY_CLASS_64_BITS);
+ return copy;
+}
+} // Anonymous namespace
+
+bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views) {
+ if (broken_views) {
+ // If format views are broken, only accept formats that are identical.
+ return format_a == format_b;
+ }
+ static constexpr Table TABLE = MakeViewTable();
+ return IsSupported(TABLE, format_a, format_b);
+}
+
+bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) {
+ static constexpr Table TABLE = MakeCopyTable();
+ return IsSupported(TABLE, format_a, format_b);
}
} // namespace VideoCore::Surface
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
index 51766349b..9a0522988 100644
--- a/src/video_core/compatible_formats.h
+++ b/src/video_core/compatible_formats.h
@@ -4,31 +4,12 @@
#pragma once
-#include <array>
-#include <bitset>
-#include <cstddef>
-
#include "video_core/surface.h"
namespace VideoCore::Surface {
-class FormatCompatibility {
-public:
- using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>;
-
- explicit FormatCompatibility();
-
- bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept {
- return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
- }
-
- bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept {
- return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
- }
+bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views);
-private:
- Table view;
- Table copy;
-};
+bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b);
} // namespace VideoCore::Surface
diff --git a/src/video_core/delayed_destruction_ring.h b/src/video_core/delayed_destruction_ring.h
new file mode 100644
index 000000000..4f1d29c04
--- /dev/null
+++ b/src/video_core/delayed_destruction_ring.h
@@ -0,0 +1,32 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <utility>
+#include <vector>
+
+namespace VideoCommon {
+
+/// Container to push objects to be destroyed a few ticks in the future
+template <typename T, size_t TICKS_TO_DESTROY>
+class DelayedDestructionRing {
+public:
+ void Tick() {
+ index = (index + 1) % TICKS_TO_DESTROY;
+ elements[index].clear();
+ }
+
+ void Push(T&& object) {
+ elements[index].push_back(std::move(object));
+ }
+
+private:
+ size_t index = 0;
+ std::array<std::vector<T>, TICKS_TO_DESTROY> elements;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index e16075993..7149af290 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -9,13 +9,33 @@
#include "video_core/dirty_flags.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
-#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / sizeof(u32))
+#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace VideoCommon::Dirty {
-
+namespace {
using Tegra::Engines::Maxwell3D;
-void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
+void SetupDirtyVertexBuffers(Maxwell3D::DirtyState::Tables& tables) {
+ static constexpr std::size_t num_array = 3;
+ for (std::size_t i = 0; i < Maxwell3D::Regs::NumVertexArrays; ++i) {
+ const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
+ const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
+
+ FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
+ FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
+ }
+}
+
+void SetupIndexBuffer(Maxwell3D::DirtyState::Tables& tables) {
+ FillBlock(tables[0], OFF(index_array), NUM(index_array), IndexBuffer);
+}
+
+void SetupDirtyDescriptors(Maxwell3D::DirtyState::Tables& tables) {
+ FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
+ FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
+}
+
+void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
static constexpr std::size_t num_per_rt = NUM(rt[0]);
static constexpr std::size_t begin = OFF(rt);
static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
@@ -23,6 +43,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt);
}
FillBlock(tables[1], begin, num, RenderTargets);
+ FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets);
+
+ tables[0][OFF(rt_control)] = RenderTargets;
+ tables[1][OFF(rt_control)] = RenderTargetControl;
static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets};
for (std::size_t i = 0; i < std::size(zeta_flags); ++i) {
@@ -34,5 +58,13 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
FillBlock(table, OFF(zeta), NUM(zeta), flag);
}
}
+} // Anonymous namespace
+
+void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
+ SetupDirtyVertexBuffers(tables);
+ SetupIndexBuffer(tables);
+ SetupDirtyDescriptors(tables);
+ SetupDirtyRenderTargets(tables);
+}
} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index 3f6c1d83a..702688ace 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -16,7 +16,10 @@ namespace VideoCommon::Dirty {
enum : u8 {
NullEntry = 0,
+ Descriptors,
+
RenderTargets,
+ RenderTargetControl,
ColorBuffer0,
ColorBuffer1,
ColorBuffer2,
@@ -27,6 +30,12 @@ enum : u8 {
ColorBuffer7,
ZetaBuffer,
+ VertexBuffers,
+ VertexBuffer0,
+ VertexBuffer31 = VertexBuffer0 + 31,
+
+ IndexBuffer,
+
LastCommonEntry,
};
@@ -44,6 +53,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_
FillBlock(tables[1], begin, num, index_b);
}
-void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
+void SetupDirtyFlags(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index f2f96ac33..8b33c04ab 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/cityhash.h"
#include "common/microprofile.h"
#include "core/core.h"
#include "core/memory.h"
@@ -12,7 +13,7 @@
namespace Tegra {
-DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
+DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_) : gpu{gpu_}, system{system_} {}
DmaPusher::~DmaPusher() = default;
@@ -22,8 +23,6 @@ void DmaPusher::DispatchCalls() {
MICROPROFILE_SCOPE(DispatchCalls);
gpu.SyncGuestHost();
- // On entering GPU code, assume all memory may be touched by the ARM core.
- gpu.Maxwell3D().OnMemoryWrite();
dma_pushbuffer_subindex = 0;
@@ -45,32 +44,41 @@ bool DmaPusher::Step() {
return false;
}
- const CommandList& command_list{dma_pushbuffer.front()};
- ASSERT_OR_EXECUTE(!command_list.empty(), {
- // Somehow the command_list is empty, in order to avoid a crash
- // We ignore it and assume its size is 0.
- dma_pushbuffer.pop();
- dma_pushbuffer_subindex = 0;
- return true;
- });
- const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
- const GPUVAddr dma_get = command_list_header.addr;
-
- if (dma_pushbuffer_subindex >= command_list.size()) {
- // We've gone through the current list, remove it from the queue
- dma_pushbuffer.pop();
- dma_pushbuffer_subindex = 0;
- }
+ CommandList& command_list{dma_pushbuffer.front()};
- if (command_list_header.size == 0) {
- return true;
- }
+ ASSERT_OR_EXECUTE(
+ command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
+ // Somehow the command_list is empty, in order to avoid a crash
+ // We ignore it and assume its size is 0.
+ dma_pushbuffer.pop();
+ dma_pushbuffer_subindex = 0;
+ return true;
+ });
+
+ if (command_list.prefetch_command_list.size()) {
+ // Prefetched command list from nvdrv, used for things like synchronization
+ command_headers = std::move(command_list.prefetch_command_list);
+ dma_pushbuffer.pop();
+ } else {
+ const CommandListHeader command_list_header{
+ command_list.command_lists[dma_pushbuffer_subindex++]};
+ const GPUVAddr dma_get = command_list_header.addr;
+
+ if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
+ // We've gone through the current list, remove it from the queue
+ dma_pushbuffer.pop();
+ dma_pushbuffer_subindex = 0;
+ }
- // Push buffer non-empty, read a word
- command_headers.resize(command_list_header.size);
- gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
- command_list_header.size * sizeof(u32));
+ if (command_list_header.size == 0) {
+ return true;
+ }
+ // Push buffer non-empty, read a word
+ command_headers.resize(command_list_header.size);
+ gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+ command_list_header.size * sizeof(u32));
+ }
for (std::size_t index = 0; index < command_headers.size();) {
const CommandHeader& command_header = command_headers[index];
@@ -142,7 +150,12 @@ void DmaPusher::SetState(const CommandHeader& command_header) {
void DmaPusher::CallMethod(u32 argument) const {
if (dma_state.method < non_puller_methods) {
- gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count});
+ gpu.CallMethod(GPU::MethodCall{
+ dma_state.method,
+ argument,
+ dma_state.subchannel,
+ dma_state.method_count,
+ });
} else {
subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument,
dma_state.is_last_call);
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index efa90d170..19f286fa7 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -18,6 +18,8 @@ class System;
namespace Tegra {
+class GPU;
+
enum class SubmissionMode : u32 {
IncreasingOld = 0,
Increasing = 1,
@@ -27,6 +29,31 @@ enum class SubmissionMode : u32 {
IncreaseOnce = 5
};
+// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
+// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
+// So the values you see in docs might be multiplied by 4.
+enum class BufferMethods : u32 {
+ BindObject = 0x0,
+ Nop = 0x2,
+ SemaphoreAddressHigh = 0x4,
+ SemaphoreAddressLow = 0x5,
+ SemaphoreSequence = 0x6,
+ SemaphoreTrigger = 0x7,
+ NotifyIntr = 0x8,
+ WrcacheFlush = 0x9,
+ Unk28 = 0xA,
+ UnkCacheFlush = 0xB,
+ RefCnt = 0x14,
+ SemaphoreAcquire = 0x1A,
+ SemaphoreRelease = 0x1B,
+ FenceValue = 0x1C,
+ FenceAction = 0x1D,
+ WaitForInterrupt = 0x1E,
+ Unk7c = 0x1F,
+ Yield = 0x20,
+ NonPullerMethods = 0x40,
+};
+
struct CommandListHeader {
union {
u64 raw;
@@ -49,9 +76,23 @@ union CommandHeader {
static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
-class GPU;
+inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, SubmissionMode mode) {
+ CommandHeader result{};
+ result.method.Assign(static_cast<u32>(method));
+ result.arg_count.Assign(arg_count);
+ result.mode.Assign(mode);
+ return result;
+}
-using CommandList = std::vector<Tegra::CommandListHeader>;
+struct CommandList final {
+ CommandList() = default;
+ explicit CommandList(std::size_t size) : command_lists(size) {}
+ explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_)
+ : prefetch_command_list{std::move(prefetch_command_list_)} {}
+
+ std::vector<CommandListHeader> command_lists;
+ std::vector<CommandHeader> prefetch_command_list;
+};
/**
* The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
@@ -60,9 +101,9 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
* See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
* details on this implementation.
*/
-class DmaPusher {
+class DmaPusher final {
public:
- explicit DmaPusher(Core::System& system, GPU& gpu);
+ explicit DmaPusher(Core::System& system_, GPU& gpu_);
~DmaPusher();
void Push(CommandList&& entries) {
@@ -71,7 +112,7 @@ public:
void DispatchCalls();
- void BindSubchannel(Tegra::Engines::EngineInterface* engine, u32 subchannel_id) {
+ void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id) {
subchannels[subchannel_id] = engine;
}
@@ -104,7 +145,7 @@ private:
bool ib_enable{true}; ///< IB mode enabled
- std::array<Tegra::Engines::EngineInterface*, max_subchannels> subchannels{};
+ std::array<Engines::EngineInterface*, max_subchannels> subchannels{};
GPU& gpu;
Core::System& system;
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index d44ad0cd8..71d7e1473 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -11,16 +11,16 @@
namespace Tegra::Engines::Upload {
-State::State(MemoryManager& memory_manager, Registers& regs)
- : regs{regs}, memory_manager{memory_manager} {}
+State::State(MemoryManager& memory_manager_, Registers& regs_)
+ : regs{regs_}, memory_manager{memory_manager_} {}
State::~State() = default;
-void State::ProcessExec(const bool is_linear) {
+void State::ProcessExec(const bool is_linear_) {
write_offset = 0;
copy_size = regs.line_length_in * regs.line_count;
inner_buffer.resize(copy_size);
- this->is_linear = is_linear;
+ is_linear = is_linear_;
}
void State::ProcessData(const u32 data, const bool is_last_call) {
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index 462da419e..1c7f1effa 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -54,10 +54,10 @@ struct Registers {
class State {
public:
- State(MemoryManager& memory_manager, Registers& regs);
+ explicit State(MemoryManager& memory_manager_, Registers& regs_);
~State();
- void ProcessExec(bool is_linear);
+ void ProcessExec(bool is_linear_);
void ProcessData(u32 data, bool is_last_call);
private:
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 6e50661a3..0f640fdae 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,90 +10,58 @@
namespace Tegra::Engines {
-Fermi2D::Fermi2D() = default;
+Fermi2D::Fermi2D() {
+ // Nvidia's OpenGL driver seems to assume these values
+ regs.src.depth = 1;
+ regs.dst.depth = 1;
+}
Fermi2D::~Fermi2D() = default;
-void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
- rasterizer = &rasterizer_;
+void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
}
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Fermi2D register, increase the size of the Regs structure");
-
regs.reg_array[method] = method_argument;
- switch (method) {
- // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
- // so trigger on the second 32-bit write.
- case FERMI2D_REG_INDEX(blit_src_y) + 1: {
- HandleSurfaceCopy();
- break;
- }
+ if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
+ Blit();
}
}
void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
- for (std::size_t i = 0; i < amount; i++) {
- CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+ for (u32 i = 0; i < amount; ++i) {
+ CallMethod(method, base_start[i], methods_pending - i <= 1);
}
}
-static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
- const u32 line_a = src_2 - src_1;
- const u32 line_b = dst_2 - dst_1;
- const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
- return {line_b - (excess * line_b) / line_a, excess};
-}
+void Fermi2D::Blit() {
+ LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
+ regs.src.Address(), regs.dst.Address());
-void Fermi2D::HandleSurfaceCopy() {
- LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}",
- static_cast<u32>(regs.operation));
-
- // TODO(Subv): Only raw copies are implemented.
- ASSERT(regs.operation == Operation::SrcCopy);
-
- const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
- const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
- u32 src_blit_x2, src_blit_y2;
- if (regs.blit_control.origin == Origin::Corner) {
- src_blit_x2 =
- static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32);
- src_blit_y2 =
- static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32);
- } else {
- src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
- src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
- }
- u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
- u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
- const auto [new_dst_w, src_excess_x] =
- DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
- const auto [new_dst_h, src_excess_y] =
- DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
- dst_blit_x2 = new_dst_w + regs.blit_dst_x;
- src_blit_x2 = src_blit_x2 - src_excess_x;
- dst_blit_y2 = new_dst_h + regs.blit_dst_y;
- src_blit_y2 = src_blit_y2 - src_excess_y;
- const auto [new_src_w, dst_excess_x] =
- DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
- const auto [new_src_h, dst_excess_y] =
- DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
- src_blit_x2 = new_src_w + src_blit_x1;
- dst_blit_x2 = dst_blit_x2 - dst_excess_x;
- src_blit_y2 = new_src_h + src_blit_y1;
- dst_blit_y2 = dst_blit_y2 - dst_excess_y;
- const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
- const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
- dst_blit_y2};
- Config copy_config;
- copy_config.operation = regs.operation;
- copy_config.filter = regs.blit_control.filter;
- copy_config.src_rect = src_rect;
- copy_config.dst_rect = dst_rect;
+ UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
+ UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
+ UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
+ UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
+ UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
- if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
+ const auto& args = regs.pixels_from_memory;
+ const Config config{
+ .operation = regs.operation,
+ .filter = args.sample_mode.filter,
+ .dst_x0 = args.dst_x0,
+ .dst_y0 = args.dst_y0,
+ .dst_x1 = args.dst_x0 + args.dst_width,
+ .dst_y1 = args.dst_y0 + args.dst_height,
+ .src_x0 = static_cast<s32>(args.src_x0 >> 32),
+ .src_y0 = static_cast<s32>(args.src_y0 >> 32),
+ .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
+ .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
+ };
+ if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
UNIMPLEMENTED();
}
}
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 213abfaae..c808a577d 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -38,7 +38,7 @@ public:
~Fermi2D();
/// Binds a rasterizer to this engine.
- void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
/// Write the value to the register identified by method.
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
@@ -53,8 +53,8 @@ public:
};
enum class Filter : u32 {
- PointSample = 0, // Nearest
- Linear = 1,
+ Point = 0,
+ Bilinear = 1,
};
enum class Operation : u32 {
@@ -67,88 +67,235 @@ public:
BlendPremult = 6,
};
- struct Regs {
- static constexpr std::size_t NUM_REGS = 0x258;
+ enum class MemoryLayout : u32 {
+ BlockLinear = 0,
+ Pitch = 1,
+ };
- struct Surface {
- RenderTargetFormat format;
- BitField<0, 1, u32> linear;
- union {
- BitField<0, 4, u32> block_width;
- BitField<4, 4, u32> block_height;
- BitField<8, 4, u32> block_depth;
- };
- u32 depth;
- u32 layer;
- u32 pitch;
- u32 width;
- u32 height;
- u32 address_high;
- u32 address_low;
-
- GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
- }
-
- u32 BlockWidth() const {
- return block_width.Value();
- }
-
- u32 BlockHeight() const {
- return block_height.Value();
- }
-
- u32 BlockDepth() const {
- return block_depth.Value();
- }
- };
- static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
+ enum class CpuIndexWrap : u32 {
+ Wrap = 0,
+ NoWrap = 1,
+ };
+ struct Surface {
+ RenderTargetFormat format;
+ MemoryLayout linear;
union {
- struct {
- INSERT_UNION_PADDING_WORDS(0x80);
+ BitField<0, 4, u32> block_width;
+ BitField<4, 4, u32> block_height;
+ BitField<8, 4, u32> block_depth;
+ };
+ u32 depth;
+ u32 layer;
+ u32 pitch;
+ u32 width;
+ u32 height;
+ u32 addr_upper;
+ u32 addr_lower;
+
+ [[nodiscard]] constexpr GPUVAddr Address() const noexcept {
+ return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
+ }
+ };
+ static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
- Surface dst;
+ enum class SectorPromotion : u32 {
+ NoPromotion = 0,
+ PromoteTo2V = 1,
+ PromoteTo2H = 2,
+ PromoteTo4 = 3,
+ };
- INSERT_UNION_PADDING_WORDS(2);
+ enum class NumTpcs : u32 {
+ All = 0,
+ One = 1,
+ };
- Surface src;
+ enum class RenderEnableMode : u32 {
+ False = 0,
+ True = 1,
+ Conditional = 2,
+ RenderIfEqual = 3,
+ RenderIfNotEqual = 4,
+ };
- INSERT_UNION_PADDING_WORDS(0x15);
+ enum class ColorKeyFormat : u32 {
+ A16R56G6B5 = 0,
+ A1R5G55B5 = 1,
+ A8R8G8B8 = 2,
+ A2R10G10B10 = 3,
+ Y8 = 4,
+ Y16 = 5,
+ Y32 = 6,
+ };
- Operation operation;
+ union Beta4 {
+ BitField<0, 8, u32> b;
+ BitField<8, 8, u32> g;
+ BitField<16, 8, u32> r;
+ BitField<24, 8, u32> a;
+ };
- INSERT_UNION_PADDING_WORDS(0x177);
+ struct Point {
+ u32 x;
+ u32 y;
+ };
+ enum class PatternSelect : u32 {
+ MonoChrome8x8 = 0,
+ MonoChrome64x1 = 1,
+ MonoChrome1x64 = 2,
+ Color = 3,
+ };
+
+ enum class NotifyType : u32 {
+ WriteOnly = 0,
+ WriteThenAwaken = 1,
+ };
+
+ enum class MonochromePatternColorFormat : u32 {
+ A8X8R8G6B5 = 0,
+ A1R5G5B5 = 1,
+ A8R8G8B8 = 2,
+ A8Y8 = 3,
+ A8X8Y16 = 4,
+ Y32 = 5,
+ };
+
+ enum class MonochromePatternFormat : u32 {
+ CGA6_M1 = 0,
+ LE_M1 = 1,
+ };
+
+ union Regs {
+ static constexpr std::size_t NUM_REGS = 0x258;
+ struct {
+ u32 object;
+ INSERT_PADDING_WORDS_NOINIT(0x3F);
+ u32 no_operation;
+ NotifyType notify;
+ INSERT_PADDING_WORDS_NOINIT(0x2);
+ u32 wait_for_idle;
+ INSERT_PADDING_WORDS_NOINIT(0xB);
+ u32 pm_trigger;
+ INSERT_PADDING_WORDS_NOINIT(0xF);
+ u32 context_dma_notify;
+ u32 dst_context_dma;
+ u32 src_context_dma;
+ u32 semaphore_context_dma;
+ INSERT_PADDING_WORDS_NOINIT(0x1C);
+ Surface dst;
+ CpuIndexWrap pixels_from_cpu_index_wrap;
+ u32 kind2d_check_enable;
+ Surface src;
+ SectorPromotion pixels_from_memory_sector_promotion;
+ INSERT_PADDING_WORDS_NOINIT(0x1);
+ NumTpcs num_tpcs;
+ u32 render_enable_addr_upper;
+ u32 render_enable_addr_lower;
+ RenderEnableMode render_enable_mode;
+ INSERT_PADDING_WORDS_NOINIT(0x4);
+ u32 clip_x0;
+ u32 clip_y0;
+ u32 clip_width;
+ u32 clip_height;
+ BitField<0, 1, u32> clip_enable;
+ BitField<0, 3, ColorKeyFormat> color_key_format;
+ u32 color_key;
+ BitField<0, 1, u32> color_key_enable;
+ BitField<0, 8, u32> rop;
+ u32 beta1;
+ Beta4 beta4;
+ Operation operation;
+ union {
+ BitField<0, 6, u32> x;
+ BitField<8, 6, u32> y;
+ } pattern_offset;
+ BitField<0, 2, PatternSelect> pattern_select;
+ INSERT_PADDING_WORDS_NOINIT(0xC);
+ struct {
+ BitField<0, 3, MonochromePatternColorFormat> color_format;
+ BitField<0, 1, MonochromePatternFormat> format;
+ u32 color0;
+ u32 color1;
+ u32 pattern0;
+ u32 pattern1;
+ } monochrome_pattern;
+ struct {
+ std::array<u32, 0x40> X8R8G8B8;
+ std::array<u32, 0x20> R5G6B5;
+ std::array<u32, 0x20> X1R5G5B5;
+ std::array<u32, 0x10> Y8;
+ } color_pattern;
+ INSERT_PADDING_WORDS_NOINIT(0x10);
+ struct {
+ u32 prim_mode;
+ u32 prim_color_format;
+ u32 prim_color;
+ u32 line_tie_break_bits;
+ INSERT_PADDING_WORDS_NOINIT(0x14);
+ u32 prim_point_xy;
+ INSERT_PADDING_WORDS_NOINIT(0x7);
+ std::array<Point, 0x40> prim_point;
+ } render_solid;
+ struct {
+ u32 data_type;
+ u32 color_format;
+ u32 index_format;
+ u32 mono_format;
+ u32 wrap;
+ u32 color0;
+ u32 color1;
+ u32 mono_opacity;
+ INSERT_PADDING_WORDS_NOINIT(0x6);
+ u32 src_width;
+ u32 src_height;
+ u32 dx_du_frac;
+ u32 dx_du_int;
+ u32 dx_dv_frac;
+ u32 dy_dv_int;
+ u32 dst_x0_frac;
+ u32 dst_x0_int;
+ u32 dst_y0_frac;
+ u32 dst_y0_int;
+ u32 data;
+ } pixels_from_cpu;
+ INSERT_PADDING_WORDS_NOINIT(0x3);
+ u32 big_endian_control;
+ INSERT_PADDING_WORDS_NOINIT(0x3);
+ struct {
+ BitField<0, 3, u32> block_shape;
+ BitField<0, 5, u32> corral_size;
+ BitField<0, 1, u32> safe_overlap;
union {
- u32 raw;
BitField<0, 1, Origin> origin;
BitField<4, 1, Filter> filter;
- } blit_control;
-
- INSERT_UNION_PADDING_WORDS(0x8);
-
- u32 blit_dst_x;
- u32 blit_dst_y;
- u32 blit_dst_width;
- u32 blit_dst_height;
- u64 blit_du_dx;
- u64 blit_dv_dy;
- u64 blit_src_x;
- u64 blit_src_y;
-
- INSERT_UNION_PADDING_WORDS(0x21);
- };
- std::array<u32, NUM_REGS> reg_array;
+ } sample_mode;
+ INSERT_PADDING_WORDS_NOINIT(0x8);
+ s32 dst_x0;
+ s32 dst_y0;
+ s32 dst_width;
+ s32 dst_height;
+ s64 du_dx;
+ s64 dv_dy;
+ s64 src_x0;
+ s64 src_y0;
+ } pixels_from_memory;
};
+ std::array<u32, NUM_REGS> reg_array;
} regs{};
struct Config {
Operation operation;
Filter filter;
- Common::Rectangle<u32> src_rect;
- Common::Rectangle<u32> dst_rect;
+ s32 dst_x0;
+ s32 dst_y0;
+ s32 dst_x1;
+ s32 dst_y1;
+ s32 src_x0;
+ s32 src_y0;
+ s32 src_x1;
+ s32 src_y1;
};
private:
@@ -156,25 +303,49 @@ private:
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
- void HandleSurfaceCopy();
+ void Blit();
};
#define ASSERT_REG_POSITION(field_name, position) \
- static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \
+ static_assert(offsetof(Fermi2D::Regs, field_name) == position, \
"Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(dst, 0x80);
-ASSERT_REG_POSITION(src, 0x8C);
-ASSERT_REG_POSITION(operation, 0xAB);
-ASSERT_REG_POSITION(blit_control, 0x223);
-ASSERT_REG_POSITION(blit_dst_x, 0x22c);
-ASSERT_REG_POSITION(blit_dst_y, 0x22d);
-ASSERT_REG_POSITION(blit_dst_width, 0x22e);
-ASSERT_REG_POSITION(blit_dst_height, 0x22f);
-ASSERT_REG_POSITION(blit_du_dx, 0x230);
-ASSERT_REG_POSITION(blit_dv_dy, 0x232);
-ASSERT_REG_POSITION(blit_src_x, 0x234);
-ASSERT_REG_POSITION(blit_src_y, 0x236);
+ASSERT_REG_POSITION(object, 0x0);
+ASSERT_REG_POSITION(no_operation, 0x100);
+ASSERT_REG_POSITION(notify, 0x104);
+ASSERT_REG_POSITION(wait_for_idle, 0x110);
+ASSERT_REG_POSITION(pm_trigger, 0x140);
+ASSERT_REG_POSITION(context_dma_notify, 0x180);
+ASSERT_REG_POSITION(dst_context_dma, 0x184);
+ASSERT_REG_POSITION(src_context_dma, 0x188);
+ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
+ASSERT_REG_POSITION(dst, 0x200);
+ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
+ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
+ASSERT_REG_POSITION(src, 0x230);
+ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
+ASSERT_REG_POSITION(num_tpcs, 0x260);
+ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
+ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
+ASSERT_REG_POSITION(clip_x0, 0x280);
+ASSERT_REG_POSITION(clip_y0, 0x284);
+ASSERT_REG_POSITION(clip_width, 0x288);
+ASSERT_REG_POSITION(clip_height, 0x28c);
+ASSERT_REG_POSITION(clip_enable, 0x290);
+ASSERT_REG_POSITION(color_key_format, 0x294);
+ASSERT_REG_POSITION(color_key, 0x298);
+ASSERT_REG_POSITION(rop, 0x2A0);
+ASSERT_REG_POSITION(beta1, 0x2A4);
+ASSERT_REG_POSITION(beta4, 0x2A8);
+ASSERT_REG_POSITION(operation, 0x2AC);
+ASSERT_REG_POSITION(pattern_offset, 0x2B0);
+ASSERT_REG_POSITION(pattern_select, 0x2B4);
+ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
+ASSERT_REG_POSITION(color_pattern, 0x300);
+ASSERT_REG_POSITION(render_solid, 0x580);
+ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
+ASSERT_REG_POSITION(big_endian_control, 0x870);
+ASSERT_REG_POSITION(pixels_from_memory, 0x880);
#undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 898370739..a9b75091e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -21,8 +21,8 @@ KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manage
KeplerCompute::~KeplerCompute() = default;
-void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
- rasterizer = &rasterizer_;
+void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
}
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
@@ -39,7 +39,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
case KEPLER_COMPUTE_REG_INDEX(data_upload): {
upload_state.ProcessData(method_argument, is_last_call);
if (is_last_call) {
- system.GPU().Maxwell3D().OnMemoryWrite();
}
break;
}
@@ -58,24 +57,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
}
}
-Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
- const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
- ASSERT(cbuf_mask[regs.tex_cb_index]);
-
- const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index];
- ASSERT(texinfo.Address() != 0);
-
- const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle);
- ASSERT(address < texinfo.Address() + texinfo.size);
-
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
- return GetTextureInfo(tex_handle);
-}
-
-Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const {
- return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
-}
-
u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
ASSERT(stage == ShaderType::Compute);
const auto& buffer = launch_description.const_buffer_config[const_buffer];
@@ -98,9 +79,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
const Texture::TextureHandle tex_handle{handle};
- const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
- SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
- result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
+ const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
+ const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
+
+ SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
+ result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
return result;
}
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 7f2500aab..7c40cba38 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -46,7 +46,7 @@ public:
~KeplerCompute();
/// Binds a rasterizer to this engine.
- void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
static constexpr std::size_t NumConstBuffers = 8;
@@ -55,7 +55,7 @@ public:
union {
struct {
- INSERT_UNION_PADDING_WORDS(0x60);
+ INSERT_PADDING_WORDS_NOINIT(0x60);
Upload::Registers upload;
@@ -67,7 +67,7 @@ public:
u32 data_upload;
- INSERT_UNION_PADDING_WORDS(0x3F);
+ INSERT_PADDING_WORDS_NOINIT(0x3F);
struct {
u32 address;
@@ -76,11 +76,11 @@ public:
}
} launch_desc_loc;
- INSERT_UNION_PADDING_WORDS(0x1);
+ INSERT_PADDING_WORDS_NOINIT(0x1);
u32 launch;
- INSERT_UNION_PADDING_WORDS(0x4A7);
+ INSERT_PADDING_WORDS_NOINIT(0x4A7);
struct {
u32 address_high;
@@ -92,7 +92,7 @@ public:
}
} tsc;
- INSERT_UNION_PADDING_WORDS(0x3);
+ INSERT_PADDING_WORDS_NOINIT(0x3);
struct {
u32 address_high;
@@ -104,7 +104,7 @@ public:
}
} tic;
- INSERT_UNION_PADDING_WORDS(0x22);
+ INSERT_PADDING_WORDS_NOINIT(0x22);
struct {
u32 address_high;
@@ -115,11 +115,11 @@ public:
}
} code_loc;
- INSERT_UNION_PADDING_WORDS(0x3FE);
+ INSERT_PADDING_WORDS_NOINIT(0x3FE);
u32 tex_cb_index;
- INSERT_UNION_PADDING_WORDS(0x374);
+ INSERT_PADDING_WORDS_NOINIT(0x374);
};
std::array<u32, NUM_REGS> reg_array;
};
@@ -209,11 +209,6 @@ public:
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) override;
- Texture::FullTextureInfo GetTexture(std::size_t offset) const;
-
- /// Given a texture handle, returns the TSC and TIC entries.
- Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
-
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index dc71b2eec..560551157 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -14,8 +14,8 @@
namespace Tegra::Engines {
-KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
- : system{system}, upload_state{memory_manager, regs.upload} {}
+KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager)
+ : system{system_}, upload_state{memory_manager, regs.upload} {}
KeplerMemory::~KeplerMemory() = default;
@@ -33,7 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
case KEPLERMEMORY_REG_INDEX(data): {
upload_state.ProcessData(method_argument, is_last_call);
if (is_last_call) {
- system.GPU().Maxwell3D().OnMemoryWrite();
}
break;
}
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 5b7f71a00..19808a5c6 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -35,7 +35,7 @@ namespace Tegra::Engines {
class KeplerMemory final : public EngineInterface {
public:
- KeplerMemory(Core::System& system, MemoryManager& memory_manager);
+ explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager);
~KeplerMemory();
/// Write the value to the register identified by method.
@@ -50,7 +50,7 @@ public:
union {
struct {
- INSERT_UNION_PADDING_WORDS(0x60);
+ INSERT_PADDING_WORDS_NOINIT(0x60);
Upload::Registers upload;
@@ -62,7 +62,7 @@ public:
u32 data;
- INSERT_UNION_PADDING_WORDS(0x11);
+ INSERT_PADDING_WORDS_NOINIT(0x11);
};
std::array<u32, NUM_REGS> reg_array;
};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 33854445f..75517a4f7 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <cinttypes>
#include <cstring>
#include <optional>
#include "common/assert.h"
@@ -31,8 +30,8 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
Maxwell3D::~Maxwell3D() = default;
-void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
- rasterizer = &rasterizer_;
+void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
}
void Maxwell3D::InitializeRegisterDefaults() {
@@ -124,6 +123,115 @@ void Maxwell3D::InitializeRegisterDefaults() {
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
}
+void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
+ if (executing_macro == 0) {
+ // A macro call must begin by writing the macro method's register, not its argument.
+ ASSERT_MSG((method % 2) == 0,
+ "Can't start macro execution by writing to the ARGS register");
+ executing_macro = method;
+ }
+
+ macro_params.insert(macro_params.end(), base_start, base_start + amount);
+
+ // Call the macro when there are no more parameters in the command buffer
+ if (is_last_call) {
+ CallMacroMethod(executing_macro, macro_params);
+ macro_params.clear();
+ }
+}
+
+u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
+ // Keep track of the register value in shadow_state when requested.
+ const auto control = shadow_state.shadow_ram_control;
+ if (control == Regs::ShadowRamControl::Track ||
+ control == Regs::ShadowRamControl::TrackWithFilter) {
+ shadow_state.reg_array[method] = argument;
+ return argument;
+ }
+ if (control == Regs::ShadowRamControl::Replay) {
+ return shadow_state.reg_array[method];
+ }
+ return argument;
+}
+
+void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
+ if (regs.reg_array[method] == argument) {
+ return;
+ }
+ regs.reg_array[method] = argument;
+
+ for (const auto& table : dirty.tables) {
+ dirty.flags[table[method]] = true;
+ }
+}
+
+void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument,
+ bool is_last_call) {
+ switch (method) {
+ case MAXWELL3D_REG_INDEX(wait_for_idle):
+ return rasterizer->WaitForIdle();
+ case MAXWELL3D_REG_INDEX(shadow_ram_control):
+ shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument);
+ return;
+ case MAXWELL3D_REG_INDEX(macros.data):
+ return macro_engine->AddCode(regs.macros.upload_address, argument);
+ case MAXWELL3D_REG_INDEX(macros.bind):
+ return ProcessMacroBind(argument);
+ case MAXWELL3D_REG_INDEX(firmware[4]):
+ return ProcessFirmwareCall4();
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 1:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 2:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 3:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 4:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 5:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 6:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 7:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 8:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 9:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 10:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 11:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 12:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
+ return StartCBData(method);
+ case MAXWELL3D_REG_INDEX(cb_bind[0]):
+ return ProcessCBBind(0);
+ case MAXWELL3D_REG_INDEX(cb_bind[1]):
+ return ProcessCBBind(1);
+ case MAXWELL3D_REG_INDEX(cb_bind[2]):
+ return ProcessCBBind(2);
+ case MAXWELL3D_REG_INDEX(cb_bind[3]):
+ return ProcessCBBind(3);
+ case MAXWELL3D_REG_INDEX(cb_bind[4]):
+ return ProcessCBBind(4);
+ case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
+ return DrawArrays();
+ case MAXWELL3D_REG_INDEX(clear_buffers):
+ return ProcessClearBuffers();
+ case MAXWELL3D_REG_INDEX(query.query_get):
+ return ProcessQueryGet();
+ case MAXWELL3D_REG_INDEX(condition.mode):
+ return ProcessQueryCondition();
+ case MAXWELL3D_REG_INDEX(counter_reset):
+ return ProcessCounterReset();
+ case MAXWELL3D_REG_INDEX(sync_info):
+ return ProcessSyncPoint();
+ case MAXWELL3D_REG_INDEX(exec_upload):
+ return upload_state.ProcessExec(regs.exec_upload.linear != 0);
+ case MAXWELL3D_REG_INDEX(data_upload):
+ upload_state.ProcessData(argument, is_last_call);
+ if (is_last_call) {
+ }
+ return;
+ case MAXWELL3D_REG_INDEX(fragment_barrier):
+ return rasterizer->FragmentBarrier();
+ case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
+ return rasterizer->TiledCacheBarrier();
+ }
+}
+
void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
// Reset the current macro.
executing_macro = 0;
@@ -157,142 +265,16 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
// uploaded to the GPU during initialization.
if (method >= MacroRegistersStart) {
- // We're trying to execute a macro
- if (executing_macro == 0) {
- // A macro call must begin by writing the macro method's register, not its argument.
- ASSERT_MSG((method % 2) == 0,
- "Can't start macro execution by writing to the ARGS register");
- executing_macro = method;
- }
-
- macro_params.push_back(method_argument);
-
- // Call the macro when there are no more parameters in the command buffer
- if (is_last_call) {
- CallMacroMethod(executing_macro, macro_params);
- macro_params.clear();
- }
+ ProcessMacro(method, &method_argument, 1, is_last_call);
return;
}
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
- u32 arg = method_argument;
- // Keep track of the register value in shadow_state when requested.
- if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track ||
- shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) {
- shadow_state.reg_array[method] = arg;
- } else if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Replay) {
- arg = shadow_state.reg_array[method];
- }
-
- if (regs.reg_array[method] != arg) {
- regs.reg_array[method] = arg;
-
- for (const auto& table : dirty.tables) {
- dirty.flags[table[method]] = true;
- }
- }
-
- switch (method) {
- case MAXWELL3D_REG_INDEX(wait_for_idle): {
- rasterizer->WaitForIdle();
- break;
- }
- case MAXWELL3D_REG_INDEX(shadow_ram_control): {
- shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_argument);
- break;
- }
- case MAXWELL3D_REG_INDEX(macros.data): {
- macro_engine->AddCode(regs.macros.upload_address, arg);
- break;
- }
- case MAXWELL3D_REG_INDEX(macros.bind): {
- ProcessMacroBind(arg);
- break;
- }
- case MAXWELL3D_REG_INDEX(firmware[4]): {
- ProcessFirmwareCall4();
- break;
- }
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
- StartCBData(method);
- break;
- }
- case MAXWELL3D_REG_INDEX(cb_bind[0]): {
- ProcessCBBind(0);
- break;
- }
- case MAXWELL3D_REG_INDEX(cb_bind[1]): {
- ProcessCBBind(1);
- break;
- }
- case MAXWELL3D_REG_INDEX(cb_bind[2]): {
- ProcessCBBind(2);
- break;
- }
- case MAXWELL3D_REG_INDEX(cb_bind[3]): {
- ProcessCBBind(3);
- break;
- }
- case MAXWELL3D_REG_INDEX(cb_bind[4]): {
- ProcessCBBind(4);
- break;
- }
- case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): {
- DrawArrays();
- break;
- }
- case MAXWELL3D_REG_INDEX(clear_buffers): {
- ProcessClearBuffers();
- break;
- }
- case MAXWELL3D_REG_INDEX(query.query_get): {
- ProcessQueryGet();
- break;
- }
- case MAXWELL3D_REG_INDEX(condition.mode): {
- ProcessQueryCondition();
- break;
- }
- case MAXWELL3D_REG_INDEX(counter_reset): {
- ProcessCounterReset();
- break;
- }
- case MAXWELL3D_REG_INDEX(sync_info): {
- ProcessSyncPoint();
- break;
- }
- case MAXWELL3D_REG_INDEX(exec_upload): {
- upload_state.ProcessExec(regs.exec_upload.linear != 0);
- break;
- }
- case MAXWELL3D_REG_INDEX(data_upload): {
- upload_state.ProcessData(arg, is_last_call);
- if (is_last_call) {
- OnMemoryWrite();
- }
- break;
- }
- default:
- break;
- }
+ const u32 argument = ProcessShadowRam(method, method_argument);
+ ProcessDirtyRegisters(method, argument);
+ ProcessMethodCall(method, argument, method_argument, is_last_call);
}
void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
@@ -300,50 +282,33 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
// uploaded to the GPU during initialization.
if (method >= MacroRegistersStart) {
- // We're trying to execute a macro
- if (executing_macro == 0) {
- // A macro call must begin by writing the macro method's register, not its argument.
- ASSERT_MSG((method % 2) == 0,
- "Can't start macro execution by writing to the ARGS register");
- executing_macro = method;
- }
-
- for (std::size_t i = 0; i < amount; i++) {
- macro_params.push_back(base_start[i]);
- }
-
- // Call the macro when there are no more parameters in the command buffer
- if (amount == methods_pending) {
- CallMacroMethod(executing_macro, macro_params);
- macro_params.clear();
- }
+ ProcessMacro(method, base_start, amount, amount == methods_pending);
return;
}
switch (method) {
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 1:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 2:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 3:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 4:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 5:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 6:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 7:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 8:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 9:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 10:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 11:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 12:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
ProcessCBMultiData(method, base_start, amount);
break;
- }
- default: {
+ default:
for (std::size_t i = 0; i < amount; i++) {
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
}
- }
+ break;
}
}
@@ -396,7 +361,7 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
}
void Maxwell3D::FlushMMEInlineDraw() {
- LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+ LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
@@ -541,8 +506,7 @@ void Maxwell3D::ProcessCounterReset() {
rasterizer->ResetCounter(QueryType::SamplesPassed);
break;
default:
- LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
- static_cast<int>(regs.counter_reset));
+ LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.counter_reset);
break;
}
}
@@ -557,7 +521,7 @@ void Maxwell3D::ProcessSyncPoint() {
}
void Maxwell3D::DrawArrays() {
- LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+ LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
@@ -595,27 +559,28 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
return 0;
case Regs::QuerySelect::SamplesPassed:
// Deferred.
- rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
+ rasterizer->Query(regs.query.QueryAddress(), QueryType::SamplesPassed,
system.GPU().GetTicks());
- return {};
+ return std::nullopt;
default:
LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
- static_cast<u32>(regs.query.query_get.select.Value()));
+ regs.query.query_get.select.Value());
return 1;
}
}
-void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
+void Maxwell3D::ProcessCBBind(size_t stage_index) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
- auto& shader = state.shader_stages[stage_index];
- auto& bind_data = regs.cb_bind[stage_index];
-
- ASSERT(bind_data.index < Regs::MaxConstBuffers);
- auto& buffer = shader.const_buffers[bind_data.index];
-
+ const auto& bind_data = regs.cb_bind[stage_index];
+ auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.index];
buffer.enabled = bind_data.valid.Value() != 0;
buffer.address = regs.const_buffer.BufferAddress();
buffer.size = regs.const_buffer.cb_size;
+
+ const bool is_enabled = bind_data.valid.Value() != 0;
+ const GPUVAddr gpu_addr = is_enabled ? regs.const_buffer.BufferAddress() : 0;
+ const u32 size = is_enabled ? regs.const_buffer.cb_size : 0;
+ rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
}
void Maxwell3D::ProcessCBData(u32 value) {
@@ -627,7 +592,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
}
void Maxwell3D::StartCBData(u32 method) {
- constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
+ constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
cb_data_state.start_pos = regs.const_buffer.cb_pos;
cb_data_state.id = method - first_cb_data;
cb_data_state.current = method;
@@ -640,7 +605,7 @@ void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount
if (cb_data_state.current != null_cb_data) {
FinishCBData();
}
- constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
+ constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
cb_data_state.start_pos = regs.const_buffer.cb_pos;
cb_data_state.id = method - first_cb_data;
cb_data_state.current = method;
@@ -670,14 +635,13 @@ void Maxwell3D::FinishCBData() {
const u32 id = cb_data_state.id;
memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
- OnMemoryWrite();
cb_data_state.id = null_cb_data;
cb_data_state.current = null_cb_data;
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
- const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
+ const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -686,43 +650,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
}
Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
- const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
+ const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
Texture::TSCEntry tsc_entry;
memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
return tsc_entry;
}
-Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
- return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
-}
-
-Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const {
- const auto stage_index = static_cast<std::size_t>(stage);
- const auto& shader = state.shader_stages[stage_index];
- const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
- ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
-
- const GPUVAddr tex_info_address =
- tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
-
- ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
-
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
-
- return GetTextureInfo(tex_handle);
-}
-
u32 Maxwell3D::GetRegisterValue(u32 method) const {
ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
return regs.reg_array[method];
}
void Maxwell3D::ProcessClearBuffers() {
- ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
- regs.clear_buffers.R == regs.clear_buffers.B &&
- regs.clear_buffers.R == regs.clear_buffers.A);
-
rasterizer->Clear();
}
@@ -730,9 +670,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse
ASSERT(stage != ShaderType::Compute);
const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& buffer = shader_stage.const_buffers[const_buffer];
- u32 result;
- std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
- return result;
+ return memory_manager.Read<u32>(buffer.address + offset);
}
SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
@@ -750,9 +688,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
const Texture::TextureHandle tex_handle{handle};
- const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
- SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
- result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
+ const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
+ const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
+
+ SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
+ result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
return result;
}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index bc289c55d..ffed42a29 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -55,7 +55,7 @@ public:
~Maxwell3D();
/// Binds a rasterizer to this engine.
- void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
@@ -438,16 +438,6 @@ public:
DecrWrapOGL = 0x8508,
};
- enum class MemoryLayout : u32 {
- Linear = 0,
- BlockLinear = 1,
- };
-
- enum class InvMemoryLayout : u32 {
- BlockLinear = 0,
- Linear = 1,
- };
-
enum class CounterReset : u32 {
SampleCnt = 0x01,
Unk02 = 0x02,
@@ -546,7 +536,7 @@ public:
Equation equation_a;
Factor factor_source_a;
Factor factor_dest_a;
- INSERT_UNION_PADDING_WORDS(1);
+ INSERT_PADDING_WORDS_NOINIT(1);
};
enum class TessellationPrimitive : u32 {
@@ -589,26 +579,36 @@ public:
NegativeW = 7,
};
+ enum class SamplerIndex : u32 {
+ Independently = 0,
+ ViaHeaderIndex = 1,
+ };
+
+ struct TileMode {
+ union {
+ BitField<0, 4, u32> block_width;
+ BitField<4, 4, u32> block_height;
+ BitField<8, 4, u32> block_depth;
+ BitField<12, 1, u32> is_pitch_linear;
+ BitField<16, 1, u32> is_3d;
+ };
+ };
+ static_assert(sizeof(TileMode) == 4);
+
struct RenderTargetConfig {
u32 address_high;
u32 address_low;
u32 width;
u32 height;
Tegra::RenderTargetFormat format;
+ TileMode tile_mode;
union {
- BitField<0, 3, u32> block_width;
- BitField<4, 3, u32> block_height;
- BitField<8, 3, u32> block_depth;
- BitField<12, 1, InvMemoryLayout> type;
- BitField<16, 1, u32> is_3d;
- } memory_layout;
- union {
- BitField<0, 16, u32> layers;
+ BitField<0, 16, u32> depth;
BitField<16, 1, u32> volume;
};
u32 layer_stride;
u32 base_layer;
- INSERT_UNION_PADDING_WORDS(7);
+ INSERT_PADDING_WORDS_NOINIT(7);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
@@ -640,7 +640,7 @@ public:
BitField<8, 3, ViewportSwizzle> z;
BitField<12, 3, ViewportSwizzle> w;
} swizzle;
- INSERT_UNION_PADDING_WORDS(1);
+ INSERT_PADDING_WORDS_NOINIT(1);
Common::Rectangle<f32> GetRect() const {
return {
@@ -700,7 +700,7 @@ public:
u32 address_low;
s32 buffer_size;
s32 buffer_offset;
- INSERT_UNION_PADDING_WORDS(3);
+ INSERT_PADDING_WORDS_NOINIT(3);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
@@ -713,7 +713,7 @@ public:
u32 stream;
u32 varying_count;
u32 stride;
- INSERT_UNION_PADDING_WORDS(1);
+ INSERT_PADDING_WORDS_NOINIT(1);
};
static_assert(sizeof(TransformFeedbackLayout) == 16);
@@ -731,7 +731,7 @@ public:
union {
struct {
- INSERT_UNION_PADDING_WORDS(0x44);
+ INSERT_PADDING_WORDS_NOINIT(0x44);
u32 wait_for_idle;
@@ -744,7 +744,7 @@ public:
ShadowRamControl shadow_ram_control;
- INSERT_UNION_PADDING_WORDS(0x16);
+ INSERT_PADDING_WORDS_NOINIT(0x16);
Upload::Registers upload;
struct {
@@ -755,7 +755,11 @@ public:
u32 data_upload;
- INSERT_UNION_PADDING_WORDS(0x44);
+ INSERT_PADDING_WORDS_NOINIT(0x16);
+
+ u32 force_early_fragment_tests;
+
+ INSERT_PADDING_WORDS_NOINIT(0x2D);
struct {
union {
@@ -765,7 +769,7 @@ public:
};
} sync_info;
- INSERT_UNION_PADDING_WORDS(0x15);
+ INSERT_PADDING_WORDS_NOINIT(0x15);
union {
BitField<0, 2, TessellationPrimitive> prim;
@@ -777,21 +781,21 @@ public:
std::array<f32, 4> tess_level_outer;
std::array<f32, 2> tess_level_inner;
- INSERT_UNION_PADDING_WORDS(0x10);
+ INSERT_PADDING_WORDS_NOINIT(0x10);
u32 rasterize_enable;
std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings;
- INSERT_UNION_PADDING_WORDS(0xC0);
+ INSERT_PADDING_WORDS_NOINIT(0xC0);
std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts;
- INSERT_UNION_PADDING_WORDS(0x1);
+ INSERT_PADDING_WORDS_NOINIT(0x1);
u32 tfb_enabled;
- INSERT_UNION_PADDING_WORDS(0x2E);
+ INSERT_PADDING_WORDS_NOINIT(0x2E);
std::array<RenderTargetConfig, NumRenderTargets> rt;
@@ -799,7 +803,7 @@ public:
std::array<ViewPort, NumViewports> viewports;
- INSERT_UNION_PADDING_WORDS(0x1D);
+ INSERT_PADDING_WORDS_NOINIT(0x1D);
struct {
u32 first;
@@ -811,16 +815,16 @@ public:
float clear_color[4];
float clear_depth;
- INSERT_UNION_PADDING_WORDS(0x3);
+ INSERT_PADDING_WORDS_NOINIT(0x3);
s32 clear_stencil;
- INSERT_UNION_PADDING_WORDS(0x2);
+ INSERT_PADDING_WORDS_NOINIT(0x2);
PolygonMode polygon_mode_front;
PolygonMode polygon_mode_back;
- INSERT_UNION_PADDING_WORDS(0x3);
+ INSERT_PADDING_WORDS_NOINIT(0x3);
u32 polygon_offset_point_enable;
u32 polygon_offset_line_enable;
@@ -828,46 +832,53 @@ public:
u32 patch_vertices;
- INSERT_UNION_PADDING_WORDS(0xC);
+ INSERT_PADDING_WORDS_NOINIT(0x4);
+
+ u32 fragment_barrier;
+
+ INSERT_PADDING_WORDS_NOINIT(0x7);
std::array<ScissorTest, NumViewports> scissor_test;
- INSERT_UNION_PADDING_WORDS(0x15);
+ INSERT_PADDING_WORDS_NOINIT(0x15);
s32 stencil_back_func_ref;
u32 stencil_back_mask;
u32 stencil_back_func_mask;
- INSERT_UNION_PADDING_WORDS(0xC);
+ INSERT_PADDING_WORDS_NOINIT(0x5);
+
+ u32 invalidate_texture_data_cache;
+
+ INSERT_PADDING_WORDS_NOINIT(0x1);
+
+ u32 tiled_cache_barrier;
+
+ INSERT_PADDING_WORDS_NOINIT(0x4);
u32 color_mask_common;
- INSERT_UNION_PADDING_WORDS(0x2);
+ INSERT_PADDING_WORDS_NOINIT(0x2);
f32 depth_bounds[2];
- INSERT_UNION_PADDING_WORDS(0x2);
+ INSERT_PADDING_WORDS_NOINIT(0x2);
u32 rt_separate_frag_data;
- INSERT_UNION_PADDING_WORDS(0x1);
+ INSERT_PADDING_WORDS_NOINIT(0x1);
u32 multisample_raster_enable;
u32 multisample_raster_samples;
std::array<u32, 4> multisample_sample_mask;
- INSERT_UNION_PADDING_WORDS(0x5);
+ INSERT_PADDING_WORDS_NOINIT(0x5);
struct {
u32 address_high;
u32 address_low;
Tegra::DepthFormat format;
- union {
- BitField<0, 4, u32> block_width;
- BitField<4, 4, u32> block_height;
- BitField<8, 4, u32> block_depth;
- BitField<20, 1, InvMemoryLayout> type;
- } memory_layout;
+ TileMode tile_mode;
u32 layer_stride;
GPUVAddr Address() const {
@@ -876,7 +887,18 @@ public:
}
} zeta;
- INSERT_UNION_PADDING_WORDS(0x41);
+ struct {
+ union {
+ BitField<0, 16, u32> x;
+ BitField<16, 16, u32> width;
+ };
+ union {
+ BitField<0, 16, u32> y;
+ BitField<16, 16, u32> height;
+ };
+ } render_area;
+
+ INSERT_PADDING_WORDS_NOINIT(0x3F);
union {
BitField<0, 4, u32> stencil;
@@ -885,24 +907,24 @@ public:
BitField<12, 4, u32> viewport;
} clear_flags;
- INSERT_UNION_PADDING_WORDS(0x10);
+ INSERT_PADDING_WORDS_NOINIT(0x10);
u32 fill_rectangle;
- INSERT_UNION_PADDING_WORDS(0x8);
+ INSERT_PADDING_WORDS_NOINIT(0x8);
std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
std::array<MsaaSampleLocation, 4> multisample_sample_locations;
- INSERT_UNION_PADDING_WORDS(0x2);
+ INSERT_PADDING_WORDS_NOINIT(0x2);
union {
BitField<0, 1, u32> enable;
BitField<4, 3, u32> target;
} multisample_coverage_to_color;
- INSERT_UNION_PADDING_WORDS(0x8);
+ INSERT_PADDING_WORDS_NOINIT(0x8);
struct {
union {
@@ -917,7 +939,7 @@ public:
BitField<25, 3, u32> map_7;
};
- u32 GetMap(std::size_t index) const {
+ u32 Map(std::size_t index) const {
const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
map_4, map_5, map_6, map_7};
ASSERT(index < maps.size());
@@ -925,20 +947,22 @@ public:
}
} rt_control;
- INSERT_UNION_PADDING_WORDS(0x2);
+ INSERT_PADDING_WORDS_NOINIT(0x2);
u32 zeta_width;
u32 zeta_height;
union {
- BitField<0, 16, u32> zeta_layers;
+ BitField<0, 16, u32> zeta_depth;
BitField<16, 1, u32> zeta_volume;
};
- INSERT_UNION_PADDING_WORDS(0x26);
+ SamplerIndex sampler_index;
+
+ INSERT_PADDING_WORDS_NOINIT(0x25);
u32 depth_test_enable;
- INSERT_UNION_PADDING_WORDS(0x5);
+ INSERT_PADDING_WORDS_NOINIT(0x5);
u32 independent_blend_enable;
@@ -946,7 +970,7 @@ public:
u32 alpha_test_enabled;
- INSERT_UNION_PADDING_WORDS(0x6);
+ INSERT_PADDING_WORDS_NOINIT(0x6);
u32 d3d_cull_mode;
@@ -960,7 +984,8 @@ public:
float b;
float a;
} blend_color;
- INSERT_UNION_PADDING_WORDS(0x4);
+
+ INSERT_PADDING_WORDS_NOINIT(0x4);
struct {
u32 separate_alpha;
@@ -969,7 +994,7 @@ public:
Blend::Factor factor_dest_rgb;
Blend::Equation equation_a;
Blend::Factor factor_source_a;
- INSERT_UNION_PADDING_WORDS(1);
+ INSERT_PADDING_WORDS_NOINIT(1);
Blend::Factor factor_dest_a;
u32 enable_common;
@@ -985,7 +1010,7 @@ public:
u32 stencil_front_func_mask;
u32 stencil_front_mask;
- INSERT_UNION_PADDING_WORDS(0x2);
+ INSERT_PADDING_WORDS_NOINIT(0x2);
u32 frag_color_clamp;
@@ -997,12 +1022,17 @@ public:
float line_width_smooth;
float line_width_aliased;
- INSERT_UNION_PADDING_WORDS(0x1F);
+ INSERT_PADDING_WORDS_NOINIT(0x1B);
+
+ u32 invalidate_sampler_cache_no_wfi;
+ u32 invalidate_texture_header_cache_no_wfi;
+
+ INSERT_PADDING_WORDS_NOINIT(0x2);
u32 vb_element_base;
u32 vb_base_instance;
- INSERT_UNION_PADDING_WORDS(0x35);
+ INSERT_PADDING_WORDS_NOINIT(0x35);
u32 clip_distance_enabled;
@@ -1010,11 +1040,11 @@ public:
float point_size;
- INSERT_UNION_PADDING_WORDS(0x1);
+ INSERT_PADDING_WORDS_NOINIT(0x1);
u32 point_sprite_enable;
- INSERT_UNION_PADDING_WORDS(0x3);
+ INSERT_PADDING_WORDS_NOINIT(0x3);
CounterReset counter_reset;
@@ -1027,7 +1057,7 @@ public:
BitField<4, 1, u32> alpha_to_one;
} multisample_control;
- INSERT_UNION_PADDING_WORDS(0x4);
+ INSERT_PADDING_WORDS_NOINIT(0x4);
struct {
u32 address_high;
@@ -1041,34 +1071,34 @@ public:
} condition;
struct {
- u32 tsc_address_high;
- u32 tsc_address_low;
- u32 tsc_limit;
+ u32 address_high;
+ u32 address_low;
+ u32 limit;
- GPUVAddr TSCAddress() const {
- return static_cast<GPUVAddr>(
- (static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low);
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
}
} tsc;
- INSERT_UNION_PADDING_WORDS(0x1);
+ INSERT_PADDING_WORDS_NOINIT(0x1);
float polygon_offset_factor;
u32 line_smooth_enable;
struct {
- u32 tic_address_high;
- u32 tic_address_low;
- u32 tic_limit;
+ u32 address_high;
+ u32 address_low;
+ u32 limit;
- GPUVAddr TICAddress() const {
- return static_cast<GPUVAddr>(
- (static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low);
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
}
} tic;
- INSERT_UNION_PADDING_WORDS(0x5);
+ INSERT_PADDING_WORDS_NOINIT(0x5);
u32 stencil_two_side_enable;
StencilOp stencil_back_op_fail;
@@ -1076,17 +1106,17 @@ public:
StencilOp stencil_back_op_zpass;
ComparisonOp stencil_back_func_func;
- INSERT_UNION_PADDING_WORDS(0x4);
+ INSERT_PADDING_WORDS_NOINIT(0x4);
u32 framebuffer_srgb;
float polygon_offset_units;
- INSERT_UNION_PADDING_WORDS(0x4);
+ INSERT_PADDING_WORDS_NOINIT(0x4);
Tegra::Texture::MsaaMode multisample_mode;
- INSERT_UNION_PADDING_WORDS(0xC);
+ INSERT_PADDING_WORDS_NOINIT(0xC);
union {
BitField<2, 1, u32> coord_origin;
@@ -1102,7 +1132,7 @@ public:
(static_cast<GPUVAddr>(code_address_high) << 32) | code_address_low);
}
} code_address;
- INSERT_UNION_PADDING_WORDS(1);
+ INSERT_PADDING_WORDS_NOINIT(1);
struct {
u32 vertex_end_gl;
@@ -1114,14 +1144,14 @@ public:
};
} draw;
- INSERT_UNION_PADDING_WORDS(0xA);
+ INSERT_PADDING_WORDS_NOINIT(0xA);
struct {
u32 enabled;
u32 index;
} primitive_restart;
- INSERT_UNION_PADDING_WORDS(0x5F);
+ INSERT_PADDING_WORDS_NOINIT(0x5F);
struct {
u32 start_addr_high;
@@ -1162,9 +1192,9 @@ public:
}
} index_array;
- INSERT_UNION_PADDING_WORDS(0x7);
+ INSERT_PADDING_WORDS_NOINIT(0x7);
- INSERT_UNION_PADDING_WORDS(0x1F);
+ INSERT_PADDING_WORDS_NOINIT(0x1F);
float polygon_offset_clamp;
@@ -1178,14 +1208,14 @@ public:
}
} instanced_arrays;
- INSERT_UNION_PADDING_WORDS(0x4);
+ INSERT_PADDING_WORDS_NOINIT(0x4);
union {
BitField<0, 1, u32> enable;
BitField<4, 8, u32> unk4;
} vp_point_size;
- INSERT_UNION_PADDING_WORDS(1);
+ INSERT_PADDING_WORDS_NOINIT(1);
u32 cull_test_enabled;
FrontFace front_face;
@@ -1193,11 +1223,11 @@ public:
u32 pixel_center_integer;
- INSERT_UNION_PADDING_WORDS(0x1);
+ INSERT_PADDING_WORDS_NOINIT(0x1);
u32 viewport_transform_enabled;
- INSERT_UNION_PADDING_WORDS(0x3);
+ INSERT_PADDING_WORDS_NOINIT(0x3);
union {
BitField<0, 1, u32> depth_range_0_1;
@@ -1206,18 +1236,18 @@ public:
BitField<11, 1, u32> depth_clamp_disabled;
} view_volume_clip_control;
- INSERT_UNION_PADDING_WORDS(0x1F);
+ INSERT_PADDING_WORDS_NOINIT(0x1F);
u32 depth_bounds_enable;
- INSERT_UNION_PADDING_WORDS(1);
+ INSERT_PADDING_WORDS_NOINIT(1);
struct {
u32 enable;
LogicOperation operation;
} logic_op;
- INSERT_UNION_PADDING_WORDS(0x1);
+ INSERT_PADDING_WORDS_NOINIT(0x1);
union {
u32 raw;
@@ -1230,9 +1260,9 @@ public:
BitField<6, 4, u32> RT;
BitField<10, 11, u32> layer;
} clear_buffers;
- INSERT_UNION_PADDING_WORDS(0xB);
+ INSERT_PADDING_WORDS_NOINIT(0xB);
std::array<ColorMask, NumRenderTargets> color_mask;
- INSERT_UNION_PADDING_WORDS(0x38);
+ INSERT_PADDING_WORDS_NOINIT(0x38);
struct {
u32 query_address_high;
@@ -1254,7 +1284,7 @@ public:
}
} query;
- INSERT_UNION_PADDING_WORDS(0x3C);
+ INSERT_PADDING_WORDS_NOINIT(0x3C);
struct {
union {
@@ -1284,8 +1314,7 @@ public:
GPUVAddr LimitAddress() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) |
- limit_low) +
- 1;
+ limit_low);
}
} vertex_array_limit[NumVertexArrays];
@@ -1295,10 +1324,10 @@ public:
BitField<4, 4, ShaderProgram> program;
};
u32 offset;
- INSERT_UNION_PADDING_WORDS(14);
+ INSERT_PADDING_WORDS_NOINIT(14);
} shader_config[MaxShaderProgram];
- INSERT_UNION_PADDING_WORDS(0x60);
+ INSERT_PADDING_WORDS_NOINIT(0x60);
u32 firmware[0x20];
@@ -1307,7 +1336,7 @@ public:
u32 cb_address_high;
u32 cb_address_low;
u32 cb_pos;
- u32 cb_data[NumCBData];
+ std::array<u32, NumCBData> cb_data;
GPUVAddr BufferAddress() const {
return static_cast<GPUVAddr>(
@@ -1315,7 +1344,7 @@ public:
}
} const_buffer;
- INSERT_UNION_PADDING_WORDS(0x10);
+ INSERT_PADDING_WORDS_NOINIT(0x10);
struct {
union {
@@ -1323,18 +1352,18 @@ public:
BitField<0, 1, u32> valid;
BitField<4, 5, u32> index;
};
- INSERT_UNION_PADDING_WORDS(7);
+ INSERT_PADDING_WORDS_NOINIT(7);
} cb_bind[MaxShaderStage];
- INSERT_UNION_PADDING_WORDS(0x56);
+ INSERT_PADDING_WORDS_NOINIT(0x56);
u32 tex_cb_index;
- INSERT_UNION_PADDING_WORDS(0x7D);
+ INSERT_PADDING_WORDS_NOINIT(0x7D);
std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs;
- INSERT_UNION_PADDING_WORDS(0x298);
+ INSERT_PADDING_WORDS_NOINIT(0x298);
struct {
/// Compressed address of a buffer that holds information about bound SSBOs.
@@ -1346,14 +1375,14 @@ public:
}
} ssbo_info;
- INSERT_UNION_PADDING_WORDS(0x11);
+ INSERT_PADDING_WORDS_NOINIT(0x11);
struct {
u32 address[MaxShaderStage];
u32 size[MaxShaderStage];
} tex_info_buffers;
- INSERT_UNION_PADDING_WORDS(0xCC);
+ INSERT_PADDING_WORDS_NOINIT(0xCC);
};
std::array<u32, NUM_REGS> reg_array;
};
@@ -1373,6 +1402,7 @@ public:
};
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
+
u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
};
@@ -1393,12 +1423,6 @@ public:
void FlushMMEInlineDraw();
- /// Given a texture handle, returns the TSC and TIC entries.
- Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
-
- /// Returns the texture information for a specific texture in a specific shader stage.
- Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const;
-
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
@@ -1428,11 +1452,6 @@ public:
return *rasterizer;
}
- /// Notify a memory write has happened.
- void OnMemoryWrite() {
- dirty.flags |= dirty.on_write_stores;
- }
-
enum class MMEDrawMode : u32 {
Undefined,
Array,
@@ -1454,45 +1473,19 @@ public:
using Tables = std::array<Table, 2>;
Flags flags;
- Flags on_write_stores;
Tables tables{};
} dirty;
private:
void InitializeRegisterDefaults();
- Core::System& system;
- MemoryManager& memory_manager;
+ void ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call);
- VideoCore::RasterizerInterface* rasterizer = nullptr;
+ u32 ProcessShadowRam(u32 method, u32 argument);
- /// Start offsets of each macro in macro_memory
- std::array<u32, 0x80> macro_positions = {};
-
- std::array<bool, Regs::NUM_REGS> mme_inline{};
-
- /// Macro method that is currently being executed / being fed parameters.
- u32 executing_macro = 0;
- /// Parameters that have been submitted to the macro call so far.
- std::vector<u32> macro_params;
-
- /// Interpreter for the macro codes uploaded to the GPU.
- std::unique_ptr<MacroEngine> macro_engine;
-
- static constexpr u32 null_cb_data = 0xFFFFFFFF;
- struct {
- std::array<std::array<u32, 0x4000>, 16> buffer;
- u32 current{null_cb_data};
- u32 id{null_cb_data};
- u32 start_pos{};
- u32 counter{};
- } cb_data_state;
+ void ProcessDirtyRegisters(u32 method, u32 argument);
- Upload::State upload_state;
-
- bool execute_on{true};
-
- std::array<u8, Regs::NUM_REGS> dirty_pointers{};
+ void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);
/// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const;
@@ -1502,8 +1495,8 @@ private:
/**
* Call a macro on this engine.
+ *
* @param method Method to call
- * @param num_parameters Number of arguments
* @param parameters Arguments to the method call
*/
void CallMacroMethod(u32 method, const std::vector<u32>& parameters);
@@ -1542,7 +1535,7 @@ private:
void FinishCBData();
/// Handles a write to the CB_BIND register.
- void ProcessCBBind(std::size_t stage_index);
+ void ProcessCBBind(size_t stage_index);
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
void DrawArrays();
@@ -1552,6 +1545,38 @@ private:
/// Returns a query's value or an empty object if the value will be deferred through a cache.
std::optional<u64> GetQueryResult();
+
+ Core::System& system;
+ MemoryManager& memory_manager;
+
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+ /// Start offsets of each macro in macro_memory
+ std::array<u32, 0x80> macro_positions{};
+
+ std::array<bool, Regs::NUM_REGS> mme_inline{};
+
+ /// Macro method that is currently being executed / being fed parameters.
+ u32 executing_macro = 0;
+ /// Parameters that have been submitted to the macro call so far.
+ std::vector<u32> macro_params;
+
+ /// Interpreter for the macro codes uploaded to the GPU.
+ std::unique_ptr<MacroEngine> macro_engine;
+
+ static constexpr u32 null_cb_data = 0xFFFFFFFF;
+ struct CBDataState {
+ std::array<std::array<u32, 0x4000>, 16> buffer;
+ u32 current{null_cb_data};
+ u32 id{null_cb_data};
+ u32 start_pos{};
+ u32 counter{};
+ };
+ CBDataState cb_data_state;
+
+ Upload::State upload_state;
+
+ bool execute_on{true};
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -1564,6 +1589,7 @@ ASSERT_REG_POSITION(shadow_ram_control, 0x49);
ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec_upload, 0x6C);
ASSERT_REG_POSITION(data_upload, 0x6D);
+ASSERT_REG_POSITION(force_early_fragment_tests, 0x84);
ASSERT_REG_POSITION(sync_info, 0xB2);
ASSERT_REG_POSITION(tess_mode, 0xC8);
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
@@ -1586,10 +1612,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
ASSERT_REG_POSITION(patch_vertices, 0x373);
+ASSERT_REG_POSITION(fragment_barrier, 0x378);
ASSERT_REG_POSITION(scissor_test, 0x380);
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
+ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD);
+ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF);
ASSERT_REG_POSITION(color_mask_common, 0x3E4);
ASSERT_REG_POSITION(depth_bounds, 0x3E7);
ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
@@ -1597,6 +1626,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
ASSERT_REG_POSITION(zeta, 0x3F8);
+ASSERT_REG_POSITION(render_area, 0x3FD);
ASSERT_REG_POSITION(clear_flags, 0x43E);
ASSERT_REG_POSITION(fill_rectangle, 0x44F);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1605,7 +1635,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
-ASSERT_REG_POSITION(zeta_layers, 0x48c);
+ASSERT_REG_POSITION(zeta_depth, 0x48c);
+ASSERT_REG_POSITION(sampler_index, 0x48D);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -1629,6 +1660,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
+ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509);
+ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index e88290754..a2f19559f 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -16,8 +16,10 @@ namespace Tegra::Engines {
using namespace Texture;
-MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
- : system{system}, memory_manager{memory_manager} {}
+MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
+ : system{system_}, memory_manager{memory_manager_} {}
+
+MaxwellDMA::~MaxwellDMA() = default;
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
@@ -58,9 +60,6 @@ void MaxwellDMA::Launch() {
return;
}
- // All copies here update the main memory, so mark all rasterizer states as invalid.
- system.GPU().Maxwell3D().OnMemoryWrite();
-
if (is_src_pitch && is_dst_pitch) {
CopyPitchToPitch();
} else {
@@ -94,6 +93,7 @@ void MaxwellDMA::CopyPitchToPitch() {
}
void MaxwellDMA::CopyBlockLinearToPitch() {
+ UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
UNIMPLEMENTED_IF(regs.src_params.layer != 0);
@@ -114,8 +114,6 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
const u32 block_depth = src_params.block_size.depth;
const size_t src_size =
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
- const size_t src_layer_size =
- CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
@@ -135,6 +133,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
}
void MaxwellDMA::CopyPitchToBlockLinear() {
+ UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
+
const auto& dst_params = regs.dst_params;
const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
const u32 width = dst_params.width;
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 50f445efc..3c59eeb13 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -72,11 +72,13 @@ public:
struct RenderEnable {
enum class Mode : u32 {
- FALSE = 0,
- TRUE = 1,
- CONDITIONAL = 2,
- RENDER_IF_EQUAL = 3,
- RENDER_IF_NOT_EQUAL = 4,
+ // Note: This uses Pascal case in order to avoid the identifiers
+ // FALSE and TRUE, which are reserved on Darwin.
+ False = 0,
+ True = 1,
+ Conditional = 2,
+ RenderIfEqual = 3,
+ RenderIfNotEqual = 4,
};
PackedGPUVAddr address;
@@ -185,8 +187,8 @@ public:
};
static_assert(sizeof(RemapConst) == 12);
- explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
- ~MaxwellDMA() = default;
+ explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_);
+ ~MaxwellDMA();
/// Write the value to the register identified by method.
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d374b73cf..8b45f1b62 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -32,31 +32,31 @@ struct Register {
constexpr Register() = default;
- constexpr Register(u64 value) : value(value) {}
+ constexpr Register(u64 value_) : value(value_) {}
- constexpr operator u64() const {
+ [[nodiscard]] constexpr operator u64() const {
return value;
}
template <typename T>
- constexpr u64 operator-(const T& oth) const {
+ [[nodiscard]] constexpr u64 operator-(const T& oth) const {
return value - oth;
}
template <typename T>
- constexpr u64 operator&(const T& oth) const {
+ [[nodiscard]] constexpr u64 operator&(const T& oth) const {
return value & oth;
}
- constexpr u64 operator&(const Register& oth) const {
+ [[nodiscard]] constexpr u64 operator&(const Register& oth) const {
return value & oth.value;
}
- constexpr u64 operator~() const {
+ [[nodiscard]] constexpr u64 operator~() const {
return ~value;
}
- u64 GetSwizzledIndex(u64 elem) const {
+ [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const {
elem = (value + elem) & 3;
return (value & ~3) + elem;
}
@@ -75,7 +75,7 @@ enum class AttributeSize : u64 {
union Attribute {
Attribute() = default;
- constexpr explicit Attribute(u64 value) : value(value) {}
+ constexpr explicit Attribute(u64 value_) : value(value_) {}
enum class Index : u64 {
LayerViewportPointSize = 6,
@@ -107,7 +107,7 @@ union Attribute {
BitField<31, 1, u64> patch;
BitField<47, 3, AttributeSize> size;
- bool IsPhysical() const {
+ [[nodiscard]] bool IsPhysical() const {
return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
}
} fmt20;
@@ -124,7 +124,7 @@ union Attribute {
union Sampler {
Sampler() = default;
- constexpr explicit Sampler(u64 value) : value(value) {}
+ constexpr explicit Sampler(u64 value_) : value(value_) {}
enum class Index : u64 {
Sampler_0 = 8,
@@ -137,7 +137,7 @@ union Sampler {
union Image {
Image() = default;
- constexpr explicit Image(u64 value) : value{value} {}
+ constexpr explicit Image(u64 value_) : value{value_} {}
BitField<36, 13, u64> index;
u64 value;
@@ -505,14 +505,14 @@ struct IpaMode {
IpaInterpMode interpolation_mode;
IpaSampleMode sampling_mode;
- bool operator==(const IpaMode& a) const {
+ [[nodiscard]] bool operator==(const IpaMode& a) const {
return std::tie(interpolation_mode, sampling_mode) ==
std::tie(a.interpolation_mode, a.sampling_mode);
}
- bool operator!=(const IpaMode& a) const {
+ [[nodiscard]] bool operator!=(const IpaMode& a) const {
return !operator==(a);
}
- bool operator<(const IpaMode& a) const {
+ [[nodiscard]] bool operator<(const IpaMode& a) const {
return std::tie(interpolation_mode, sampling_mode) <
std::tie(a.interpolation_mode, a.sampling_mode);
}
@@ -658,10 +658,10 @@ union Instruction {
return *this;
}
- constexpr Instruction(u64 value) : value{value} {}
+ constexpr Instruction(u64 value_) : value{value_} {}
constexpr Instruction(const Instruction& instr) : value(instr.value) {}
- constexpr bool Bit(u64 offset) const {
+ [[nodiscard]] constexpr bool Bit(u64 offset) const {
return ((value >> offset) & 1) != 0;
}
@@ -746,34 +746,34 @@ union Instruction {
BitField<28, 8, u64> imm_lut28;
BitField<48, 8, u64> imm_lut48;
- u32 GetImmLut28() const {
+ [[nodiscard]] u32 GetImmLut28() const {
return static_cast<u32>(imm_lut28);
}
- u32 GetImmLut48() const {
+ [[nodiscard]] u32 GetImmLut48() const {
return static_cast<u32>(imm_lut48);
}
} lop3;
- u16 GetImm20_16() const {
+ [[nodiscard]] u16 GetImm20_16() const {
return static_cast<u16>(imm20_16);
}
- u32 GetImm20_19() const {
+ [[nodiscard]] u32 GetImm20_19() const {
u32 imm{static_cast<u32>(imm20_19)};
imm <<= 12;
imm |= negate_imm ? 0x80000000 : 0;
return imm;
}
- u32 GetImm20_32() const {
+ [[nodiscard]] u32 GetImm20_32() const {
return static_cast<u32>(imm20_32);
}
- s32 GetSignedImm20_20() const {
- u32 immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
+ [[nodiscard]] s32 GetSignedImm20_20() const {
+ const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
// Sign extend the 20-bit value.
- u32 mask = 1U << (20 - 1);
+ const auto mask = 1U << (20 - 1);
return static_cast<s32>((immediate ^ mask) - mask);
}
} alu;
@@ -857,7 +857,7 @@ union Instruction {
BitField<56, 1, u64> second_negate;
BitField<30, 9, u64> second;
- u32 PackImmediates() const {
+ [[nodiscard]] u32 PackImmediates() const {
// Immediates are half floats shifted.
constexpr u32 imm_shift = 6;
return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift)));
@@ -1033,7 +1033,7 @@ union Instruction {
BitField<28, 2, AtomicType> type;
BitField<30, 22, s64> offset;
- s32 GetImmediateOffset() const {
+ [[nodiscard]] s32 GetImmediateOffset() const {
return static_cast<s32>(offset << 2);
}
} atoms;
@@ -1215,7 +1215,7 @@ union Instruction {
BitField<39, 4, u64> rounding;
// H0, H1 extract for F16 missing
BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
- F2fRoundingOp GetRoundingMode() const {
+ [[nodiscard]] F2fRoundingOp GetRoundingMode() const {
constexpr u64 rounding_mask = 0x0B;
return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask);
}
@@ -1239,15 +1239,15 @@ union Instruction {
BitField<54, 1, u64> aoffi_flag;
BitField<55, 3, TextureProcessMode> process_mode;
- bool IsComponentEnabled(std::size_t component) const {
- return ((1ull << component) & component_mask) != 0;
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
+ return ((1ULL << component) & component_mask) != 0;
}
- TextureProcessMode GetTextureProcessMode() const {
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
return process_mode;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::DC:
return dc_flag != 0;
@@ -1271,15 +1271,15 @@ union Instruction {
BitField<36, 1, u64> aoffi_flag;
BitField<37, 3, TextureProcessMode> process_mode;
- bool IsComponentEnabled(std::size_t component) const {
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
return ((1ULL << component) & component_mask) != 0;
}
- TextureProcessMode GetTextureProcessMode() const {
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
return process_mode;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::DC:
return dc_flag != 0;
@@ -1299,7 +1299,7 @@ union Instruction {
BitField<31, 4, u64> component_mask;
BitField<49, 1, u64> nodep_flag;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::NODEP:
return nodep_flag != 0;
@@ -1309,7 +1309,7 @@ union Instruction {
return false;
}
- bool IsComponentEnabled(std::size_t component) const {
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
return ((1ULL << component) & component_mask) != 0;
}
} txq;
@@ -1321,11 +1321,11 @@ union Instruction {
BitField<35, 1, u64> ndv_flag;
BitField<49, 1, u64> nodep_flag;
- bool IsComponentEnabled(std::size_t component) const {
- return ((1ull << component) & component_mask) != 0;
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
+ return ((1ULL << component) & component_mask) != 0;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::NDV:
return (ndv_flag != 0);
@@ -1347,7 +1347,7 @@ union Instruction {
BitField<54, 2, u64> offset_mode;
BitField<56, 2, u64> component;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::NDV:
return ndv_flag != 0;
@@ -1373,7 +1373,7 @@ union Instruction {
BitField<33, 2, u64> offset_mode;
BitField<37, 2, u64> component;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::NDV:
return ndv_flag != 0;
@@ -1399,7 +1399,7 @@ union Instruction {
BitField<52, 2, u64> component;
BitField<55, 1, u64> fp16_flag;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::DC:
return dc_flag != 0;
@@ -1422,24 +1422,27 @@ union Instruction {
BitField<53, 4, u64> texture_info;
BitField<59, 1, u64> fp32_flag;
- TextureType GetTextureType() const {
+ [[nodiscard]] TextureType GetTextureType() const {
// The TEXS instruction has a weird encoding for the texture type.
- if (texture_info == 0)
+ if (texture_info == 0) {
return TextureType::Texture1D;
- if (texture_info >= 1 && texture_info <= 9)
+ }
+ if (texture_info >= 1 && texture_info <= 9) {
return TextureType::Texture2D;
- if (texture_info >= 10 && texture_info <= 11)
+ }
+ if (texture_info >= 10 && texture_info <= 11) {
return TextureType::Texture3D;
- if (texture_info >= 12 && texture_info <= 13)
+ }
+ if (texture_info >= 12 && texture_info <= 13) {
return TextureType::TextureCube;
+ }
- LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}",
- static_cast<u32>(texture_info.Value()));
+ LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
UNREACHABLE();
return TextureType::Texture1D;
}
- TextureProcessMode GetTextureProcessMode() const {
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
switch (texture_info) {
case 0:
case 2:
@@ -1458,7 +1461,7 @@ union Instruction {
return TextureProcessMode::None;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::DC:
return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
@@ -1470,16 +1473,16 @@ union Instruction {
return false;
}
- bool IsArrayTexture() const {
+ [[nodiscard]] bool IsArrayTexture() const {
// TEXS only supports Texture2D arrays.
return texture_info >= 7 && texture_info <= 9;
}
- bool HasTwoDestinations() const {
+ [[nodiscard]] bool HasTwoDestinations() const {
return gpr28.Value() != Register::ZeroIndex;
}
- bool IsComponentEnabled(std::size_t component) const {
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
{},
{0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
@@ -1506,7 +1509,7 @@ union Instruction {
BitField<54, 1, u64> cl;
BitField<55, 1, u64> process_mode;
- TextureProcessMode GetTextureProcessMode() const {
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
}
} tld;
@@ -1516,7 +1519,7 @@ union Instruction {
BitField<53, 4, u64> texture_info;
BitField<59, 1, u64> fp32_flag;
- TextureType GetTextureType() const {
+ [[nodiscard]] TextureType GetTextureType() const {
// The TLDS instruction has a weird encoding for the texture type.
if (texture_info <= 1) {
return TextureType::Texture1D;
@@ -1529,19 +1532,19 @@ union Instruction {
return TextureType::Texture3D;
}
- LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}",
- static_cast<u32>(texture_info.Value()));
+ LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
UNREACHABLE();
return TextureType::Texture1D;
}
- TextureProcessMode GetTextureProcessMode() const {
- if (texture_info == 1 || texture_info == 5 || texture_info == 12)
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
+ if (texture_info == 1 || texture_info == 5 || texture_info == 12) {
return TextureProcessMode::LL;
+ }
return TextureProcessMode::LZ;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::AOFFI:
return texture_info == 12 || texture_info == 4;
@@ -1555,7 +1558,7 @@ union Instruction {
return false;
}
- bool IsArrayTexture() const {
+ [[nodiscard]] bool IsArrayTexture() const {
// TEXS only supports Texture2D arrays.
return texture_info == 8;
}
@@ -1567,7 +1570,7 @@ union Instruction {
BitField<35, 1, u64> aoffi_flag;
BitField<49, 1, u64> nodep_flag;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::AOFFI:
return aoffi_flag != 0;
@@ -1591,7 +1594,7 @@ union Instruction {
BitField<20, 3, StoreType> store_data_layout;
BitField<20, 4, u64> component_mask_selector;
- bool IsComponentEnabled(std::size_t component) const {
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
ASSERT(mode == SurfaceDataMode::P);
constexpr u8 R = 0b0001;
constexpr u8 G = 0b0010;
@@ -1604,7 +1607,7 @@ union Instruction {
return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
}
- StoreType GetStoreDataLayout() const {
+ [[nodiscard]] StoreType GetStoreDataLayout() const {
ASSERT(mode == SurfaceDataMode::D_BA);
return store_data_layout;
}
@@ -1622,14 +1625,15 @@ union Instruction {
BitField<20, 24, u64> target;
BitField<5, 1, u64> constant_buffer;
- s32 GetBranchTarget() const {
+ [[nodiscard]] s32 GetBranchTarget() const {
// Sign extend the branch target offset
- u32 mask = 1U << (24 - 1);
- u32 value = static_cast<u32>(target);
+ const auto mask = 1U << (24 - 1);
+ const auto target_value = static_cast<u32>(target);
+ constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
+
// The branch offset is relative to the next instruction and is stored in bytes, so
// divide it by the size of an instruction and add 1 to it.
- return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
- 1;
+ return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
}
} bra;
@@ -1637,14 +1641,15 @@ union Instruction {
BitField<20, 24, u64> target;
BitField<5, 1, u64> constant_buffer;
- s32 GetBranchExtend() const {
+ [[nodiscard]] s32 GetBranchExtend() const {
// Sign extend the branch target offset
- u32 mask = 1U << (24 - 1);
- u32 value = static_cast<u32>(target);
+ const auto mask = 1U << (24 - 1);
+ const auto target_value = static_cast<u32>(target);
+ constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
+
// The branch offset is relative to the next instruction and is stored in bytes, so
// divide it by the size of an instruction and add 1 to it.
- return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
- 1;
+ return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
}
} brx;
@@ -1697,7 +1702,7 @@ union Instruction {
BitField<50, 1, u64> is_op_b_register;
BitField<51, 3, VmnmxOperation> operation;
- VmnmxType SourceFormatA() const {
+ [[nodiscard]] VmnmxType SourceFormatA() const {
switch (src_format_a) {
case 0b11:
return VmnmxType::Bits32;
@@ -1708,7 +1713,7 @@ union Instruction {
}
}
- VmnmxType SourceFormatB() const {
+ [[nodiscard]] VmnmxType SourceFormatB() const {
switch (src_format_b) {
case 0b11:
return VmnmxType::Bits32;
@@ -1739,7 +1744,7 @@ union Instruction {
BitField<20, 14, u64> shifted_offset;
BitField<34, 5, u64> index;
- u64 GetOffset() const {
+ [[nodiscard]] u64 GetOffset() const {
return shifted_offset * 4;
}
} cbuf34;
@@ -1748,7 +1753,7 @@ union Instruction {
BitField<20, 16, s64> offset;
BitField<36, 5, u64> index;
- s64 GetOffset() const {
+ [[nodiscard]] s64 GetOffset() const {
return offset;
}
} cbuf36;
@@ -1893,6 +1898,7 @@ public:
ICMP_IMM,
FCMP_RR,
FCMP_RC,
+ FCMP_IMMR,
MUFU, // Multi-Function Operator
RRO_C, // Range Reduction Operator
RRO_R,
@@ -1996,29 +2002,29 @@ public:
/// Returns whether an opcode has an execution predicate field or not (ie, whether it can be
/// conditionally executed).
- static bool IsPredicatedInstruction(Id opcode) {
+ [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) {
// TODO(Subv): Add the rest of unpredicated instructions.
return opcode != Id::SSY && opcode != Id::PBK;
}
class Matcher {
public:
- constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type)
- : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {}
+ constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_)
+ : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {}
- constexpr const char* GetName() const {
+ [[nodiscard]] constexpr const char* GetName() const {
return name;
}
- constexpr u16 GetMask() const {
+ [[nodiscard]] constexpr u16 GetMask() const {
return mask;
}
- constexpr Id GetId() const {
+ [[nodiscard]] constexpr Id GetId() const {
return id;
}
- constexpr Type GetType() const {
+ [[nodiscard]] constexpr Type GetType() const {
return type;
}
@@ -2027,7 +2033,7 @@ public:
* @param instruction The instruction to test
* @returns true if the given instruction matches.
*/
- constexpr bool Matches(u16 instruction) const {
+ [[nodiscard]] constexpr bool Matches(u16 instruction) const {
return (instruction & mask) == expected;
}
@@ -2039,7 +2045,8 @@ public:
Type type;
};
- static std::optional<std::reference_wrapper<const Matcher>> Decode(Instruction instr) {
+ using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>;
+ [[nodiscard]] static DecodeResult Decode(Instruction instr) {
static const auto table{GetDecodeTable()};
const auto matches_instruction = [instr](const auto& matcher) {
@@ -2061,7 +2068,7 @@ private:
* A '0' in a bitstring indicates that a zero must be present at that bit position.
* A '1' in a bitstring indicates that a one must be present at that bit position.
*/
- static constexpr auto GetMaskAndExpect(const char* const bitstring) {
+ [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) {
u16 mask = 0, expect = 0;
for (std::size_t i = 0; i < opcode_bitsize; i++) {
const std::size_t bit_position = opcode_bitsize - i - 1;
@@ -2083,14 +2090,14 @@ private:
public:
/// Creates a matcher that can match and parse instructions based on bitstring.
- static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type,
- const char* const name) {
+ [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op,
+ Type type, const char* const name) {
const auto [mask, expected] = GetMaskAndExpect(bitstring);
return Matcher(name, mask, expected, op, type);
}
};
- static std::vector<Matcher> GetDecodeTable() {
+ [[nodiscard]] static std::vector<Matcher> GetDecodeTable() {
std::vector<Matcher> table = {
#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
@@ -2205,6 +2212,7 @@ private:
INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
+ INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index 72e2a33d5..e0d7b89c5 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -41,37 +41,37 @@ struct Header {
BitField<26, 1, u32> does_load_or_store;
BitField<27, 1, u32> does_fp64;
BitField<28, 4, u32> stream_out_mask;
- } common0{};
+ } common0;
union {
BitField<0, 24, u32> shader_local_memory_low_size;
BitField<24, 8, u32> per_patch_attribute_count;
- } common1{};
+ } common1;
union {
BitField<0, 24, u32> shader_local_memory_high_size;
BitField<24, 8, u32> threads_per_input_primitive;
- } common2{};
+ } common2;
union {
BitField<0, 24, u32> shader_local_memory_crs_size;
BitField<24, 4, OutputTopology> output_topology;
BitField<28, 4, u32> reserved;
- } common3{};
+ } common3;
union {
BitField<0, 12, u32> max_output_vertices;
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
BitField<20, 4, u32> reserved;
BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
- } common4{};
+ } common4;
union {
struct {
- INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
- INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
- INSERT_UNION_PADDING_BYTES(16); // ImapGenericVector[32]
- INSERT_UNION_PADDING_BYTES(2); // ImapColor
+ INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
+ INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
+ INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
+ INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
union {
BitField<0, 8, u16> clip_distances;
BitField<8, 1, u16> point_sprite_s;
@@ -82,20 +82,20 @@ struct Header {
BitField<14, 1, u16> instance_id;
BitField<15, 1, u16> vertex_id;
};
- INSERT_UNION_PADDING_BYTES(5); // ImapFixedFncTexture[10]
- INSERT_UNION_PADDING_BYTES(1); // ImapReserved
- INSERT_UNION_PADDING_BYTES(3); // OmapSystemValuesA
- INSERT_UNION_PADDING_BYTES(1); // OmapSystemValuesB
- INSERT_UNION_PADDING_BYTES(16); // OmapGenericVector[32]
- INSERT_UNION_PADDING_BYTES(2); // OmapColor
- INSERT_UNION_PADDING_BYTES(2); // OmapSystemValuesC
- INSERT_UNION_PADDING_BYTES(5); // OmapFixedFncTexture[10]
- INSERT_UNION_PADDING_BYTES(1); // OmapReserved
+ INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
+ INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
+ INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
+ INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB
+ INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
+ INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
+ INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC
+ INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
+ INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
} vtg;
struct {
- INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
- INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
+ INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
+ INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
union {
BitField<0, 2, PixelImap> x;
@@ -105,10 +105,10 @@ struct Header {
u8 raw;
} imap_generic_vector[32];
- INSERT_UNION_PADDING_BYTES(2); // ImapColor
- INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC
- INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10]
- INSERT_UNION_PADDING_BYTES(2); // ImapReserved
+ INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
+ INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
+ INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
+ INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
struct {
u32 target;
@@ -145,7 +145,7 @@ struct Header {
}
} ps;
- std::array<u32, 0xF> raw{};
+ std::array<u32, 0xF> raw;
};
u64 GetLocalMemorySize() const {
@@ -153,7 +153,6 @@ struct Header {
(common2.shader_local_memory_high_size << 24));
}
};
-
static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
} // namespace Tegra::Shader
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 06cc12d5a..f055b61e9 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "core/core.h"
+#include "video_core/delayed_destruction_ring.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
@@ -17,11 +18,11 @@ namespace VideoCommon {
class FenceBase {
public:
- FenceBase(u32 payload, bool is_stubbed)
- : address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {}
+ explicit FenceBase(u32 payload_, bool is_stubbed_)
+ : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}
- FenceBase(GPUVAddr address, u32 payload, bool is_stubbed)
- : address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {}
+ explicit FenceBase(GPUVAddr address_, u32 payload_, bool is_stubbed_)
+ : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}
GPUVAddr GetAddress() const {
return address;
@@ -47,6 +48,11 @@ protected:
template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
class FenceManager {
public:
+ /// Notify the fence manager about a new frame
+ void TickFrame() {
+ delayed_destruction_ring.Tick();
+ }
+
void SignalSemaphore(GPUVAddr addr, u32 value) {
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
@@ -74,8 +80,6 @@ public:
}
void WaitPendingFences() {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait()) {
@@ -83,23 +87,23 @@ public:
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
- memory_manager.template Write<u32>(current_fence->GetAddress(),
- current_fence->GetPayload());
+ gpu_memory.template Write<u32>(current_fence->GetAddress(),
+ current_fence->GetPayload());
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
- fences.pop();
+ PopFence();
}
}
protected:
- FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- TTextureCache& texture_cache, TTBufferCache& buffer_cache,
- TQueryCache& query_cache)
- : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
- buffer_cache{buffer_cache}, query_cache{query_cache} {}
+ explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
+ TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
+ TQueryCache& query_cache_)
+ : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()},
+ texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
- virtual ~FenceManager() {}
+ virtual ~FenceManager() = default;
/// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true
@@ -113,16 +117,15 @@ protected:
/// Waits until a fence has been signalled by the host GPU.
virtual void WaitFence(TFence& fence) = 0;
- Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
+ Tegra::GPU& gpu;
+ Tegra::MemoryManager& gpu_memory;
TTextureCache& texture_cache;
TTBufferCache& buffer_cache;
TQueryCache& query_cache;
private:
void TryReleasePendingFences() {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait() && !IsFenceSignaled(current_fence)) {
@@ -130,38 +133,49 @@ private:
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
- memory_manager.template Write<u32>(current_fence->GetAddress(),
- current_fence->GetPayload());
+ gpu_memory.template Write<u32>(current_fence->GetAddress(),
+ current_fence->GetPayload());
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
- fences.pop();
+ PopFence();
}
}
bool ShouldWait() const {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
query_cache.ShouldWaitAsyncFlushes();
}
bool ShouldFlush() const {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
query_cache.HasUncommittedFlushes();
}
void PopAsyncFlushes() {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.PopAsyncFlushes();
buffer_cache.PopAsyncFlushes();
query_cache.PopAsyncFlushes();
}
void CommitAsyncFlushes() {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.CommitAsyncFlushes();
buffer_cache.CommitAsyncFlushes();
query_cache.CommitAsyncFlushes();
}
+ void PopFence() {
+ delayed_destruction_ring.Push(std::move(fences.front()));
+ fences.pop();
+ }
+
std::queue<TFence> fences;
+
+ DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
};
} // namespace VideoCommon
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h
new file mode 100644
index 000000000..b86c3a757
--- /dev/null
+++ b/src/video_core/framebuffer_config.h
@@ -0,0 +1,31 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+namespace Tegra {
+
+/**
+ * Struct describing framebuffer configuration
+ */
+struct FramebufferConfig {
+ enum class PixelFormat : u32 {
+ A8B8G8R8_UNORM = 1,
+ RGB565_UNORM = 4,
+ B8G8R8A8_UNORM = 5,
+ };
+
+ VAddr address{};
+ u32 offset{};
+ u32 width{};
+ u32 height{};
+ u32 stride{};
+ PixelFormat pixel_format{};
+
+ using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
+ TransformFlags transform_flags{};
+ Common::Rectangle<int> crop_rect;
+};
+
+} // namespace Tegra
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index acb6e6d46..2a9bd4121 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -10,6 +10,7 @@
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/frontend/emu_window.h"
+#include "core/hardware_interrupt_manager.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/fermi_2d.h"
@@ -27,22 +28,24 @@ namespace Tegra {
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
-GPU::GPU(Core::System& system_, bool is_async_)
- : system{system_}, dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
- memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
+GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
+ : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
+ dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
+ cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_},
maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
fermi_2d{std::make_unique<Engines::Fermi2D>()},
kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
- shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {}
+ shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
+ gpu_thread{system_, is_async_} {}
GPU::~GPU() = default;
void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
renderer = std::move(renderer_);
+ rasterizer = renderer->ReadRasterizer();
- VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
memory_manager->BindRasterizer(rasterizer);
maxwell_3d->BindRasterizer(rasterizer);
fermi_2d->BindRasterizer(rasterizer);
@@ -77,31 +80,46 @@ DmaPusher& GPU::DmaPusher() {
return *dma_pusher;
}
+Tegra::CDmaPusher& GPU::CDmaPusher() {
+ return *cdma_pusher;
+}
+
const DmaPusher& GPU::DmaPusher() const {
return *dma_pusher;
}
+const Tegra::CDmaPusher& GPU::CDmaPusher() const {
+ return *cdma_pusher;
+}
+
void GPU::WaitFence(u32 syncpoint_id, u32 value) {
// Synced GPU, is always in sync
if (!is_async) {
return;
}
+ if (syncpoint_id == UINT32_MAX) {
+ // TODO: Research what this does.
+ LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
+ return;
+ }
MICROPROFILE_SCOPE(GPU_wait);
std::unique_lock lock{sync_mutex};
- sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; });
+ sync_cv.wait(lock, [=, this] { return syncpoints.at(syncpoint_id).load() >= value; });
}
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
- syncpoints[syncpoint_id]++;
+ auto& syncpoint = syncpoints.at(syncpoint_id);
+ syncpoint++;
std::lock_guard lock{sync_mutex};
sync_cv.notify_all();
- if (!syncpt_interrupts[syncpoint_id].empty()) {
- u32 value = syncpoints[syncpoint_id].load();
- auto it = syncpt_interrupts[syncpoint_id].begin();
- while (it != syncpt_interrupts[syncpoint_id].end()) {
+ auto& interrupt = syncpt_interrupts.at(syncpoint_id);
+ if (!interrupt.empty()) {
+ u32 value = syncpoint.load();
+ auto it = interrupt.begin();
+ while (it != interrupt.end()) {
if (value >= *it) {
TriggerCpuInterrupt(syncpoint_id, *it);
- it = syncpt_interrupts[syncpoint_id].erase(it);
+ it = interrupt.erase(it);
continue;
}
it++;
@@ -110,22 +128,22 @@ void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
}
u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const {
- return syncpoints[syncpoint_id].load();
+ return syncpoints.at(syncpoint_id).load();
}
void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
- auto& interrupt = syncpt_interrupts[syncpoint_id];
+ auto& interrupt = syncpt_interrupts.at(syncpoint_id);
bool contains = std::any_of(interrupt.begin(), interrupt.end(),
[value](u32 in_value) { return in_value == value; });
if (contains) {
return;
}
- syncpt_interrupts[syncpoint_id].emplace_back(value);
+ interrupt.emplace_back(value);
}
bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
std::lock_guard lock{sync_mutex};
- auto& interrupt = syncpt_interrupts[syncpoint_id];
+ auto& interrupt = syncpt_interrupts.at(syncpoint_id);
const auto iter =
std::find_if(interrupt.begin(), interrupt.end(),
[value](u32 interrupt_value) { return value == interrupt_value; });
@@ -153,7 +171,7 @@ void GPU::TickWork() {
const std::size_t size = request.size;
flush_requests.pop_front();
flush_request_mutex.unlock();
- renderer->Rasterizer().FlushRegion(addr, size);
+ rasterizer->FlushRegion(addr, size);
current_flush_fence.store(fence);
flush_request_mutex.lock();
}
@@ -175,41 +193,13 @@ u64 GPU::GetTicks() const {
}
void GPU::FlushCommands() {
- renderer->Rasterizer().FlushCommands();
+ rasterizer->FlushCommands();
}
void GPU::SyncGuestHost() {
- renderer->Rasterizer().SyncGuestHost();
+ rasterizer->SyncGuestHost();
}
-void GPU::OnCommandListEnd() {
- renderer->Rasterizer().ReleaseFences();
-}
-// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
-// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
-// So the values you see in docs might be multiplied by 4.
-enum class BufferMethods {
- BindObject = 0x0,
- Nop = 0x2,
- SemaphoreAddressHigh = 0x4,
- SemaphoreAddressLow = 0x5,
- SemaphoreSequence = 0x6,
- SemaphoreTrigger = 0x7,
- NotifyIntr = 0x8,
- WrcacheFlush = 0x9,
- Unk28 = 0xA,
- UnkCacheFlush = 0xB,
- RefCnt = 0x14,
- SemaphoreAcquire = 0x1A,
- SemaphoreRelease = 0x1B,
- FenceValue = 0x1C,
- FenceAction = 0x1D,
- Unk78 = 0x1E,
- Unk7c = 0x1F,
- Yield = 0x20,
- NonPullerMethods = 0x40,
-};
-
enum class GpuSemaphoreOperation {
AcquireEqual = 0x1,
WriteLong = 0x2,
@@ -240,8 +230,12 @@ void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32
CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
} else {
for (std::size_t i = 0; i < amount; i++) {
- CallPullerMethod(
- {method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)});
+ CallPullerMethod(MethodCall{
+ method,
+ base_start[i],
+ subchannel,
+ methods_pending - static_cast<u32>(i),
+ });
}
}
}
@@ -268,7 +262,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
case BufferMethods::UnkCacheFlush:
case BufferMethods::WrcacheFlush:
case BufferMethods::FenceValue:
+ break;
case BufferMethods::FenceAction:
+ ProcessFenceActionMethod();
+ break;
+ case BufferMethods::WaitForInterrupt:
+ ProcessWaitForInterruptMethod();
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();
@@ -298,8 +297,7 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
break;
}
default:
- LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented",
- static_cast<u32>(method));
+ LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
break;
}
}
@@ -378,10 +376,28 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
break;
default:
- UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast<u32>(engine_id));
+ UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
+ }
+}
+
+void GPU::ProcessFenceActionMethod() {
+ switch (regs.fence_action.op) {
+ case FenceOperation::Acquire:
+ WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+ break;
+ case FenceOperation::Increment:
+ IncrementSyncPoint(regs.fence_action.syncpoint_id);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
}
}
+void GPU::ProcessWaitForInterruptMethod() {
+ // TODO(bunnei) ImplementMe
+ LOG_WARNING(HW_GPU, "(STUBBED) called");
+}
+
void GPU::ProcessSemaphoreTriggerMethod() {
const auto semaphoreOperationMask = 0xF;
const auto op =
@@ -443,4 +459,75 @@ void GPU::ProcessSemaphoreAcquire() {
}
}
+void GPU::Start() {
+ gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
+ cpu_context = renderer->GetRenderWindow().CreateSharedContext();
+ cpu_context->MakeCurrent();
+}
+
+void GPU::ObtainContext() {
+ cpu_context->MakeCurrent();
+}
+
+void GPU::ReleaseContext() {
+ cpu_context->DoneCurrent();
+}
+
+void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
+ gpu_thread.SubmitList(std::move(entries));
+}
+
+void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
+ if (!use_nvdec) {
+ return;
+ }
+ // This condition fires when a video stream ends, clear all intermediary data
+ if (entries[0].raw == 0xDEADB33F) {
+ cdma_pusher.reset();
+ return;
+ }
+ if (!cdma_pusher) {
+ cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
+ }
+
+ // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
+ // TODO(ameerj): RE proper async nvdec operation
+ // gpu_thread.SubmitCommandBuffer(std::move(entries));
+
+ cdma_pusher->Push(std::move(entries));
+ cdma_pusher->DispatchCalls();
+}
+
+void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ gpu_thread.SwapBuffers(framebuffer);
+}
+
+void GPU::FlushRegion(VAddr addr, u64 size) {
+ gpu_thread.FlushRegion(addr, size);
+}
+
+void GPU::InvalidateRegion(VAddr addr, u64 size) {
+ gpu_thread.InvalidateRegion(addr, size);
+}
+
+void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+ gpu_thread.FlushAndInvalidateRegion(addr, size);
+}
+
+void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
+ auto& interrupt_manager = system.InterruptManager();
+ interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
+}
+
+void GPU::WaitIdle() const {
+ gpu_thread.WaitIdle();
+}
+
+void GPU::OnCommandListEnd() {
+ if (is_async) {
+ // This command only applies to asynchronous GPU mode
+ gpu_thread.OnCommandListEnd();
+ }
+}
+
} // namespace Tegra
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index c7d11deb2..b2ee45496 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -13,14 +13,17 @@
#include "common/common_types.h"
#include "core/hle/service/nvdrv/nvdata.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
+#include "video_core/cdma_pusher.h"
#include "video_core/dma_pusher.h"
+#include "video_core/framebuffer_config.h"
+#include "video_core/gpu_thread.h"
using CacheAddr = std::uintptr_t;
-inline CacheAddr ToCacheAddr(const void* host_ptr) {
+[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) {
return reinterpret_cast<CacheAddr>(host_ptr);
}
-inline u8* FromCacheAddr(CacheAddr cache_addr) {
+[[nodiscard]] inline u8* FromCacheAddr(CacheAddr cache_addr) {
return reinterpret_cast<u8*>(cache_addr);
}
@@ -100,28 +103,6 @@ enum class DepthFormat : u32 {
struct CommandListHeader;
class DebugContext;
-/**
- * Struct describing framebuffer configuration
- */
-struct FramebufferConfig {
- enum class PixelFormat : u32 {
- A8B8G8R8_UNORM = 1,
- RGB565_UNORM = 4,
- B8G8R8A8_UNORM = 5,
- };
-
- VAddr address;
- u32 offset;
- u32 width;
- u32 height;
- u32 stride;
- PixelFormat pixel_format;
-
- using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
- TransformFlags transform_flags;
- Common::Rectangle<int> crop_rect;
-};
-
namespace Engines {
class Fermi2D;
class Maxwell3D;
@@ -140,7 +121,7 @@ enum class EngineID {
class MemoryManager;
-class GPU {
+class GPU final {
public:
struct MethodCall {
u32 method{};
@@ -148,17 +129,17 @@ public:
u32 subchannel{};
u32 method_count{};
- bool IsLastCall() const {
+ explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
+ : method(method_), argument(argument_), subchannel(subchannel_),
+ method_count(method_count_) {}
+
+ [[nodiscard]] bool IsLastCall() const {
return method_count <= 1;
}
-
- MethodCall(u32 method, u32 argument, u32 subchannel = 0, u32 method_count = 0)
- : method(method), argument(argument), subchannel(subchannel),
- method_count(method_count) {}
};
- explicit GPU(Core::System& system, bool is_async);
- virtual ~GPU();
+ explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_);
+ ~GPU();
/// Binds a renderer to the GPU.
void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
@@ -175,13 +156,13 @@ public:
/// Synchronizes CPU writes with Host GPU memory.
void SyncGuestHost();
/// Signal the ending of command list.
- virtual void OnCommandListEnd();
+ void OnCommandListEnd();
/// Request a host GPU memory flush from the CPU.
- u64 RequestFlush(VAddr addr, std::size_t size);
+ [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
/// Obtains current flush request fence id.
- u64 CurrentFlushRequestFence() const {
+ [[nodiscard]] u64 CurrentFlushRequestFence() const {
return current_flush_fence.load(std::memory_order_relaxed);
}
@@ -189,80 +170,112 @@ public:
void TickWork();
/// Returns a reference to the Maxwell3D GPU engine.
- Engines::Maxwell3D& Maxwell3D();
+ [[nodiscard]] Engines::Maxwell3D& Maxwell3D();
/// Returns a const reference to the Maxwell3D GPU engine.
- const Engines::Maxwell3D& Maxwell3D() const;
+ [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const;
/// Returns a reference to the KeplerCompute GPU engine.
- Engines::KeplerCompute& KeplerCompute();
+ [[nodiscard]] Engines::KeplerCompute& KeplerCompute();
/// Returns a reference to the KeplerCompute GPU engine.
- const Engines::KeplerCompute& KeplerCompute() const;
+ [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const;
/// Returns a reference to the GPU memory manager.
- Tegra::MemoryManager& MemoryManager();
+ [[nodiscard]] Tegra::MemoryManager& MemoryManager();
/// Returns a const reference to the GPU memory manager.
- const Tegra::MemoryManager& MemoryManager() const;
+ [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const;
/// Returns a reference to the GPU DMA pusher.
- Tegra::DmaPusher& DmaPusher();
+ [[nodiscard]] Tegra::DmaPusher& DmaPusher();
+
+ /// Returns a const reference to the GPU DMA pusher.
+ [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const;
- VideoCore::RendererBase& Renderer() {
+ /// Returns a reference to the GPU CDMA pusher.
+ [[nodiscard]] Tegra::CDmaPusher& CDmaPusher();
+
+ /// Returns a const reference to the GPU CDMA pusher.
+ [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const;
+
+ /// Returns a reference to the underlying renderer.
+ [[nodiscard]] VideoCore::RendererBase& Renderer() {
return *renderer;
}
- const VideoCore::RendererBase& Renderer() const {
+ /// Returns a const reference to the underlying renderer.
+ [[nodiscard]] const VideoCore::RendererBase& Renderer() const {
return *renderer;
}
- VideoCore::ShaderNotify& ShaderNotify() {
+ /// Returns a reference to the shader notifier.
+ [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() {
return *shader_notify;
}
- const VideoCore::ShaderNotify& ShaderNotify() const {
+ /// Returns a const reference to the shader notifier.
+ [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const {
return *shader_notify;
}
// Waits for the GPU to finish working
- virtual void WaitIdle() const = 0;
+ void WaitIdle() const;
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
void WaitFence(u32 syncpoint_id, u32 value);
void IncrementSyncPoint(u32 syncpoint_id);
- u32 GetSyncpointValue(u32 syncpoint_id) const;
+ [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const;
void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value);
- bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
+ [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
- u64 GetTicks() const;
+ [[nodiscard]] u64 GetTicks() const;
- std::unique_lock<std::mutex> LockSync() {
+ [[nodiscard]] std::unique_lock<std::mutex> LockSync() {
return std::unique_lock{sync_mutex};
}
- bool IsAsync() const {
+ [[nodiscard]] bool IsAsync() const {
return is_async;
}
- /// Returns a const reference to the GPU DMA pusher.
- const Tegra::DmaPusher& DmaPusher() const;
+ [[nodiscard]] bool UseNvdec() const {
+ return use_nvdec;
+ }
+
+ enum class FenceOperation : u32 {
+ Acquire = 0,
+ Increment = 1,
+ };
+
+ union FenceAction {
+ u32 raw;
+ BitField<0, 1, FenceOperation> op;
+ BitField<8, 24, u32> syncpoint_id;
+
+ [[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
+ FenceAction result{};
+ result.op.Assign(op);
+ result.syncpoint_id.Assign(syncpoint_id);
+ return {result.raw};
+ }
+ };
struct Regs {
static constexpr size_t NUM_REGS = 0x40;
union {
struct {
- INSERT_UNION_PADDING_WORDS(0x4);
+ INSERT_PADDING_WORDS_NOINIT(0x4);
struct {
u32 address_high;
u32 address_low;
- GPUVAddr SemaphoreAddress() const {
+ [[nodiscard]] GPUVAddr SemaphoreAddress() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
@@ -270,21 +283,18 @@ public:
u32 semaphore_sequence;
u32 semaphore_trigger;
- INSERT_UNION_PADDING_WORDS(0xC);
+ INSERT_PADDING_WORDS_NOINIT(0xC);
// The pusher and the puller share the reference counter, the pusher only has read
// access
u32 reference_count;
- INSERT_UNION_PADDING_WORDS(0x5);
+ INSERT_PADDING_WORDS_NOINIT(0x5);
u32 semaphore_acquire;
u32 semaphore_release;
u32 fence_value;
- union {
- BitField<4, 4, u32> operation;
- BitField<8, 8, u32> id;
- } fence_action;
- INSERT_UNION_PADDING_WORDS(0xE2);
+ FenceAction fence_action;
+ INSERT_PADDING_WORDS_NOINIT(0xE2);
// Puller state
u32 acquire_mode;
@@ -300,34 +310,39 @@ public:
/// Performs any additional setup necessary in order to begin GPU emulation.
/// This can be used to launch any necessary threads and register any necessary
/// core timing events.
- virtual void Start() = 0;
+ void Start();
/// Obtain the CPU Context
- virtual void ObtainContext() = 0;
+ void ObtainContext();
/// Release the CPU Context
- virtual void ReleaseContext() = 0;
+ void ReleaseContext();
/// Push GPU command entries to be processed
- virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
+ void PushGPUEntries(Tegra::CommandList&& entries);
+
+ /// Push GPU command buffer entries to be processed
+ void PushCommandBuffer(Tegra::ChCommandHeaderList& entries);
/// Swap buffers (render frame)
- virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
- virtual void FlushRegion(VAddr addr, u64 size) = 0;
+ void FlushRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be invalidated
- virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+ void InvalidateRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
- virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+ void FlushAndInvalidateRegion(VAddr addr, u64 size);
protected:
- virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
+ void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const;
private:
void ProcessBindMethod(const MethodCall& method_call);
+ void ProcessFenceActionMethod();
+ void ProcessWaitForInterruptMethod();
void ProcessSemaphoreTriggerMethod();
void ProcessSemaphoreRelease();
void ProcessSemaphoreAcquire();
@@ -343,16 +358,18 @@ private:
u32 methods_pending);
/// Determines where the method should be executed.
- bool ExecuteMethodOnEngine(u32 method);
+ [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
protected:
Core::System& system;
+ std::unique_ptr<Tegra::MemoryManager> memory_manager;
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
+ std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
std::unique_ptr<VideoCore::RendererBase> renderer;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
+ const bool use_nvdec;
private:
- std::unique_ptr<Tegra::MemoryManager> memory_manager;
-
/// Mapping of command subchannels to their bound engine ids
std::array<EngineID, 8> bound_engines = {};
/// 3D engine
@@ -373,12 +390,13 @@ private:
std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
std::mutex sync_mutex;
+ std::mutex device_mutex;
std::condition_variable sync_cv;
struct FlushRequest {
- FlushRequest(u64 fence, VAddr addr, std::size_t size)
- : fence{fence}, addr{addr}, size{size} {}
+ explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
+ : fence{fence_}, addr{addr_}, size{size_} {}
u64 fence;
VAddr addr;
std::size_t size;
@@ -390,6 +408,9 @@ private:
std::mutex flush_request_mutex;
const bool is_async;
+
+ VideoCommon::GPUThread::ThreadManager gpu_thread;
+ std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
};
#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
deleted file mode 100644
index 70a3d5738..000000000
--- a/src/video_core/gpu_asynch.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "core/core.h"
-#include "core/hardware_interrupt_manager.h"
-#include "video_core/gpu_asynch.h"
-#include "video_core/gpu_thread.h"
-#include "video_core/renderer_base.h"
-
-namespace VideoCommon {
-
-GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {}
-
-GPUAsynch::~GPUAsynch() = default;
-
-void GPUAsynch::Start() {
- gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
- cpu_context = renderer->GetRenderWindow().CreateSharedContext();
- cpu_context->MakeCurrent();
-}
-
-void GPUAsynch::ObtainContext() {
- cpu_context->MakeCurrent();
-}
-
-void GPUAsynch::ReleaseContext() {
- cpu_context->DoneCurrent();
-}
-
-void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
- gpu_thread.SubmitList(std::move(entries));
-}
-
-void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
- gpu_thread.SwapBuffers(framebuffer);
-}
-
-void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
- gpu_thread.FlushRegion(addr, size);
-}
-
-void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
- gpu_thread.InvalidateRegion(addr, size);
-}
-
-void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
- gpu_thread.FlushAndInvalidateRegion(addr, size);
-}
-
-void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
- auto& interrupt_manager = system.InterruptManager();
- interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
-}
-
-void GPUAsynch::WaitIdle() const {
- gpu_thread.WaitIdle();
-}
-
-void GPUAsynch::OnCommandListEnd() {
- gpu_thread.OnCommandListEnd();
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
deleted file mode 100644
index f89c855a5..000000000
--- a/src/video_core/gpu_asynch.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "video_core/gpu.h"
-#include "video_core/gpu_thread.h"
-
-namespace Core::Frontend {
-class GraphicsContext;
-}
-
-namespace VideoCore {
-class RendererBase;
-} // namespace VideoCore
-
-namespace VideoCommon {
-
-/// Implementation of GPU interface that runs the GPU asynchronously
-class GPUAsynch final : public Tegra::GPU {
-public:
- explicit GPUAsynch(Core::System& system);
- ~GPUAsynch() override;
-
- void Start() override;
- void ObtainContext() override;
- void ReleaseContext() override;
- void PushGPUEntries(Tegra::CommandList&& entries) override;
- void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
- void FlushRegion(VAddr addr, u64 size) override;
- void InvalidateRegion(VAddr addr, u64 size) override;
- void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
- void WaitIdle() const override;
-
- void OnCommandListEnd() override;
-
-protected:
- void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
-
-private:
- GPUThread::ThreadManager gpu_thread;
- std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
deleted file mode 100644
index 1ca47ddef..000000000
--- a/src/video_core/gpu_synch.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "video_core/gpu_synch.h"
-#include "video_core/renderer_base.h"
-
-namespace VideoCommon {
-
-GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {}
-
-GPUSynch::~GPUSynch() = default;
-
-void GPUSynch::Start() {}
-
-void GPUSynch::ObtainContext() {
- renderer->Context().MakeCurrent();
-}
-
-void GPUSynch::ReleaseContext() {
- renderer->Context().DoneCurrent();
-}
-
-void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
- dma_pusher->Push(std::move(entries));
- dma_pusher->DispatchCalls();
-}
-
-void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
- renderer->SwapBuffers(framebuffer);
-}
-
-void GPUSynch::FlushRegion(VAddr addr, u64 size) {
- renderer->Rasterizer().FlushRegion(addr, size);
-}
-
-void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
- renderer->Rasterizer().InvalidateRegion(addr, size);
-}
-
-void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
- renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
deleted file mode 100644
index 297258cb1..000000000
--- a/src/video_core/gpu_synch.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "video_core/gpu.h"
-
-namespace Core::Frontend {
-class GraphicsContext;
-}
-
-namespace VideoCore {
-class RendererBase;
-} // namespace VideoCore
-
-namespace VideoCommon {
-
-/// Implementation of GPU interface that runs the GPU synchronously
-class GPUSynch final : public Tegra::GPU {
-public:
- explicit GPUSynch(Core::System& system);
- ~GPUSynch() override;
-
- void Start() override;
- void ObtainContext() override;
- void ReleaseContext() override;
- void PushGPUEntries(Tegra::CommandList&& entries) override;
- void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
- void FlushRegion(VAddr addr, u64 size) override;
- void InvalidateRegion(VAddr addr, u64 size) override;
- void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
- void WaitIdle() const override {}
-
-protected:
- void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
- [[maybe_unused]] u32 value) const override {}
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index bf761abf2..50319f1d5 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/microprofile.h"
+#include "common/scope_exit.h"
#include "common/thread.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
@@ -18,9 +19,11 @@ namespace VideoCommon::GPUThread {
/// Runs the GPU thread
static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
- SynchState& state) {
+ SynchState& state, Tegra::CDmaPusher& cdma_pusher) {
std::string name = "yuzu:GPU";
MicroProfileOnThreadCreate(name.c_str());
+ SCOPE_EXIT({ MicroProfileOnThreadExit(); });
+
Common::SetCurrentThreadName(name.c_str());
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
system.RegisterHostThread();
@@ -35,23 +38,28 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
}
auto current_context = context.Acquire();
+ VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();
CommandDataContainer next;
while (state.is_running) {
next = state.queue.PopWait();
- if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
+ if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
dma_pusher.Push(std::move(submit_list->entries));
dma_pusher.DispatchCalls();
- } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
+ } else if (auto* command_list = std::get_if<SubmitChCommandEntries>(&next.data)) {
+ // NVDEC
+ cdma_pusher.Push(std::move(command_list->entries));
+ cdma_pusher.DispatchCalls();
+ } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
- renderer.Rasterizer().ReleaseFences();
+ rasterizer->ReleaseFences();
} else if (std::holds_alternative<GPUTickCommand>(next.data)) {
system.GPU().TickWork();
- } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
- renderer.Rasterizer().FlushRegion(data->addr, data->size);
- } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
- renderer.Rasterizer().OnCPUWrite(data->addr, data->size);
+ } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
+ rasterizer->FlushRegion(flush->addr, flush->size);
+ } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
+ rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
return;
} else {
@@ -61,7 +69,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
}
}
-ThreadManager::ThreadManager(Core::System& system) : system{system} {}
+ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
+ : system{system_}, is_async{is_async_} {}
ThreadManager::~ThreadManager() {
if (!thread.joinable()) {
@@ -75,47 +84,64 @@ ThreadManager::~ThreadManager() {
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context,
- Tegra::DmaPusher& dma_pusher) {
- thread = std::thread{RunThread, std::ref(system), std::ref(renderer),
- std::ref(context), std::ref(dma_pusher), std::ref(state)};
+ Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) {
+ rasterizer = renderer.ReadRasterizer();
+ thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
+ std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher));
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
PushCommand(SubmitListCommand(std::move(entries)));
}
+void ThreadManager::SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries) {
+ PushCommand(SubmitChCommandEntries(std::move(entries)));
+}
+
void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
}
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
- if (!Settings::IsGPULevelHigh()) {
+ if (!is_async) {
+ // Always flush with synchronous GPU mode
PushCommand(FlushRegionCommand(addr, size));
return;
}
- if (!Settings::IsGPULevelExtreme()) {
- return;
- }
- if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
+
+ // Asynchronous GPU mode
+ switch (Settings::values.gpu_accuracy.GetValue()) {
+ case Settings::GPUAccuracy::Normal:
+ PushCommand(FlushRegionCommand(addr, size));
+ break;
+ case Settings::GPUAccuracy::High:
+ // TODO(bunnei): Is this right? Preserving existing behavior for now
+ break;
+ case Settings::GPUAccuracy::Extreme: {
auto& gpu = system.GPU();
u64 fence = gpu.RequestFlush(addr, size);
PushCommand(GPUTickCommand());
while (fence > gpu.CurrentFlushRequestFence()) {
}
+ break;
+ }
+ default:
+ UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue());
}
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
- system.Renderer().Rasterizer().OnCPUWrite(addr, size);
+ rasterizer->OnCPUWrite(addr, size);
}
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
- system.Renderer().Rasterizer().OnCPUWrite(addr, size);
+ rasterizer->OnCPUWrite(addr, size);
}
void ThreadManager::WaitIdle() const {
- while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) {
+ while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) &&
+ system.IsPoweredOn()) {
}
}
@@ -126,6 +152,12 @@ void ThreadManager::OnCommandListEnd() {
u64 ThreadManager::PushCommand(CommandData&& command_data) {
const u64 fence{++state.last_fence};
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
+
+ if (!is_async) {
+ // In synchronous GPU mode, block the caller until the command has executed
+ WaitIdle();
+ }
+
return fence;
}
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 5a28335d6..4cd951169 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -10,8 +10,9 @@
#include <optional>
#include <thread>
#include <variant>
+
#include "common/threadsafe_queue.h"
-#include "video_core/gpu.h"
+#include "video_core/framebuffer_config.h"
namespace Tegra {
struct FramebufferConfig;
@@ -25,6 +26,11 @@ class GraphicsContext;
class System;
} // namespace Core
+namespace VideoCore {
+class RasterizerInterface;
+class RendererBase;
+} // namespace VideoCore
+
namespace VideoCommon::GPUThread {
/// Command to signal to the GPU thread that processing has ended
@@ -32,22 +38,30 @@ struct EndProcessingCommand final {};
/// Command to signal to the GPU thread that a command list is ready for processing
struct SubmitListCommand final {
- explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
+ explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {}
Tegra::CommandList entries;
};
+/// Command to signal to the GPU thread that a cdma command list is ready for processing
+struct SubmitChCommandEntries final {
+ explicit SubmitChCommandEntries(Tegra::ChCommandHeaderList&& entries_)
+ : entries{std::move(entries_)} {}
+
+ Tegra::ChCommandHeaderList entries;
+};
+
/// Command to signal to the GPU thread that a swap buffers is pending
struct SwapBuffersCommand final {
- explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
- : framebuffer{std::move(framebuffer)} {}
+ explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer_)
+ : framebuffer{std::move(framebuffer_)} {}
std::optional<Tegra::FramebufferConfig> framebuffer;
};
/// Command to signal to the GPU thread to flush a region
struct FlushRegionCommand final {
- explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+ explicit constexpr FlushRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {}
VAddr addr;
u64 size;
@@ -55,7 +69,7 @@ struct FlushRegionCommand final {
/// Command to signal to the GPU thread to invalidate a region
struct InvalidateRegionCommand final {
- explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+ explicit constexpr InvalidateRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {}
VAddr addr;
u64 size;
@@ -63,8 +77,8 @@ struct InvalidateRegionCommand final {
/// Command to signal to the GPU thread to flush and invalidate a region
struct FlushAndInvalidateRegionCommand final {
- explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
- : addr{addr}, size{size} {}
+ explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr_, u64 size_)
+ : addr{addr_}, size{size_} {}
VAddr addr;
u64 size;
@@ -77,15 +91,15 @@ struct OnCommandListEndCommand final {};
struct GPUTickCommand final {};
using CommandData =
- std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
- InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
- GPUTickCommand>;
+ std::variant<EndProcessingCommand, SubmitListCommand, SubmitChCommandEntries,
+ SwapBuffersCommand, FlushRegionCommand, InvalidateRegionCommand,
+ FlushAndInvalidateRegionCommand, OnCommandListEndCommand, GPUTickCommand>;
struct CommandDataContainer {
CommandDataContainer() = default;
- CommandDataContainer(CommandData&& data, u64 next_fence)
- : data{std::move(data)}, fence{next_fence} {}
+ explicit CommandDataContainer(CommandData&& data_, u64 next_fence_)
+ : data{std::move(data_)}, fence{next_fence_} {}
CommandData data;
u64 fence{};
@@ -104,16 +118,19 @@ struct SynchState final {
/// Class used to manage the GPU thread
class ThreadManager final {
public:
- explicit ThreadManager(Core::System& system);
+ explicit ThreadManager(Core::System& system_, bool is_async_);
~ThreadManager();
/// Creates and starts the GPU thread.
void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
- Tegra::DmaPusher& dma_pusher);
+ Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher);
/// Push GPU command entries to be processed
void SubmitList(Tegra::CommandList&& entries);
+ /// Push GPU CDMA command buffer entries to be processed
+ void SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries);
+
/// Swap buffers (render frame)
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
@@ -135,11 +152,12 @@ private:
/// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data);
-private:
- SynchState state;
Core::System& system;
+ const bool is_async;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+ SynchState state;
std::thread thread;
- std::thread::id thread_id;
};
} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
index 99450777e..21e569ba1 100644
--- a/src/video_core/guest_driver.h
+++ b/src/video_core/guest_driver.h
@@ -19,8 +19,8 @@ namespace VideoCore {
class GuestDriverProfile {
public:
explicit GuestDriverProfile() = default;
- explicit GuestDriverProfile(std::optional<u32> texture_handler_size)
- : texture_handler_size{texture_handler_size} {}
+ explicit GuestDriverProfile(std::optional<u32> texture_handler_size_)
+ : texture_handler_size{texture_handler_size_} {}
void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index aa62363a7..970120acc 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -1,38 +1,83 @@
set(SHADER_FILES
+ block_linear_unswizzle_2d.comp
+ block_linear_unswizzle_3d.comp
+ convert_depth_to_float.frag
+ convert_float_to_depth.frag
+ full_screen_triangle.vert
+ opengl_copy_bc4.comp
opengl_present.frag
opengl_present.vert
+ pitch_unswizzle.comp
+ vulkan_blit_color_float.frag
+ vulkan_blit_depth_stencil.frag
+ vulkan_present.frag
+ vulkan_present.vert
+ vulkan_quad_indexed.comp
+ vulkan_uint8.comp
)
+find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED)
+
+set(GLSL_FLAGS "")
+set(QUIET_FLAG "--quiet")
+
set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
+set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders)
set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
-set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders)
-add_custom_command(
- OUTPUT
- ${SHADER_DIR}
+set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in)
+set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake)
+
+# Check if `--quiet` is available on host's glslangValidator version
+# glslangValidator prints to STDERR iff an unrecognized flag is passed to it
+execute_process(
COMMAND
- ${CMAKE_COMMAND} -E make_directory ${SHADER_DIR}
+ ${GLSLANGVALIDATOR} ${QUIET_FLAG}
+ ERROR_VARIABLE
+ GLSLANG_ERROR
+ # STDOUT variable defined to silence unnecessary output during CMake configuration
+ OUTPUT_VARIABLE
+ GLSLANG_OUTPUT
)
-set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in)
-set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake)
+if (NOT GLSLANG_ERROR STREQUAL "")
+ message(WARNING "Refusing to use unavailable flag `${QUIET_FLAG}` on `${GLSLANGVALIDATOR}`")
+ set(QUIET_FLAG "")
+endif()
foreach(FILENAME IN ITEMS ${SHADER_FILES})
string(REPLACE "." "_" SHADER_NAME ${FILENAME})
set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
- set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
- add_custom_command(
- OUTPUT
- ${HEADER_FILE}
- COMMAND
- ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE}
- MAIN_DEPENDENCY
- ${SOURCE_FILE}
- DEPENDS
- ${HEADER_GENERATOR}
- ${INPUT_FILE}
- )
- set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE})
+ # Skip generating source headers on Vulkan exclusive files
+ if (NOT ${FILENAME} MATCHES "vulkan.*")
+ set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
+ add_custom_command(
+ OUTPUT
+ ${SOURCE_HEADER_FILE}
+ COMMAND
+ ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE}
+ MAIN_DEPENDENCY
+ ${SOURCE_FILE}
+ DEPENDS
+ ${INPUT_FILE}
+ # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified
+ )
+ set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE})
+ endif()
+ # Skip compiling to SPIR-V OpenGL exclusive files
+ if (NOT ${FILENAME} MATCHES "opengl.*")
+ string(TOUPPER ${SHADER_NAME}_SPV SPIRV_VARIABLE_NAME)
+ set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_spv.h)
+ add_custom_command(
+ OUTPUT
+ ${SPIRV_HEADER_FILE}
+ COMMAND
+ ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
+ MAIN_DEPENDENCY
+ ${SOURCE_FILE}
+ )
+ set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE})
+ endif()
endforeach()
add_custom_target(host_shaders
diff --git a/src/video_core/host_shaders/StringShaderHeader.cmake b/src/video_core/host_shaders/StringShaderHeader.cmake
index 368bce0ed..c0fc49768 100644
--- a/src/video_core/host_shaders/StringShaderHeader.cmake
+++ b/src/video_core/host_shaders/StringShaderHeader.cmake
@@ -8,4 +8,6 @@ string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME)
file(READ ${SOURCE_FILE} CONTENTS)
+get_filename_component(OUTPUT_DIR ${HEADER_FILE} DIRECTORY)
+make_directory(${OUTPUT_DIR})
configure_file(${INPUT_FILE} ${HEADER_FILE} @ONLY)
diff --git a/src/video_core/host_shaders/block_linear_unswizzle_2d.comp b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp
new file mode 100644
index 000000000..a131be79e
--- /dev/null
+++ b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp
@@ -0,0 +1,122 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 430
+
+#ifdef VULKAN
+
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_EXT_shader_8bit_storage : require
+#define HAS_EXTENDED_TYPES 1
+#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
+#define END_PUSH_CONSTANTS };
+#define UNIFORM(n)
+#define BINDING_SWIZZLE_BUFFER 0
+#define BINDING_INPUT_BUFFER 1
+#define BINDING_OUTPUT_IMAGE 2
+
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+
+#extension GL_NV_gpu_shader5 : enable
+#ifdef GL_NV_gpu_shader5
+#define HAS_EXTENDED_TYPES 1
+#else
+#define HAS_EXTENDED_TYPES 0
+#endif
+#define BEGIN_PUSH_CONSTANTS
+#define END_PUSH_CONSTANTS
+#define UNIFORM(n) layout (location = n) uniform
+#define BINDING_SWIZZLE_BUFFER 0
+#define BINDING_INPUT_BUFFER 1
+#define BINDING_OUTPUT_IMAGE 0
+
+#endif
+
+BEGIN_PUSH_CONSTANTS
+UNIFORM(0) uvec3 origin;
+UNIFORM(1) ivec3 destination;
+UNIFORM(2) uint bytes_per_block_log2;
+UNIFORM(3) uint layer_stride;
+UNIFORM(4) uint block_size;
+UNIFORM(5) uint x_shift;
+UNIFORM(6) uint block_height;
+UNIFORM(7) uint block_height_mask;
+END_PUSH_CONSTANTS
+
+layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
+ uint swizzle_table[];
+};
+
+#if HAS_EXTENDED_TYPES
+layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; };
+layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; };
+#endif
+layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; };
+layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; };
+layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; };
+
+layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage2DArray output_image;
+
+layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
+
+const uint GOB_SIZE_X = 64;
+const uint GOB_SIZE_Y = 8;
+const uint GOB_SIZE_Z = 1;
+const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
+
+const uint GOB_SIZE_X_SHIFT = 6;
+const uint GOB_SIZE_Y_SHIFT = 3;
+const uint GOB_SIZE_Z_SHIFT = 0;
+const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
+
+const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
+
+uint SwizzleOffset(uvec2 pos) {
+ pos = pos & SWIZZLE_MASK;
+ return swizzle_table[pos.y * 64 + pos.x];
+}
+
+uvec4 ReadTexel(uint offset) {
+ switch (bytes_per_block_log2) {
+#if HAS_EXTENDED_TYPES
+ case 0:
+ return uvec4(u8data[offset], 0, 0, 0);
+ case 1:
+ return uvec4(u16data[offset / 2], 0, 0, 0);
+#else
+ case 0:
+ return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
+ case 1:
+ return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
+#endif
+ case 2:
+ return uvec4(u32data[offset / 4], 0, 0, 0);
+ case 3:
+ return uvec4(u64data[offset / 8], 0, 0);
+ case 4:
+ return u128data[offset / 16];
+ }
+ return uvec4(0);
+}
+
+void main() {
+ uvec3 pos = gl_GlobalInvocationID + origin;
+ pos.x <<= bytes_per_block_log2;
+
+ // Read as soon as possible due to its latency
+ const uint swizzle = SwizzleOffset(pos.xy);
+
+ const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;
+
+ uint offset = 0;
+ offset += pos.z * layer_stride;
+ offset += (block_y >> block_height) * block_size;
+ offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT;
+ offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
+ offset += swizzle;
+
+ const uvec4 texel = ReadTexel(offset);
+ const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination;
+ imageStore(output_image, coord, texel);
+}
diff --git a/src/video_core/host_shaders/block_linear_unswizzle_3d.comp b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp
new file mode 100644
index 000000000..bb6872e6b
--- /dev/null
+++ b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp
@@ -0,0 +1,125 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 430
+
+#ifdef VULKAN
+
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_EXT_shader_8bit_storage : require
+#define HAS_EXTENDED_TYPES 1
+#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
+#define END_PUSH_CONSTANTS };
+#define UNIFORM(n)
+#define BINDING_SWIZZLE_BUFFER 0
+#define BINDING_INPUT_BUFFER 1
+#define BINDING_OUTPUT_IMAGE 2
+
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+
+#extension GL_NV_gpu_shader5 : enable
+#ifdef GL_NV_gpu_shader5
+#define HAS_EXTENDED_TYPES 1
+#else
+#define HAS_EXTENDED_TYPES 0
+#endif
+#define BEGIN_PUSH_CONSTANTS
+#define END_PUSH_CONSTANTS
+#define UNIFORM(n) layout (location = n) uniform
+#define BINDING_SWIZZLE_BUFFER 0
+#define BINDING_INPUT_BUFFER 1
+#define BINDING_OUTPUT_IMAGE 0
+
+#endif
+
+BEGIN_PUSH_CONSTANTS
+UNIFORM(0) uvec3 origin;
+UNIFORM(1) ivec3 destination;
+UNIFORM(2) uint bytes_per_block_log2;
+UNIFORM(3) uint slice_size;
+UNIFORM(4) uint block_size;
+UNIFORM(5) uint x_shift;
+UNIFORM(6) uint block_height;
+UNIFORM(7) uint block_height_mask;
+UNIFORM(8) uint block_depth;
+UNIFORM(9) uint block_depth_mask;
+END_PUSH_CONSTANTS
+
+layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
+ uint swizzle_table[];
+};
+
+#if HAS_EXTENDED_TYPES
+layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; };
+layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; };
+#endif
+layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; };
+layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; };
+layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; };
+
+layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage3D output_image;
+
+layout(local_size_x = 16, local_size_y = 8, local_size_z = 8) in;
+
+const uint GOB_SIZE_X = 64;
+const uint GOB_SIZE_Y = 8;
+const uint GOB_SIZE_Z = 1;
+const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
+
+const uint GOB_SIZE_X_SHIFT = 6;
+const uint GOB_SIZE_Y_SHIFT = 3;
+const uint GOB_SIZE_Z_SHIFT = 0;
+const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
+
+const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
+
+uint SwizzleOffset(uvec2 pos) {
+ pos = pos & SWIZZLE_MASK;
+ return swizzle_table[pos.y * 64 + pos.x];
+}
+
+uvec4 ReadTexel(uint offset) {
+ switch (bytes_per_block_log2) {
+#if HAS_EXTENDED_TYPES
+ case 0:
+ return uvec4(u8data[offset], 0, 0, 0);
+ case 1:
+ return uvec4(u16data[offset / 2], 0, 0, 0);
+#else
+ case 0:
+ return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
+ case 1:
+ return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
+#endif
+ case 2:
+ return uvec4(u32data[offset / 4], 0, 0, 0);
+ case 3:
+ return uvec4(u64data[offset / 8], 0, 0);
+ case 4:
+ return u128data[offset / 16];
+ }
+ return uvec4(0);
+}
+
+void main() {
+ uvec3 pos = gl_GlobalInvocationID + origin;
+ pos.x <<= bytes_per_block_log2;
+
+ // Read as soon as possible due to its latency
+ const uint swizzle = SwizzleOffset(pos.xy);
+
+ const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;
+
+ uint offset = 0;
+ offset += (pos.z >> block_depth) * slice_size;
+ offset += (pos.z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height);
+ offset += (block_y >> block_height) * block_size;
+ offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT;
+ offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
+ offset += swizzle;
+
+ const uvec4 texel = ReadTexel(offset);
+ const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination;
+ imageStore(output_image, coord, texel);
+}
diff --git a/src/video_core/host_shaders/convert_depth_to_float.frag b/src/video_core/host_shaders/convert_depth_to_float.frag
new file mode 100644
index 000000000..624c58509
--- /dev/null
+++ b/src/video_core/host_shaders/convert_depth_to_float.frag
@@ -0,0 +1,13 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 450
+
+layout(binding = 0) uniform sampler2D depth_texture;
+layout(location = 0) out float output_color;
+
+void main() {
+ ivec2 coord = ivec2(gl_FragCoord.xy);
+ output_color = texelFetch(depth_texture, coord, 0).r;
+}
diff --git a/src/video_core/host_shaders/convert_float_to_depth.frag b/src/video_core/host_shaders/convert_float_to_depth.frag
new file mode 100644
index 000000000..d86c795f4
--- /dev/null
+++ b/src/video_core/host_shaders/convert_float_to_depth.frag
@@ -0,0 +1,13 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 450
+
+layout(binding = 0) uniform sampler2D color_texture;
+
+void main() {
+ ivec2 coord = ivec2(gl_FragCoord.xy);
+ float color = texelFetch(color_texture, coord, 0).r;
+ gl_FragDepth = color;
+}
diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert
new file mode 100644
index 000000000..452ad6502
--- /dev/null
+++ b/src/video_core/host_shaders/full_screen_triangle.vert
@@ -0,0 +1,29 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 450
+
+#ifdef VULKAN
+#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
+#define END_PUSH_CONSTANTS };
+#define UNIFORM(n)
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+#define BEGIN_PUSH_CONSTANTS
+#define END_PUSH_CONSTANTS
+#define UNIFORM(n) layout (location = n) uniform
+#endif
+
+BEGIN_PUSH_CONSTANTS
+UNIFORM(0) vec2 tex_scale;
+UNIFORM(1) vec2 tex_offset;
+END_PUSH_CONSTANTS
+
+layout(location = 0) out vec2 texcoord;
+
+void main() {
+ float x = float((gl_VertexIndex & 1) << 2);
+ float y = float((gl_VertexIndex & 2) << 1);
+ gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0);
+ texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset);
+}
diff --git a/src/video_core/host_shaders/opengl_copy_bc4.comp b/src/video_core/host_shaders/opengl_copy_bc4.comp
new file mode 100644
index 000000000..7b8e20fbe
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_copy_bc4.comp
@@ -0,0 +1,70 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 430 core
+#extension GL_ARB_gpu_shader_int64 : require
+
+layout (local_size_x = 4, local_size_y = 4) in;
+
+layout(binding = 0, rg32ui) readonly uniform uimage3D bc4_input;
+layout(binding = 1, rgba8ui) writeonly uniform uimage3D bc4_output;
+
+layout(location = 0) uniform uvec3 src_offset;
+layout(location = 1) uniform uvec3 dst_offset;
+
+// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
+uint DecompressBlock(uint64_t bits, uvec2 coord) {
+ const uint code_offset = 16 + 3 * (4 * coord.y + coord.x);
+ const uint code = uint(bits >> code_offset) & 7;
+ const uint red0 = uint(bits >> 0) & 0xff;
+ const uint red1 = uint(bits >> 8) & 0xff;
+ if (red0 > red1) {
+ switch (code) {
+ case 0:
+ return red0;
+ case 1:
+ return red1;
+ case 2:
+ return (6 * red0 + 1 * red1) / 7;
+ case 3:
+ return (5 * red0 + 2 * red1) / 7;
+ case 4:
+ return (4 * red0 + 3 * red1) / 7;
+ case 5:
+ return (3 * red0 + 4 * red1) / 7;
+ case 6:
+ return (2 * red0 + 5 * red1) / 7;
+ case 7:
+ return (1 * red0 + 6 * red1) / 7;
+ }
+ } else {
+ switch (code) {
+ case 0:
+ return red0;
+ case 1:
+ return red1;
+ case 2:
+ return (4 * red0 + 1 * red1) / 5;
+ case 3:
+ return (3 * red0 + 2 * red1) / 5;
+ case 4:
+ return (2 * red0 + 3 * red1) / 5;
+ case 5:
+ return (1 * red0 + 4 * red1) / 5;
+ case 6:
+ return 0;
+ case 7:
+ return 0xff;
+ }
+ }
+ return 0;
+}
+
+void main() {
+ uvec2 packed_bits = imageLoad(bc4_input, ivec3(gl_WorkGroupID + src_offset)).rg;
+ uint64_t bits = packUint2x32(packed_bits);
+ uint red = DecompressBlock(bits, gl_LocalInvocationID.xy);
+ uvec4 color = uvec4(red & 0xff, 0, 0, 0xff);
+ imageStore(bc4_output, ivec3(gl_GlobalInvocationID + dst_offset), color);
+}
diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag
index 8a4cb024b..84b818227 100644
--- a/src/video_core/host_shaders/opengl_present.frag
+++ b/src/video_core/host_shaders/opengl_present.frag
@@ -1,3 +1,7 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
#version 430 core
layout (location = 0) in vec2 frag_tex_coord;
diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert
index 2235d31a4..c3b5adbba 100644
--- a/src/video_core/host_shaders/opengl_present.vert
+++ b/src/video_core/host_shaders/opengl_present.vert
@@ -1,3 +1,7 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
#version 430 core
out gl_PerVertex {
diff --git a/src/video_core/host_shaders/pitch_unswizzle.comp b/src/video_core/host_shaders/pitch_unswizzle.comp
new file mode 100644
index 000000000..cb48ec170
--- /dev/null
+++ b/src/video_core/host_shaders/pitch_unswizzle.comp
@@ -0,0 +1,86 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 430
+
+#ifdef VULKAN
+
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_EXT_shader_8bit_storage : require
+#define HAS_EXTENDED_TYPES 1
+#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
+#define END_PUSH_CONSTANTS };
+#define UNIFORM(n)
+#define BINDING_INPUT_BUFFER 0
+#define BINDING_OUTPUT_IMAGE 1
+
+#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+
+#extension GL_NV_gpu_shader5 : enable
+#ifdef GL_NV_gpu_shader5
+#define HAS_EXTENDED_TYPES 1
+#else
+#define HAS_EXTENDED_TYPES 0
+#endif
+#define BEGIN_PUSH_CONSTANTS
+#define END_PUSH_CONSTANTS
+#define UNIFORM(n) layout (location = n) uniform
+#define BINDING_INPUT_BUFFER 0
+#define BINDING_OUTPUT_IMAGE 0
+
+#endif
+
+BEGIN_PUSH_CONSTANTS
+UNIFORM(0) uvec2 origin;
+UNIFORM(1) ivec2 destination;
+UNIFORM(2) uint bytes_per_block;
+UNIFORM(3) uint pitch;
+END_PUSH_CONSTANTS
+
+#if HAS_EXTENDED_TYPES
+layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU8 { uint8_t u8data[]; };
+layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU16 { uint16_t u16data[]; };
+#endif
+layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uint u32data[]; };
+layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU64 { uvec2 u64data[]; };
+layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU128 { uvec4 u128data[]; };
+
+layout(binding = BINDING_OUTPUT_IMAGE) writeonly uniform uimage2D output_image;
+
+layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
+
+uvec4 ReadTexel(uint offset) {
+ switch (bytes_per_block) {
+#if HAS_EXTENDED_TYPES
+ case 1:
+ return uvec4(u8data[offset], 0, 0, 0);
+ case 2:
+ return uvec4(u16data[offset / 2], 0, 0, 0);
+#else
+ case 1:
+ return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
+ case 2:
+ return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
+#endif
+ case 4:
+ return uvec4(u32data[offset / 4], 0, 0, 0);
+ case 8:
+ return uvec4(u64data[offset / 8], 0, 0);
+ case 16:
+ return u128data[offset / 16];
+ }
+ return uvec4(0);
+}
+
+void main() {
+ uvec2 pos = gl_GlobalInvocationID.xy + origin;
+
+ uint offset = 0;
+ offset += pos.x * bytes_per_block;
+ offset += pos.y * pitch;
+
+ const uvec4 texel = ReadTexel(offset);
+ const ivec2 coord = ivec2(gl_GlobalInvocationID.xy) + destination;
+ imageStore(output_image, coord, texel);
+}
diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/vulkan_blit_color_float.frag
new file mode 100644
index 000000000..4a6aae410
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_blit_color_float.frag
@@ -0,0 +1,14 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 450
+
+layout(binding = 0) uniform sampler2D tex;
+
+layout(location = 0) in vec2 texcoord;
+layout(location = 0) out vec4 color;
+
+void main() {
+ color = textureLod(tex, texcoord, 0);
+}
diff --git a/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag
new file mode 100644
index 000000000..19bb23a5a
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag
@@ -0,0 +1,16 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 450
+#extension GL_ARB_shader_stencil_export : require
+
+layout(binding = 0) uniform sampler2D depth_tex;
+layout(binding = 1) uniform isampler2D stencil_tex;
+
+layout(location = 0) in vec2 texcoord;
+
+void main() {
+ gl_FragDepth = textureLod(depth_tex, texcoord, 0).r;
+ gl_FragStencilRefARB = textureLod(stencil_tex, texcoord, 0).r;
+}
diff --git a/src/video_core/renderer_vulkan/shaders/blit.frag b/src/video_core/host_shaders/vulkan_present.frag
index a06ecd24a..0979ff3e6 100644
--- a/src/video_core/renderer_vulkan/shaders/blit.frag
+++ b/src/video_core/host_shaders/vulkan_present.frag
@@ -2,15 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-/*
- * Build instructions:
- * $ glslangValidator -V $THIS_FILE -o output.spv
- * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
- * $ xxd -i optimized.spv
- *
- * Then copy that bytecode to the C++ file
- */
-
#version 460 core
layout (location = 0) in vec2 frag_tex_coord;
diff --git a/src/video_core/renderer_vulkan/shaders/blit.vert b/src/video_core/host_shaders/vulkan_present.vert
index c64d9235a..00b868958 100644
--- a/src/video_core/renderer_vulkan/shaders/blit.vert
+++ b/src/video_core/host_shaders/vulkan_present.vert
@@ -2,15 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-/*
- * Build instructions:
- * $ glslangValidator -V $THIS_FILE -o output.spv
- * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
- * $ xxd -i optimized.spv
- *
- * Then copy that bytecode to the C++ file
- */
-
#version 460 core
layout (location = 0) in vec2 vert_position;
diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp
index 5a472ba9b..8655591d0 100644
--- a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp
+++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp
@@ -2,15 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-/*
- * Build instructions:
- * $ glslangValidator -V quad_indexed.comp -o output.spv
- * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
- * $ xxd -i optimized.spv
- *
- * Then copy that bytecode to the C++ file
- */
-
#version 460 core
layout (local_size_x = 1024) in;
diff --git a/src/video_core/renderer_vulkan/shaders/uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp
index a320f3ae0..872291670 100644
--- a/src/video_core/renderer_vulkan/shaders/uint8.comp
+++ b/src/video_core/host_shaders/vulkan_uint8.comp
@@ -2,15 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-/*
- * Build instructions:
- * $ glslangValidator -V $THIS_FILE -o output.spv
- * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
- * $ xxd -i optimized.spv
- *
- * Then copy that bytecode to the C++ file
- */
-
#version 460 core
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_8bit_storage : require
@@ -25,9 +16,16 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
uint16_t output_indexes[];
};
+uint AssembleIndex(uint id) {
+ // Most primitive restart indices are 0xFF
+ // Hardcode this to 0xFF for now
+ uint index = uint(input_indexes[id]);
+ return index == 0xFF ? 0xFFFF : index;
+}
+
void main() {
uint id = gl_GlobalInvocationID.x;
if (id < input_indexes.length()) {
- output_indexes[id] = uint16_t(input_indexes[id]);
+ output_indexes[id] = uint16_t(AssembleIndex(id));
}
}
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index a50e7b4e0..cd21a2112 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -36,7 +36,7 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
}
} else {
// Macro not compiled, check if it's uploaded and if so, compile it
- std::optional<u32> mid_method = std::nullopt;
+ std::optional<u32> mid_method;
const auto macro_code = uploaded_macro_code.find(method);
if (macro_code == uploaded_macro_code.end()) {
for (const auto& [method_base, code] : uploaded_macro_code) {
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index df00b57df..70ac7c620 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -85,7 +85,7 @@ constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
{0x0217920100488FF7, &HLE_0217920100488FF7},
}};
-HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
+HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
HLEMacro::~HLEMacro() = default;
std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
@@ -99,8 +99,8 @@ std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) co
HLEMacroImpl::~HLEMacroImpl() = default;
-HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func)
- : maxwell3d(maxwell3d), func(func) {}
+HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_)
+ : maxwell3d{maxwell3d_}, func{func_} {}
void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
func(maxwell3d, parameters);
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h
index 37af875a0..cb3bd1600 100644
--- a/src/video_core/macro/macro_hle.h
+++ b/src/video_core/macro/macro_hle.h
@@ -20,7 +20,7 @@ using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u3
class HLEMacro {
public:
- explicit HLEMacro(Engines::Maxwell3D& maxwell3d);
+ explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
~HLEMacro();
std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index bd01fd1f2..8da26fd59 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -11,29 +11,29 @@
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
namespace Tegra {
-MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d)
- : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
+MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_)
+ : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
}
-MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d,
- const std::vector<u32>& code)
- : maxwell3d(maxwell3d), code(code) {}
+MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_,
+ const std::vector<u32>& code_)
+ : maxwell3d{maxwell3d_}, code{code_} {}
-void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 method) {
+void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) {
MICROPROFILE_SCOPE(MacroInterp);
Reset();
- registers[1] = parameters[0];
- num_parameters = parameters.size();
+ registers[1] = params[0];
+ num_parameters = params.size();
if (num_parameters > parameters_capacity) {
parameters_capacity = num_parameters;
- this->parameters = std::make_unique<u32[]>(num_parameters);
+ parameters = std::make_unique<u32[]>(num_parameters);
}
- std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32));
+ std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32));
// Execute the code until we hit an exit condition.
bool keep_executing = true;
@@ -133,8 +133,7 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) {
break;
}
default:
- UNIMPLEMENTED_MSG("Unimplemented macro operation {}",
- static_cast<u32>(opcode.operation.Value()));
+ UNIMPLEMENTED_MSG("Unimplemented macro operation {}", opcode.operation.Value());
}
// An instruction with the Exit flag will not actually
@@ -182,7 +181,7 @@ u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a,
return ~(src_a & src_b);
default:
- UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", static_cast<u32>(operation));
+ UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", operation);
return 0;
}
}
@@ -230,7 +229,7 @@ void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 r
Send((result >> 12) & 0b111111);
break;
default:
- UNIMPLEMENTED_MSG("Unimplemented result operation {}", static_cast<u32>(operation));
+ UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation);
}
}
diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h
index 90217fc89..d50c619ce 100644
--- a/src/video_core/macro/macro_interpreter.h
+++ b/src/video_core/macro/macro_interpreter.h
@@ -17,7 +17,7 @@ class Maxwell3D;
class MacroInterpreter final : public MacroEngine {
public:
- explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d);
+ explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d_);
protected:
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
@@ -28,8 +28,8 @@ private:
class MacroInterpreterImpl : public CachedMacro {
public:
- MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code);
- void Execute(const std::vector<u32>& parameters, u32 method) override;
+ explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
+ void Execute(const std::vector<u32>& params, u32 method) override;
private:
/// Resets the execution engine state, zeroing registers, etc.
@@ -38,9 +38,9 @@ private:
/**
* Executes a single macro instruction located at the current program counter. Returns whether
* the interpreter should keep running.
- * @param offset Offset to start execution at.
+ *
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a
- * previous instruction.
+ * previous instruction.
*/
bool Step(bool is_delay_slot);
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index c1b9e4ad9..c6b2b2109 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -14,11 +14,11 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
namespace Tegra {
-static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
-static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
-static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
-static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
-static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
+constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
+constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp;
+constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
+constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
+constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
STATE,
@@ -28,15 +28,15 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
BRANCH_HOLDER,
});
-MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d)
- : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
+MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_)
+ : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
}
-MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code)
- : Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) {
+MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
+ : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} {
Compile();
}
@@ -165,8 +165,7 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
}
break;
default:
- UNIMPLEMENTED_MSG("Unimplemented ALU operation {}",
- static_cast<std::size_t>(opcode.alu_operation.Value()));
+ UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value());
break;
}
Compile_ProcessResult(opcode.result_operation, opcode.dst);
@@ -553,15 +552,15 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
}
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
- const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) {
+ const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) {
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
// register.
- if (reg == 0) {
+ if (reg_index == 0) {
return;
}
- mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
+ mov(dword[STATE + offsetof(JITState, registers) + reg_index * sizeof(u32)], result);
};
- const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); };
+ const auto SetMethodAddress = [this](const Xbyak::Reg32& reg32) { mov(METHOD_ADDRESS, reg32); };
switch (operation) {
case Macro::ResultOperation::IgnoreAndFetch:
@@ -604,7 +603,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
Compile_Send(RESULT);
break;
default:
- UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast<std::size_t>(operation));
+ UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation);
}
}
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index a180e7428..7f50ac2f8 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -23,7 +23,7 @@ constexpr size_t MAX_CODE_SIZE = 0x10000;
class MacroJITx64 final : public MacroEngine {
public:
- explicit MacroJITx64(Engines::Maxwell3D& maxwell3d);
+ explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_);
protected:
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
@@ -34,7 +34,7 @@ private:
class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro {
public:
- MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code);
+ explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
~MacroJITx64Impl();
void Execute(const std::vector<u32>& parameters, u32 method) override;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 16b2aaa27..44240a9c4 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -4,6 +4,7 @@
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/kernel/memory/page_table.h"
#include "core/hle/kernel/process.h"
@@ -11,6 +12,7 @@
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
namespace Tegra {
@@ -19,8 +21,8 @@ MemoryManager::MemoryManager(Core::System& system_)
MemoryManager::~MemoryManager() = default;
-void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
- rasterizer = &rasterizer_;
+void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
}
GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
@@ -37,6 +39,12 @@ GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std
}
GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
+ const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
+ if (it != map_ranges.end() && it->first == gpu_addr) {
+ it->second = size;
+ } else {
+ map_ranges.insert(it, MapRange{gpu_addr, size});
+ }
return UpdateRange(gpu_addr, cpu_addr, size);
}
@@ -44,13 +52,28 @@ GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_
return Map(cpu_addr, *FindFreeRange(size, align), size);
}
+GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) {
+ const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true);
+ ASSERT(gpu_addr);
+ return Map(cpu_addr, *gpu_addr, size);
+}
+
void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
- if (!size) {
+ if (size == 0) {
return;
}
-
+ const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
+ if (it != map_ranges.end()) {
+ ASSERT(it->first == gpu_addr);
+ map_ranges.erase(it);
+ } else {
+ UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
+ }
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
- system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size);
+ const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
+ ASSERT(cpu_addr);
+
+ rasterizer->UnmapMemory(*cpu_addr, size);
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
}
@@ -58,7 +81,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) {
for (u64 offset{}; offset < size; offset += page_size) {
if (!GetPageEntry(gpu_addr + offset).IsUnmapped()) {
- return {};
+ return std::nullopt;
}
}
@@ -108,7 +131,8 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
page_table[PageEntryIndex(gpu_addr)] = page_entry;
}
-std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align) const {
+std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
+ bool start_32bit_address) const {
if (!align) {
align = page_size;
} else {
@@ -116,7 +140,7 @@ std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size
}
u64 available_size{};
- GPUVAddr gpu_addr{address_space_start};
+ GPUVAddr gpu_addr{start_32bit_address ? address_space_start_low : address_space_start};
while (gpu_addr + available_size < address_space_size) {
if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) {
available_size += page_size;
@@ -135,13 +159,13 @@ std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size
}
}
- return {};
+ return std::nullopt;
}
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
const auto page_entry{GetPageEntry(gpu_addr)};
if (!page_entry.IsValid()) {
- return {};
+ return std::nullopt;
}
return page_entry.ToAddress() + (gpu_addr & page_mask);
@@ -207,6 +231,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
return system.Memory().GetPointer(*address);
}
+size_t MemoryManager::BytesToMapEnd(GPUVAddr gpu_addr) const noexcept {
+ auto it = std::ranges::upper_bound(map_ranges, gpu_addr, {}, &MapRange::first);
+ --it;
+ return it->second - (gpu_addr - it->first);
+}
+
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const {
std::size_t remaining_size{size};
std::size_t page_index{gpu_src_addr >> page_bits};
@@ -303,17 +333,29 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf
}
}
+void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const {
+ size_t remaining_size{size};
+ size_t page_index{gpu_addr >> page_bits};
+ size_t page_offset{gpu_addr & page_mask};
+ while (remaining_size > 0) {
+ const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
+ if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
+ rasterizer->FlushRegion(*page_addr + page_offset, num_bytes);
+ }
+ ++page_index;
+ page_offset = 0;
+ remaining_size -= num_bytes;
+ }
+}
+
void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) {
std::vector<u8> tmp_buffer(size);
ReadBlock(gpu_src_addr, tmp_buffer.data(), size);
- WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);
-}
-void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
- std::size_t size) {
- std::vector<u8> tmp_buffer(size);
- ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size);
- WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);
+ // The output block must be flushed in case it has data modified from the GPU.
+ // Fixes NPC geometry in Zombie Panic in Wonderland DX
+ FlushRegion(gpu_dest_addr, size);
+ WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);
}
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 53c8d122a..b3538d503 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -28,7 +28,7 @@ public:
};
constexpr PageEntry() = default;
- constexpr PageEntry(State state) : state{state} {}
+ constexpr PageEntry(State state_) : state{state_} {}
constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {}
[[nodiscard]] constexpr bool IsUnmapped() const {
@@ -68,11 +68,11 @@ static_assert(sizeof(PageEntry) == 4, "PageEntry is too large");
class MemoryManager final {
public:
- explicit MemoryManager(Core::System& system);
+ explicit MemoryManager(Core::System& system_);
~MemoryManager();
/// Binds a renderer to the memory manager.
- void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
[[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
@@ -85,6 +85,9 @@ public:
[[nodiscard]] u8* GetPointer(GPUVAddr addr);
[[nodiscard]] const u8* GetPointer(GPUVAddr addr) const;
+ /// Returns the number of bytes until the end of the memory map containing the given GPU address
+ [[nodiscard]] size_t BytesToMapEnd(GPUVAddr gpu_addr) const noexcept;
+
/**
* ReadBlock and WriteBlock are full read and write operations over virtual
* GPU Memory. It's important to use these when GPU memory may not be continuous
@@ -107,7 +110,6 @@ public:
*/
void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
- void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
/**
* IsGranularRange checks if a gpu region can be simply read with a pointer.
@@ -116,6 +118,7 @@ public:
[[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
[[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
+ [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
[[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
[[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
void Unmap(GPUVAddr gpu_addr, std::size_t size);
@@ -124,17 +127,21 @@ private:
[[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
- [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const;
+ [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align,
+ bool start_32bit_address = false) const;
void TryLockPage(PageEntry page_entry, std::size_t size);
void TryUnlockPage(PageEntry page_entry, std::size_t size);
+ void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
+
[[nodiscard]] static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) {
return (gpu_addr >> page_bits) & page_table_mask;
}
static constexpr u64 address_space_size = 1ULL << 40;
static constexpr u64 address_space_start = 1ULL << 32;
+ static constexpr u64 address_space_start_low = 1ULL << 16;
static constexpr u64 page_bits{16};
static constexpr u64 page_size{1 << page_bits};
static constexpr u64 page_mask{page_size - 1};
@@ -147,6 +154,11 @@ private:
VideoCore::RasterizerInterface* rasterizer = nullptr;
std::vector<PageEntry> page_table;
+
+ using MapRange = std::pair<GPUVAddr, size_t>;
+ std::vector<MapRange> map_ranges;
+
+ std::vector<std::pair<VAddr, std::size_t>> cache_invalidate_queue;
};
} // namespace Tegra
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
deleted file mode 100644
index 9da9fb4ff..000000000
--- a/src/video_core/morton.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-#include <cstring>
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/morton.h"
-#include "video_core/surface.h"
-#include "video_core/textures/decoders.h"
-
-namespace VideoCore {
-
-using Surface::GetBytesPerPixel;
-using Surface::PixelFormat;
-
-using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
-using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
-
-template <bool morton_to_linear, PixelFormat format>
-static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
- u32 tile_width_spacing, u8* buffer, u8* addr) {
- constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
-
- // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
- // pixel values.
- constexpr u32 tile_size_x{GetDefaultBlockWidth(format)};
- constexpr u32 tile_size_y{GetDefaultBlockHeight(format)};
-
- if constexpr (morton_to_linear) {
- Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
- stride, height, depth, block_height, block_depth,
- tile_width_spacing);
- } else {
- Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
- (height + tile_size_y - 1) / tile_size_y, depth,
- bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
- block_height, block_depth, tile_width_spacing);
- }
-}
-
-static constexpr ConversionArray morton_to_linear_fns = {
- MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>,
- MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>,
- MortonCopy<true, PixelFormat::A8B8G8R8_SINT>,
- MortonCopy<true, PixelFormat::A8B8G8R8_UINT>,
- MortonCopy<true, PixelFormat::R5G6B5_UNORM>,
- MortonCopy<true, PixelFormat::B5G6R5_UNORM>,
- MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>,
- MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>,
- MortonCopy<true, PixelFormat::A2B10G10R10_UINT>,
- MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>,
- MortonCopy<true, PixelFormat::R8_UNORM>,
- MortonCopy<true, PixelFormat::R8_SNORM>,
- MortonCopy<true, PixelFormat::R8_SINT>,
- MortonCopy<true, PixelFormat::R8_UINT>,
- MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>,
- MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>,
- MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>,
- MortonCopy<true, PixelFormat::R16G16B16A16_SINT>,
- MortonCopy<true, PixelFormat::R16G16B16A16_UINT>,
- MortonCopy<true, PixelFormat::B10G11R11_FLOAT>,
- MortonCopy<true, PixelFormat::R32G32B32A32_UINT>,
- MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>,
- MortonCopy<true, PixelFormat::BC2_UNORM>,
- MortonCopy<true, PixelFormat::BC3_UNORM>,
- MortonCopy<true, PixelFormat::BC4_UNORM>,
- MortonCopy<true, PixelFormat::BC4_SNORM>,
- MortonCopy<true, PixelFormat::BC5_UNORM>,
- MortonCopy<true, PixelFormat::BC5_SNORM>,
- MortonCopy<true, PixelFormat::BC7_UNORM>,
- MortonCopy<true, PixelFormat::BC6H_UFLOAT>,
- MortonCopy<true, PixelFormat::BC6H_SFLOAT>,
- MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>,
- MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>,
- MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>,
- MortonCopy<true, PixelFormat::R32G32B32A32_SINT>,
- MortonCopy<true, PixelFormat::R32G32_FLOAT>,
- MortonCopy<true, PixelFormat::R32G32_SINT>,
- MortonCopy<true, PixelFormat::R32_FLOAT>,
- MortonCopy<true, PixelFormat::R16_FLOAT>,
- MortonCopy<true, PixelFormat::R16_UNORM>,
- MortonCopy<true, PixelFormat::R16_SNORM>,
- MortonCopy<true, PixelFormat::R16_UINT>,
- MortonCopy<true, PixelFormat::R16_SINT>,
- MortonCopy<true, PixelFormat::R16G16_UNORM>,
- MortonCopy<true, PixelFormat::R16G16_FLOAT>,
- MortonCopy<true, PixelFormat::R16G16_UINT>,
- MortonCopy<true, PixelFormat::R16G16_SINT>,
- MortonCopy<true, PixelFormat::R16G16_SNORM>,
- MortonCopy<true, PixelFormat::R32G32B32_FLOAT>,
- MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>,
- MortonCopy<true, PixelFormat::R8G8_UNORM>,
- MortonCopy<true, PixelFormat::R8G8_SNORM>,
- MortonCopy<true, PixelFormat::R8G8_SINT>,
- MortonCopy<true, PixelFormat::R8G8_UINT>,
- MortonCopy<true, PixelFormat::R32G32_UINT>,
- MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>,
- MortonCopy<true, PixelFormat::R32_UINT>,
- MortonCopy<true, PixelFormat::R32_SINT>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>,
- MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>,
- MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>,
- MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>,
- MortonCopy<true, PixelFormat::BC2_SRGB>,
- MortonCopy<true, PixelFormat::BC3_SRGB>,
- MortonCopy<true, PixelFormat::BC7_SRGB>,
- MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>,
- MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>,
- MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>,
- MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>,
- MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>,
- MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>,
- MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>,
- MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
- MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>,
- MortonCopy<true, PixelFormat::D32_FLOAT>,
- MortonCopy<true, PixelFormat::D16_UNORM>,
- MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>,
- MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>,
- MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>,
-};
-
-static constexpr ConversionArray linear_to_morton_fns = {
- MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>,
- MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>,
- MortonCopy<false, PixelFormat::A8B8G8R8_SINT>,
- MortonCopy<false, PixelFormat::A8B8G8R8_UINT>,
- MortonCopy<false, PixelFormat::R5G6B5_UNORM>,
- MortonCopy<false, PixelFormat::B5G6R5_UNORM>,
- MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>,
- MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>,
- MortonCopy<false, PixelFormat::A2B10G10R10_UINT>,
- MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>,
- MortonCopy<false, PixelFormat::R8_UNORM>,
- MortonCopy<false, PixelFormat::R8_SNORM>,
- MortonCopy<false, PixelFormat::R8_SINT>,
- MortonCopy<false, PixelFormat::R8_UINT>,
- MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>,
- MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>,
- MortonCopy<false, PixelFormat::R16G16B16A16_SINT>,
- MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>,
- MortonCopy<false, PixelFormat::R16G16B16A16_UINT>,
- MortonCopy<false, PixelFormat::B10G11R11_FLOAT>,
- MortonCopy<false, PixelFormat::R32G32B32A32_UINT>,
- MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>,
- MortonCopy<false, PixelFormat::BC2_UNORM>,
- MortonCopy<false, PixelFormat::BC3_UNORM>,
- MortonCopy<false, PixelFormat::BC4_UNORM>,
- MortonCopy<false, PixelFormat::BC4_SNORM>,
- MortonCopy<false, PixelFormat::BC5_UNORM>,
- MortonCopy<false, PixelFormat::BC5_SNORM>,
- MortonCopy<false, PixelFormat::BC7_UNORM>,
- MortonCopy<false, PixelFormat::BC6H_UFLOAT>,
- MortonCopy<false, PixelFormat::BC6H_SFLOAT>,
- // TODO(Subv): Swizzling ASTC formats are not supported
- nullptr,
- MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>,
- MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>,
- MortonCopy<false, PixelFormat::R32G32B32A32_SINT>,
- MortonCopy<false, PixelFormat::R32G32_FLOAT>,
- MortonCopy<false, PixelFormat::R32G32_SINT>,
- MortonCopy<false, PixelFormat::R32_FLOAT>,
- MortonCopy<false, PixelFormat::R16_FLOAT>,
- MortonCopy<false, PixelFormat::R16_UNORM>,
- MortonCopy<false, PixelFormat::R16_SNORM>,
- MortonCopy<false, PixelFormat::R16_UINT>,
- MortonCopy<false, PixelFormat::R16_SINT>,
- MortonCopy<false, PixelFormat::R16G16_UNORM>,
- MortonCopy<false, PixelFormat::R16G16_FLOAT>,
- MortonCopy<false, PixelFormat::R16G16_UINT>,
- MortonCopy<false, PixelFormat::R16G16_SINT>,
- MortonCopy<false, PixelFormat::R16G16_SNORM>,
- MortonCopy<false, PixelFormat::R32G32B32_FLOAT>,
- MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>,
- MortonCopy<false, PixelFormat::R8G8_UNORM>,
- MortonCopy<false, PixelFormat::R8G8_SNORM>,
- MortonCopy<false, PixelFormat::R8G8_SINT>,
- MortonCopy<false, PixelFormat::R8G8_UINT>,
- MortonCopy<false, PixelFormat::R32G32_UINT>,
- MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>,
- MortonCopy<false, PixelFormat::R32_UINT>,
- MortonCopy<false, PixelFormat::R32_SINT>,
- nullptr,
- nullptr,
- nullptr,
- MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>,
- MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>,
- MortonCopy<false, PixelFormat::BC2_SRGB>,
- MortonCopy<false, PixelFormat::BC3_SRGB>,
- MortonCopy<false, PixelFormat::BC7_SRGB>,
- MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>,
- MortonCopy<false, PixelFormat::D32_FLOAT>,
- MortonCopy<false, PixelFormat::D16_UNORM>,
- MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>,
- MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>,
- MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>,
-};
-
-static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
- switch (mode) {
- case MortonSwizzleMode::MortonToLinear:
- return morton_to_linear_fns[static_cast<std::size_t>(format)];
- case MortonSwizzleMode::LinearToMorton:
- return linear_to_morton_fns[static_cast<std::size_t>(format)];
- }
- UNREACHABLE();
- return morton_to_linear_fns[static_cast<std::size_t>(format)];
-}
-
-void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
- u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
- u8* buffer, u8* addr) {
- GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
- tile_width_spacing, buffer, addr);
-}
-
-} // namespace VideoCore
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
deleted file mode 100644
index b714a7e3f..000000000
--- a/src/video_core/morton.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-#include "video_core/surface.h"
-
-namespace VideoCore {
-
-enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
-
-void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
- u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
- u8* buffer, u8* addr);
-
-} // namespace VideoCore
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 0d3a88765..203f2af05 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -28,8 +28,8 @@ namespace VideoCommon {
template <class QueryCache, class HostCounter>
class CounterStreamBase {
public:
- explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
- : cache{cache}, type{type} {}
+ explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_)
+ : cache{cache_}, type{type_} {}
/// Updates the state of the stream, enabling or disabling as needed.
void Update(bool enabled) {
@@ -91,14 +91,15 @@ private:
std::shared_ptr<HostCounter> last;
};
-template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter,
- class QueryPool>
+template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
class QueryCacheBase {
public:
- explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
- : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
- static_cast<QueryCache&>(*this),
- VideoCore::QueryType::SamplesPassed}}} {}
+ explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::MemoryManager& gpu_memory_)
+ : rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+ gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
+ VideoCore::QueryType::SamplesPassed}}} {}
void InvalidateRegion(VAddr addr, std::size_t size) {
std::unique_lock lock{mutex};
@@ -118,29 +119,27 @@ public:
*/
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
std::unique_lock lock{mutex};
- auto& memory_manager = system.GPU().MemoryManager();
- const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr_opt);
- VAddr cpu_addr = *cpu_addr_opt;
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ ASSERT(cpu_addr);
- CachedQuery* query = TryGet(cpu_addr);
+ CachedQuery* query = TryGet(*cpu_addr);
if (!query) {
- ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
- const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+ ASSERT_OR_EXECUTE(cpu_addr, return;);
+ u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
- query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
+ query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
}
query->BindCounter(Stream(type).Current(), timestamp);
if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- AsyncFlushQuery(cpu_addr);
+ AsyncFlushQuery(*cpu_addr);
}
}
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
void UpdateCounters() {
std::unique_lock lock{mutex};
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
}
@@ -206,9 +205,6 @@ public:
committed_flushes.pop_front();
}
-protected:
- std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
-
private:
/// Flushes a memory range to guest memory and removes it from the cache.
void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
@@ -270,8 +266,9 @@ private:
static constexpr std::uintptr_t PAGE_SIZE = 4096;
static constexpr unsigned PAGE_BITS = 12;
- Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::MemoryManager& gpu_memory;
std::recursive_mutex mutex;
@@ -337,8 +334,8 @@ private:
template <class HostCounter>
class CachedQueryBase {
public:
- explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
- : cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
+ explicit CachedQueryBase(VAddr cpu_addr_, u8* host_ptr_)
+ : cpu_addr{cpu_addr_}, host_ptr{host_ptr_} {}
virtual ~CachedQueryBase() = default;
CachedQueryBase(CachedQueryBase&&) noexcept = default;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 3cbdac8e7..50491b758 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -7,6 +7,7 @@
#include <atomic>
#include <functional>
#include <optional>
+#include <span>
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
@@ -32,7 +33,7 @@ using DiskResourceLoadCallback = std::function<void(LoadCallbackStage, std::size
class RasterizerInterface {
public:
- virtual ~RasterizerInterface() {}
+ virtual ~RasterizerInterface() = default;
/// Dispatches a draw invocation
virtual void Draw(bool is_indexed, bool is_instanced) = 0;
@@ -49,6 +50,10 @@ public:
/// Records a GPU query and caches it
virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
+ /// Signal an uniform buffer binding
+ virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+ u32 size) = 0;
+
/// Signal a GPU based semaphore as a fence
virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
@@ -76,6 +81,9 @@ public:
/// Sync memory between guest and host.
virtual void SyncGuestHost() = 0;
+ /// Unmap memory range
+ virtual void UnmapMemory(VAddr addr, u64 size) = 0;
+
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
@@ -83,6 +91,12 @@ public:
/// Notify the host renderer to wait for previous primitive and compute operations.
virtual void WaitForIdle() = 0;
+ /// Notify the host renderer to wait for reads and writes to render targets and flush caches.
+ virtual void FragmentBarrier() = 0;
+
+ /// Notify the host renderer to make available previous render target writes.
+ virtual void TiledCacheBarrier() = 0;
+
/// Notify the rasterizer to send all written commands to the host GPU.
virtual void FlushCommands() = 0;
@@ -90,15 +104,15 @@ public:
virtual void TickFrame() = 0;
/// Attempt to use a faster method to perform a surface copy
- virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
- const Tegra::Engines::Fermi2D::Regs::Surface& dst,
- const Tegra::Engines::Fermi2D::Config& copy_config) {
+ [[nodiscard]] virtual bool AccelerateSurfaceCopy(
+ const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst,
+ const Tegra::Engines::Fermi2D::Config& copy_config) {
return false;
}
/// Attempt to use a faster method to display the framebuffer to screen
- virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
- u32 pixel_stride) {
+ [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
+ VAddr framebuffer_addr, u32 pixel_stride) {
return false;
}
@@ -106,19 +120,16 @@ public:
virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
/// Initialize disk cached resources for the game being emulated
- virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
- const DiskResourceLoadCallback& callback = {}) {}
-
- /// Initializes renderer dirty flags
- virtual void SetupDirtyFlags() {}
+ virtual void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
+ const DiskResourceLoadCallback& callback) {}
/// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
- GuestDriverProfile& AccessGuestDriverProfile() {
+ [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() {
return guest_driver_profile;
}
/// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
- const GuestDriverProfile& AccessGuestDriverProfile() const {
+ [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const {
return guest_driver_profile;
}
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 649074acd..320ee8d30 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -37,60 +37,43 @@ public:
std::unique_ptr<Core::Frontend::GraphicsContext> context);
virtual ~RendererBase();
- /// Initialize the renderer
- virtual bool Init() = 0;
-
- /// Shutdown the renderer
- virtual void ShutDown() = 0;
-
/// Finalize rendering the guest frame and draw into the presentation texture
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
- /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer
- /// specific implementation)
- /// Returns true if a frame was drawn
- virtual bool TryPresent(int timeout_ms) = 0;
+ [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
// Getter/setter functions:
// ------------------------
- f32 GetCurrentFPS() const {
+ [[nodiscard]] f32 GetCurrentFPS() const {
return m_current_fps;
}
- int GetCurrentFrame() const {
+ [[nodiscard]] int GetCurrentFrame() const {
return m_current_frame;
}
- RasterizerInterface& Rasterizer() {
- return *rasterizer;
- }
-
- const RasterizerInterface& Rasterizer() const {
- return *rasterizer;
- }
-
- Core::Frontend::GraphicsContext& Context() {
+ [[nodiscard]] Core::Frontend::GraphicsContext& Context() {
return *context;
}
- const Core::Frontend::GraphicsContext& Context() const {
+ [[nodiscard]] const Core::Frontend::GraphicsContext& Context() const {
return *context;
}
- Core::Frontend::EmuWindow& GetRenderWindow() {
+ [[nodiscard]] Core::Frontend::EmuWindow& GetRenderWindow() {
return render_window;
}
- const Core::Frontend::EmuWindow& GetRenderWindow() const {
+ [[nodiscard]] const Core::Frontend::EmuWindow& GetRenderWindow() const {
return render_window;
}
- RendererSettings& Settings() {
+ [[nodiscard]] RendererSettings& Settings() {
return renderer_settings;
}
- const RendererSettings& Settings() const {
+ [[nodiscard]] const RendererSettings& Settings() const {
return renderer_settings;
}
@@ -103,7 +86,6 @@ public:
protected:
Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
- std::unique_ptr<RasterizerInterface> rasterizer;
std::unique_ptr<Core::Frontend::GraphicsContext> context;
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
int m_current_frame = 0; ///< Current frame, should be set by the renderer
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index b7e9ed2e9..3e4d88c30 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -39,8 +39,8 @@ using Operation = const OperationNode&;
constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
char Swizzle(std::size_t component) {
- ASSERT(component < 4);
- return component["xyzw"];
+ static constexpr std::string_view SWIZZLE{"xyzw"};
+ return SWIZZLE.at(component);
}
constexpr bool IsGenericAttribute(Attribute::Index index) {
@@ -71,7 +71,7 @@ std::string_view GetInputFlags(PixelImap attribute) {
case PixelImap::Unused:
break;
}
- UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
+ UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
return {};
}
@@ -123,7 +123,7 @@ std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::Primitive
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
return "TRIANGLES_ADJACENCY";
default:
- UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
+ UNIMPLEMENTED_MSG("topology={}", topology);
return "POINTS";
}
}
@@ -137,7 +137,7 @@ std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
case Tegra::Shader::OutputTopology::TriangleStrip:
return "TRIANGLE_STRIP";
default:
- UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
+ UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
return "points";
}
}
@@ -187,8 +187,8 @@ std::string TextureType(const MetaTexture& meta) {
class ARBDecompiler final {
public:
- explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
- ShaderType stage, std::string_view identifier);
+ explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
+ ShaderType stage_, std::string_view identifier);
std::string Code() const {
return shader_source;
@@ -224,7 +224,7 @@ private:
std::string Visit(const Node& node);
- std::pair<std::string, std::size_t> BuildCoords(Operation);
+ std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
std::string BuildAoffi(Operation);
std::string GlobalMemoryPointer(const GmemNode& gmem);
void Exit();
@@ -376,9 +376,11 @@ private:
std::string temporary = AllocTemporary();
std::string address;
std::string_view opname;
+ bool robust = false;
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
address = GlobalMemoryPointer(*gmem);
opname = "ATOM";
+ robust = true;
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
opname = "ATOMS";
@@ -386,7 +388,15 @@ private:
UNREACHABLE();
return "{0, 0, 0, 0}";
}
+ if (robust) {
+ AddLine("IF NE.x;");
+ }
AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
+ if (robust) {
+ AddLine("ELSE;");
+ AddLine("MOV.S {}, 0;", temporary);
+ AddLine("ENDIF;");
+ }
return temporary;
}
@@ -792,9 +802,9 @@ private:
};
};
-ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
- ShaderType stage, std::string_view identifier)
- : device{device}, ir{ir}, registry{registry}, stage{stage} {
+ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
+ ShaderType stage_, std::string_view identifier)
+ : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
DefineGlobalMemory();
AddLine("TEMP RC;");
@@ -980,10 +990,9 @@ void ARBDecompiler::DeclareLocalMemory() {
}
void ARBDecompiler::DeclareGlobalMemory() {
- const std::size_t num_entries = ir.GetGlobalMemory().size();
+ const size_t num_entries = ir.GetGlobalMemory().size();
if (num_entries > 0) {
- const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2;
- AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
+ AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
}
}
@@ -1125,44 +1134,44 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
VisitAST(current);
}
- } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
- const std::string condition = VisitExpression(ast->condition);
+ } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
+ const std::string condition = VisitExpression(if_then->condition);
ResetTemporaries();
AddLine("MOVC.U RC.x, {};", condition);
AddLine("IF NE.x;");
- for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
VisitAST(current);
}
AddLine("ENDIF;");
- } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
+ } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
AddLine("ELSE;");
- for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
VisitAST(current);
}
- } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
- VisitBlock(ast->nodes);
- } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
- AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition));
+ } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
+ VisitBlock(decoded->nodes);
+ } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
+ AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
ResetTemporaries();
- } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
- const std::string condition = VisitExpression(ast->condition);
+ } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
+ const std::string condition = VisitExpression(do_while->condition);
ResetTemporaries();
AddLine("REP;");
- for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
VisitAST(current);
}
AddLine("MOVC.U RC.x, {};", condition);
AddLine("BRK (NE.x);");
AddLine("ENDREP;");
- } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) {
- const bool is_true = ExprIsTrue(ast->condition);
+ } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
+ const bool is_true = ExprIsTrue(ast_return->condition);
if (!is_true) {
- AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
+ AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
AddLine("IF NE.x;");
ResetTemporaries();
}
- if (ast->kills) {
+ if (ast_return->kills) {
AddLine("KIL TR;");
} else {
Exit();
@@ -1170,11 +1179,11 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
if (!is_true) {
AddLine("ENDIF;");
}
- } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) {
- if (ExprIsTrue(ast->condition)) {
+ } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
+ if (ExprIsTrue(ast_break->condition)) {
AddLine("BRK;");
} else {
- AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
+ AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
AddLine("BRK (NE.x);");
ResetTemporaries();
}
@@ -1342,7 +1351,7 @@ std::string ARBDecompiler::Visit(const Node& node) {
GetGenericAttributeIndex(index), swizzle);
}
}
- UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index));
+ UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
break;
}
return "{0, 0, 0, 0}.x";
@@ -1363,7 +1372,8 @@ std::string ARBDecompiler::Visit(const Node& node) {
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
std::string temporary = AllocTemporary();
- AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));
+ AddLine("MOV {}, 0;", temporary);
+ AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
return temporary;
}
@@ -1406,12 +1416,12 @@ std::string ARBDecompiler::Visit(const Node& node) {
return {};
}
-std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
+std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
UNIMPLEMENTED_IF(meta.sampler.is_indexed);
- UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array &&
- meta.sampler.type == Tegra::Shader::TextureType::TextureCube);
+ const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
+ meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
const std::size_t count = operation.GetOperandsCount();
std::string temporary = AllocVectorTemporary();
std::size_t i = 0;
@@ -1419,12 +1429,21 @@ std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operati
AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
}
if (meta.sampler.is_array) {
- AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array));
+ AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
+ ++i;
}
if (meta.sampler.is_shadow) {
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare));
+ std::string compare = Visit(meta.depth_compare);
+ if (is_extended) {
+ ASSERT(i == 4);
+ std::string extra_coord = AllocVectorTemporary();
+ AddLine("MOV.F {}.x, {};", extra_coord, compare);
+ return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
+ }
+ AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
+ ++i;
}
- return {std::move(temporary), i};
+ return {temporary, temporary, i};
}
std::string ARBDecompiler::BuildAoffi(Operation operation) {
@@ -1441,18 +1460,21 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {
}
std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
+ // Read a bindless SSBO, return its address and set CC accordingly
+ // address = c[binding].xy
+ // length = c[binding].z
const u32 binding = global_memory_names.at(gmem.GetDescriptor());
- const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
const std::string pointer = AllocLongVectorTemporary();
std::string temporary = AllocTemporary();
- const u32 local_index = binding / 2;
- AddLine("PK64.U {}, c[{}];", pointer, local_index);
+ AddLine("PK64.U {}, c[{}];", pointer, binding);
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
Visit(gmem.GetBaseAddress()));
AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
- AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer);
+ AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
+ // Compare offset to length and set CC
+ AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
return fmt::format("{}.x", pointer);
}
@@ -1463,9 +1485,7 @@ void ARBDecompiler::Exit() {
}
const auto safe_get_register = [this](u32 reg) -> std::string {
- // TODO(Rodrigo): Replace with contains once C++20 releases
- const auto& used_registers = ir.GetRegisters();
- if (used_registers.find(reg) != used_registers.end()) {
+ if (ir.GetRegisters().contains(reg)) {
return fmt::format("R{}.x", reg);
}
return "{0, 0, 0, 0}.x";
@@ -1552,7 +1572,9 @@ std::string ARBDecompiler::Assign(Operation operation) {
ResetTemporaries();
return {};
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
+ AddLine("IF NE.x;");
AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
+ AddLine("ENDIF;");
ResetTemporaries();
return {};
} else {
@@ -1844,7 +1866,7 @@ std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
std::string ARBDecompiler::Texture(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [temporary, swizzle] = BuildCoords(operation);
+ const auto [coords, temporary, swizzle] = BuildCoords(operation);
std::string_view opcode = "TEX";
std::string extra;
@@ -1873,7 +1895,7 @@ std::string ARBDecompiler::Texture(Operation operation) {
}
}
- AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id,
+ AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
TextureType(meta), BuildAoffi(operation));
AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
return fmt::format("{}.x", temporary);
@@ -1882,7 +1904,7 @@ std::string ARBDecompiler::Texture(Operation operation) {
std::string ARBDecompiler::TextureGather(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [temporary, swizzle] = BuildCoords(operation);
+ const auto [coords, temporary, swizzle] = BuildCoords(operation);
std::string comp;
if (!meta.sampler.is_shadow) {
@@ -1892,7 +1914,7 @@ std::string ARBDecompiler::TextureGather(Operation operation) {
AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
return fmt::format("{}.x", temporary);
}
@@ -1930,13 +1952,13 @@ std::string ARBDecompiler::TextureQueryLod(Operation operation) {
std::string ARBDecompiler::TexelFetch(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [temporary, swizzle] = BuildCoords(operation);
+ const auto [coords, temporary, swizzle] = BuildCoords(operation);
if (!meta.sampler.is_buffer) {
ASSERT(swizzle < 4);
AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
}
- AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta),
+ AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
BuildAoffi(operation));
AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
return fmt::format("{}.x", temporary);
@@ -1947,7 +1969,7 @@ std::string ARBDecompiler::TextureGradient(Operation operation) {
const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
const std::string ddx = AllocVectorTemporary();
const std::string ddy = AllocVectorTemporary();
- const std::string coord = BuildCoords(operation).first;
+ const std::string coord = std::get<1>(BuildCoords(operation));
const std::size_t num_components = meta.derivates.size() / 2;
for (std::size_t index = 0; index < num_components; ++index) {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index e866d8f2f..6da3906a4 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,98 +2,208 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <memory>
+#include <span>
-#include <glad/glad.h>
-
-#include "common/assert.h"
-#include "common/microprofile.h"
#include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
+namespace {
+struct BindlessSSBO {
+ GLuint64EXT address;
+ GLsizei length;
+ GLsizei padding;
+};
+static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);
+
+constexpr std::array PROGRAM_LUT{
+ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+ GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
+};
+} // Anonymous namespace
+
+Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
+
+Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+ VAddr cpu_addr_, u64 size_bytes_)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
+ buffer.Create();
+ const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
+ glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
+ glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
+
+ if (runtime.has_unified_vertex_buffers) {
+ glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
+ }
+}
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
+ glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
+ static_cast<GLsizeiptr>(data.size_bytes()), data.data());
+}
-MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
+void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
+ glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
+ static_cast<GLsizeiptr>(data.size_bytes()), data.data());
+}
-Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
- : VideoCommon::BufferBlock{cpu_addr, size} {
- gl_buffer.Create();
- glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
- if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
- glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
- glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
+void Buffer::MakeResident(GLenum access) noexcept {
+ // Abuse GLenum's order to exit early
+ // GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
+ if (access <= current_residency_access || buffer.handle == 0) {
+ return;
+ }
+ if (std::exchange(current_residency_access, access) != GL_NONE) {
+ // If the buffer is already resident, remove its residency before promoting it
+ glMakeNamedBufferNonResidentNV(buffer.handle);
}
+ glMakeNamedBufferResidentNV(buffer.handle, access);
}
-Buffer::~Buffer() = default;
-
-void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {
- glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
- data);
+BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
+ : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
+ use_assembly_shaders{device.UseAssemblyShaders()},
+ has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
+ stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
+ GLint gl_max_attributes;
+ glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
+ max_attributes = static_cast<u32>(gl_max_attributes);
+ for (auto& stage_uniforms : fast_uniforms) {
+ for (OGLBuffer& buffer : stage_uniforms) {
+ buffer.Create();
+ glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
+ }
+ }
+ for (auto& stage_uniforms : copy_uniforms) {
+ for (OGLBuffer& buffer : stage_uniforms) {
+ buffer.Create();
+ glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
+ }
+ }
+ for (OGLBuffer& buffer : copy_compute_uniforms) {
+ buffer.Create();
+ glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
+ }
}
-void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
- MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
- const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
- const GLintptr gl_offset = static_cast<GLintptr>(offset);
- if (read_buffer.handle == 0) {
- read_buffer.Create();
- glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
- GL_STREAM_READ);
+void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ for (const VideoCommon::BufferCopy& copy : copies) {
+ glCopyNamedBufferSubData(
+ src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
+ static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
}
- glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
- glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
- glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
}
-void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t size) {
- glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
- static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
+void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
+ if (has_unified_vertex_buffers) {
+ buffer.MakeResident(GL_READ_ONLY);
+ glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
+ static_cast<GLsizeiptr>(size));
+ } else {
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
+ index_buffer_offset = offset;
+ }
}
-OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
- const Device& device_, std::size_t stream_size)
- : GenericBufferCache{rasterizer, system,
- std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
- device{device_} {
- if (!device.HasFastBufferSubData()) {
+void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
+ u32 stride) {
+ if (index >= max_attributes) {
return;
}
-
- static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
- glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
- for (const GLuint cbuf : cbufs) {
- glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
+ if (has_unified_vertex_buffers) {
+ buffer.MakeResident(GL_READ_ONLY);
+ glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
+ glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
+ buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
+ } else {
+ glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
+ static_cast<GLsizei>(stride));
}
}
-OGLBufferCache::~OGLBufferCache() {
- glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
+void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
+ u32 offset, u32 size) {
+ if (use_assembly_shaders) {
+ GLuint handle;
+ if (offset != 0) {
+ handle = copy_uniforms[stage][binding_index].handle;
+ glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
+ } else {
+ handle = buffer.Handle();
+ }
+ glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
+ static_cast<GLsizeiptr>(size));
+ } else {
+ const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ }
}
-std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
- return std::make_shared<Buffer>(device, cpu_addr, size);
+void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
+ u32 size) {
+ if (use_assembly_shaders) {
+ GLuint handle;
+ if (offset != 0) {
+ handle = copy_compute_uniforms[binding_index].handle;
+ glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
+ } else {
+ handle = buffer.Handle();
+ }
+ glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0,
+ static_cast<GLsizeiptr>(size));
+ } else {
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ }
}
-OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
- return {0, 0, 0};
+void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
+ u32 offset, u32 size, bool is_written) {
+ if (use_assembly_shaders) {
+ const BindlessSSBO ssbo{
+ .address = buffer.HostGpuAddr() + offset,
+ .length = static_cast<GLsizei>(size),
+ .padding = 0,
+ };
+ buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
+ glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
+ reinterpret_cast<const GLuint*>(&ssbo));
+ } else {
+ const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ }
}
-OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
- std::size_t size) {
- DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
- const GLuint cbuf = cbufs[cbuf_cursor++];
+void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
+ u32 size, bool is_written) {
+ if (use_assembly_shaders) {
+ const BindlessSSBO ssbo{
+ .address = buffer.HostGpuAddr() + offset,
+ .length = static_cast<GLsizei>(size),
+ .padding = 0,
+ };
+ buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
+ glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
+ reinterpret_cast<const GLuint*>(&ssbo));
+ } else if (size == 0) {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
+ } else {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ }
+}
- glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
- return {cbuf, 0, 0};
+void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
+ u32 size) {
+ glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 88fdc0536..d8b20a9af 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -5,76 +5,157 @@
#pragma once
#include <array>
-#include <memory>
+#include <span>
+#include "common/alignment.h"
#include "common/common_types.h"
+#include "common/dynamic_library.h"
#include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
-namespace Core {
-class System;
-}
-
namespace OpenGL {
-class Device;
-class OGLStreamBuffer;
-class RasterizerOpenGL;
+class BufferCacheRuntime;
-class Buffer : public VideoCommon::BufferBlock {
+class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
public:
- explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
- ~Buffer();
+ explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
+ u64 size_bytes);
+ explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
- void Upload(std::size_t offset, std::size_t size, const u8* data);
+ void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
- void Download(std::size_t offset, std::size_t size, u8* data);
+ void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;
- void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t size);
+ void MakeResident(GLenum access) noexcept;
- GLuint Handle() const noexcept {
- return gl_buffer.handle;
+ [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
+ return address;
}
- u64 Address() const noexcept {
- return gpu_address;
+ [[nodiscard]] GLuint Handle() const noexcept {
+ return buffer.handle;
}
private:
- OGLBuffer gl_buffer;
- OGLBuffer read_buffer;
- u64 gpu_address = 0;
+ GLuint64EXT address = 0;
+ OGLBuffer buffer;
+ GLenum current_residency_access = GL_NONE;
};
-using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
-class OGLBufferCache final : public GenericBufferCache {
+class BufferCacheRuntime {
+ friend Buffer;
+
public:
- explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
- const Device& device, std::size_t stream_size);
- ~OGLBufferCache();
+ static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
+
+ explicit BufferCacheRuntime(const Device& device_);
+
+ void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies);
+
+ void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
+
+ void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
+
+ void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
+
+ void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
+
+ void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
+ bool is_written);
+
+ void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
+ bool is_written);
+
+ void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
+
+ void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
+ if (use_assembly_shaders) {
+ const GLuint handle = fast_uniforms[stage][binding_index].handle;
+ const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
+ glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
+ } else {
+ const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding,
+ fast_uniforms[stage][binding_index].handle, 0,
+ static_cast<GLsizeiptr>(size));
+ }
+ }
- BufferInfo GetEmptyBuffer(std::size_t) override;
+ void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
+ if (use_assembly_shaders) {
+ glProgramBufferParametersIuivNV(
+ PABO_LUT[stage], binding_index, 0,
+ static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
+ reinterpret_cast<const GLuint*>(data.data()));
+ } else {
+ glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
+ static_cast<GLsizeiptr>(data.size_bytes()), data.data());
+ }
+ }
- void Acquire() noexcept {
- cbuf_cursor = 0;
+ std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
+ const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
+ const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ return mapped_span;
}
-protected:
- std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
+ [[nodiscard]] const GLvoid* IndexOffset() const noexcept {
+ return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
+ }
- BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
+ [[nodiscard]] bool HasFastBufferSubData() const noexcept {
+ return has_fast_buffer_sub_data;
+ }
private:
- static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
- Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
+ static constexpr std::array PABO_LUT{
+ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
+ GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
+ GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
+ };
const Device& device;
- std::size_t cbuf_cursor = 0;
- std::array<GLuint, NUM_CBUFS> cbufs{};
+ bool has_fast_buffer_sub_data = false;
+ bool use_assembly_shaders = false;
+ bool has_unified_vertex_buffers = false;
+
+ u32 max_attributes = 0;
+
+ std::optional<StreamBuffer> stream_buffer;
+
+ std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
+ VideoCommon::NUM_STAGES>
+ fast_uniforms;
+ std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
+ VideoCommon::NUM_STAGES>
+ copy_uniforms;
+ std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
+
+ u32 index_buffer_offset = 0;
+};
+
+struct BufferCacheParams {
+ using Runtime = OpenGL::BufferCacheRuntime;
+ using Buffer = OpenGL::Buffer;
+
+ static constexpr bool IS_OPENGL = true;
+ static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
+ static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
+ static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
+ static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
+ static constexpr bool USE_MEMORY_MAPS = false;
};
+using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index e7d95149f..48d5c4a5e 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -5,9 +5,11 @@
#include <algorithm>
#include <array>
#include <cstddef>
+#include <cstdlib>
#include <cstring>
#include <limits>
#include <optional>
+#include <span>
#include <vector>
#include <glad/glad.h>
@@ -19,35 +21,35 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
-
namespace {
-
// One uniform block is reserved for emulation purposes
constexpr u32 ReservedUniformBlocks = 1;
constexpr u32 NumStages = 5;
-constexpr std::array LimitUBOs = {
+constexpr std::array LIMIT_UBOS = {
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
- GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
-
-constexpr std::array LimitSSBOs = {
+ GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
+};
+constexpr std::array LIMIT_SSBOS = {
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
- GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
-
-constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
- GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
- GL_MAX_TEXTURE_IMAGE_UNITS,
- GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
-
-constexpr std::array LimitImages = {
+ GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
+};
+constexpr std::array LIMIT_SAMPLERS = {
+ GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
+ GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TEXTURE_IMAGE_UNITS,
+ GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
+};
+constexpr std::array LIMIT_IMAGES = {
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
- GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
+ GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
+};
template <typename T>
T GetInteger(GLenum pname) {
@@ -76,8 +78,8 @@ std::vector<std::string_view> GetExtensions() {
return extensions;
}
-bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) {
- return std::find(images.begin(), images.end(), extension) != images.end();
+bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
+ return std::ranges::find(extensions, extension) != extensions.end();
}
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
@@ -91,8 +93,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
- std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
- [](GLenum pname) { return GetInteger<u32>(pname); });
+ std::ranges::transform(LIMIT_UBOS, max.begin(),
+ [](GLenum pname) { return GetInteger<u32>(pname); });
return max;
}
@@ -115,9 +117,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
for (std::size_t i = 0; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
bindings[stage] = {
- Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]),
- Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]),
- Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])};
+ Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
+ Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
+ Extract(base_samplers, num_samplers, total_samplers / NumStages,
+ LIMIT_SAMPLERS[stage])};
}
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
@@ -130,7 +133,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
// Reserve at least 4 image bindings on the fragment stage.
bindings[4].image =
- Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
+ Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
// This is guaranteed to be at least 1.
const u32 total_extracted_images = num_images / (NumStages - 1);
@@ -142,7 +145,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
continue;
}
bindings[stage].image =
- Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
+ Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
}
// Compute doesn't care about any of this.
@@ -188,17 +191,24 @@ bool IsASTCSupported() {
return true;
}
+[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
+ const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
+ return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
+}
} // Anonymous namespace
-Device::Device()
- : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
+Device::Device() {
+ if (!GLAD_GL_VERSION_4_6) {
+ LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
+ throw std::runtime_error{"Insufficient version"};
+ }
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
- const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor == "NVIDIA Corporation";
const bool is_amd = vendor == "ATI Technologies Inc.";
+ const bool is_intel = vendor == "Intel";
bool disable_fast_buffer_sub_data = false;
if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
@@ -208,8 +218,10 @@ Device::Device()
disable_fast_buffer_sub_data = true;
}
- uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
- shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
+ max_uniform_buffers = BuildMaxUniformBuffers();
+ base_bindings = BuildBaseBindings();
+ uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
+ shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
@@ -223,8 +235,10 @@ Device::Device()
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
+ has_broken_texture_view_formats = is_amd || is_intel;
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
+ has_debugging_tool_attached = IsDebugToolAttached(extensions);
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
@@ -235,10 +249,13 @@ Device::Device()
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
+ use_driver_cache = is_nvidia;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
+ LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
+ has_broken_texture_view_formats);
if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 8a4b6b9fc..ee053776d 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -10,11 +10,9 @@
namespace OpenGL {
-static constexpr u32 EmulationUniformBlockBinding = 0;
-
-class Device final {
+class Device {
public:
- struct BaseBindings final {
+ struct BaseBindings {
u32 uniform_buffer{};
u32 shader_storage_buffer{};
u32 sampler{};
@@ -36,11 +34,11 @@ public:
return GetBaseBindings(static_cast<std::size_t>(shader_type));
}
- std::size_t GetUniformBufferAlignment() const {
+ size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
- std::size_t GetShaderStorageBufferAlignment() const {
+ size_t GetShaderStorageBufferAlignment() const {
return shader_storage_alignment;
}
@@ -96,6 +94,10 @@ public:
return has_precise_bug;
}
+ bool HasBrokenTextureViewFormats() const {
+ return has_broken_texture_view_formats;
+ }
+
bool HasFastBufferSubData() const {
return has_fast_buffer_sub_data;
}
@@ -104,6 +106,10 @@ public:
return has_nv_viewport_array2;
}
+ bool HasDebuggingToolAttached() const {
+ return has_debugging_tool_attached;
+ }
+
bool UseAssemblyShaders() const {
return use_assembly_shaders;
}
@@ -112,14 +118,18 @@ public:
return use_asynchronous_shaders;
}
+ bool UseDriverCache() const {
+ return use_driver_cache;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
- std::size_t uniform_buffer_alignment{};
- std::size_t shader_storage_alignment{};
+ size_t uniform_buffer_alignment{};
+ size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
u32 max_compute_shared_memory_size{};
@@ -133,10 +143,13 @@ private:
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
+ bool has_broken_texture_view_formats{};
bool has_fast_buffer_sub_data{};
bool has_nv_viewport_array2{};
+ bool has_debugging_tool_attached{};
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
+ bool use_driver_cache{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 3d2588dd2..151290101 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -11,10 +11,10 @@
namespace OpenGL {
-GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {}
+GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
-GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
- : FenceBase(address, payload, is_stubbed) {}
+GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_)
+ : FenceBase{address_, payload_, is_stubbed_} {}
GLInnerFence::~GLInnerFence() = default;
@@ -45,11 +45,10 @@ void GLInnerFence::Wait() {
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
}
-FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,
- VideoCore::RasterizerInterface& rasterizer,
- TextureCacheOpenGL& texture_cache,
- OGLBufferCache& buffer_cache, QueryCache& query_cache)
- : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {}
+FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::GPU& gpu_, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, QueryCache& query_cache_)
+ : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(value, is_stubbed);
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index 1686cf5c8..e714aa115 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -17,8 +17,8 @@ namespace OpenGL {
class GLInnerFence : public VideoCommon::FenceBase {
public:
- GLInnerFence(u32 payload, bool is_stubbed);
- GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed);
+ explicit GLInnerFence(u32 payload_, bool is_stubbed_);
+ explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_);
~GLInnerFence();
void Queue();
@@ -32,14 +32,13 @@ private:
};
using Fence = std::shared_ptr<GLInnerFence>;
-using GenericFenceManager =
- VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
+using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
- FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
- QueryCache& query_cache);
+ explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ TextureCache& texture_cache, BufferCache& buffer_cache,
+ QueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
deleted file mode 100644
index b8a512cb6..000000000
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
-
-namespace OpenGL {
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using VideoCore::Surface::SurfaceType;
-
-FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
-
-FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default;
-
-GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) {
- const auto [entry, is_cache_miss] = cache.try_emplace(key);
- auto& framebuffer{entry->second};
- if (is_cache_miss) {
- framebuffer = CreateFramebuffer(key);
- }
- return framebuffer.handle;
-}
-
-OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) {
- OGLFramebuffer framebuffer;
- framebuffer.Create();
-
- // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
-
- if (key.zeta) {
- const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
- const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
- key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
- }
-
- std::size_t num_buffers = 0;
- std::array<GLenum, Maxwell::NumRenderTargets> targets;
-
- for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
- if (!key.colors[index]) {
- targets[index] = GL_NONE;
- continue;
- }
- const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
- key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
-
- const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
- targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
- num_buffers = index + 1;
- }
-
- if (num_buffers > 0) {
- glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
- } else {
- glDrawBuffer(GL_NONE);
- }
-
- return framebuffer;
-}
-
-std::size_t FramebufferCacheKey::Hash() const noexcept {
- std::size_t hash = std::hash<View>{}(zeta);
- for (const auto& color : colors) {
- hash ^= std::hash<View>{}(color);
- }
- hash ^= static_cast<std::size_t>(color_attachments) << 16;
- return hash;
-}
-
-bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
- return std::tie(colors, zeta, color_attachments) ==
- std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
deleted file mode 100644
index 8f698fee0..000000000
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <unordered_map>
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_texture_cache.h"
-
-namespace OpenGL {
-
-constexpr std::size_t BitsPerAttachment = 4;
-
-struct FramebufferCacheKey {
- View zeta;
- std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
- u32 color_attachments = 0;
-
- std::size_t Hash() const noexcept;
-
- bool operator==(const FramebufferCacheKey& rhs) const noexcept;
-
- bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
- return !operator==(rhs);
- }
-
- void SetAttachment(std::size_t index, u32 attachment) {
- color_attachments |= attachment << (BitsPerAttachment * index);
- }
-};
-
-} // namespace OpenGL
-
-namespace std {
-
-template <>
-struct hash<OpenGL::FramebufferCacheKey> {
- std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept {
- return k.Hash();
- }
-};
-
-} // namespace std
-
-namespace OpenGL {
-
-class FramebufferCacheOpenGL {
-public:
- FramebufferCacheOpenGL();
- ~FramebufferCacheOpenGL();
-
- GLuint GetFramebuffer(const FramebufferCacheKey& key);
-
-private:
- OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
-
- std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
-};
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index d7ba57aca..acebbf5f4 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -30,12 +30,9 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace
-QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
- : VideoCommon::QueryCacheBase<
- QueryCache, CachedQuery, CounterStream, HostCounter,
- std::vector<OGLQuery>>{system,
- static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},
- gl_rasterizer{gl_rasterizer} {}
+QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::MemoryManager& gpu_memory_)
+ : QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {}
QueryCache::~QueryCache() = default;
@@ -60,10 +57,11 @@ bool QueryCache::AnyCommandQueued() const noexcept {
return gl_rasterizer.AnyCommandQueued();
}
-HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
- VideoCore::QueryType type)
- : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
- type{type}, query{cache.AllocateQuery(type)} {
+HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
+ VideoCore::QueryType type_)
+ : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, query{
+ cache.AllocateQuery(
+ type)} {
glBeginQuery(GetTarget(type), query.handle);
}
@@ -87,11 +85,14 @@ u64 HostCounter::BlockingQuery() const {
return static_cast<u64>(value);
}
-CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
- : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
+CachedQuery::CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
+ u8* host_ptr_)
+ : CachedQueryBase{cpu_addr_, host_ptr_}, cache{&cache_}, type{type_} {}
+
+CachedQuery::~CachedQuery() = default;
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
- : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
+ : CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
cache = rhs.cache;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index d8e7052a1..7bbe5cfe9 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -26,10 +26,11 @@ class RasterizerOpenGL;
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
-class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
- HostCounter, std::vector<OGLQuery>> {
+class QueryCache final
+ : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
- explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
+ explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::MemoryManager& gpu_memory_);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
@@ -40,12 +41,13 @@ public:
private:
RasterizerOpenGL& gl_rasterizer;
+ std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> query_pools;
};
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
public:
- explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
- VideoCore::QueryType type);
+ explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
+ VideoCore::QueryType type_);
~HostCounter();
void EndQuery();
@@ -60,12 +62,14 @@ private:
class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
public:
- explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
- u8* host_ptr);
- CachedQuery(CachedQuery&& rhs) noexcept;
- CachedQuery(const CachedQuery&) = delete;
+ explicit CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
+ u8* host_ptr_);
+ ~CachedQuery() override;
+ CachedQuery(CachedQuery&& rhs) noexcept;
CachedQuery& operator=(CachedQuery&& rhs) noexcept;
+
+ CachedQuery(const CachedQuery&) = delete;
CachedQuery& operator=(const CachedQuery&) = delete;
void Flush() override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 4af5824cd..418644108 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -25,48 +25,49 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/shader_cache.h"
+#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using GLvec4 = std::array<GLfloat, 4>;
using Tegra::Engines::ShaderType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
-MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
-MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
+MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
namespace {
-constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
-constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
- NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
-constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
- NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
+constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
-constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
-constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
+struct TextureHandle {
+ constexpr TextureHandle(u32 data, bool via_header_index) {
+ const Tegra::Texture::TextureHandle handle{data};
+ image = handle.tic_id;
+ sampler = via_header_index ? image : handle.tsc_id.Value();
+ }
+
+ u32 image;
+ u32 sampler;
+};
template <typename Engine, typename Entry>
-Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
- ShaderType shader_type, std::size_t index = 0) {
+TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
+ ShaderType shader_type, size_t index = 0) {
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
if (entry.is_separated) {
const u32 buffer_1 = entry.buffer;
@@ -75,36 +76,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
const u32 offset_2 = entry.secondary_offset;
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
- return engine.GetTextureInfo(handle_1 | handle_2);
+ return TextureHandle(handle_1 | handle_2, via_header_index);
}
}
if (entry.is_bindless) {
- const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
- return engine.GetTextureInfo(handle);
- }
-
- const auto& gpu_profile = engine.AccessGuestDriverProfile();
- const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
- if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
- return engine.GetStageTexture(shader_type, offset);
- } else {
- return engine.GetTexture(offset);
- }
-}
-
-std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
- const ConstBufferEntry& entry) {
- if (!entry.IsIndirect()) {
- return entry.GetSize();
+ const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
+ return TextureHandle(raw, via_header_index);
}
-
- if (buffer.size > Maxwell::MaxConstBufferSize) {
- LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
- Maxwell::MaxConstBufferSize);
- return Maxwell::MaxConstBufferSize;
- }
-
- return buffer.size;
+ const u32 buffer = engine.GetBoundBuffer();
+ const u64 offset = (entry.offset + index) * sizeof(u32);
+ return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
}
/// Translates hardware transform feedback indices
@@ -131,7 +112,7 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
case 43:
return {GL_BACK_SECONDARY_COLOR_NV, 0};
}
- UNIMPLEMENTED_MSG("index={}", static_cast<int>(index));
+ UNIMPLEMENTED_MSG("index={}", index);
return {GL_POSITION, 0};
}
@@ -139,72 +120,74 @@ void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
-void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) {
- if (num_entries == 0) {
- return;
+ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
+ if (entry.is_buffer) {
+ return ImageViewType::Buffer;
}
- if (num_entries % 2 == 1) {
- pointers[num_entries] = 0;
+ switch (entry.type) {
+ case Tegra::Shader::TextureType::Texture1D:
+ return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
+ case Tegra::Shader::TextureType::Texture2D:
+ return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
+ case Tegra::Shader::TextureType::Texture3D:
+ return ImageViewType::e3D;
+ case Tegra::Shader::TextureType::TextureCube:
+ return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
}
- const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
- glProgramLocalParametersI4uivNV(target, 0, num_vectors,
- reinterpret_cast<const GLuint*>(pointers));
+ UNREACHABLE();
+ return ImageViewType::e2D;
}
-} // Anonymous namespace
-
-RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- const Device& device, ScreenInfo& info,
- ProgramManager& program_manager, StateTracker& state_tracker)
- : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device,
- state_tracker},
- shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
- buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
- fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
- screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
- async_shaders{emu_window} {
- CheckExtensions();
-
- unified_uniform_buffer.Create();
- glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
-
- if (device.UseAssemblyShaders()) {
- glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
- for (const GLuint cbuf : staging_cbufs) {
- glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
- nullptr, 0);
- }
+ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
+ switch (entry.type) {
+ case Tegra::Shader::ImageType::Texture1D:
+ return ImageViewType::e1D;
+ case Tegra::Shader::ImageType::Texture1DArray:
+ return ImageViewType::e1DArray;
+ case Tegra::Shader::ImageType::Texture2D:
+ return ImageViewType::e2D;
+ case Tegra::Shader::ImageType::Texture2DArray:
+ return ImageViewType::e2DArray;
+ case Tegra::Shader::ImageType::Texture3D:
+ return ImageViewType::e3D;
+ case Tegra::Shader::ImageType::TextureBuffer:
+ return ImageViewType::Buffer;
}
+ UNREACHABLE();
+ return ImageViewType::e2D;
+}
+
+} // Anonymous namespace
+RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
+ Core::Memory::Memory& cpu_memory_, const Device& device_,
+ ScreenInfo& screen_info_, ProgramManager& program_manager_,
+ StateTracker& state_tracker_)
+ : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
+ kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
+ screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
+ texture_cache_runtime(device, program_manager, state_tracker),
+ texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+ buffer_cache_runtime(device),
+ buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
+ shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
+ query_cache(*this, maxwell3d, gpu_memory),
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
+ async_shaders(emu_window_) {
if (device.UseAsynchronousShaders()) {
async_shaders.AllocateWorkers();
}
}
-RasterizerOpenGL::~RasterizerOpenGL() {
- if (device.UseAssemblyShaders()) {
- glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
- }
-}
-
-void RasterizerOpenGL::CheckExtensions() {
- if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
- LOG_WARNING(
- Render_OpenGL,
- "Anisotropic filter is not supported! This can cause graphical issues in some games.");
- }
-}
+RasterizerOpenGL::~RasterizerOpenGL() = default;
-void RasterizerOpenGL::SetupVertexFormat() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+void RasterizerOpenGL::SyncVertexFormats() {
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexFormats]) {
return;
}
flags[Dirty::VertexFormats] = false;
- MICROPROFILE_SCOPE(OpenGL_VAO);
-
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
// the first 16 vertex attributes always, as we don't know which ones are actually used until
// shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
@@ -217,7 +200,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
}
flags[Dirty::VertexFormat0 + index] = false;
- const auto attrib = gpu.regs.vertex_attrib_format[index];
+ const auto attrib = maxwell3d.regs.vertex_attrib_format[index];
const auto gl_index = static_cast<GLuint>(index);
// Disable constant attributes.
@@ -240,64 +223,14 @@ void RasterizerOpenGL::SetupVertexFormat() {
}
}
-void RasterizerOpenGL::SetupVertexBuffer() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
- if (!flags[Dirty::VertexBuffers]) {
- return;
- }
- flags[Dirty::VertexBuffers] = false;
-
- MICROPROFILE_SCOPE(OpenGL_VB);
-
- const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
-
- // Upload all guest vertex arrays sequentially to our buffer
- const auto& regs = gpu.regs;
- for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
- if (!flags[Dirty::VertexBuffer0 + index]) {
- continue;
- }
- flags[Dirty::VertexBuffer0 + index] = false;
-
- const auto& vertex_array = regs.vertex_array[index];
- if (!vertex_array.IsEnabled()) {
- continue;
- }
-
- const GPUVAddr start = vertex_array.StartAddress();
- const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
- ASSERT(end >= start);
-
- const GLuint gl_index = static_cast<GLuint>(index);
- const u64 size = end - start;
- if (size == 0) {
- glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
- if (use_unified_memory) {
- glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
- }
- continue;
- }
- const auto info = buffer_cache.UploadMemory(start, size);
- if (use_unified_memory) {
- glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
- glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
- info.address + info.offset, size);
- } else {
- glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
- }
- }
-}
-
-void RasterizerOpenGL::SetupVertexInstances() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+void RasterizerOpenGL::SyncVertexInstances() {
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexInstances]) {
return;
}
flags[Dirty::VertexInstances] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
if (!flags[Dirty::VertexInstance0 + index]) {
continue;
@@ -311,26 +244,21 @@ void RasterizerOpenGL::SetupVertexInstances() {
}
}
-GLintptr RasterizerOpenGL::SetupIndexBuffer() {
- MICROPROFILE_SCOPE(OpenGL_Index);
- const auto& regs = system.GPU().Maxwell3D().regs;
- const std::size_t size = CalculateIndexBufferSize();
- const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
- return info.offset;
-}
-
-void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
- MICROPROFILE_SCOPE(OpenGL_Shader);
- auto& gpu = system.GPU().Maxwell3D();
+void RasterizerOpenGL::SetupShaders(bool is_indexed) {
u32 clip_distances = 0;
+ std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
+ image_view_indices.clear();
+ sampler_handles.clear();
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- const auto& shader_config = gpu.regs.shader_config[index];
+ const auto& shader_config = maxwell3d.regs.shader_config[index];
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
- if (!gpu.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
switch (program) {
case Maxwell::ShaderProgram::Geometry:
program_manager.UseGeometryShader(0);
@@ -343,7 +271,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
continue;
}
-
// Currently this stages are not supported in the OpenGL backend.
// TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
if (program == Maxwell::ShaderProgram::TesselationControl ||
@@ -352,7 +279,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
-
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
switch (program) {
case Maxwell::ShaderProgram::VertexA:
@@ -368,14 +294,25 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
default:
UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
shader_config.enable.Value(), shader_config.offset);
+ break;
}
// Stage indices are 0 - 5
- const std::size_t stage = index == 0 ? 0 : index - 1;
- SetupDrawConstBuffers(stage, shader);
- SetupDrawGlobalMemory(stage, shader);
- SetupDrawTextures(stage, shader);
- SetupDrawImages(stage, shader);
+ const size_t stage = index == 0 ? 0 : index - 1;
+ shaders[stage] = shader;
+
+ SetupDrawTextures(shader, stage);
+ SetupDrawImages(shader, stage);
+
+ buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
+
+ buffer_cache.UnbindGraphicsStorageBuffers(stage);
+ u32 ssbo_index = 0;
+ for (const auto& buffer : shader->GetEntries().global_memory_entries) {
+ buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
+ buffer.cbuf_offset, buffer.is_written);
+ ++ssbo_index;
+ }
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -389,146 +326,43 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
++index;
}
}
-
SyncClipEnabled(clip_distances);
- gpu.dirty.flags[Dirty::Shaders] = false;
-}
-
-std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
-
- std::size_t size = 0;
- for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- if (!regs.vertex_array[index].IsEnabled())
- continue;
-
- const GPUVAddr start = regs.vertex_array[index].StartAddress();
- const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+ maxwell3d.dirty.flags[Dirty::Shaders] = false;
- size += end - start;
- ASSERT(end >= start);
- }
-
- return size;
-}
-
-std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
-
- return static_cast<std::size_t>(regs.index_array.count) *
- static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
-}
-
-void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
- const VideoCore::DiskResourceLoadCallback& callback) {
- shader_cache.LoadDiskCache(stop_loading, callback);
-}
-
-void RasterizerOpenGL::SetupDirtyFlags() {
- state_tracker.Initialize();
-}
-
-void RasterizerOpenGL::ConfigureFramebuffers() {
- MICROPROFILE_SCOPE(OpenGL_Framebuffer);
- auto& gpu = system.GPU().Maxwell3D();
- if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
- return;
- }
- gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
+ buffer_cache.UpdateGraphicsBuffers(is_indexed);
- texture_cache.GuardRenderTargets(true);
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
- View depth_surface = texture_cache.GetDepthBufferSurface(true);
+ buffer_cache.BindHostGeometryBuffers(is_indexed);
- const auto& regs = gpu.regs;
- UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
-
- // Bind the framebuffer surfaces
- FramebufferCacheKey key;
- const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
- for (std::size_t index = 0; index < colors_count; ++index) {
- View color_surface{texture_cache.GetColorBufferSurface(index, true)};
- if (!color_surface) {
+ size_t image_view_index = 0;
+ size_t texture_index = 0;
+ size_t image_index = 0;
+ for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+ const Shader* const shader = shaders[stage];
+ if (!shader) {
continue;
}
- // Assume that a surface will be written to if it is used as a framebuffer, even
- // if the shader doesn't actually write to it.
- texture_cache.MarkColorBufferInUse(index);
-
- key.SetAttachment(index, regs.rt_control.GetMap(index));
- key.colors[index] = std::move(color_surface);
+ buffer_cache.BindHostStageBuffers(stage);
+ const auto& base = device.GetBaseBindings(stage);
+ BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
+ texture_index, image_index);
}
-
- if (depth_surface) {
- // Assume that a surface will be written to if it is used as a framebuffer, even if
- // the shader doesn't actually write to it.
- texture_cache.MarkDepthBufferInUse();
- key.zeta = std::move(depth_surface);
- }
-
- texture_cache.GuardRenderTargets(false);
-
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
}
-void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
- auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
-
- texture_cache.GuardRenderTargets(true);
- View color_surface;
-
- if (using_color) {
- // Determine if we have to preserve the contents.
- // First we have to make sure all clear masks are enabled.
- bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
- !regs.clear_buffers.B || !regs.clear_buffers.A;
- const std::size_t index = regs.clear_buffers.RT;
- if (regs.clear_flags.scissor) {
- // Then we have to confirm scissor testing clears the whole image.
- const auto& scissor = regs.scissor_test[0];
- preserve_contents |= scissor.min_x > 0;
- preserve_contents |= scissor.min_y > 0;
- preserve_contents |= scissor.max_x < regs.rt[index].width;
- preserve_contents |= scissor.max_y < regs.rt[index].height;
- }
-
- color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
- texture_cache.MarkColorBufferInUse(index);
- }
-
- View depth_surface;
- if (using_depth_stencil) {
- bool preserve_contents = false;
- if (regs.clear_flags.scissor) {
- // For depth stencil clears we only have to confirm scissor test covers the whole image.
- const auto& scissor = regs.scissor_test[0];
- preserve_contents |= scissor.min_x > 0;
- preserve_contents |= scissor.min_y > 0;
- preserve_contents |= scissor.max_x < regs.zeta_width;
- preserve_contents |= scissor.max_y < regs.zeta_height;
- }
-
- depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
- texture_cache.MarkDepthBufferInUse();
- }
- texture_cache.GuardRenderTargets(false);
-
- FramebufferCacheKey key;
- key.colors[0] = std::move(color_surface);
- key.zeta = std::move(depth_surface);
-
- state_tracker.NotifyFramebuffer();
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
+void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback) {
+ shader_cache.LoadDiskCache(title_id, stop_loading, callback);
}
void RasterizerOpenGL::Clear() {
- const auto& gpu = system.GPU().Maxwell3D();
- if (!gpu.ShouldExecute()) {
+ MICROPROFILE_SCOPE(OpenGL_Clears);
+ if (!maxwell3d.ShouldExecute()) {
return;
}
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
bool use_color{};
bool use_depth{};
bool use_stencil{};
@@ -537,8 +371,9 @@ void RasterizerOpenGL::Clear() {
regs.clear_buffers.A) {
use_color = true;
- state_tracker.NotifyColorMask0();
- glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
+ const GLuint index = regs.clear_buffers.RT;
+ state_tracker.NotifyColorMask(index);
+ glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
// TODO(Rodrigo): Determine if clamping is used on clears
@@ -571,15 +406,15 @@ void RasterizerOpenGL::Clear() {
state_tracker.NotifyScissor0();
glDisablei(GL_SCISSOR_TEST, 0);
}
-
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
- ConfigureClearFramebuffer(use_color, use_depth || use_stencil);
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UpdateRenderTargets(true);
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
if (use_color) {
- glClearBufferfv(GL_COLOR, 0, regs.clear_color);
+ glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
}
-
if (use_depth && use_stencil) {
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
} else if (use_depth) {
@@ -587,115 +422,35 @@ void RasterizerOpenGL::Clear() {
} else if (use_stencil) {
glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
}
-
++num_queued_commands;
}
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
- auto& gpu = system.GPU().Maxwell3D();
query_cache.UpdateCounters();
- SyncViewport();
- SyncRasterizeEnable();
- SyncPolygonModes();
- SyncColorMask();
- SyncFragmentColorClampState();
- SyncMultiSampleState();
- SyncDepthTestState();
- SyncDepthClamp();
- SyncStencilTestState();
- SyncBlendState();
- SyncLogicOpState();
- SyncCullMode();
- SyncPrimitiveRestart();
- SyncScissorTest();
- SyncPointState();
- SyncLineState();
- SyncPolygonOffset();
- SyncAlphaTest();
- SyncFramebufferSRGB();
-
- buffer_cache.Acquire();
- current_cbuf = 0;
-
- std::size_t buffer_size = CalculateVertexArraysSize();
-
- // Add space for index buffer
- if (is_indexed) {
- buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
- }
-
- // Uniform space for the 5 shader stages
- buffer_size =
- Common::AlignUp<std::size_t>(buffer_size, 4) +
- (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
-
- // Add space for at least 18 constant buffers
- buffer_size += Maxwell::MaxConstBuffers *
- (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
-
- // Prepare the vertex array.
- const bool invalidated = buffer_cache.Map(buffer_size);
-
- if (invalidated) {
- // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
- auto& dirty = gpu.dirty.flags;
- dirty[Dirty::VertexBuffers] = true;
- for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
- dirty[index] = true;
- }
- }
-
- // Prepare vertex array format.
- SetupVertexFormat();
-
- // Upload vertex and index data.
- SetupVertexBuffer();
- SetupVertexInstances();
- GLintptr index_buffer_offset = 0;
- if (is_indexed) {
- index_buffer_offset = SetupIndexBuffer();
- }
-
- // Setup emulation uniform buffer.
- if (!device.UseAssemblyShaders()) {
- MaxwellUniformData ubo;
- ubo.SetFromRegs(gpu);
- const auto info =
- buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
- glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
- static_cast<GLsizeiptr>(sizeof(ubo)));
- }
+ SyncState();
// Setup shaders and their used resources.
- texture_cache.GuardSamplers(true);
- const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology);
- SetupShaders(primitive_mode);
- texture_cache.GuardSamplers(false);
-
- ConfigureFramebuffers();
-
- // Signal the buffer cache that we are not going to upload more things.
- buffer_cache.Unmap();
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ SetupShaders(is_indexed);
+ texture_cache.UpdateRenderTargets(false);
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
program_manager.BindGraphicsPipeline();
- if (texture_cache.TextureBarrier()) {
- glTextureBarrier();
- }
-
+ const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
BeginTransformFeedback(primitive_mode);
- const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
+ const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
const GLsizei num_instances =
- static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
+ static_cast<GLsizei>(is_instanced ? maxwell3d.mme_draw.instance_count : 1);
if (is_indexed) {
- const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base);
- const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count);
- const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
- const GLenum format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
+ const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
+ const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
+ const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
+ const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
glDrawElements(primitive_mode, num_vertices, format, offset);
} else if (num_instances == 1 && base_instance == 0) {
@@ -714,8 +469,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
base_instance);
}
} else {
- const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first);
- const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count);
+ const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vertex_buffer.first);
+ const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.vertex_buffer.count);
if (num_instances == 1 && base_instance == 0) {
glDrawArrays(primitive_mode, base_vertex, num_vertices);
} else if (base_instance == 0) {
@@ -730,30 +485,28 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
++num_queued_commands;
- system.GPU().TickWork();
+ gpu.TickWork();
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
- buffer_cache.Acquire();
- current_cbuf = 0;
-
- auto kernel = shader_cache.GetComputeKernel(code_addr);
- program_manager.BindCompute(kernel->GetHandle());
-
- SetupComputeTextures(kernel);
- SetupComputeImages(kernel);
+ Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
- const std::size_t buffer_size =
- Tegra::Engines::KeplerCompute::NumConstBuffers *
- (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
- buffer_cache.Map(buffer_size);
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ BindComputeTextures(kernel);
- SetupComputeConstBuffers(kernel);
- SetupComputeGlobalMemory(kernel);
-
- buffer_cache.Unmap();
-
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& entries = kernel->GetEntries();
+ buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
+ buffer_cache.UnbindComputeStorageBuffers();
+ u32 ssbo_index = 0;
+ for (const auto& buffer : entries.global_memory_entries) {
+ buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
+ buffer.is_written);
+ ++ssbo_index;
+ }
+ buffer_cache.UpdateComputeBuffers();
+ buffer_cache.BindHostComputeBuffers();
+
+ const auto& launch_desc = kepler_compute.launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
}
@@ -767,6 +520,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
query_cache.Query(gpu_addr, type, timestamp);
}
+void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+ u32 size) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
+}
+
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
@@ -774,16 +533,24 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- texture_cache.FlushRegion(addr, size);
- buffer_cache.FlushRegion(addr, size);
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.DownloadMemory(addr, size);
+ }
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.DownloadMemory(addr, size);
+ }
query_cache.FlushRegion(addr, size);
}
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
if (!Settings::IsGPULevelHigh()) {
- return buffer_cache.MustFlushRegion(addr, size);
+ return buffer_cache.IsRegionGpuModified(addr, size);
}
- return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
+ return texture_cache.IsRegionGpuModified(addr, size) ||
+ buffer_cache.IsRegionGpuModified(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -791,9 +558,15 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- texture_cache.InvalidateRegion(addr, size);
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.WriteMemory(addr, size);
+ }
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.WriteMemory(addr, size);
+ }
shader_cache.InvalidateRegion(addr, size);
- buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
}
@@ -802,30 +575,47 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- texture_cache.OnCPUWrite(addr, size);
shader_cache.OnCPUWrite(addr, size);
- buffer_cache.OnCPUWrite(addr, size);
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.WriteMemory(addr, size);
+ }
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.CachedWriteMemory(addr, size);
+ }
}
void RasterizerOpenGL::SyncGuestHost() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- texture_cache.SyncGuestHost();
- buffer_cache.SyncGuestHost();
shader_cache.SyncGuestHost();
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.FlushCachedWrites();
+ }
+}
+
+void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UnmapMemory(addr, size);
+ }
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.WriteMemory(addr, size);
+ }
+ shader_cache.OnCPUWrite(addr, size);
}
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
- auto& memory_manager{gpu.MemoryManager()};
- memory_manager.Write<u32>(addr, value);
+ gpu_memory.Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
}
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;
@@ -834,7 +624,6 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
}
void RasterizerOpenGL::ReleaseFences() {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
return;
}
@@ -849,14 +638,15 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
}
void RasterizerOpenGL::WaitForIdle() {
- // Place a barrier on everything that is not framebuffer related.
- // This is related to another flag that is not currently implemented.
- glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
- GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
- GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
- GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
- GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
- GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
+ glMemoryBarrier(GL_ALL_BARRIER_BITS);
+}
+
+void RasterizerOpenGL::FragmentBarrier() {
+ glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
+}
+
+void RasterizerOpenGL::TiledCacheBarrier() {
+ glTextureBarrier();
}
void RasterizerOpenGL::FlushCommands() {
@@ -872,288 +662,174 @@ void RasterizerOpenGL::TickFrame() {
// Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
num_queued_commands = 0;
- buffer_cache.TickFrame();
+ fence_manager.TickFrame();
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.TickFrame();
+ }
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.TickFrame();
+ }
}
-bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
- const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
- texture_cache.DoFermiCopy(src, dst, copy_config);
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.BlitImage(dst, src, copy_config);
return true;
}
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
- if (!framebuffer_addr) {
- return {};
+ if (framebuffer_addr == 0) {
+ return false;
}
-
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
- if (!surface) {
- return {};
+ std::scoped_lock lock{texture_cache.mutex};
+ ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
+ if (!image_view) {
+ return false;
}
-
// Verify that the cached surface is the same size and format as the requested framebuffer
- const auto& params{surface->GetSurfaceParams()};
- const auto& pixel_format{
- VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
- ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
- ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
-
- if (params.pixel_format != pixel_format) {
- LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different");
- }
-
- screen_info.display_texture = surface->GetTexture();
- screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion;
+ // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
+ // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
+ screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
+ screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
}
-void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
- static constexpr std::array PARAMETER_LUT = {
- GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
- GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
- GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
-
- MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
- const auto& shader_stage = stages[stage_index];
- const auto& entries = shader->GetEntries();
- const bool use_unified = entries.use_unified_uniforms;
- const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
-
- const auto base_bindings = device.GetBaseBindings(stage_index);
- u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
- for (const auto& entry : entries.const_buffers) {
- const u32 index = entry.GetIndex();
- const auto& buffer = shader_stage.const_buffers[index];
- SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
- base_unified_offset + index * Maxwell::MaxConstBufferSize);
- ++binding;
- }
- if (use_unified) {
- const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
- entries.global_memory_entries.size());
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
- base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
- }
-}
-
-void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
- MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
- const auto& entries = kernel->GetEntries();
- const bool use_unified = entries.use_unified_uniforms;
-
- u32 binding = 0;
- for (const auto& entry : entries.const_buffers) {
- const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
- const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
- Tegra::Engines::ConstBufferInfo buffer;
- buffer.address = config.Address();
- buffer.size = config.size;
- buffer.enabled = mask[entry.GetIndex()];
- SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
- use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
- ++binding;
- }
- if (use_unified) {
- const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
- NUM_CONST_BUFFERS_BYTES_PER_STAGE);
- }
-}
-
-void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
- const Tegra::Engines::ConstBufferInfo& buffer,
- const ConstBufferEntry& entry, bool use_unified,
- std::size_t unified_offset) {
- if (!buffer.enabled) {
- // Set values to zero to unbind buffers
- if (device.UseAssemblyShaders()) {
- glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
- } else {
- glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
- }
- return;
- }
+void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
+ image_view_indices.clear();
+ sampler_handles.clear();
- // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
- // UBO alignment requirements.
- const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
+ texture_cache.SynchronizeComputeDescriptors();
- const bool fast_upload = !use_unified && device.HasFastBufferSubData();
+ SetupComputeTextures(kernel);
+ SetupComputeImages(kernel);
- const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
- const GPUVAddr gpu_addr = buffer.address;
- auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ texture_cache.FillComputeImageViews(indices_span, image_view_ids);
- if (device.UseAssemblyShaders()) {
- UNIMPLEMENTED_IF(use_unified);
- if (info.offset != 0) {
- const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
- glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
- info.handle = staging_cbuf;
- info.offset = 0;
+ program_manager.BindCompute(kernel->GetHandle());
+ size_t image_view_index = 0;
+ size_t texture_index = 0;
+ size_t image_index = 0;
+ BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
+}
+
+void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
+ GLuint base_image, size_t& image_view_index,
+ size_t& texture_index, size_t& image_index) {
+ const GLuint* const samplers = sampler_handles.data() + texture_index;
+ const GLuint* const textures = texture_handles.data() + texture_index;
+ const GLuint* const images = image_handles.data() + image_index;
+
+ const size_t num_samplers = entries.samplers.size();
+ for (const auto& sampler : entries.samplers) {
+ for (size_t i = 0; i < sampler.size; ++i) {
+ const ImageViewId image_view_id = image_view_ids[image_view_index++];
+ const ImageView& image_view = texture_cache.GetImageView(image_view_id);
+ const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
+ texture_handles[texture_index++] = handle;
}
- glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
- return;
}
-
- if (use_unified) {
- glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
- unified_offset, size);
- } else {
- glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
+ const size_t num_images = entries.images.size();
+ for (size_t unit = 0; unit < num_images; ++unit) {
+ // TODO: Mark as modified
+ const ImageViewId image_view_id = image_view_ids[image_view_index++];
+ const ImageView& image_view = texture_cache.GetImageView(image_view_id);
+ const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
+ image_handles[image_index] = handle;
+ ++image_index;
}
-}
-
-void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
- static constexpr std::array TARGET_LUT = {
- GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
- GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
- };
-
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
- const auto& cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
- const auto& entries{shader->GetEntries().global_memory_entries};
-
- std::array<GLuint64EXT, 32> pointers;
- ASSERT(entries.size() < pointers.size());
-
- const bool assembly_shaders = device.UseAssemblyShaders();
- u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
- for (const auto& entry : entries) {
- const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
- const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
- const u32 size{memory_manager.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
- ++binding;
- }
- if (assembly_shaders) {
- UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());
- }
-}
-
-void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
- const auto& cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
- const auto& entries{kernel->GetEntries().global_memory_entries};
-
- std::array<GLuint64EXT, 32> pointers;
- ASSERT(entries.size() < pointers.size());
-
- u32 binding = 0;
- for (const auto& entry : entries) {
- const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
- const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
- const u32 size{memory_manager.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
- ++binding;
- }
- if (device.UseAssemblyShaders()) {
- UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());
+ if (num_samplers > 0) {
+ glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
+ glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
}
-}
-
-void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
- GPUVAddr gpu_addr, std::size_t size,
- GLuint64EXT* pointer) {
- const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
- const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
- if (device.UseAssemblyShaders()) {
- *pointer = info.address + info.offset;
- } else {
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
- static_cast<GLsizeiptr>(size));
+ if (num_images > 0) {
+ glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
}
}
-void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
- MICROPROFILE_SCOPE(OpenGL_Texture);
- const auto& maxwell3d = system.GPU().Maxwell3D();
- u32 binding = device.GetBaseBindings(stage_index).sampler;
+void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
+ const bool via_header_index =
+ maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : shader->GetEntries().samplers) {
const auto shader_type = static_cast<ShaderType>(stage_index);
- for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
- SetupTexture(binding++, texture, entry);
+ for (size_t index = 0; index < entry.size; ++index) {
+ const auto handle =
+ GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
+ const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
+ sampler_handles.push_back(sampler->Handle());
+ image_view_indices.push_back(handle.image);
}
}
}
-void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
- MICROPROFILE_SCOPE(OpenGL_Texture);
- const auto& compute = system.GPU().KeplerCompute();
- u32 binding = 0;
+void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
+ const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : kernel->GetEntries().samplers) {
- for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i);
- SetupTexture(binding++, texture, entry);
+ for (size_t i = 0; i < entry.size; ++i) {
+ const auto handle =
+ GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
+ const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
+ sampler_handles.push_back(sampler->Handle());
+ image_view_indices.push_back(handle.image);
}
}
}
-void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
- const SamplerEntry& entry) {
- const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
- if (!view) {
- // Can occur when texture addr is null or its memory is unmapped/invalid
- glBindSampler(binding, 0);
- glBindTextureUnit(binding, 0);
- return;
- }
- const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
- texture.tic.z_source, texture.tic.w_source);
- glBindTextureUnit(binding, handle);
- if (!view->GetSurfaceParams().IsBuffer()) {
- glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
- }
-}
-
-void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
- const auto& maxwell3d = system.GPU().Maxwell3D();
- u32 binding = device.GetBaseBindings(stage_index).image;
+void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
+ const bool via_header_index =
+ maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : shader->GetEntries().images) {
- const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
- const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
- SetupImage(binding++, tic, entry);
+ const auto shader_type = static_cast<ShaderType>(stage_index);
+ const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
+ image_view_indices.push_back(handle.image);
}
}
-void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
- const auto& compute = system.GPU().KeplerCompute();
- u32 binding = 0;
+void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
+ const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : shader->GetEntries().images) {
- const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
- SetupImage(binding++, tic, entry);
+ const auto handle =
+ GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
+ image_view_indices.push_back(handle.image);
}
}
-void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
- const ImageEntry& entry) {
- const auto view = texture_cache.GetImageSurface(tic, entry);
- if (!view) {
- glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
- return;
- }
- if (entry.is_written) {
- view->MarkAsModified(texture_cache.Tick());
- }
- const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
- glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
+void RasterizerOpenGL::SyncState() {
+ SyncViewport();
+ SyncRasterizeEnable();
+ SyncPolygonModes();
+ SyncColorMask();
+ SyncFragmentColorClampState();
+ SyncMultiSampleState();
+ SyncDepthTestState();
+ SyncDepthClamp();
+ SyncStencilTestState();
+ SyncBlendState();
+ SyncLogicOpState();
+ SyncCullMode();
+ SyncPrimitiveRestart();
+ SyncScissorTest();
+ SyncPointState();
+ SyncLineState();
+ SyncPolygonOffset();
+ SyncAlphaTest();
+ SyncFramebufferSRGB();
+ SyncVertexFormats();
+ SyncVertexInstances();
}
void RasterizerOpenGL::SyncViewport() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
- const auto& regs = gpu.regs;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
const bool dirty_viewport = flags[Dirty::Viewports];
const bool dirty_clip_control = flags[Dirty::ClipControl];
@@ -1180,15 +856,17 @@ void RasterizerOpenGL::SyncViewport() {
flags[Dirty::ClipControl] = false;
bool flip_y = false;
- if (regs.viewport_transform[0].scale_y < 0.0) {
+ if (regs.viewport_transform[0].scale_y < 0.0f) {
flip_y = !flip_y;
}
if (regs.screen_y_control.y_negate != 0) {
flip_y = !flip_y;
}
- glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
- regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
- : GL_NEGATIVE_ONE_TO_ONE);
+ const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
+ const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
+ const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
+ state_tracker.ClipControl(origin, depth);
+ state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
}
if (dirty_viewport) {
@@ -1225,25 +903,23 @@ void RasterizerOpenGL::SyncViewport() {
}
void RasterizerOpenGL::SyncDepthClamp() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::DepthClampEnabled]) {
return;
}
flags[Dirty::DepthClampEnabled] = false;
- oglEnable(GL_DEPTH_CLAMP, gpu.regs.view_volume_clip_control.depth_clamp_disabled == 0);
+ oglEnable(GL_DEPTH_CLAMP, maxwell3d.regs.view_volume_clip_control.depth_clamp_disabled == 0);
}
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
return;
}
flags[Dirty::ClipDistances] = false;
- clip_mask &= gpu.regs.clip_distance_enabled;
+ clip_mask &= maxwell3d.regs.clip_distance_enabled;
if (clip_mask == last_clip_distance_mask) {
return;
}
@@ -1259,9 +935,8 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
- const auto& regs = gpu.regs;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
if (flags[Dirty::CullTest]) {
flags[Dirty::CullTest] = false;
@@ -1276,26 +951,24 @@ void RasterizerOpenGL::SyncCullMode() {
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PrimitiveRestart]) {
return;
}
flags[Dirty::PrimitiveRestart] = false;
- if (gpu.regs.primitive_restart.enabled) {
+ if (maxwell3d.regs.primitive_restart.enabled) {
glEnable(GL_PRIMITIVE_RESTART);
- glPrimitiveRestartIndex(gpu.regs.primitive_restart.index);
+ glPrimitiveRestartIndex(maxwell3d.regs.primitive_restart.index);
} else {
glDisable(GL_PRIMITIVE_RESTART);
}
}
void RasterizerOpenGL::SyncDepthTestState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
- const auto& regs = gpu.regs;
if (flags[Dirty::DepthMask]) {
flags[Dirty::DepthMask] = false;
glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE);
@@ -1313,14 +986,13 @@ void RasterizerOpenGL::SyncDepthTestState() {
}
void RasterizerOpenGL::SyncStencilTestState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::StencilTest]) {
return;
}
flags[Dirty::StencilTest] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_STENCIL_TEST, regs.stencil_enable);
glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
@@ -1345,25 +1017,24 @@ void RasterizerOpenGL::SyncStencilTestState() {
}
void RasterizerOpenGL::SyncRasterizeEnable() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::RasterizeEnable]) {
return;
}
flags[Dirty::RasterizeEnable] = false;
- oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0);
+ oglEnable(GL_RASTERIZER_DISCARD, maxwell3d.regs.rasterize_enable == 0);
}
void RasterizerOpenGL::SyncPolygonModes() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PolygonModes]) {
return;
}
flags[Dirty::PolygonModes] = false;
- if (gpu.regs.fill_rectangle) {
+ const auto& regs = maxwell3d.regs;
+ if (regs.fill_rectangle) {
if (!GLAD_GL_NV_fill_rectangle) {
LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
@@ -1376,27 +1047,26 @@ void RasterizerOpenGL::SyncPolygonModes() {
return;
}
- if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) {
+ if (regs.polygon_mode_front == regs.polygon_mode_back) {
flags[Dirty::PolygonModeFront] = false;
flags[Dirty::PolygonModeBack] = false;
- glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+ glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
return;
}
if (flags[Dirty::PolygonModeFront]) {
flags[Dirty::PolygonModeFront] = false;
- glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+ glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
}
if (flags[Dirty::PolygonModeBack]) {
flags[Dirty::PolygonModeBack] = false;
- glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back));
+ glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_back));
}
}
void RasterizerOpenGL::SyncColorMask() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::ColorMasks]) {
return;
}
@@ -1405,7 +1075,7 @@ void RasterizerOpenGL::SyncColorMask() {
const bool force = flags[Dirty::ColorMaskCommon];
flags[Dirty::ColorMaskCommon] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
if (regs.color_mask_common) {
if (!force && !flags[Dirty::ColorMask0]) {
return;
@@ -1430,33 +1100,30 @@ void RasterizerOpenGL::SyncColorMask() {
}
void RasterizerOpenGL::SyncMultiSampleState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::MultisampleControl]) {
return;
}
flags[Dirty::MultisampleControl] = false;
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage);
oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one);
}
void RasterizerOpenGL::SyncFragmentColorClampState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::FragmentClampColor]) {
return;
}
flags[Dirty::FragmentClampColor] = false;
- glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
+ glClampColor(GL_CLAMP_FRAGMENT_COLOR, maxwell3d.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
}
void RasterizerOpenGL::SyncBlendState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
- const auto& regs = gpu.regs;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
if (flags[Dirty::BlendColor]) {
flags[Dirty::BlendColor] = false;
@@ -1513,14 +1180,13 @@ void RasterizerOpenGL::SyncBlendState() {
}
void RasterizerOpenGL::SyncLogicOpState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::LogicOp]) {
return;
}
flags[Dirty::LogicOp] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
if (regs.logic_op.enable) {
glEnable(GL_COLOR_LOGIC_OP);
glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation));
@@ -1530,14 +1196,13 @@ void RasterizerOpenGL::SyncLogicOpState() {
}
void RasterizerOpenGL::SyncScissorTest() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::Scissors]) {
return;
}
flags[Dirty::Scissors] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
if (!flags[Dirty::Scissor0 + index]) {
continue;
@@ -1556,49 +1221,38 @@ void RasterizerOpenGL::SyncScissorTest() {
}
void RasterizerOpenGL::SyncPointState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PointSize]) {
return;
}
flags[Dirty::PointSize] = false;
- oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable);
-
- if (gpu.regs.vp_point_size.enable) {
- // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
- glEnable(GL_PROGRAM_POINT_SIZE);
- return;
- }
+ oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
+ oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
- // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
- // in OpenGL).
- glPointSize(std::max(1.0f, gpu.regs.point_size));
- glDisable(GL_PROGRAM_POINT_SIZE);
+ glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
}
void RasterizerOpenGL::SyncLineState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::LineWidth]) {
return;
}
flags[Dirty::LineWidth] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable);
glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased);
}
void RasterizerOpenGL::SyncPolygonOffset() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PolygonOffset]) {
return;
}
flags[Dirty::PolygonOffset] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
@@ -1612,18 +1266,13 @@ void RasterizerOpenGL::SyncPolygonOffset() {
}
void RasterizerOpenGL::SyncAlphaTest() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::AlphaTest]) {
return;
}
flags[Dirty::AlphaTest] = false;
- const auto& regs = gpu.regs;
- if (regs.alpha_test_enabled && regs.rt_control.count > 1) {
- LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested");
- }
-
+ const auto& regs = maxwell3d.regs;
if (regs.alpha_test_enabled) {
glEnable(GL_ALPHA_TEST);
glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref);
@@ -1633,20 +1282,19 @@ void RasterizerOpenGL::SyncAlphaTest() {
}
void RasterizerOpenGL::SyncFramebufferSRGB() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::FramebufferSRGB]) {
return;
}
flags[Dirty::FramebufferSRGB] = false;
- oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
+ oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
}
void RasterizerOpenGL::SyncTransformFeedback() {
// TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
// when this is required.
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
static constexpr std::size_t STRIDE = 3;
std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
@@ -1698,40 +1346,17 @@ void RasterizerOpenGL::SyncTransformFeedback() {
}
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
-
if (device.UseAssemblyShaders()) {
SyncTransformFeedback();
}
-
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
-
- for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
- const auto& binding = regs.tfb_bindings[index];
- if (!binding.buffer_enable) {
- if (enabled_transform_feedback_buffers[index]) {
- glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
- 0);
- }
- enabled_transform_feedback_buffers[index] = false;
- continue;
- }
- enabled_transform_feedback_buffers[index] = true;
-
- auto& tfb_buffer = transform_feedback_buffers[index];
- tfb_buffer.Create();
-
- const GLuint handle = tfb_buffer.handle;
- const std::size_t size = binding.buffer_size;
- glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
- glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
- static_cast<GLsizeiptr>(size));
- }
+ UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
// We may have to call BeginTransformFeedbackNV here since they seem to call different
// implementations on Nvidia's driver (the pointer is different) but we are using
@@ -1741,27 +1366,11 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
}
void RasterizerOpenGL::EndTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
-
glEndTransformFeedback();
-
- for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
- const auto& binding = regs.tfb_bindings[index];
- if (!binding.buffer_enable) {
- continue;
- }
- UNIMPLEMENTED_IF(binding.buffer_offset != 0);
-
- const GLuint handle = transform_feedback_buffers[index].handle;
- const GPUVAddr gpu_addr = binding.Address();
- const std::size_t size = binding.buffer_size;
- const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
- glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
- static_cast<GLsizeiptr>(size));
- }
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index ccc6f50f6..3745cf637 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -7,12 +7,13 @@
#include <array>
#include <atomic>
#include <cstddef>
-#include <map>
#include <memory>
#include <optional>
#include <tuple>
#include <utility>
+#include <boost/container/static_vector.hpp>
+
#include <glad/glad.h>
#include "common/common_types.h"
@@ -23,21 +24,18 @@
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
-#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_sampler_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
-#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
-namespace Core {
-class System;
+namespace Core::Memory {
+class Memory;
}
namespace Core::Frontend {
@@ -51,13 +49,21 @@ class MemoryManager;
namespace OpenGL {
struct ScreenInfo;
-struct DrawParameters;
+struct ShaderEntries;
+
+struct BindlessSSBO {
+ GLuint64EXT address;
+ GLsizei length;
+ GLsizei padding;
+};
+static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
- explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- const Device& device, ScreenInfo& info,
- ProgramManager& program_manager, StateTracker& state_tracker);
+ explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
+ Core::Memory::Memory& cpu_memory_, const Device& device_,
+ ScreenInfo& screen_info_, ProgramManager& program_manager_,
+ StateTracker& state_tracker_);
~RasterizerOpenGL() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -65,27 +71,30 @@ public:
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
+ void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
+ void UnmapMemory(VAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
+ void FragmentBarrier() override;
+ void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
- bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
- const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+ bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
- void LoadDiskResources(const std::atomic_bool& stop_loading,
+ void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
- void SetupDirtyFlags() override;
/// Returns true when there are commands queued to the OpenGL server.
bool AnyCommandQueued() const {
@@ -101,51 +110,29 @@ public:
}
private:
- /// Configures the color and depth framebuffer states.
- void ConfigureFramebuffers();
-
- /// Configures the color and depth framebuffer for clearing.
- void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
-
- /// Configures the current constbuffers to use for the draw command.
- void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
-
- /// Configures the current constbuffers to use for the kernel invocation.
- void SetupComputeConstBuffers(Shader* kernel);
-
- /// Configures a constant buffer.
- void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
- const ConstBufferEntry& entry, bool use_unified,
- std::size_t unified_offset);
+ static constexpr size_t MAX_TEXTURES = 192;
+ static constexpr size_t MAX_IMAGES = 48;
+ static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
- /// Configures the current global memory entries to use for the draw command.
- void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
+ void BindComputeTextures(Shader* kernel);
- /// Configures the current global memory entries to use for the kernel invocation.
- void SetupComputeGlobalMemory(Shader* kernel);
-
- /// Configures a global memory buffer.
- void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
- std::size_t size, GLuint64EXT* pointer);
+ void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
+ size_t& image_view_index, size_t& texture_index, size_t& image_index);
/// Configures the current textures to use for the draw command.
- void SetupDrawTextures(std::size_t stage_index, Shader* shader);
+ void SetupDrawTextures(const Shader* shader, size_t stage_index);
/// Configures the textures used in a compute shader.
- void SetupComputeTextures(Shader* kernel);
-
- /// Configures a texture.
- void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
- const SamplerEntry& entry);
+ void SetupComputeTextures(const Shader* kernel);
/// Configures images in a graphics shader.
- void SetupDrawImages(std::size_t stage_index, Shader* shader);
+ void SetupDrawImages(const Shader* shader, size_t stage_index);
/// Configures images in a compute shader.
- void SetupComputeImages(Shader* shader);
+ void SetupComputeImages(const Shader* shader);
- /// Configures an image.
- void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
+ /// Syncs state to match guest's
+ void SyncState();
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
@@ -210,6 +197,12 @@ private:
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
+ /// Syncs vertex formats to match the guest state
+ void SyncVertexFormats();
+
+ /// Syncs vertex instances to match the guest state
+ void SyncVertexInstances();
+
/// Syncs transform feedback state to match guest state
/// @note Only valid on assembly shaders
void SyncTransformFeedback();
@@ -220,56 +213,35 @@ private:
/// End a transform feedback
void EndTransformFeedback();
- /// Check for extension that are not strictly required but are needed for correct emulation
- void CheckExtensions();
-
- std::size_t CalculateVertexArraysSize() const;
-
- std::size_t CalculateIndexBufferSize() const;
+ void SetupShaders(bool is_indexed);
- /// Updates the current vertex format
- void SetupVertexFormat();
-
- void SetupVertexBuffer();
- void SetupVertexInstances();
-
- GLintptr SetupIndexBuffer();
-
- void SetupShaders(GLenum primitive_mode);
+ Tegra::GPU& gpu;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
const Device& device;
+ ScreenInfo& screen_info;
+ ProgramManager& program_manager;
+ StateTracker& state_tracker;
- TextureCacheOpenGL texture_cache;
+ TextureCacheRuntime texture_cache_runtime;
+ TextureCache texture_cache;
+ BufferCacheRuntime buffer_cache_runtime;
+ BufferCache buffer_cache;
ShaderCacheOpenGL shader_cache;
- SamplerCacheOpenGL sampler_cache;
- FramebufferCacheOpenGL framebuffer_cache;
QueryCache query_cache;
- OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
- Core::System& system;
- ScreenInfo& screen_info;
- ProgramManager& program_manager;
- StateTracker& state_tracker;
VideoCommon::Shader::AsyncShaders async_shaders;
- static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
-
- GLint vertex_binding = 0;
-
- std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
- transform_feedback_buffers;
- std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
- enabled_transform_feedback_buffers;
-
- static constexpr std::size_t NUM_CONSTANT_BUFFERS =
- Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
- Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
- std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
- std::size_t current_cbuf = 0;
- OGLBuffer unified_uniform_buffer;
+ boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
+ std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
+ boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
+ std::array<GLuint, MAX_TEXTURES> texture_handles{};
+ std::array<GLuint, MAX_IMAGES> image_handles{};
- /// Number of commands queued to the OpenGL driver. Reseted on flush.
+ /// Number of commands queued to the OpenGL driver. Resetted on flush.
std::size_t num_queued_commands = 0;
u32 last_clip_distance_mask = 0;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 0ebcec427..3428e5e21 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -71,7 +71,7 @@ void OGLSampler::Create() {
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
- glGenSamplers(1, &handle);
+ glCreateSamplers(1, &handle);
}
void OGLSampler::Release() {
@@ -171,12 +171,6 @@ void OGLBuffer::Release() {
handle = 0;
}
-void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
- ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
-
- glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
-}
-
void OGLSync::Create() {
if (handle != 0)
return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index f48398669..552d79db4 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -234,9 +234,6 @@ public:
/// Deletes the internal OpenGL resource
void Release();
- // Converts the buffer into a stream copy buffer with a fixed size
- void MakeStreamCopy(std::size_t buffer_size);
-
GLuint handle = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
deleted file mode 100644
index 5c174879a..000000000
--- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/logging/log.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_sampler_cache.h"
-#include "video_core/renderer_opengl/maxwell_to_gl.h"
-
-namespace OpenGL {
-
-SamplerCacheOpenGL::SamplerCacheOpenGL() = default;
-
-SamplerCacheOpenGL::~SamplerCacheOpenGL() = default;
-
-OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
- OGLSampler sampler;
- sampler.Create();
-
- const GLuint sampler_id{sampler.handle};
- glSamplerParameteri(
- sampler_id, GL_TEXTURE_MAG_FILTER,
- MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None));
- glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
- MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter));
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u));
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v));
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p));
- glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
- tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
- glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
- MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func));
- glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data());
- glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod());
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod());
- glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias());
- if (GLAD_GL_ARB_texture_filter_anisotropic) {
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
- } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
- } else {
- LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
- }
-
- return sampler;
-}
-
-GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const {
- return sampler.handle;
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h
deleted file mode 100644
index 34ee37f00..000000000
--- a/src/video_core/renderer_opengl/gl_sampler_cache.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <glad/glad.h>
-
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/sampler_cache.h"
-
-namespace OpenGL {
-
-class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> {
-public:
- explicit SamplerCacheOpenGL();
- ~SamplerCacheOpenGL();
-
-protected:
- OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
-
- GLuint ToSamplerType(const OGLSampler& sampler) const override;
-};
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index a07d56ef0..529570ff0 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -27,7 +27,6 @@
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
@@ -160,6 +159,10 @@ std::unordered_set<GLenum> GetSupportedFormats() {
ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
+ if (device.UseDriverCache()) {
+ // Ignore hint retrievable if we are using the driver cache
+ hint_retrievable = false;
+ }
const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
LOG_INFO(Render_OpenGL, "{}", shader_id);
@@ -198,10 +201,10 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
return program;
}
-Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
- ProgramSharedPtr program_, bool is_built)
+Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_,
+ ProgramSharedPtr program_, bool is_built_)
: registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
- is_built(is_built) {
+ is_built{is_built_} {
handle = program->assembly_program.handle;
if (handle == 0) {
handle = program->source_program.handle;
@@ -239,12 +242,11 @@ std::unique_ptr<Shader> Shader::CreateStageFromMemory(
ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
const auto shader_type = GetShaderType(program_type);
- auto& gpu = params.system.GPU();
+ auto& gpu = params.gpu;
gpu.ShaderNotify().MarkSharderBuilding();
auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
- if (!async_shaders.IsShaderAsync(params.system.GPU()) ||
- !params.device.UseAsynchronousShaders()) {
+ if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
// TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b;
@@ -287,11 +289,10 @@ std::unique_ptr<Shader> Shader::CreateStageFromMemory(
std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
ProgramCode code) {
- auto& gpu = params.system.GPU();
+ auto& gpu = params.gpu;
gpu.ShaderNotify().MarkSharderBuilding();
- auto& engine = gpu.KeplerCompute();
- auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
+ auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
const u64 uid = params.unique_identifier;
auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
@@ -320,22 +321,26 @@ std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
}
-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
- Core::Frontend::EmuWindow& emu_window, const Device& device)
- : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
- emu_window{emu_window}, device{device}, disk_cache{system} {}
+ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
+ Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_)
+ : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
+ maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
-void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
+void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
+ disk_cache.BindTitleID(title_id);
const std::optional transferable = disk_cache.LoadTransferable();
if (!transferable) {
return;
}
std::vector<ShaderDiskCachePrecompiled> gl_cache;
- if (!device.UseAssemblyShaders()) {
+ if (!device.UseAssemblyShaders() && !device.UseDriverCache()) {
// Only load precompiled cache when we are not using assembly shaders
gl_cache = disk_cache.LoadPrecompiled();
}
@@ -355,8 +360,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
std::atomic_bool gl_cache_failed = false;
const auto find_precompiled = [&gl_cache](u64 id) {
- return std::find_if(gl_cache.begin(), gl_cache.end(),
- [id](const auto& entry) { return entry.unique_identifier == id; });
+ return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier);
};
const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
@@ -431,8 +435,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return;
}
- if (device.UseAssemblyShaders()) {
- // Don't store precompiled binaries for assembly shaders.
+ if (device.UseAssemblyShaders() || device.UseDriverCache()) {
+ // Don't store precompiled binaries for assembly shaders or when using the driver cache
return;
}
@@ -457,7 +461,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats) {
- if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
+ if (!supported_formats.contains(precompiled_entry.binary_format)) {
LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
return {};
}
@@ -481,21 +485,19 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
VideoCommon::Shader::AsyncShaders& async_shaders) {
- if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
+ if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
if (last_shader->IsBuilt()) {
return last_shader;
}
}
- auto& memory_manager{system.GPU().MemoryManager()};
- const GPUVAddr address{GetShaderAddress(system, program)};
+ const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
auto completed_work = async_shaders.GetCompletedWork();
for (auto& work : completed_work) {
Shader* shader = TryGet(work.cpu_address);
- auto& gpu = system.GPU();
gpu.ShaderNotify().MarkShaderComplete();
if (shader == nullptr) {
continue;
@@ -507,14 +509,13 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
shader->AsyncGLASMBuilt(std::move(work.program.glasm));
}
+ auto& registry = shader->GetRegistry();
+
ShaderDiskCacheEntry entry;
entry.type = work.shader_type;
entry.code = std::move(work.code);
entry.code_b = std::move(work.code_b);
entry.unique_identifier = work.uid;
-
- auto& registry = shader->GetRegistry();
-
entry.bound_buffer = registry.GetBoundBuffer();
entry.graphics_info = registry.GetGraphicsInfo();
entry.keys = registry.GetKeys();
@@ -525,28 +526,28 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
}
// Look up shader in the cache based on address
- const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
+ const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
- const auto host_ptr{memory_manager.GetPointer(address)};
+ const u8* const host_ptr{gpu_memory.GetPointer(address)};
// No shader found - create a new one
- ProgramCode code{GetShaderCode(memory_manager, address, host_ptr, false)};
+ ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
ProgramCode code_b;
if (program == Maxwell::ShaderProgram::VertexA) {
- const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
- const u8* host_ptr_b = memory_manager.GetPointer(address_b);
- code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);
+ const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
+ const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
+ code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
}
const std::size_t code_size = code.size() * sizeof(u64);
const u64 unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
- const ShaderParameters params{system, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
+ const ShaderParameters params{gpu, maxwell3d, disk_cache, device,
+ *cpu_addr, host_ptr, unique_identifier};
std::unique_ptr<Shader> shader;
const auto found = runtime_cache.find(unique_identifier);
@@ -568,21 +569,20 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
}
Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
- auto& memory_manager{system.GPU().MemoryManager()};
- const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
+ const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
return kernel;
}
- const auto host_ptr{memory_manager.GetPointer(code_addr)};
// No kernel found, create a new one
- ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
+ const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
+ ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
const std::size_t code_size{code.size() * sizeof(u64)};
const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
- const ShaderParameters params{system, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
+ const ShaderParameters params{gpu, kepler_compute, disk_cache, device,
+ *cpu_addr, host_ptr, unique_identifier};
std::unique_ptr<Shader> kernel;
const auto found = runtime_cache.find(unique_identifier);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 7528ac686..2aed0697e 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -25,8 +25,8 @@
#include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h"
-namespace Core {
-class System;
+namespace Tegra {
+class MemoryManager;
}
namespace Core::Frontend {
@@ -57,11 +57,12 @@ struct PrecompiledShader {
};
struct ShaderParameters {
- Core::System& system;
+ Tegra::GPU& gpu;
+ Tegra::Engines::ConstBufferEngineInterface& engine;
ShaderDiskCacheOpenGL& disk_cache;
const Device& device;
VAddr cpu_addr;
- u8* host_ptr;
+ const u8* host_ptr;
u64 unique_identifier;
};
@@ -107,7 +108,7 @@ public:
private:
explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
- ProgramSharedPtr program, bool is_built = true);
+ ProgramSharedPtr program, bool is_built_ = true);
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
@@ -118,12 +119,15 @@ private:
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
public:
- explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
- Core::Frontend::EmuWindow& emu_window, const Device& device);
+ explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
+ Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_);
~ShaderCacheOpenGL() override;
/// Loads disk cache for the current game
- void LoadDiskCache(const std::atomic_bool& stop_loading,
+ void LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
/// Gets the current specified shader stage program
@@ -138,9 +142,13 @@ private:
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats);
- Core::System& system;
Core::Frontend::EmuWindow& emu_window;
+ Tegra::GPU& gpu;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
const Device& device;
+
ShaderDiskCacheOpenGL disk_cache;
std::unordered_map<u64, PrecompiledShader> runtime_cache;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 3f75fcd2b..ac78d344c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -14,6 +14,7 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/div_ceil.h"
#include "common/logging/log.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
@@ -38,11 +39,9 @@ using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
using Tegra::Shader::TextureType;
-using VideoCommon::Shader::BuildTransformFeedback;
-using VideoCommon::Shader::Registry;
-using namespace std::string_literals;
using namespace VideoCommon::Shader;
+using namespace std::string_literals;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using Operation = const OperationNode&;
@@ -65,7 +64,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
-constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
+constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
#define ftou floatBitsToUint
#define itof intBitsToFloat
#define utof uintBitsToFloat
@@ -78,10 +77,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
-
-layout (std140, binding = {}) uniform vs_config {{
- float y_direction;
-}};
)";
class ShaderWriter final {
@@ -131,7 +126,7 @@ private:
class Expression final {
public:
- Expression(std::string code, Type type) : code{std::move(code)}, type{type} {
+ Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} {
ASSERT(type != Type::Void);
}
Expression() : type{Type::Void} {}
@@ -148,8 +143,8 @@ public:
ASSERT(type == Type::Void);
}
- std::string As(Type type) const {
- switch (type) {
+ std::string As(Type type_) const {
+ switch (type_) {
case Type::Bool:
return AsBool();
case Type::Bool2:
@@ -316,7 +311,7 @@ std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology t
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
return {"triangles_adjacency", 6};
default:
- UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
+ UNIMPLEMENTED_MSG("topology={}", topology);
return {"points", 1};
}
}
@@ -342,7 +337,7 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
case Tegra::Shader::OutputTopology::TriangleStrip:
return "triangle_strip";
default:
- UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
+ UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
return "points";
}
}
@@ -403,13 +398,6 @@ std::string FlowStackTopName(MetaStackClass stack) {
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
}
-bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
- const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
- // We waste one UBO for emulation
- const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
- return num_ubos > num_available_ubos;
-}
-
struct GenericVaryingDescription {
std::string name;
u8 first_element = 0;
@@ -418,11 +406,11 @@ struct GenericVaryingDescription {
class GLSLDecompiler final {
public:
- explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
- ShaderType stage, std::string_view identifier, std::string_view suffix)
- : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier},
- suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{
- UseUnifiedUniforms(device, ir, stage)} {
+ explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
+ ShaderType stage_, std::string_view identifier_,
+ std::string_view suffix_)
+ : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
+ identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
if (stage != ShaderType::Compute) {
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
}
@@ -516,7 +504,8 @@ private:
if (!identifier.empty()) {
code.AddLine("// {}", identifier);
}
- code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core");
+ const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
+ code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
if (device.HasShaderBallot()) {
code.AddLine("#extension GL_ARB_shader_ballot : require");
@@ -542,7 +531,7 @@ private:
code.AddNewLine();
- code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
+ code.AddLine(COMMON_DECLARATIONS);
}
void DeclareVertex() {
@@ -744,7 +733,7 @@ private:
case PixelImap::Unused:
break;
}
- UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
+ UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
return {};
}
@@ -777,16 +766,16 @@ private:
name = "gs_" + name + "[]";
}
- std::string suffix;
+ std::string suffix_;
if (stage == ShaderType::Fragment) {
const auto input_mode{header.ps.GetPixelImap(location)};
if (input_mode == PixelImap::Unused) {
return;
}
- suffix = GetInputFlags(input_mode);
+ suffix_ = GetInputFlags(input_mode);
}
- code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name);
+ code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name);
}
void DeclareOutputAttributes() {
@@ -813,7 +802,7 @@ private:
const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
const auto it = transform_feedback.find(location);
if (it == transform_feedback.end()) {
- return {};
+ return std::nullopt;
}
return it->second.components;
}
@@ -865,20 +854,9 @@ private:
}
void DeclareConstantBuffers() {
- if (use_unified_uniforms) {
- const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
- static_cast<u32>(ir.GetGlobalMemory().size());
- code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
- binding);
- code.AddLine(" uint cbufs[];");
- code.AddLine("}};");
- code.AddNewLine();
- return;
- }
-
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
- for (const auto [index, info] : ir.GetConstantBuffers()) {
- const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
+ for (const auto& [index, info] : ir.GetConstantBuffers()) {
+ const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
GetConstBufferBlock(index));
@@ -1081,29 +1059,17 @@ private:
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
const Node offset = cbuf->GetOffset();
- const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
// Direct access
const u32 offset_imm = immediate->GetValue();
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
- if (use_unified_uniforms) {
- return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
- Type::Uint};
- } else {
- return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
- offset_imm / (4 * 4), (offset_imm / 4) % 4),
- Type::Uint};
- }
- }
-
- // Indirect access
- if (use_unified_uniforms) {
- return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
- Visit(offset).AsUint()),
+ return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
+ offset_imm / (4 * 4), (offset_imm / 4) % 4),
Type::Uint};
}
+ // Indirect access
const std::string final_offset = code.GenerateTemporary();
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
@@ -1251,7 +1217,7 @@ private:
}
break;
}
- UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
+ UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
return {"0", Type::Int};
}
@@ -1295,21 +1261,21 @@ private:
switch (element) {
case 0:
UNIMPLEMENTED();
- return {};
+ return std::nullopt;
case 1:
if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return {};
+ return std::nullopt;
}
return {{"gl_Layer", Type::Int}};
case 2:
if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return {};
+ return std::nullopt;
}
return {{"gl_ViewportIndex", Type::Int}};
case 3:
return {{"gl_PointSize", Type::Float}};
}
- return {};
+ return std::nullopt;
case Attribute::Index::FrontColor:
return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
case Attribute::Index::FrontSecondaryColor:
@@ -1331,8 +1297,8 @@ private:
GetSwizzle(element)),
Type::Float}};
}
- UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
- return {};
+ UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute);
+ return std::nullopt;
}
}
@@ -1443,8 +1409,10 @@ private:
return expr + ", vec2(0.0), vec2(0.0))";
case TextureType::TextureCube:
return expr + ", vec3(0.0), vec3(0.0))";
+ default:
+ UNREACHABLE();
+ break;
}
- UNREACHABLE();
}
for (const auto& variant : extras) {
@@ -2054,15 +2022,19 @@ private:
}
Expression Texture(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- std::string expr = GenerateTexture(
- operation, "", {TextureOffset{}, TextureArgument{Type::Float, meta->bias}});
- if (meta->sampler.is_shadow) {
- expr = "vec4(" + expr + ')';
+ const auto meta = std::get<MetaTexture>(operation.GetMeta());
+ const bool separate_dc = meta.sampler.type == TextureType::TextureCube &&
+ meta.sampler.is_array && meta.sampler.is_shadow;
+ // TODO: Replace this with an array and make GenerateTexture use C++20 std::span
+ const std::vector<TextureIR> extras{
+ TextureOffset{},
+ TextureArgument{Type::Float, meta.bias},
+ };
+ std::string expr = GenerateTexture(operation, "", extras, separate_dc);
+ if (meta.sampler.is_shadow) {
+ expr = fmt::format("vec4({})", expr);
}
- return {expr + GetSwizzle(meta->element), Type::Float};
+ return {expr + GetSwizzle(meta.element), Type::Float};
}
Expression TextureLod(Operation operation) {
@@ -2094,13 +2066,13 @@ private:
const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int;
const bool separate_dc = meta.sampler.is_shadow;
- std::vector<TextureIR> ir;
+ std::vector<TextureIR> ir_;
if (meta.sampler.is_shadow) {
- ir = {TextureOffset{}};
+ ir_ = {TextureOffset{}};
} else {
- ir = {TextureOffset{}, TextureArgument{type, meta.component}};
+ ir_ = {TextureOffset{}, TextureArgument{type, meta.component}};
}
- return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element),
+ return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element),
Type::Float};
}
@@ -2287,7 +2259,6 @@ private:
}
}
}
-
if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
@@ -2331,7 +2302,8 @@ private:
}
Expression YNegate(Operation operation) {
- return {"y_direction", Type::Float};
+ // Y_NEGATE is mapped to this uniform value
+ return {"gl_FrontMaterial.ambient.a", Type::Float};
}
template <u32 element>
@@ -2746,11 +2718,11 @@ private:
}
}
- std::string GetSampler(const Sampler& sampler) const {
+ std::string GetSampler(const SamplerEntry& sampler) const {
return AppendSuffix(sampler.index, "sampler");
}
- std::string GetImage(const Image& image) const {
+ std::string GetImage(const ImageEntry& image) const {
return AppendSuffix(image.index, "image");
}
@@ -2781,7 +2753,6 @@ private:
const std::string_view identifier;
const std::string_view suffix;
const Header header;
- const bool use_unified_uniforms;
std::unordered_map<u8, VaryingTFB> transform_feedback;
ShaderWriter code;
@@ -2795,7 +2766,7 @@ std::string GetFlowVariable(u32 index) {
class ExprDecompiler {
public:
- explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
+ explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
void operator()(const ExprAnd& expr) {
inner += '(';
@@ -2850,7 +2821,7 @@ private:
class ASTDecompiler {
public:
- explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
+ explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
void operator()(const ASTProgram& ast) {
ASTNode current = ast.nodes.GetFirst();
@@ -2997,8 +2968,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s
for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
}
+ for (const auto& buffer : entries.const_buffers) {
+ entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
+ }
entries.shader_length = ir.GetLength();
- entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
return entries;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 451c9689a..0397a000c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -20,13 +20,13 @@ namespace OpenGL {
class Device;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using SamplerEntry = VideoCommon::Shader::Sampler;
-using ImageEntry = VideoCommon::Shader::Image;
+using SamplerEntry = VideoCommon::Shader::SamplerEntry;
+using ImageEntry = VideoCommon::Shader::ImageEntry;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
- explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index)
- : VideoCommon::Shader::ConstBuffer{max_offset, is_indirect}, index{index} {}
+ explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
+ : ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
u32 GetIndex() const {
return index;
@@ -37,10 +37,10 @@ private:
};
struct GlobalMemoryEntry {
- constexpr explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read,
- bool is_written)
- : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{
- is_written} {}
+ constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
+ bool is_written_)
+ : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
+ is_written_} {}
u32 cbuf_index = 0;
u32 cbuf_offset = 0;
@@ -55,7 +55,7 @@ struct ShaderEntries {
std::vector<ImageEntry> images;
std::size_t shader_length{};
u32 clip_distances{};
- bool use_unified_uniforms{};
+ u32 enabled_uniform_buffers{};
};
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 40c0877c1..955b2abc4 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -206,13 +206,17 @@ bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
flat_bindless_samplers.size();
}
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
+void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
+ title_id = title_id_;
+}
+
std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
// Skip games without title id
- const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
+ const bool has_title_id = title_id != 0;
if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
return std::nullopt;
}
@@ -313,8 +317,7 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo
return std::nullopt;
}
}
-
- return std::move(entries);
+ return entries;
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -340,7 +343,7 @@ void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
}
const u64 id = entry.unique_identifier;
- if (stored_transferable.find(id) != stored_transferable.end()) {
+ if (stored_transferable.contains(id)) {
// The shader already exists
return;
}
@@ -474,7 +477,7 @@ std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
}
std::string ShaderDiskCacheOpenGL::GetTitleID() const {
- return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID());
+ return fmt::format("{:016X}", title_id);
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index db2bb73bc..aef841c1d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -21,10 +21,6 @@
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
-namespace Core {
-class System;
-}
-
namespace Common::FS {
class IOFile;
}
@@ -70,9 +66,12 @@ struct ShaderDiskCachePrecompiled {
class ShaderDiskCacheOpenGL {
public:
- explicit ShaderDiskCacheOpenGL(Core::System& system);
+ explicit ShaderDiskCacheOpenGL();
~ShaderDiskCacheOpenGL();
+ /// Binds a title ID for all future operations.
+ void BindTitleID(u64 title_id);
+
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
@@ -157,8 +156,6 @@ private:
return LoadArrayFromPrecompiled(&object, 1);
}
- Core::System& system;
-
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
// file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
@@ -168,8 +165,11 @@ private:
// Stored transferable shaders
std::unordered_set<u64> stored_transferable;
+ /// Title ID to operate on
+ u64 title_id = 0;
+
// The cache has been loaded at boot
- bool is_usable{};
+ bool is_usable = false;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 691c6c79b..553e6e8d6 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() {
}
}
+void ProgramManager::BindHostCompute(GLuint program) {
+ if (use_assembly_programs) {
+ glDisable(GL_COMPUTE_PROGRAM_NV);
+ }
+ glUseProgram(program);
+ is_graphics_bound = false;
+}
+
+void ProgramManager::RestoreGuestCompute() {
+ if (use_assembly_programs) {
+ glEnable(GL_COMPUTE_PROGRAM_NV);
+ glUseProgram(0);
+ }
+}
+
void ProgramManager::UseVertexShader(GLuint program) {
if (use_assembly_programs) {
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 950e0dfcb..ad42cce74 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,6 +45,12 @@ public:
/// Rewinds BindHostPipeline state changes.
void RestoreGuestPipeline();
+ /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
+ void BindHostCompute(GLuint program);
+
+ /// Rewinds BindHostCompute state changes.
+ void RestoreGuestCompute();
+
void UseVertexShader(GLuint program);
void UseGeometryShader(GLuint program);
void UseFragmentShader(GLuint program);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index d24fad3de..dbdf5230f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -13,7 +13,7 @@
#include "video_core/renderer_opengl/gl_state_tracker.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
-#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32))
+#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace OpenGL {
@@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) {
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
}
-void SetupDirtyVertexArrays(Tables& tables) {
- static constexpr std::size_t num_array = 3;
+void SetupDirtyVertexInstances(Tables& tables) {
static constexpr std::size_t instance_base_offset = 3;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
- const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
-
- FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
- FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
-
const std::size_t instance_array_offset = array_offset + instance_base_offset;
tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
tables[1][instance_array_offset] = VertexInstances;
@@ -214,16 +208,14 @@ void SetupDirtyMisc(Tables& tables) {
} // Anonymous namespace
-StateTracker::StateTracker(Core::System& system) : system{system} {}
-
-void StateTracker::Initialize() {
- auto& dirty = system.GPU().Maxwell3D().dirty;
+StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
+ auto& dirty = gpu.Maxwell3D().dirty;
auto& tables = dirty.tables;
- SetupDirtyRenderTargets(tables);
+ SetupDirtyFlags(tables);
SetupDirtyColorMasks(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
- SetupDirtyVertexArrays(tables);
+ SetupDirtyVertexInstances(tables);
SetupDirtyVertexFormat(tables);
SetupDirtyShaders(tables);
SetupDirtyPolygonModes(tables);
@@ -243,12 +235,6 @@ void StateTracker::Initialize() {
SetupDirtyClipControl(tables);
SetupDirtyDepthClampEnabled(tables);
SetupDirtyMisc(tables);
-
- auto& store = dirty.on_write_stores;
- store[VertexBuffers] = true;
- for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
- store[VertexBuffer0 + i] = true;
- }
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 0f823288e..94c905116 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -13,8 +13,8 @@
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
-namespace Core {
-class System;
+namespace Tegra {
+class GPU;
}
namespace OpenGL {
@@ -28,10 +28,6 @@ enum : u8 {
VertexFormat0,
VertexFormat31 = VertexFormat0 + 31,
- VertexBuffers,
- VertexBuffer0,
- VertexBuffer31 = VertexBuffer0 + 31,
-
VertexInstances,
VertexInstance0,
VertexInstance31 = VertexInstance0 + 31,
@@ -90,9 +86,7 @@ static_assert(Last <= std::numeric_limits<u8>::max());
class StateTracker {
public:
- explicit StateTracker(Core::System& system);
-
- void Initialize();
+ explicit StateTracker(Tegra::GPU& gpu);
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
@@ -102,14 +96,40 @@ public:
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
}
+ void BindFramebuffer(GLuint new_framebuffer) {
+ if (framebuffer == new_framebuffer) {
+ return;
+ }
+ framebuffer = new_framebuffer;
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
+ }
+
+ void ClipControl(GLenum new_origin, GLenum new_depth) {
+ if (new_origin == origin && new_depth == depth) {
+ return;
+ }
+ origin = new_origin;
+ depth = new_depth;
+ glClipControl(origin, depth);
+ }
+
+ void SetYNegate(bool new_y_negate) {
+ if (new_y_negate == y_negate) {
+ return;
+ }
+ // Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
+ y_negate = new_y_negate;
+ const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f};
+ glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data());
+ }
+
void NotifyScreenDrawVertexArray() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
- flags[OpenGL::Dirty::VertexBuffers] = true;
- flags[OpenGL::Dirty::VertexBuffer0] = true;
+ flags[VideoCommon::Dirty::VertexBuffers] = true;
+ flags[VideoCommon::Dirty::VertexBuffer0] = true;
flags[OpenGL::Dirty::VertexInstances] = true;
flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
@@ -117,100 +137,87 @@ public:
}
void NotifyPolygonModes() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::PolygonModes] = true;
flags[OpenGL::Dirty::PolygonModeFront] = true;
flags[OpenGL::Dirty::PolygonModeBack] = true;
}
void NotifyViewport0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Viewports] = true;
flags[OpenGL::Dirty::Viewport0] = true;
}
void NotifyScissor0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Scissors] = true;
flags[OpenGL::Dirty::Scissor0] = true;
}
- void NotifyColorMask0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ void NotifyColorMask(size_t index) {
flags[OpenGL::Dirty::ColorMasks] = true;
- flags[OpenGL::Dirty::ColorMask0] = true;
+ flags[OpenGL::Dirty::ColorMask0 + index] = true;
}
void NotifyBlend0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::BlendStates] = true;
flags[OpenGL::Dirty::BlendState0] = true;
}
void NotifyFramebuffer() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[VideoCommon::Dirty::RenderTargets] = true;
}
void NotifyFrontFace() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::FrontFace] = true;
}
void NotifyCullTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::CullTest] = true;
}
void NotifyDepthMask() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::DepthMask] = true;
}
void NotifyDepthTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::DepthTest] = true;
}
void NotifyStencilTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::StencilTest] = true;
}
void NotifyPolygonOffset() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::PolygonOffset] = true;
}
void NotifyRasterizeEnable() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::RasterizeEnable] = true;
}
void NotifyFramebufferSRGB() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::FramebufferSRGB] = true;
}
void NotifyLogicOp() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::LogicOp] = true;
}
void NotifyClipControl() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::ClipControl] = true;
}
void NotifyAlphaTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::AlphaTest] = true;
}
private:
- Core::System& system;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
+ GLuint framebuffer = 0;
GLuint index_buffer = 0;
+ GLenum origin = GL_LOWER_LEFT;
+ GLenum depth = GL_NEGATIVE_ONE_TO_ONE;
+ bool y_negate = false;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 887995cf4..77b3ee0fe 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -1,80 +1,64 @@
-// Copyright 2018 Citra Emulator Project
+// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <tuple>
-#include <vector>
+#include <array>
+#include <memory>
+#include <span>
+
+#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
-#include "common/microprofile.h"
-#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
-MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
- MP_RGB(128, 128, 192));
-
namespace OpenGL {
-OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
- : buffer_size(size) {
- gl_buffer.Create();
-
- GLsizeiptr allocate_size = size;
- if (vertex_data_usage) {
- // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
- // read position is near the end and is an out-of-bound access to the vertex buffer. This is
- // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
- // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
- // crash.
- allocate_size *= 2;
- }
-
- static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
- glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
- mapped_ptr = static_cast<u8*>(
- glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
-
- if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
- glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
- glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
+StreamBuffer::StreamBuffer() {
+ static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
+ buffer.Create();
+ glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
+ glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
+ mapped_pointer =
+ static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
+ for (OGLSync& sync : fences) {
+ sync.Create();
}
}
-OGLStreamBuffer::~OGLStreamBuffer() {
- glUnmapNamedBuffer(gl_buffer.handle);
- gl_buffer.Release();
-}
-
-std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
- ASSERT(size <= buffer_size);
- ASSERT(alignment <= buffer_size);
- mapped_size = size;
-
- if (alignment > 0) {
- buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
+std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
+ ASSERT(size < REGION_SIZE);
+ for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
+ ++region) {
+ fences[region].Create();
}
+ used_iterator = iterator;
- bool invalidate = false;
- if (buffer_pos + size > buffer_size) {
- MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
- glInvalidateBufferData(gl_buffer.handle);
-
- buffer_pos = 0;
- invalidate = true;
+ for (size_t region = Region(free_iterator) + 1,
+ region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
+ region < region_end; ++region) {
+ glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+ fences[region].Release();
}
-
- return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
-}
-
-void OGLStreamBuffer::Unmap(GLsizeiptr size) {
- ASSERT(size <= mapped_size);
-
- if (size > 0) {
- glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
+ if (iterator + size >= free_iterator) {
+ free_iterator = iterator + size;
}
-
- buffer_pos += size;
+ if (iterator + size > STREAM_BUFFER_SIZE) {
+ for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
+ fences[region].Create();
+ }
+ used_iterator = 0;
+ iterator = 0;
+ free_iterator = size;
+
+ for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
+ glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+ fences[region].Release();
+ }
+ }
+ const size_t offset = iterator;
+ iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
+ return {std::span(mapped_pointer + offset, size), offset};
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 307a67113..6dbb6bfba 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -1,55 +1,50 @@
-// Copyright 2018 Citra Emulator Project
+// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
-#include <tuple>
+#include <array>
+#include <memory>
+#include <span>
+#include <utility>
+
#include <glad/glad.h>
+
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
-class Device;
+class StreamBuffer {
+ static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
+ static constexpr size_t NUM_SYNCS = 16;
+ static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
+ static constexpr size_t MAX_ALIGNMENT = 256;
+ static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
+ static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
+ static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
-class OGLStreamBuffer : private NonCopyable {
public:
- explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
- ~OGLStreamBuffer();
-
- /*
- * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
- * and the optional alignment requirement.
- * If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
- * The return values are the pointer to the new chunk, the offset within the buffer,
- * and the invalidation flag for previous chunks.
- * The actual used size must be specified on unmapping the chunk.
- */
- std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
-
- void Unmap(GLsizeiptr size);
-
- GLuint Handle() const {
- return gl_buffer.handle;
- }
+ explicit StreamBuffer();
- u64 Address() const {
- return gpu_address;
- }
+ [[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
- GLsizeiptr Size() const noexcept {
- return buffer_size;
+ [[nodiscard]] GLuint Handle() const noexcept {
+ return buffer.handle;
}
private:
- OGLBuffer gl_buffer;
+ [[nodiscard]] static size_t Region(size_t offset) noexcept {
+ return offset / REGION_SIZE;
+ }
- GLuint64EXT gpu_address = 0;
- GLintptr buffer_pos = 0;
- GLsizeiptr buffer_size = 0;
- GLsizeiptr mapped_size = 0;
- u8* mapped_ptr = nullptr;
+ size_t iterator = 0;
+ size_t used_iterator = 0;
+ size_t free_iterator = 0;
+ u8* mapped_pointer = nullptr;
+ OGLBuffer buffer;
+ std::array<OGLSync, NUM_SYNCS> fences;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index f403f388a..12434db67 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -2,37 +2,60 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "common/assert.h"
-#include "common/bit_util.h"
-#include "common/common_types.h"
-#include "common/microprofile.h"
-#include "common/scope_exit.h"
-#include "core/core.h"
-#include "video_core/morton.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include <algorithm>
+#include <array>
+#include <bit>
+#include <string>
+
+#include <glad/glad.h>
+
+#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
-#include "video_core/renderer_opengl/utils.h"
-#include "video_core/texture_cache/surface_base.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
+#include "video_core/renderer_opengl/util_shaders.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/format_lookup_table.h"
+#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/texture_cache.h"
-#include "video_core/textures/convert.h"
-#include "video_core/textures/texture.h"
+#include "video_core/textures/decoders.h"
namespace OpenGL {
-using Tegra::Texture::SwizzleSource;
-using VideoCore::MortonSwizzleMode;
+namespace {
+using Tegra::Texture::SwizzleSource;
+using Tegra::Texture::TextureMipmapFilter;
+using Tegra::Texture::TextureType;
+using Tegra::Texture::TICEntry;
+using Tegra::Texture::TSCEntry;
+using VideoCommon::CalculateLevelStrideAlignment;
+using VideoCommon::ImageCopy;
+using VideoCommon::ImageFlagBits;
+using VideoCommon::ImageType;
+using VideoCommon::NUM_RT;
+using VideoCommon::SamplesLog2;
+using VideoCommon::SwizzleParameters;
+using VideoCore::Surface::BytesPerBlock;
+using VideoCore::Surface::IsPixelFormatASTC;
+using VideoCore::Surface::IsPixelFormatSRGB;
+using VideoCore::Surface::MaxPixelFormat;
using VideoCore::Surface::PixelFormat;
-using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
-MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
-MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
-MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
- MP_RGB(128, 192, 128));
+struct CopyOrigin {
+ GLint level;
+ GLint x;
+ GLint y;
+ GLint z;
+};
-namespace {
+struct CopyRegion {
+ GLsizei width;
+ GLsizei height;
+ GLsizei depth;
+};
struct FormatTuple {
GLenum internal_format;
@@ -40,7 +63,7 @@ struct FormatTuple {
GLenum type = GL_NONE;
};
-constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
+constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
{GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
@@ -103,72 +126,113 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
{GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
- // Compressed sRGB formats
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
- {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
- {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
- {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
- {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
- {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
-
- // Depth formats
- {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
- {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
-
- // DepthStencil formats
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
+ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
+ {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
}};
+constexpr std::array ACCELERATED_FORMATS{
+ GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
+ GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
+ GL_RG16UI, GL_RG8UI, GL_R32UI, GL_R16UI, GL_R8UI, GL_RGBA32I,
+ GL_RGBA16I, GL_RGBA8I, GL_RG32I, GL_RG16I, GL_RG8I, GL_R32I,
+ GL_R16I, GL_R8I, GL_RGBA16, GL_RGB10_A2, GL_RGBA8, GL_RG16,
+ GL_RG8, GL_R16, GL_R8, GL_RGBA16_SNORM, GL_RGBA8_SNORM, GL_RG16_SNORM,
+ GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
+};
+
const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
- ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
- return tex_format_tuples[static_cast<std::size_t>(pixel_format)];
+ ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
+ return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
}
-GLenum GetTextureTarget(const SurfaceTarget& target) {
- switch (target) {
- case SurfaceTarget::TextureBuffer:
+GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
+ switch (info.type) {
+ case ImageType::e1D:
+ return GL_TEXTURE_1D_ARRAY;
+ case ImageType::e2D:
+ if (info.num_samples > 1) {
+ return GL_TEXTURE_2D_MULTISAMPLE_ARRAY;
+ }
+ return GL_TEXTURE_2D_ARRAY;
+ case ImageType::e3D:
+ return GL_TEXTURE_3D;
+ case ImageType::Linear:
+ return GL_TEXTURE_2D_ARRAY;
+ case ImageType::Buffer:
return GL_TEXTURE_BUFFER;
- case SurfaceTarget::Texture1D:
+ }
+ UNREACHABLE_MSG("Invalid image type={}", info.type);
+ return GL_NONE;
+}
+
+GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
+ const bool is_multisampled = num_samples > 1;
+ switch (type) {
+ case ImageViewType::e1D:
return GL_TEXTURE_1D;
- case SurfaceTarget::Texture2D:
- return GL_TEXTURE_2D;
- case SurfaceTarget::Texture3D:
+ case ImageViewType::e2D:
+ return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
+ case ImageViewType::Cube:
+ return GL_TEXTURE_CUBE_MAP;
+ case ImageViewType::e3D:
return GL_TEXTURE_3D;
- case SurfaceTarget::Texture1DArray:
+ case ImageViewType::e1DArray:
return GL_TEXTURE_1D_ARRAY;
- case SurfaceTarget::Texture2DArray:
- return GL_TEXTURE_2D_ARRAY;
- case SurfaceTarget::TextureCubemap:
- return GL_TEXTURE_CUBE_MAP;
- case SurfaceTarget::TextureCubeArray:
+ case ImageViewType::e2DArray:
+ return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
+ case ImageViewType::CubeArray:
return GL_TEXTURE_CUBE_MAP_ARRAY;
+ case ImageViewType::Rect:
+ return GL_TEXTURE_RECTANGLE;
+ case ImageViewType::Buffer:
+ return GL_TEXTURE_BUFFER;
}
- UNREACHABLE();
- return {};
+ UNREACHABLE_MSG("Invalid image view type={}", type);
+ return GL_NONE;
}
-GLint GetSwizzleSource(SwizzleSource source) {
+GLenum TextureMode(PixelFormat format, bool is_first) {
+ switch (format) {
+ case PixelFormat::D24_UNORM_S8_UINT:
+ case PixelFormat::D32_FLOAT_S8_UINT:
+ return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
+ case PixelFormat::S8_UINT_D24_UNORM:
+ return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
+ default:
+ UNREACHABLE();
+ return GL_DEPTH_COMPONENT;
+ }
+}
+
+GLint Swizzle(SwizzleSource source) {
switch (source) {
case SwizzleSource::Zero:
return GL_ZERO;
@@ -184,529 +248,844 @@ GLint GetSwizzleSource(SwizzleSource source) {
case SwizzleSource::OneFloat:
return GL_ONE;
}
- UNREACHABLE();
+ UNREACHABLE_MSG("Invalid swizzle source={}", source);
return GL_NONE;
}
-GLenum GetComponent(PixelFormat format, bool is_first) {
- switch (format) {
- case PixelFormat::D24_UNORM_S8_UINT:
- case PixelFormat::D32_FLOAT_S8_UINT:
- return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
- case PixelFormat::S8_UINT_D24_UNORM:
- return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
+GLenum AttachmentType(PixelFormat format) {
+ switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) {
+ case SurfaceType::Depth:
+ return GL_DEPTH_ATTACHMENT;
+ case SurfaceType::DepthStencil:
+ return GL_DEPTH_STENCIL_ATTACHMENT;
default:
- UNREACHABLE();
- return GL_DEPTH_COMPONENT;
+ UNIMPLEMENTED_MSG("Unimplemented type={}", type);
+ return GL_NONE;
}
}
-void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
- if (params.IsBuffer()) {
- return;
+[[nodiscard]] bool IsConverted(const Device& device, PixelFormat format, ImageType type) {
+ if (!device.HasASTC() && IsPixelFormatASTC(format)) {
+ return true;
}
- glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
- glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1));
- if (params.num_levels == 1) {
- glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f);
+ switch (format) {
+ case PixelFormat::BC4_UNORM:
+ case PixelFormat::BC5_UNORM:
+ return type == ImageType::e3D;
+ default:
+ break;
}
+ return false;
}
-OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format,
- OGLBuffer& texture_buffer) {
- OGLTexture texture;
- texture.Create(target);
+[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) {
+ switch (value) {
+ case SwizzleSource::G:
+ return SwizzleSource::R;
+ default:
+ return value;
+ }
+}
- switch (params.target) {
- case SurfaceTarget::Texture1D:
- glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width);
- break;
- case SurfaceTarget::TextureBuffer:
- texture_buffer.Create();
- glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(),
- nullptr, GL_DYNAMIC_STORAGE_BIT);
- glTextureBuffer(texture.handle, internal_format, texture_buffer.handle);
+void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4> swizzle) {
+ switch (format) {
+ case PixelFormat::D24_UNORM_S8_UINT:
+ case PixelFormat::D32_FLOAT_S8_UINT:
+ case PixelFormat::S8_UINT_D24_UNORM:
+ UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G);
+ glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
+ TextureMode(format, swizzle[0] == SwizzleSource::R));
+ std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
break;
- case SurfaceTarget::Texture2D:
- case SurfaceTarget::TextureCubemap:
- glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
- params.height);
+ default:
break;
- case SurfaceTarget::Texture3D:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubeArray:
- glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width,
- params.height, params.depth);
+ }
+ std::array<GLint, 4> gl_swizzle;
+ std::ranges::transform(swizzle, gl_swizzle.begin(), Swizzle);
+ glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
+}
+
+[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime,
+ const VideoCommon::ImageInfo& info) {
+ // Disable accelerated uploads for now as they don't implement swizzled uploads
+ return false;
+ switch (info.type) {
+ case ImageType::e2D:
+ case ImageType::e3D:
+ case ImageType::Linear:
break;
default:
- UNREACHABLE();
+ return false;
+ }
+ const GLenum internal_format = GetFormatTuple(info.format).internal_format;
+ const auto& format_info = runtime.FormatInfo(info.type, internal_format);
+ if (format_info.is_compressed) {
+ return false;
+ }
+ if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) {
+ return false;
}
+ if (format_info.compatibility_by_size) {
+ return true;
+ }
+ const GLenum store_format = StoreFormat(BytesPerBlock(info.format));
+ const GLenum store_class = runtime.FormatInfo(info.type, store_format).compatibility_class;
+ return format_info.compatibility_class == store_class;
+}
- ApplyTextureDefaults(params, texture.handle);
+[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
+ VideoCommon::SubresourceLayers subresource, GLenum target) {
+ switch (target) {
+ case GL_TEXTURE_2D_ARRAY:
+ case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+ return CopyOrigin{
+ .level = static_cast<GLint>(subresource.base_level),
+ .x = static_cast<GLint>(offset.x),
+ .y = static_cast<GLint>(offset.y),
+ .z = static_cast<GLint>(subresource.base_layer),
+ };
+ case GL_TEXTURE_3D:
+ return CopyOrigin{
+ .level = static_cast<GLint>(subresource.base_level),
+ .x = static_cast<GLint>(offset.x),
+ .y = static_cast<GLint>(offset.y),
+ .z = static_cast<GLint>(offset.z),
+ };
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented copy target={}", target);
+ return CopyOrigin{.level = 0, .x = 0, .y = 0, .z = 0};
+ }
+}
- return texture;
+[[nodiscard]] CopyRegion MakeCopyRegion(VideoCommon::Extent3D extent,
+ VideoCommon::SubresourceLayers dst_subresource,
+ GLenum target) {
+ switch (target) {
+ case GL_TEXTURE_2D_ARRAY:
+ case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+ return CopyRegion{
+ .width = static_cast<GLsizei>(extent.width),
+ .height = static_cast<GLsizei>(extent.height),
+ .depth = static_cast<GLsizei>(dst_subresource.num_layers),
+ };
+ case GL_TEXTURE_3D:
+ return CopyRegion{
+ .width = static_cast<GLsizei>(extent.width),
+ .height = static_cast<GLsizei>(extent.height),
+ .depth = static_cast<GLsizei>(extent.depth),
+ };
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented copy target={}", target);
+ return CopyRegion{.width = 0, .height = 0, .depth = 0};
+ }
}
-constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source,
- SwizzleSource w_source) {
- return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
- (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
+ if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
+ const GLuint texture = image_view->DefaultHandle();
+ glNamedFramebufferTexture(fbo, attachment, texture, 0);
+ return;
+ }
+ const GLuint texture = image_view->Handle(ImageViewType::e3D);
+ if (image_view->range.extent.layers > 1) {
+ // TODO: OpenGL doesn't support rendering to a fixed number of slices
+ glNamedFramebufferTexture(fbo, attachment, texture, 0);
+ } else {
+ const u32 slice = image_view->range.base.layer;
+ glNamedFramebufferTextureLayer(fbo, attachment, texture, 0, slice);
+ }
}
} // Anonymous namespace
-CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
- bool is_astc_supported)
- : VideoCommon::SurfaceBase<View>(gpu_addr, params, is_astc_supported) {
- if (is_converted) {
- internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8;
- format = GL_RGBA;
- type = GL_UNSIGNED_BYTE;
- } else {
- const auto& tuple{GetFormatTuple(params.pixel_format)};
- internal_format = tuple.internal_format;
- format = tuple.format;
- type = tuple.type;
- is_compressed = params.IsCompressed();
+ImageBufferMap::~ImageBufferMap() {
+ if (sync) {
+ sync->Create();
}
- target = GetTextureTarget(params.target);
- texture = CreateTexture(params, target, internal_format, texture_buffer);
- DecorateSurfaceName();
+}
- u32 num_layers = 1;
- if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
- num_layers = params.depth;
+TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
+ StateTracker& state_tracker_)
+ : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) {
+ static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
+ for (size_t i = 0; i < TARGETS.size(); ++i) {
+ const GLenum target = TARGETS[i];
+ for (const FormatTuple& tuple : FORMAT_TABLE) {
+ const GLenum format = tuple.internal_format;
+ GLint compat_class;
+ GLint compat_type;
+ GLint is_compressed;
+ glGetInternalformativ(target, format, GL_IMAGE_COMPATIBILITY_CLASS, 1, &compat_class);
+ glGetInternalformativ(target, format, GL_IMAGE_FORMAT_COMPATIBILITY_TYPE, 1,
+ &compat_type);
+ glGetInternalformativ(target, format, GL_TEXTURE_COMPRESSED, 1, &is_compressed);
+ const FormatProperties properties{
+ .compatibility_class = static_cast<GLenum>(compat_class),
+ .compatibility_by_size = compat_type == GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE,
+ .is_compressed = is_compressed == GL_TRUE,
+ };
+ format_properties[i].emplace(format, properties);
+ }
}
-
- main_view =
- CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
+ has_broken_texture_view_formats = device.HasBrokenTextureViewFormats();
+
+ null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
+ null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
+ null_image_3d.Create(GL_TEXTURE_3D);
+ null_image_rect.Create(GL_TEXTURE_RECTANGLE);
+ glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
+ glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
+ glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
+ glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
+
+ std::array<GLuint, 4> new_handles;
+ glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
+ null_image_view_1d.handle = new_handles[0];
+ null_image_view_2d.handle = new_handles[1];
+ null_image_view_2d_array.handle = new_handles[2];
+ null_image_view_cube.handle = new_handles[3];
+ glTextureView(null_image_view_1d.handle, GL_TEXTURE_1D, null_image_1d_array.handle, GL_R8, 0, 1,
+ 0, 1);
+ glTextureView(null_image_view_2d.handle, GL_TEXTURE_2D, null_image_cube_array.handle, GL_R8, 0,
+ 1, 0, 1);
+ glTextureView(null_image_view_2d_array.handle, GL_TEXTURE_2D_ARRAY,
+ null_image_cube_array.handle, GL_R8, 0, 1, 0, 1);
+ glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
+ GL_R8, 0, 1, 0, 6);
+ const std::array texture_handles{
+ null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
+ null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
+ null_image_view_2d_array.handle, null_image_view_cube.handle,
+ };
+ for (const GLuint handle : texture_handles) {
+ static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
+ glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
+ }
+ const auto set_view = [this](ImageViewType type, GLuint handle) {
+ if (device.HasDebuggingToolAttached()) {
+ const std::string name = fmt::format("NullImage {}", type);
+ glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
+ }
+ null_image_views[static_cast<size_t>(type)] = handle;
+ };
+ set_view(ImageViewType::e1D, null_image_view_1d.handle);
+ set_view(ImageViewType::e2D, null_image_view_2d.handle);
+ set_view(ImageViewType::Cube, null_image_view_cube.handle);
+ set_view(ImageViewType::e3D, null_image_3d.handle);
+ set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
+ set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
+ set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
+ set_view(ImageViewType::Rect, null_image_rect.handle);
}
-CachedSurface::~CachedSurface() = default;
-
-void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
- MICROPROFILE_SCOPE(OpenGL_Texture_Download);
+TextureCacheRuntime::~TextureCacheRuntime() = default;
- if (params.IsBuffer()) {
- glGetNamedBufferSubData(texture_buffer.handle, 0,
- static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)),
- staging_buffer.data());
- return;
- }
+void TextureCacheRuntime::Finish() {
+ glFinish();
+}
- SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
+ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
+ return upload_buffers.RequestMap(size, true);
+}
- for (u32 level = 0; level < params.emulated_levels; ++level) {
- glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
- glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
- const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
+ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
+ return download_buffers.RequestMap(size, false);
+}
- u8* const mip_data = staging_buffer.data() + mip_offset;
- const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
- if (is_compressed) {
- glGetCompressedTextureImage(texture.handle, level, size, mip_data);
- } else {
- glGetTextureImage(texture.handle, level, format, type, size, mip_data);
- }
+void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
+ std::span<const ImageCopy> copies) {
+ const GLuint dst_name = dst_image.Handle();
+ const GLuint src_name = src_image.Handle();
+ const GLenum dst_target = ImageTarget(dst_image.info);
+ const GLenum src_target = ImageTarget(src_image.info);
+ for (const ImageCopy& copy : copies) {
+ const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target);
+ const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target);
+ const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target);
+ glCopyImageSubData(src_name, src_target, src_origin.level, src_origin.x, src_origin.y,
+ src_origin.z, dst_name, dst_target, dst_origin.level, dst_origin.x,
+ dst_origin.y, dst_origin.z, region.width, region.height, region.depth);
}
}
-void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
- MICROPROFILE_SCOPE(OpenGL_Texture_Upload);
- SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); });
- for (u32 level = 0; level < params.emulated_levels; ++level) {
- UploadTextureMipmap(level, staging_buffer);
+bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) {
+ if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
+ return false;
}
+ return true;
}
-void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) {
- glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
- glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
-
- const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
- const u8* buffer{staging_buffer.data() + mip_offset};
- if (is_compressed) {
- const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
- switch (params.target) {
- case SurfaceTarget::Texture2D:
- glCompressedTextureSubImage2D(texture.handle, level, 0, 0,
- static_cast<GLsizei>(params.GetMipWidth(level)),
- static_cast<GLsizei>(params.GetMipHeight(level)),
- internal_format, image_size, buffer);
- break;
- case SurfaceTarget::Texture3D:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubeArray:
- glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0,
- static_cast<GLsizei>(params.GetMipWidth(level)),
- static_cast<GLsizei>(params.GetMipHeight(level)),
- static_cast<GLsizei>(params.GetMipDepth(level)),
- internal_format, image_size, buffer);
- break;
- case SurfaceTarget::TextureCubemap: {
- const std::size_t layer_size{params.GetHostLayerSize(level)};
- for (std::size_t face = 0; face < params.depth; ++face) {
- glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
- static_cast<GLsizei>(params.GetMipWidth(level)),
- static_cast<GLsizei>(params.GetMipHeight(level)), 1,
- internal_format, static_cast<GLsizei>(layer_size),
- buffer);
- buffer += layer_size;
- }
- break;
- }
- default:
- UNREACHABLE();
- }
+void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
+ std::span<const ImageCopy> copies) {
+ if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
+ ASSERT(src.info.type == ImageType::e3D);
+ util_shaders.CopyBC4(dst, src, copies);
} else {
- switch (params.target) {
- case SurfaceTarget::Texture1D:
- glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type,
- buffer);
- break;
- case SurfaceTarget::TextureBuffer:
- ASSERT(level == 0);
- glNamedBufferSubData(texture_buffer.handle, 0,
- params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer);
- break;
- case SurfaceTarget::Texture1DArray:
- case SurfaceTarget::Texture2D:
- glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level),
- params.GetMipHeight(level), format, type, buffer);
- break;
- case SurfaceTarget::Texture3D:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubeArray:
- glTextureSubImage3D(
- texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)),
- static_cast<GLsizei>(params.GetMipHeight(level)),
- static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer);
- break;
- case SurfaceTarget::TextureCubemap:
- for (std::size_t face = 0; face < params.depth; ++face) {
- glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
- params.GetMipWidth(level), params.GetMipHeight(level), 1,
- format, type, buffer);
- buffer += params.GetHostLayerSize(level);
- }
- break;
- default:
- UNREACHABLE();
- }
+ UNREACHABLE();
}
}
-void CachedSurface::DecorateSurfaceName() {
- LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName());
-}
+void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
+ const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ Tegra::Engines::Fermi2D::Filter filter,
+ Tegra::Engines::Fermi2D::Operation operation) {
+ state_tracker.NotifyScissor0();
+ state_tracker.NotifyRasterizeEnable();
+ state_tracker.NotifyFramebufferSRGB();
-void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) {
- LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix);
+ ASSERT(dst->BufferBits() == src->BufferBits());
+
+ glEnable(GL_FRAMEBUFFER_SRGB);
+ glDisable(GL_RASTERIZER_DISCARD);
+ glDisablei(GL_SCISSOR_TEST, 0);
+
+ const GLbitfield buffer_bits = dst->BufferBits();
+ const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0;
+ const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
+ glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y,
+ src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y,
+ dst_region[1].x, dst_region[1].y, buffer_bits,
+ is_linear ? GL_LINEAR : GL_NEAREST);
}
-View CachedSurface::CreateView(const ViewParams& view_key) {
- return CreateViewInner(view_key, false);
+void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
+ std::span<const SwizzleParameters> swizzles) {
+ switch (image.info.type) {
+ case ImageType::e2D:
+ return util_shaders.BlockLinearUpload2D(image, map, swizzles);
+ case ImageType::e3D:
+ return util_shaders.BlockLinearUpload3D(image, map, swizzles);
+ case ImageType::Linear:
+ return util_shaders.PitchUpload(image, map, swizzles);
+ default:
+ UNREACHABLE();
+ break;
+ }
}
-View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) {
- auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy);
- views[view_key] = view;
- if (!is_proxy)
- view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++));
- return view;
+void TextureCacheRuntime::InsertUploadMemoryBarrier() {
+ glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
-CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
- bool is_proxy)
- : VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format},
- target{GetTextureTarget(params.target)}, is_proxy{is_proxy} {
- if (!is_proxy) {
- main_view = CreateTextureView();
+FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const {
+ switch (type) {
+ case ImageType::e1D:
+ return format_properties[0].at(internal_format);
+ case ImageType::e2D:
+ case ImageType::Linear:
+ return format_properties[1].at(internal_format);
+ case ImageType::e3D:
+ return format_properties[2].at(internal_format);
+ default:
+ UNREACHABLE();
+ return FormatProperties{};
}
}
-CachedSurfaceView::~CachedSurfaceView() = default;
+TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
+ : storage_flags{storage_flags_}, map_flags{map_flags_} {}
-void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
- ASSERT(params.num_levels == 1);
+TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
- if (params.target == SurfaceTarget::Texture3D) {
- if (params.num_layers > 1) {
- ASSERT(params.base_layer == 0);
- glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level);
- } else {
- glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle,
- params.base_level, params.base_layer);
- }
- return;
+ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
+ bool insert_fence) {
+ const size_t index = RequestBuffer(requested_size);
+ OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
+ return ImageBufferMap{
+ .mapped_span = std::span(maps[index], requested_size),
+ .sync = sync,
+ .buffer = buffers[index].handle,
+ };
+}
+
+size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
+ if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
+ return *index;
}
- if (params.num_layers > 1) {
- UNIMPLEMENTED_IF(params.base_layer != 0);
- glFramebufferTexture(fb_target, attachment, GetTexture(), 0);
- return;
+ OGLBuffer& buffer = buffers.emplace_back();
+ buffer.Create();
+ glNamedBufferStorage(buffer.handle, requested_size, nullptr,
+ storage_flags | GL_MAP_PERSISTENT_BIT);
+ maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
+ map_flags | GL_MAP_PERSISTENT_BIT)));
+
+ syncs.emplace_back();
+ sizes.push_back(requested_size);
+
+ ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
+ maps.size() == sizes.size());
+
+ return buffers.size() - 1;
+}
+
+std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
+ size_t smallest_buffer = std::numeric_limits<size_t>::max();
+ std::optional<size_t> found;
+ const size_t num_buffers = sizes.size();
+ for (size_t index = 0; index < num_buffers; ++index) {
+ const size_t buffer_size = sizes[index];
+ if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
+ continue;
+ }
+ if (syncs[index].handle != 0) {
+ GLint status;
+ glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status);
+ if (status != GL_SIGNALED) {
+ continue;
+ }
+ syncs[index].Release();
+ }
+ smallest_buffer = buffer_size;
+ found = index;
}
+ return found;
+}
- const GLenum view_target = surface.GetTarget();
- const GLuint texture = surface.GetTexture();
- switch (surface.GetSurfaceParams().target) {
- case SurfaceTarget::Texture1D:
- glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level);
+Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
+ VAddr cpu_addr_)
+ : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) {
+ if (CanBeAccelerated(runtime, info)) {
+ flags |= ImageFlagBits::AcceleratedUpload;
+ }
+ if (IsConverted(runtime.device, info.format, info.type)) {
+ flags |= ImageFlagBits::Converted;
+ gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
+ gl_format = GL_RGBA;
+ gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+ } else {
+ const auto& tuple = GetFormatTuple(info.format);
+ gl_internal_format = tuple.internal_format;
+ gl_format = tuple.format;
+ gl_type = tuple.type;
+ }
+ const GLenum target = ImageTarget(info);
+ const GLsizei width = info.size.width;
+ const GLsizei height = info.size.height;
+ const GLsizei depth = info.size.depth;
+ const int max_host_mip_levels = std::bit_width(info.size.width);
+ const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
+ const GLsizei num_layers = info.resources.layers;
+ const GLsizei num_samples = info.num_samples;
+
+ GLuint handle = 0;
+ if (target != GL_TEXTURE_BUFFER) {
+ texture.Create(target);
+ handle = texture.handle;
+ }
+ switch (target) {
+ case GL_TEXTURE_1D_ARRAY:
+ glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers);
+ break;
+ case GL_TEXTURE_2D_ARRAY:
+ glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers);
break;
- case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level);
+ case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
+ // TODO: Where should 'fixedsamplelocations' come from?
+ const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
+ glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x,
+ height >> samples_y, num_layers, GL_FALSE);
+ break;
+ }
+ case GL_TEXTURE_RECTANGLE:
+ glTextureStorage2D(handle, num_levels, gl_internal_format, width, height);
break;
- case SurfaceTarget::Texture1DArray:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubemap:
- case SurfaceTarget::TextureCubeArray:
- glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level,
- params.base_layer);
+ case GL_TEXTURE_3D:
+ glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
+ break;
+ case GL_TEXTURE_BUFFER:
+ buffer.Create();
+ glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
break;
default:
- UNIMPLEMENTED();
+ UNREACHABLE_MSG("Invalid target=0x{:x}", target);
+ break;
+ }
+ if (runtime.device.HasDebuggingToolAttached()) {
+ const std::string name = VideoCommon::Name(*this);
+ glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle,
+ static_cast<GLsizei>(name.size()), name.data());
}
}
-GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source,
- SwizzleSource z_source, SwizzleSource w_source) {
- if (GetSurfaceParams().IsBuffer()) {
- return GetTexture();
- }
- const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
- if (current_swizzle == new_swizzle) {
- return current_view;
- }
- current_swizzle = new_swizzle;
+void Image::UploadMemory(const ImageBufferMap& map,
+ std::span<const VideoCommon::BufferImageCopy> copies) {
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
+ glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
- const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
- OGLTextureView& view = entry->second;
- if (!is_cache_miss) {
- current_view = view.handle;
- return view.handle;
- }
- view = CreateTextureView();
- current_view = view.handle;
+ glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
- std::array swizzle{x_source, y_source, z_source, w_source};
+ u32 current_row_length = std::numeric_limits<u32>::max();
+ u32 current_image_height = std::numeric_limits<u32>::max();
- switch (const PixelFormat format = GetSurfaceParams().pixel_format) {
- case PixelFormat::D24_UNORM_S8_UINT:
- case PixelFormat::D32_FLOAT_S8_UINT:
- case PixelFormat::S8_UINT_D24_UNORM:
- UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
- glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
- GetComponent(format, x_source == SwizzleSource::R));
-
- // Make sure we sample the first component
- std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) {
- return value == SwizzleSource::G ? SwizzleSource::R : value;
- });
- [[fallthrough]];
- default: {
- const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]),
- GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])};
- glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
- break;
- }
+ for (const VideoCommon::BufferImageCopy& copy : copies) {
+ if (current_row_length != copy.buffer_row_length) {
+ current_row_length = copy.buffer_row_length;
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length);
+ }
+ if (current_image_height != copy.buffer_image_height) {
+ current_image_height = copy.buffer_image_height;
+ glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
+ }
+ CopyBufferToImage(copy, map.offset);
}
- return view.handle;
}
-OGLTextureView CachedSurfaceView::CreateTextureView() const {
- OGLTextureView texture_view;
- texture_view.Create();
-
- if (target == GL_TEXTURE_3D) {
- glTextureView(texture_view.handle, target, surface.texture.handle, format,
- params.base_level, params.num_levels, 0, 1);
- } else {
- glTextureView(texture_view.handle, target, surface.texture.handle, format,
- params.base_level, params.num_levels, params.base_layer, params.num_layers);
+void Image::UploadMemory(const ImageBufferMap& map,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ for (const VideoCommon::BufferCopy& copy : copies) {
+ glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
+ copy.dst_offset, copy.size);
}
- ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
-
- return texture_view;
}
-TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
- VideoCore::RasterizerInterface& rasterizer,
- const Device& device, StateTracker& state_tracker)
- : TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} {
- src_framebuffer.Create();
- dst_framebuffer.Create();
-}
+void Image::DownloadMemory(ImageBufferMap& map,
+ std::span<const VideoCommon::BufferImageCopy> copies) {
+ glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
-TextureCacheOpenGL::~TextureCacheOpenGL() = default;
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
+ glPixelStorei(GL_PACK_ALIGNMENT, 1);
-Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
- return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported);
-}
+ u32 current_row_length = std::numeric_limits<u32>::max();
+ u32 current_image_height = std::numeric_limits<u32>::max();
-void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
- const VideoCommon::CopyParams& copy_params) {
- const auto& src_params = src_surface->GetSurfaceParams();
- const auto& dst_params = dst_surface->GetSurfaceParams();
- if (src_params.type != dst_params.type) {
- // A fallback is needed
- return;
+ for (const VideoCommon::BufferImageCopy& copy : copies) {
+ if (current_row_length != copy.buffer_row_length) {
+ current_row_length = copy.buffer_row_length;
+ glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
+ }
+ if (current_image_height != copy.buffer_image_height) {
+ current_image_height = copy.buffer_image_height;
+ glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
+ }
+ CopyImageToBuffer(copy, map.offset);
}
- const auto src_handle = src_surface->GetTexture();
- const auto src_target = src_surface->GetTarget();
- const auto dst_handle = dst_surface->GetTexture();
- const auto dst_target = dst_surface->GetTarget();
- glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x,
- copy_params.source_y, copy_params.source_z, dst_handle, dst_target,
- copy_params.dest_level, copy_params.dest_x, copy_params.dest_y,
- copy_params.dest_z, copy_params.width, copy_params.height,
- copy_params.depth);
}
-void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
- const Tegra::Engines::Fermi2D::Config& copy_config) {
- const auto& src_params{src_view->GetSurfaceParams()};
- const auto& dst_params{dst_view->GetSurfaceParams()};
- UNIMPLEMENTED_IF(src_params.depth != 1);
- UNIMPLEMENTED_IF(dst_params.depth != 1);
-
- state_tracker.NotifyScissor0();
- state_tracker.NotifyFramebuffer();
- state_tracker.NotifyRasterizeEnable();
- state_tracker.NotifyFramebufferSRGB();
-
- if (dst_params.srgb_conversion) {
- glEnable(GL_FRAMEBUFFER_SRGB);
- } else {
- glDisable(GL_FRAMEBUFFER_SRGB);
+GLuint Image::StorageHandle() noexcept {
+ switch (info.format) {
+ case PixelFormat::A8B8G8R8_SRGB:
+ case PixelFormat::B8G8R8A8_SRGB:
+ case PixelFormat::BC1_RGBA_SRGB:
+ case PixelFormat::BC2_SRGB:
+ case PixelFormat::BC3_SRGB:
+ case PixelFormat::BC7_SRGB:
+ case PixelFormat::ASTC_2D_4X4_SRGB:
+ case PixelFormat::ASTC_2D_8X8_SRGB:
+ case PixelFormat::ASTC_2D_8X5_SRGB:
+ case PixelFormat::ASTC_2D_5X4_SRGB:
+ case PixelFormat::ASTC_2D_5X5_SRGB:
+ case PixelFormat::ASTC_2D_10X8_SRGB:
+ case PixelFormat::ASTC_2D_6X6_SRGB:
+ case PixelFormat::ASTC_2D_10X10_SRGB:
+ case PixelFormat::ASTC_2D_12X12_SRGB:
+ case PixelFormat::ASTC_2D_8X6_SRGB:
+ case PixelFormat::ASTC_2D_6X5_SRGB:
+ if (store_view.handle != 0) {
+ return store_view.handle;
+ }
+ store_view.Create();
+ glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0,
+ info.resources.levels, 0, info.resources.layers);
+ return store_view.handle;
+ default:
+ return texture.handle;
}
- glDisable(GL_RASTERIZER_DISCARD);
- glDisablei(GL_SCISSOR_TEST, 0);
-
- glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle);
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle);
-
- GLenum buffers = 0;
- if (src_params.type == SurfaceType::ColorTexture) {
- src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
+}
- dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
+void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) {
+ // Compressed formats don't have a pixel format or type
+ const bool is_compressed = gl_format == GL_NONE;
+ const void* const offset = reinterpret_cast<const void*>(copy.buffer_offset + buffer_offset);
- buffers = GL_COLOR_BUFFER_BIT;
- } else if (src_params.type == SurfaceType::Depth) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+ switch (info.type) {
+ case ImageType::e1D:
+ if (is_compressed) {
+ glCompressedTextureSubImage2D(texture.handle, copy.image_subresource.base_level,
+ copy.image_offset.x, copy.image_subresource.base_layer,
+ copy.image_extent.width,
+ copy.image_subresource.num_layers, gl_internal_format,
+ static_cast<GLsizei>(copy.buffer_size), offset);
+ } else {
+ glTextureSubImage2D(texture.handle, copy.image_subresource.base_level,
+ copy.image_offset.x, copy.image_subresource.base_layer,
+ copy.image_extent.width, copy.image_subresource.num_layers,
+ gl_format, gl_type, offset);
+ }
+ break;
+ case ImageType::e2D:
+ case ImageType::Linear:
+ if (is_compressed) {
+ glCompressedTextureSubImage3D(
+ texture.handle, copy.image_subresource.base_level, copy.image_offset.x,
+ copy.image_offset.y, copy.image_subresource.base_layer, copy.image_extent.width,
+ copy.image_extent.height, copy.image_subresource.num_layers, gl_internal_format,
+ static_cast<GLsizei>(copy.buffer_size), offset);
+ } else {
+ glTextureSubImage3D(texture.handle, copy.image_subresource.base_level,
+ copy.image_offset.x, copy.image_offset.y,
+ copy.image_subresource.base_layer, copy.image_extent.width,
+ copy.image_extent.height, copy.image_subresource.num_layers,
+ gl_format, gl_type, offset);
+ }
+ break;
+ case ImageType::e3D:
+ if (is_compressed) {
+ glCompressedTextureSubImage3D(
+ texture.handle, copy.image_subresource.base_level, copy.image_offset.x,
+ copy.image_offset.y, copy.image_offset.z, copy.image_extent.width,
+ copy.image_extent.height, copy.image_extent.depth, gl_internal_format,
+ static_cast<GLsizei>(copy.buffer_size), offset);
+ } else {
+ glTextureSubImage3D(texture.handle, copy.image_subresource.base_level,
+ copy.image_offset.x, copy.image_offset.y, copy.image_offset.z,
+ copy.image_extent.width, copy.image_extent.height,
+ copy.image_extent.depth, gl_format, gl_type, offset);
+ }
+ break;
+ default:
+ UNREACHABLE();
+ }
+}
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) {
+ const GLint x_offset = copy.image_offset.x;
+ const GLsizei width = copy.image_extent.width;
- buffers = GL_DEPTH_BUFFER_BIT;
- } else if (src_params.type == SurfaceType::DepthStencil) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER);
+ const GLint level = copy.image_subresource.base_level;
+ const GLsizei buffer_size = static_cast<GLsizei>(copy.buffer_size);
+ void* const offset = reinterpret_cast<void*>(copy.buffer_offset + buffer_offset);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
+ GLint y_offset = 0;
+ GLint z_offset = 0;
+ GLsizei height = 1;
+ GLsizei depth = 1;
- buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+ switch (info.type) {
+ case ImageType::e1D:
+ y_offset = copy.image_subresource.base_layer;
+ height = copy.image_subresource.num_layers;
+ break;
+ case ImageType::e2D:
+ case ImageType::Linear:
+ y_offset = copy.image_offset.y;
+ z_offset = copy.image_subresource.base_layer;
+ height = copy.image_extent.height;
+ depth = copy.image_subresource.num_layers;
+ break;
+ case ImageType::e3D:
+ y_offset = copy.image_offset.y;
+ z_offset = copy.image_offset.z;
+ height = copy.image_extent.height;
+ depth = copy.image_extent.depth;
+ break;
+ default:
+ UNREACHABLE();
+ }
+ // Compressed formats don't have a pixel format or type
+ const bool is_compressed = gl_format == GL_NONE;
+ if (is_compressed) {
+ glGetCompressedTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width,
+ height, depth, buffer_size, offset);
+ } else {
+ glGetTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, height,
+ depth, gl_format, gl_type, buffer_size, offset);
}
-
- const Common::Rectangle<u32>& src_rect = copy_config.src_rect;
- const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect;
- const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
-
- glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top),
- static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom),
- static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top),
- static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom),
- buffers,
- is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);
}
-void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
- MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy);
- const auto& src_params = src_surface->GetSurfaceParams();
- const auto& dst_params = dst_surface->GetSurfaceParams();
- UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
+ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
+ ImageId image_id_, Image& image)
+ : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
+ const Device& device = runtime.device;
+ if (True(image.flags & ImageFlagBits::Converted)) {
+ internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
+ } else {
+ internal_format = GetFormatTuple(format).internal_format;
+ }
+ VideoCommon::SubresourceRange flatten_range = info.range;
+ std::array<GLuint, 2> handles;
+ stored_views.reserve(2);
- const auto source_format = GetFormatTuple(src_params.pixel_format);
- const auto dest_format = GetFormatTuple(dst_params.pixel_format);
+ switch (info.type) {
+ case ImageViewType::e1DArray:
+ flatten_range.extent.layers = 1;
+ [[fallthrough]];
+ case ImageViewType::e1D:
+ glGenTextures(2, handles.data());
+ SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
+ SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
+ break;
+ case ImageViewType::e2DArray:
+ flatten_range.extent.layers = 1;
+ [[fallthrough]];
+ case ImageViewType::e2D:
+ if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
+ // 2D and 2D array views on a 3D textures are used exclusively for render targets
+ ASSERT(info.range.extent.levels == 1);
+ const VideoCommon::SubresourceRange slice_range{
+ .base = {.level = info.range.base.level, .layer = 0},
+ .extent = {.levels = 1, .layers = 1},
+ };
+ glGenTextures(1, handles.data());
+ SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
+ break;
+ }
+ glGenTextures(2, handles.data());
+ SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
+ SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
+ break;
+ case ImageViewType::e3D:
+ glGenTextures(1, handles.data());
+ SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
+ break;
+ case ImageViewType::CubeArray:
+ flatten_range.extent.layers = 6;
+ [[fallthrough]];
+ case ImageViewType::Cube:
+ glGenTextures(2, handles.data());
+ SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
+ SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
+ break;
+ case ImageViewType::Rect:
+ glGenTextures(1, handles.data());
+ SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
+ break;
+ case ImageViewType::Buffer:
+ glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
+ SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
+ break;
+ }
+ default_handle = Handle(info.type);
+}
- const std::size_t source_size = src_surface->GetHostSizeInBytes();
- const std::size_t dest_size = dst_surface->GetHostSizeInBytes();
+ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
+ : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
- const std::size_t buffer_size = std::max(source_size, dest_size);
+void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
+ GLuint handle, const VideoCommon::ImageViewInfo& info,
+ VideoCommon::SubresourceRange view_range) {
+ if (info.type == ImageViewType::Buffer) {
+ // TODO: Take offset from buffer cache
+ glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
+ image.guest_size_bytes);
+ } else {
+ const GLuint parent = image.texture.handle;
+ const GLenum target = ImageTarget(view_type, image.info.num_samples);
+ glTextureView(handle, target, parent, internal_format, view_range.base.level,
+ view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
+ if (!info.IsRenderTarget()) {
+ ApplySwizzle(handle, format, info.Swizzle());
+ }
+ }
+ if (device.HasDebuggingToolAttached()) {
+ const std::string name = VideoCommon::Name(*this, view_type);
+ glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
+ }
+ stored_views.emplace_back().handle = handle;
+ views[static_cast<size_t>(view_type)] = handle;
+}
- GLuint copy_pbo_handle = FetchPBO(buffer_size);
+Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
+ const GLenum compare_mode = config.depth_compare_enabled ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE;
+ const GLenum compare_func = MaxwellToGL::DepthCompareFunc(config.depth_compare_func);
+ const GLenum mag = MaxwellToGL::TextureFilterMode(config.mag_filter, TextureMipmapFilter::None);
+ const GLenum min = MaxwellToGL::TextureFilterMode(config.min_filter, config.mipmap_filter);
+ const GLenum reduction_filter = MaxwellToGL::ReductionFilter(config.reduction_filter);
+ const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE;
+
+ UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1);
+ UNIMPLEMENTED_IF(config.float_coord_normalization != 0);
+
+ sampler.Create();
+ const GLuint handle = sampler.handle;
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u));
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v));
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p));
+ glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode);
+ glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func);
+ glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag);
+ glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min);
+ glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias());
+ glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod());
+ glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod());
+ glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
+
+ if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
+ glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy());
+ } else {
+ LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
+ }
+ if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) {
+ glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter);
+ } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) {
+ LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required");
+ }
+ if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) {
+ glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless);
+ } else if (seamless == GL_FALSE) {
+ // We default to false because it's more common
+ LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required");
+ }
+}
- glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
+Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
+ ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
+ // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of
+ // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared
+ // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with
+ // mismatching size, this is why core framebuffers are preferred.
+ GLuint handle;
+ glGenFramebuffers(1, &handle);
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, handle);
+
+ GLsizei num_buffers = 0;
+ std::array<GLenum, NUM_RT> gl_draw_buffers;
+ gl_draw_buffers.fill(GL_NONE);
+
+ for (size_t index = 0; index < color_buffers.size(); ++index) {
+ const ImageView* const image_view = color_buffers[index];
+ if (!image_view) {
+ continue;
+ }
+ buffer_bits |= GL_COLOR_BUFFER_BIT;
+ gl_draw_buffers[index] = GL_COLOR_ATTACHMENT0 + key.draw_buffers[index];
+ num_buffers = static_cast<GLsizei>(index + 1);
- if (src_surface->IsCompressed()) {
- glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
- nullptr);
- } else {
- glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type,
- static_cast<GLsizei>(source_size), nullptr);
+ const GLenum attachment = static_cast<GLenum>(GL_COLOR_ATTACHMENT0 + index);
+ AttachTexture(handle, attachment, image_view);
}
- glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
+ if (const ImageView* const image_view = depth_buffer; image_view) {
+ if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) {
+ buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+ } else {
+ buffer_bits |= GL_DEPTH_BUFFER_BIT;
+ }
+ const GLenum attachment = AttachmentType(image_view->format);
+ AttachTexture(handle, attachment, image_view);
+ }
- const GLsizei width = static_cast<GLsizei>(dst_params.width);
- const GLsizei height = static_cast<GLsizei>(dst_params.height);
- const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
- if (dst_surface->IsCompressed()) {
- LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
- UNREACHABLE();
+ if (num_buffers > 1) {
+ glNamedFramebufferDrawBuffers(handle, num_buffers, gl_draw_buffers.data());
+ } else if (num_buffers > 0) {
+ glNamedFramebufferDrawBuffer(handle, gl_draw_buffers[0]);
} else {
- switch (dst_params.target) {
- case SurfaceTarget::Texture1D:
- glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format,
- dest_format.type, nullptr);
- break;
- case SurfaceTarget::Texture2D:
- glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height,
- dest_format.format, dest_format.type, nullptr);
- break;
- case SurfaceTarget::Texture3D:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubeArray:
- glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
- dest_format.format, dest_format.type, nullptr);
- break;
- case SurfaceTarget::TextureCubemap:
- glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
- dest_format.format, dest_format.type, nullptr);
- break;
- default:
- LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
- static_cast<u32>(dst_params.target));
- UNREACHABLE();
- }
+ glNamedFramebufferDrawBuffer(handle, GL_NONE);
}
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
- glTextureBarrier();
-}
+ glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_WIDTH, key.size.width);
+ glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_HEIGHT, key.size.height);
+ // TODO
+ // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_LAYERS, ...);
+ // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_SAMPLES, ...);
+ // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS, ...);
-GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) {
- ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; });
- const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size));
- OGLBuffer& cp = copy_pbo_cache[l2];
- if (cp.handle == 0) {
- const std::size_t ceil_size = 1ULL << l2;
- cp.Create();
- cp.MakeStreamCopy(ceil_size);
+ if (runtime.device.HasDebuggingToolAttached()) {
+ const std::string name = VideoCommon::Name(key);
+ glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data());
}
- return cp.handle;
+ framebuffer.handle = handle;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index de8f18489..a6172f009 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -4,154 +4,241 @@
#pragma once
-#include <array>
-#include <functional>
#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
+#include <span>
#include <glad/glad.h>
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
-using VideoCommon::SurfaceParams;
-using VideoCommon::ViewParams;
-
-class CachedSurfaceView;
-class CachedSurface;
-class TextureCacheOpenGL;
+class Device;
+class ProgramManager;
class StateTracker;
-using Surface = std::shared_ptr<CachedSurface>;
-using View = std::shared_ptr<CachedSurfaceView>;
-using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
+class Framebuffer;
+class Image;
+class ImageView;
+class Sampler;
+
+using VideoCommon::ImageId;
+using VideoCommon::ImageViewId;
+using VideoCommon::ImageViewType;
+using VideoCommon::NUM_RT;
+using VideoCommon::Offset2D;
+using VideoCommon::RenderTargets;
+
+struct ImageBufferMap {
+ ~ImageBufferMap();
+
+ std::span<u8> mapped_span;
+ size_t offset = 0;
+ OGLSync* sync;
+ GLuint buffer;
+};
+
+struct FormatProperties {
+ GLenum compatibility_class;
+ bool compatibility_by_size;
+ bool is_compressed;
+};
-class CachedSurface final : public VideoCommon::SurfaceBase<View> {
- friend CachedSurfaceView;
+class TextureCacheRuntime {
+ friend Framebuffer;
+ friend Image;
+ friend ImageView;
+ friend Sampler;
public:
- explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool is_astc_supported);
- ~CachedSurface();
+ explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
+ StateTracker& state_tracker);
+ ~TextureCacheRuntime();
- void UploadTexture(const std::vector<u8>& staging_buffer) override;
- void DownloadTexture(std::vector<u8>& staging_buffer) override;
+ void Finish();
- GLenum GetTarget() const {
- return target;
- }
+ ImageBufferMap UploadStagingBuffer(size_t size);
- GLuint GetTexture() const {
- return texture.handle;
- }
+ ImageBufferMap DownloadStagingBuffer(size_t size);
+
+ void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
- bool IsCompressed() const {
- return is_compressed;
+ void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
+ UNIMPLEMENTED();
}
-protected:
- void DecorateSurfaceName() override;
+ bool CanImageBeCopied(const Image& dst, const Image& src);
+
+ void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
+
+ void BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
+ const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ Tegra::Engines::Fermi2D::Filter filter,
+ Tegra::Engines::Fermi2D::Operation operation);
+
+ void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
+ std::span<const VideoCommon::SwizzleParameters> swizzles);
+
+ void InsertUploadMemoryBarrier();
- View CreateView(const ViewParams& view_key) override;
- View CreateViewInner(const ViewParams& view_key, bool is_proxy);
+ FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
+
+ bool HasBrokenTextureViewFormats() const noexcept {
+ return has_broken_texture_view_formats;
+ }
private:
- void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer);
+ struct StagingBuffers {
+ explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
+ ~StagingBuffers();
- GLenum internal_format{};
- GLenum format{};
- GLenum type{};
- bool is_compressed{};
- GLenum target{};
- u32 view_count{};
+ ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
- OGLTexture texture;
- OGLBuffer texture_buffer;
+ size_t RequestBuffer(size_t requested_size);
+
+ std::optional<size_t> FindBuffer(size_t requested_size);
+
+ std::vector<OGLSync> syncs;
+ std::vector<OGLBuffer> buffers;
+ std::vector<u8*> maps;
+ std::vector<size_t> sizes;
+ GLenum storage_flags;
+ GLenum map_flags;
+ };
+
+ const Device& device;
+ StateTracker& state_tracker;
+ UtilShaders util_shaders;
+
+ std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
+ bool has_broken_texture_view_formats = false;
+
+ StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
+ StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
+
+ OGLTexture null_image_1d_array;
+ OGLTexture null_image_cube_array;
+ OGLTexture null_image_3d;
+ OGLTexture null_image_rect;
+ OGLTextureView null_image_view_1d;
+ OGLTextureView null_image_view_2d;
+ OGLTextureView null_image_view_2d_array;
+ OGLTextureView null_image_view_cube;
+
+ std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
};
-class CachedSurfaceView final : public VideoCommon::ViewBase {
+class Image : public VideoCommon::ImageBase {
+ friend ImageView;
+
public:
- explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
- ~CachedSurfaceView();
+ explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
+ VAddr cpu_addr);
- /// @brief Attaches this texture view to the currently bound fb_target framebuffer
- /// @param attachment Attachment to bind textures to
- /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
- void Attach(GLenum attachment, GLenum fb_target) const;
+ void UploadMemory(const ImageBufferMap& map,
+ std::span<const VideoCommon::BufferImageCopy> copies);
- GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source);
+ void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
- void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix);
+ void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
- void MarkAsModified(u64 tick) {
- surface.MarkAsModified(true, tick);
+ GLuint StorageHandle() noexcept;
+
+ GLuint Handle() const noexcept {
+ return texture.handle;
}
- GLuint GetTexture() const {
- if (is_proxy) {
- return surface.GetTexture();
- }
- return main_view.handle;
+private:
+ void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
+
+ void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
+
+ OGLTexture texture;
+ OGLBuffer buffer;
+ OGLTextureView store_view;
+ GLenum gl_internal_format = GL_NONE;
+ GLenum gl_format = GL_NONE;
+ GLenum gl_type = GL_NONE;
+};
+
+class ImageView : public VideoCommon::ImageViewBase {
+ friend Image;
+
+public:
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
+
+ [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
+ return views[static_cast<size_t>(query_type)];
}
- GLenum GetFormat() const {
- return format;
+ [[nodiscard]] GLuint DefaultHandle() const noexcept {
+ return default_handle;
}
- const SurfaceParams& GetSurfaceParams() const {
- return surface.GetSurfaceParams();
+ [[nodiscard]] GLenum Format() const noexcept {
+ return internal_format;
}
private:
- OGLTextureView CreateTextureView() const;
+ void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
+ const VideoCommon::ImageViewInfo& info,
+ VideoCommon::SubresourceRange view_range);
+
+ std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
+ std::vector<OGLTextureView> stored_views;
+ GLuint default_handle = 0;
+ GLenum internal_format = GL_NONE;
+};
- CachedSurface& surface;
- const GLenum format;
- const GLenum target;
- const bool is_proxy;
+class ImageAlloc : public VideoCommon::ImageAllocBase {};
- std::unordered_map<u32, OGLTextureView> view_cache;
- OGLTextureView main_view;
+class Sampler {
+public:
+ explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
+
+ GLuint Handle() const noexcept {
+ return sampler.handle;
+ }
- // Use an invalid default so it always fails the comparison test
- u32 current_swizzle = 0xffffffff;
- GLuint current_view = 0;
+private:
+ OGLSampler sampler;
};
-class TextureCacheOpenGL final : public TextureCacheBase {
+class Framebuffer {
public:
- explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const Device& device, StateTracker& state_tracker);
- ~TextureCacheOpenGL();
-
-protected:
- Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
-
- void ImageCopy(Surface& src_surface, Surface& dst_surface,
- const VideoCommon::CopyParams& copy_params) override;
+ explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
+ ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
- void ImageBlit(View& src_view, View& dst_view,
- const Tegra::Engines::Fermi2D::Config& copy_config) override;
+ [[nodiscard]] GLuint Handle() const noexcept {
+ return framebuffer.handle;
+ }
- void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
+ [[nodiscard]] GLbitfield BufferBits() const noexcept {
+ return buffer_bits;
+ }
private:
- GLuint FetchPBO(std::size_t buffer_size);
-
- StateTracker& state_tracker;
+ OGLFramebuffer framebuffer;
+ GLbitfield buffer_bits = GL_NONE;
+};
- OGLFramebuffer src_framebuffer;
- OGLFramebuffer dst_framebuffer;
- std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
+struct TextureCacheParams {
+ static constexpr bool ENABLE_VALIDATION = true;
+ static constexpr bool FRAMEBUFFER_BLITS = true;
+ static constexpr bool HAS_EMULATED_COPIES = true;
+
+ using Runtime = OpenGL::TextureCacheRuntime;
+ using Image = OpenGL::Image;
+ using ImageAlloc = OpenGL::ImageAlloc;
+ using ImageView = OpenGL::ImageView;
+ using Sampler = OpenGL::Sampler;
+ using Framebuffer = OpenGL::Framebuffer;
};
+using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index fe9bd4b5a..f7ad8f370 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -4,23 +4,10 @@
#pragma once
-#include <array>
#include <glad/glad.h>
-#include "common/common_types.h"
-#include "common/logging/log.h"
#include "video_core/engines/maxwell_3d.h"
-namespace OpenGL {
-
-using GLvec2 = std::array<GLfloat, 2>;
-using GLvec3 = std::array<GLfloat, 3>;
-using GLvec4 = std::array<GLfloat, 4>;
-
-using GLuvec2 = std::array<GLuint, 2>;
-using GLuvec3 = std::array<GLuint, 3>;
-using GLuvec4 = std::array<GLuint, 4>;
-
-namespace MaxwellToGL {
+namespace OpenGL::MaxwellToGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -47,6 +34,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
return GL_UNSIGNED_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_UNSIGNED_INT_2_10_10_10_REV;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::SignedNorm:
@@ -70,6 +59,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
return GL_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_INT_2_10_10_10_REV;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::Float:
@@ -84,6 +75,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Size::Size_32_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_FLOAT;
+ default:
+ break;
}
break;
}
@@ -101,7 +94,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
case Maxwell::IndexFormat::UnsignedInt:
return GL_UNSIGNED_INT;
}
- UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format));
+ UNREACHABLE_MSG("Invalid index_format={}", index_format);
return {};
}
@@ -138,7 +131,7 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
case Maxwell::PrimitiveTopology::Patches:
return GL_PATCHES;
}
- UNREACHABLE_MSG("Invalid topology={}", static_cast<int>(topology));
+ UNREACHABLE_MSG("Invalid topology={}", topology);
return GL_POINTS;
}
@@ -166,8 +159,8 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
}
break;
}
- UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}",
- static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode));
+ UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}", filter_mode,
+ mipmap_filter_mode);
return GL_NEAREST;
}
@@ -198,7 +191,7 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
return GL_MIRROR_CLAMP_TO_EDGE;
}
}
- UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
+ UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", wrap_mode);
return GL_REPEAT;
}
@@ -221,7 +214,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
case Tegra::Texture::DepthCompareFunc::Always:
return GL_ALWAYS;
}
- UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func));
+ UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", func);
return GL_GREATER;
}
@@ -243,7 +236,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
case Maxwell::Blend::Equation::MaxGL:
return GL_MAX;
}
- UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
+ UNIMPLEMENTED_MSG("Unimplemented blend equation={}", equation);
return GL_FUNC_ADD;
}
@@ -307,27 +300,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
return GL_ONE_MINUS_CONSTANT_ALPHA;
}
- UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
- return GL_ZERO;
-}
-
-inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
- switch (source) {
- case Tegra::Texture::SwizzleSource::Zero:
- return GL_ZERO;
- case Tegra::Texture::SwizzleSource::R:
- return GL_RED;
- case Tegra::Texture::SwizzleSource::G:
- return GL_GREEN;
- case Tegra::Texture::SwizzleSource::B:
- return GL_BLUE;
- case Tegra::Texture::SwizzleSource::A:
- return GL_ALPHA;
- case Tegra::Texture::SwizzleSource::OneInt:
- case Tegra::Texture::SwizzleSource::OneFloat:
- return GL_ONE;
- }
- UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source));
+ UNIMPLEMENTED_MSG("Unimplemented blend factor={}", factor);
return GL_ZERO;
}
@@ -358,7 +331,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
case Maxwell::ComparisonOp::AlwaysOld:
return GL_ALWAYS;
}
- UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
+ UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
return GL_ALWAYS;
}
@@ -389,7 +362,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
case Maxwell::StencilOp::DecrWrapOGL:
return GL_DECR_WRAP;
}
- UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil));
+ UNIMPLEMENTED_MSG("Unimplemented stencil op={}", stencil);
return GL_KEEP;
}
@@ -400,7 +373,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) {
case Maxwell::FrontFace::CounterClockWise:
return GL_CCW;
}
- UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face));
+ UNIMPLEMENTED_MSG("Unimplemented front face cull={}", front_face);
return GL_CCW;
}
@@ -413,7 +386,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) {
case Maxwell::CullFace::FrontAndBack:
return GL_FRONT_AND_BACK;
}
- UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
+ UNIMPLEMENTED_MSG("Unimplemented cull face={}", cull_face);
return GL_BACK;
}
@@ -452,7 +425,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
case Maxwell::LogicOperation::Set:
return GL_SET;
}
- UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation));
+ UNIMPLEMENTED_MSG("Unimplemented logic operation={}", operation);
return GL_COPY;
}
@@ -465,14 +438,26 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
case Maxwell::PolygonMode::Fill:
return GL_FILL;
}
- UNREACHABLE_MSG("Invalid polygon mode={}", static_cast<int>(polygon_mode));
+ UNREACHABLE_MSG("Invalid polygon mode={}", polygon_mode);
return GL_FILL;
}
+inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) {
+ switch (filter) {
+ case Tegra::Texture::SamplerReduction::WeightedAverage:
+ return GL_WEIGHTED_AVERAGE_ARB;
+ case Tegra::Texture::SamplerReduction::Min:
+ return GL_MIN;
+ case Tegra::Texture::SamplerReduction::Max:
+ return GL_MAX;
+ }
+ UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter));
+ return GL_WEIGHTED_AVERAGE_ARB;
+}
+
inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
// Enumeration order matches register order. We can convert it arithmetically.
return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
}
-} // namespace MaxwellToGL
-} // namespace OpenGL
+} // namespace OpenGL::MaxwellToGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b759c2dba..9d2acd4d9 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -23,29 +23,13 @@
#include "core/telemetry_session.h"
#include "video_core/host_shaders/opengl_present_frag.h"
#include "video_core/host_shaders/opengl_present_vert.h"
-#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/textures/decoders.h"
namespace OpenGL {
-
namespace {
-
-constexpr std::size_t SWAP_CHAIN_SIZE = 3;
-
-struct Frame {
- u32 width{}; /// Width of the frame (to detect resize)
- u32 height{}; /// Height of the frame
- bool color_reloaded{}; /// Texture attachment was recreated (ie: resized)
- OpenGL::OGLRenderbuffer color{}; /// Buffer shared between the render/present FBO
- OpenGL::OGLFramebuffer render{}; /// FBO created on the render thread
- OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread
- GLsync render_fence{}; /// Fence created on the render thread
- GLsync present_fence{}; /// Fence created on the presentation thread
- bool is_srgb{}; /// Framebuffer is sRGB or RGB
-};
-
constexpr GLint PositionLocation = 0;
constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
@@ -58,24 +42,6 @@ struct ScreenRectVertex {
std::array<GLfloat, 2> tex_coord;
};
-/// Returns true if any debug tool is attached
-bool HasDebugTool() {
- const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
- if (nsight) {
- return true;
- }
-
- GLint num_extensions;
- glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
- for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) {
- const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index));
- if (!std::strcmp(name, "GL_EXT_debug_tool")) {
- return true;
- }
- }
- return false;
-}
-
/**
* Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
* corner and (width, height) on the lower-bottom.
@@ -156,217 +122,62 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
break;
}
}
-
} // Anonymous namespace
-/**
- * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
- * but also make sure that rendering happens at the pace that the frontend dictates. This is a
- * helper class that the renderer uses to sync frames between the render thread and the presentation
- * thread
- */
-class FrameMailbox {
-public:
- std::mutex swap_chain_lock;
- std::condition_variable present_cv;
- std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
- std::queue<Frame*> free_queue;
- std::deque<Frame*> present_queue;
- Frame* previous_frame{};
-
- FrameMailbox() {
- for (auto& frame : swap_chain) {
- free_queue.push(&frame);
- }
- }
-
- ~FrameMailbox() {
- // lock the mutex and clear out the present and free_queues and notify any people who are
- // blocked to prevent deadlock on shutdown
- std::scoped_lock lock{swap_chain_lock};
- std::queue<Frame*>().swap(free_queue);
- present_queue.clear();
- present_cv.notify_all();
- }
-
- void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
- frame->present.Release();
- frame->present.Create();
- GLint previous_draw_fbo{};
- glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
- glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
- glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
- frame->color.handle);
- if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
- LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
- }
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
- frame->color_reloaded = false;
- }
-
- void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
- // Recreate the color texture attachment
- frame->color.Release();
- frame->color.Create();
- const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
- glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
-
- // Recreate the FBO for the render target
- frame->render.Release();
- frame->render.Create();
- glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
- glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
- frame->color.handle);
- if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
- LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
- }
-
- frame->width = width;
- frame->height = height;
- frame->color_reloaded = true;
- }
-
- Frame* GetRenderFrame() {
- std::unique_lock lock{swap_chain_lock};
-
- // If theres no free frames, we will reuse the oldest render frame
- if (free_queue.empty()) {
- auto frame = present_queue.back();
- present_queue.pop_back();
- return frame;
- }
-
- Frame* frame = free_queue.front();
- free_queue.pop();
- return frame;
- }
-
- void ReleaseRenderFrame(Frame* frame) {
- std::unique_lock lock{swap_chain_lock};
- present_queue.push_front(frame);
- present_cv.notify_one();
- }
-
- Frame* TryGetPresentFrame(int timeout_ms) {
- std::unique_lock lock{swap_chain_lock};
- // wait for new entries in the present_queue
- present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
- [&] { return !present_queue.empty(); });
- if (present_queue.empty()) {
- // timed out waiting for a frame to draw so return the previous frame
- return previous_frame;
- }
-
- // free the previous frame and add it back to the free queue
- if (previous_frame) {
- free_queue.push(previous_frame);
- }
-
- // the newest entries are pushed to the front of the queue
- Frame* frame = present_queue.front();
- present_queue.pop_front();
- // remove all old entries from the present queue and move them back to the free_queue
- for (auto f : present_queue) {
- free_queue.push(f);
- }
- present_queue.clear();
- previous_frame = frame;
- return frame;
- }
-};
-
-RendererOpenGL::RendererOpenGL(Core::System& system_, Core::Frontend::EmuWindow& emu_window_,
- Tegra::GPU& gpu_,
+RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
+ Core::Frontend::EmuWindow& emu_window_,
+ Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
- : RendererBase{emu_window_, std::move(context_)}, system{system_},
- emu_window{emu_window_}, gpu{gpu_}, program_manager{device}, has_debug_tool{HasDebugTool()} {}
+ : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
+ emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu},
+ program_manager{device},
+ rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) {
+ if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
+ glEnable(GL_DEBUG_OUTPUT);
+ glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
+ glDebugMessageCallback(DebugHandler, nullptr);
+ }
+ AddTelemetryFields();
+ InitOpenGLObjects();
+}
RendererOpenGL::~RendererOpenGL() = default;
-MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64));
-MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128));
-
void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
}
-
PrepareRendertarget(framebuffer);
RenderScreenshot();
- Frame* frame;
- {
- MICROPROFILE_SCOPE(OpenGL_WaitPresent);
-
- frame = frame_mailbox->GetRenderFrame();
+ state_tracker.BindFramebuffer(0);
+ DrawScreen(emu_window.GetFramebufferLayout());
- // Clean up sync objects before drawing
+ ++m_current_frame;
- // INTEL driver workaround. We can't delete the previous render sync object until we are
- // sure that the presentation is done
- if (frame->present_fence) {
- glClientWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
- }
-
- // delete the draw fence if the frame wasn't presented
- if (frame->render_fence) {
- glDeleteSync(frame->render_fence);
- frame->render_fence = 0;
- }
+ rasterizer.TickFrame();
- // wait for the presentation to be done
- if (frame->present_fence) {
- glWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
- glDeleteSync(frame->present_fence);
- frame->present_fence = 0;
- }
- }
-
- {
- MICROPROFILE_SCOPE(OpenGL_RenderFrame);
- const auto& layout = render_window.GetFramebufferLayout();
-
- // Recreate the frame if the size of the window has changed
- if (layout.width != frame->width || layout.height != frame->height ||
- screen_info.display_srgb != frame->is_srgb) {
- LOG_DEBUG(Render_OpenGL, "Reloading render frame");
- frame->is_srgb = screen_info.display_srgb;
- frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height);
- }
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frame->render.handle);
- DrawScreen(layout);
- // Create a fence for the frontend to wait on and swap this frame to OffTex
- frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
- glFlush();
- frame_mailbox->ReleaseRenderFrame(frame);
- m_current_frame++;
- rasterizer->TickFrame();
- }
-
- render_window.PollEvents();
- if (has_debug_tool) {
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
- Present(0);
- context->SwapBuffers();
- }
+ context->SwapBuffers();
+ render_window.OnFrameDisplayed();
}
void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
- if (framebuffer) {
- // If framebuffer is provided, reload it from memory to a texture
- if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
- screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
- screen_info.texture.pixel_format != framebuffer->pixel_format ||
- gl_framebuffer_data.empty()) {
- // Reallocate texture if the framebuffer size has changed.
- // This is expected to not happen very often and hence should not be a
- // performance problem.
- ConfigureFramebufferTexture(screen_info.texture, *framebuffer);
- }
-
- // Load the framebuffer from memory, draw it to the screen, and swap buffers
- LoadFBToScreenInfo(*framebuffer);
+ if (!framebuffer) {
+ return;
+ }
+ // If framebuffer is provided, reload it from memory to a texture
+ if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
+ screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
+ screen_info.texture.pixel_format != framebuffer->pixel_format ||
+ gl_framebuffer_data.empty()) {
+ // Reallocate texture if the framebuffer size has changed.
+ // This is expected to not happen very often and hence should not be a
+ // performance problem.
+ ConfigureFramebufferTexture(screen_info.texture, *framebuffer);
}
+
+ // Load the framebuffer from memory, draw it to the screen, and swap buffers
+ LoadFBToScreenInfo(*framebuffer);
}
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
@@ -375,26 +186,27 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
framebuffer_crop_rect = framebuffer.crop_rect;
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
- if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
+ if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
return;
}
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
- const auto pixel_format{
- VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
- const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
- const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
- u8* const host_ptr{system.Memory().GetPointer(framebuffer_addr)};
- rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
-
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
- VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
- framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
- gl_framebuffer_data.data(), host_ptr);
-
+ const auto pixel_format{
+ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
+ const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
+ const u64 size_in_bytes{Tegra::Texture::CalculateSize(
+ true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
+ const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
+ const std::span<const u8> input_data(host_ptr, size_in_bytes);
+ Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
+ framebuffer.width, framebuffer.height, 1, block_height_log2,
+ 0);
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
// Update existing texture
@@ -416,8 +228,6 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
}
void RendererOpenGL::InitOpenGLObjects() {
- frame_mailbox = std::make_unique<FrameMailbox>();
-
glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
Settings::values.bg_blue.GetValue(), 0.0f);
@@ -435,6 +245,10 @@ void RendererOpenGL::InitOpenGLObjects() {
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
+ // Generate presentation sampler
+ present_sampler.Create();
+ glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -452,9 +266,15 @@ void RendererOpenGL::InitOpenGLObjects() {
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
+ // Enable seamless cubemaps when per texture parameters are not available
+ if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
+ glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
+ }
+
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
if (device.HasVertexBufferUnifiedMemory()) {
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
+ glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
@@ -471,19 +291,10 @@ void RendererOpenGL::AddTelemetryFields() {
LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
- auto& telemetry_session = system.TelemetrySession();
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
- telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor);
- telemetry_session.AddField(user_system, "GPU_Model", gpu_model);
- telemetry_session.AddField(user_system, "GPU_OpenGL_Version", gl_version);
-}
-
-void RendererOpenGL::CreateRasterizer() {
- if (rasterizer) {
- return;
- }
- rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info,
- program_manager, state_tracker);
+ telemetry_session.AddField(user_system, "GPU_Vendor", std::string(gpu_vendor));
+ telemetry_session.AddField(user_system, "GPU_Model", std::string(gpu_model));
+ telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
@@ -494,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
const auto pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
- const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
+ const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
GLint internal_format;
@@ -513,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
- UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
- static_cast<u32>(framebuffer.pixel_format));
+ // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
+ // static_cast<u32>(framebuffer.pixel_format));
}
texture.resource.Release();
@@ -546,7 +357,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
} else {
// Other transformations are unsupported
LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}",
- static_cast<u32>(framebuffer_transform_flags));
+ framebuffer_transform_flags);
UNIMPLEMENTED();
}
}
@@ -580,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
state_tracker.NotifyPolygonModes();
state_tracker.NotifyViewport0();
state_tracker.NotifyScissor0();
- state_tracker.NotifyColorMask0();
+ state_tracker.NotifyColorMask(0);
state_tracker.NotifyBlend0();
state_tracker.NotifyFramebuffer();
state_tracker.NotifyFrontFace();
@@ -596,6 +407,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
program_manager.BindHostPipeline(pipeline.handle);
+ state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
glEnable(GL_FRAMEBUFFER_SRGB);
@@ -614,7 +426,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
- glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
glDepthRangeIndexed(0, 0.0, 0.0);
@@ -638,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
}
glBindTextureUnit(0, screen_info.display_texture);
- glBindSampler(0, 0);
+ glBindSampler(0, present_sampler.handle);
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
@@ -646,51 +457,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
program_manager.RestoreGuestPipeline();
}
-bool RendererOpenGL::TryPresent(int timeout_ms) {
- if (has_debug_tool) {
- LOG_DEBUG(Render_OpenGL,
- "Skipping presentation because we are presenting on the main context");
- return false;
- }
- return Present(timeout_ms);
-}
-
-bool RendererOpenGL::Present(int timeout_ms) {
- const auto& layout = render_window.GetFramebufferLayout();
- auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms);
- if (!frame) {
- LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present");
- return false;
- }
-
- // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a
- // readback since we won't be doing any blending
- glClear(GL_COLOR_BUFFER_BIT);
-
- // Recreate the presentation FBO if the color attachment was changed
- if (frame->color_reloaded) {
- LOG_DEBUG(Render_OpenGL, "Reloading present frame");
- frame_mailbox->ReloadPresentFrame(frame, layout.width, layout.height);
- }
- glWaitSync(frame->render_fence, 0, GL_TIMEOUT_IGNORED);
- // INTEL workaround.
- // Normally we could just delete the draw fence here, but due to driver bugs, we can just delete
- // it on the emulation thread without too much penalty
- // glDeleteSync(frame.render_sync);
- // frame.render_sync = 0;
-
- glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle);
- glBlitFramebuffer(0, 0, frame->width, frame->height, 0, 0, layout.width, layout.height,
- GL_COLOR_BUFFER_BIT, GL_LINEAR);
-
- // Insert fence for the main thread to block on
- frame->present_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
- glFlush();
-
- glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
- return true;
-}
-
void RendererOpenGL::RenderScreenshot() {
if (!renderer_settings.screenshot_requested) {
return;
@@ -705,7 +471,7 @@ void RendererOpenGL::RenderScreenshot() {
screenshot_framebuffer.Create();
glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle);
- Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
+ const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
GLuint renderbuffer;
glGenRenderbuffers(1, &renderbuffer);
@@ -716,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() {
DrawScreen(layout);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+ glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
renderer_settings.screenshot_bits);
@@ -729,25 +497,4 @@ void RendererOpenGL::RenderScreenshot() {
renderer_settings.screenshot_requested = false;
}
-bool RendererOpenGL::Init() {
- if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
- glEnable(GL_DEBUG_OUTPUT);
- glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
- glDebugMessageCallback(DebugHandler, nullptr);
- }
-
- AddTelemetryFields();
-
- if (!GLAD_GL_VERSION_4_3) {
- return false;
- }
-
- InitOpenGLObjects();
- CreateRasterizer();
-
- return true;
-}
-
-void RendererOpenGL::ShutDown() {}
-
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 52ea76b7d..cc19a110f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -10,22 +10,32 @@
#include "common/math_util.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
class System;
-}
+class TelemetrySession;
+} // namespace Core
namespace Core::Frontend {
class EmuWindow;
}
+namespace Core::Memory {
+class Memory;
+}
+
namespace Layout {
struct FramebufferLayout;
}
+namespace Tegra {
+class GPU;
+}
+
namespace OpenGL {
/// Structure used for storing information about the textures for the Switch screen
@@ -46,25 +56,19 @@ struct ScreenInfo {
TextureInfo texture;
};
-struct PresentationTexture {
- u32 width = 0;
- u32 height = 0;
- OGLTexture texture;
-};
-
-class FrameMailbox;
-
class RendererOpenGL final : public VideoCore::RendererBase {
public:
- explicit RendererOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- Tegra::GPU& gpu,
- std::unique_ptr<Core::Frontend::GraphicsContext> context);
+ explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_,
+ Core::Frontend::EmuWindow& emu_window_,
+ Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererOpenGL() override;
- bool Init() override;
- void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
- bool TryPresent(int timeout_ms) override;
+
+ VideoCore::RasterizerInterface* ReadRasterizer() override {
+ return &rasterizer;
+ }
private:
/// Initializes the OpenGL state and creates persistent objects.
@@ -72,8 +76,6 @@ private:
void AddTelemetryFields();
- void CreateRasterizer();
-
void ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer);
@@ -92,16 +94,18 @@ private:
void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
- bool Present(int timeout_ms);
-
- Core::System& system;
+ Core::TelemetrySession& telemetry_session;
Core::Frontend::EmuWindow& emu_window;
+ Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
- const Device device;
- StateTracker state_tracker{system};
+ Device device;
+ StateTracker state_tracker;
+ ProgramManager program_manager;
+ RasterizerOpenGL rasterizer;
// OpenGL object IDs
+ OGLSampler present_sampler;
OGLBuffer vertex_buffer;
OGLProgram vertex_program;
OGLProgram fragment_program;
@@ -114,20 +118,12 @@ private:
/// Display information for Switch screen
ScreenInfo screen_info;
- /// Global dummy shader pipeline
- ProgramManager program_manager;
-
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
/// Used for transforming the framebuffer orientation
Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags{};
Common::Rectangle<int> framebuffer_crop_rect;
-
- /// Frame presentation mailbox
- std::unique_ptr<FrameMailbox> frame_mailbox;
-
- bool has_debug_tool = false;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
new file mode 100644
index 000000000..31ec68505
--- /dev/null
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -0,0 +1,225 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <bit>
+#include <span>
+#include <string_view>
+
+#include <glad/glad.h>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
+#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
+#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
+#include "video_core/host_shaders/pitch_unswizzle_comp.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+#include "video_core/renderer_opengl/util_shaders.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/accelerated_swizzle.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/texture_cache/util.h"
+#include "video_core/textures/decoders.h"
+
+namespace OpenGL {
+
+using namespace HostShaders;
+
+using VideoCommon::Extent3D;
+using VideoCommon::ImageCopy;
+using VideoCommon::ImageType;
+using VideoCommon::SwizzleParameters;
+using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
+using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
+using VideoCore::Surface::BytesPerBlock;
+
+namespace {
+
+OGLProgram MakeProgram(std::string_view source) {
+ OGLShader shader;
+ shader.Create(source, GL_COMPUTE_SHADER);
+
+ OGLProgram program;
+ program.Create(true, false, shader.handle);
+ return program;
+}
+
+} // Anonymous namespace
+
+UtilShaders::UtilShaders(ProgramManager& program_manager_)
+ : program_manager{program_manager_},
+ block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
+ block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
+ pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
+ copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
+ const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
+ swizzle_table_buffer.Create();
+ glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
+}
+
+UtilShaders::~UtilShaders() = default;
+
+void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
+ std::span<const SwizzleParameters> swizzles) {
+ static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
+ static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
+ static constexpr GLuint BINDING_INPUT_BUFFER = 1;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
+
+ program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
+ glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
+
+ const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
+ for (const SwizzleParameters& swizzle : swizzles) {
+ const Extent3D num_tiles = swizzle.num_tiles;
+ const size_t input_offset = swizzle.buffer_offset + map.offset;
+
+ const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
+ const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
+
+ const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
+ glUniform3uiv(0, 1, params.origin.data());
+ glUniform3iv(1, 1, params.destination.data());
+ glUniform1ui(2, params.bytes_per_block_log2);
+ glUniform1ui(3, params.layer_stride);
+ glUniform1ui(4, params.block_size);
+ glUniform1ui(5, params.x_shift);
+ glUniform1ui(6, params.block_height);
+ glUniform1ui(7, params.block_height_mask);
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+ image.guest_size_bytes - swizzle.buffer_offset);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
+ GL_WRITE_ONLY, store_format);
+ glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
+void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
+ std::span<const SwizzleParameters> swizzles) {
+ static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
+
+ static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
+ static constexpr GLuint BINDING_INPUT_BUFFER = 1;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
+
+ glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
+ program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
+
+ const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
+ for (const SwizzleParameters& swizzle : swizzles) {
+ const Extent3D num_tiles = swizzle.num_tiles;
+ const size_t input_offset = swizzle.buffer_offset + map.offset;
+
+ const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
+ const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
+ const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
+
+ const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
+ glUniform3uiv(0, 1, params.origin.data());
+ glUniform3iv(1, 1, params.destination.data());
+ glUniform1ui(2, params.bytes_per_block_log2);
+ glUniform1ui(3, params.slice_size);
+ glUniform1ui(4, params.block_size);
+ glUniform1ui(5, params.x_shift);
+ glUniform1ui(6, params.block_height);
+ glUniform1ui(7, params.block_height_mask);
+ glUniform1ui(8, params.block_depth);
+ glUniform1ui(9, params.block_depth_mask);
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+ image.guest_size_bytes - swizzle.buffer_offset);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
+ GL_WRITE_ONLY, store_format);
+ glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
+void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
+ std::span<const SwizzleParameters> swizzles) {
+ static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
+ static constexpr GLuint BINDING_INPUT_BUFFER = 0;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
+ static constexpr GLuint LOC_ORIGIN = 0;
+ static constexpr GLuint LOC_DESTINATION = 1;
+ static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
+ static constexpr GLuint LOC_PITCH = 3;
+
+ const u32 bytes_per_block = BytesPerBlock(image.info.format);
+ const GLenum format = StoreFormat(bytes_per_block);
+ const u32 pitch = image.info.pitch;
+
+ UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
+ "Non-power of two images are not implemented");
+
+ program_manager.BindHostCompute(pitch_unswizzle_program.handle);
+ glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
+ glUniform2ui(LOC_ORIGIN, 0, 0);
+ glUniform2i(LOC_DESTINATION, 0, 0);
+ glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
+ glUniform1ui(LOC_PITCH, pitch);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), 0, GL_FALSE, 0, GL_WRITE_ONLY,
+ format);
+ for (const SwizzleParameters& swizzle : swizzles) {
+ const Extent3D num_tiles = swizzle.num_tiles;
+ const size_t input_offset = swizzle.buffer_offset + map.offset;
+
+ const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
+ const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
+
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+ image.guest_size_bytes - swizzle.buffer_offset);
+ glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
+void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
+ static constexpr GLuint BINDING_INPUT_IMAGE = 0;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
+ static constexpr GLuint LOC_SRC_OFFSET = 0;
+ static constexpr GLuint LOC_DST_OFFSET = 1;
+
+ program_manager.BindHostCompute(copy_bc4_program.handle);
+
+ for (const ImageCopy& copy : copies) {
+ ASSERT(copy.src_subresource.base_layer == 0);
+ ASSERT(copy.src_subresource.num_layers == 1);
+ ASSERT(copy.dst_subresource.base_layer == 0);
+ ASSERT(copy.dst_subresource.num_layers == 1);
+
+ glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
+ glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
+ glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
+ copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
+ copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
+ glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
+GLenum StoreFormat(u32 bytes_per_block) {
+ switch (bytes_per_block) {
+ case 1:
+ return GL_R8UI;
+ case 2:
+ return GL_R16UI;
+ case 4:
+ return GL_R32UI;
+ case 8:
+ return GL_RG32UI;
+ case 16:
+ return GL_RGBA32UI;
+ }
+ UNREACHABLE();
+ return GL_R8UI;
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
new file mode 100644
index 000000000..7b1d16b09
--- /dev/null
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -0,0 +1,52 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <span>
+
+#include <glad/glad.h>
+
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/texture_cache/types.h"
+
+namespace OpenGL {
+
+class Image;
+class ProgramManager;
+
+struct ImageBufferMap;
+
+class UtilShaders {
+public:
+ explicit UtilShaders(ProgramManager& program_manager);
+ ~UtilShaders();
+
+ void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
+ std::span<const VideoCommon::SwizzleParameters> swizzles);
+
+ void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
+ std::span<const VideoCommon::SwizzleParameters> swizzles);
+
+ void PitchUpload(Image& image, const ImageBufferMap& map,
+ std::span<const VideoCommon::SwizzleParameters> swizzles);
+
+ void CopyBC4(Image& dst_image, Image& src_image,
+ std::span<const VideoCommon::ImageCopy> copies);
+
+private:
+ ProgramManager& program_manager;
+
+ OGLBuffer swizzle_table_buffer;
+
+ OGLProgram block_linear_unswizzle_2d_program;
+ OGLProgram block_linear_unswizzle_3d_program;
+ OGLProgram pitch_unswizzle_program;
+ OGLProgram copy_bc4_program;
+};
+
+GLenum StoreFormat(u32 bytes_per_block);
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
deleted file mode 100644
index 6d7bb16b2..000000000
--- a/src/video_core/renderer_opengl/utils.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <string>
-#include <vector>
-
-#include <fmt/format.h>
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/renderer_opengl/utils.h"
-
-namespace OpenGL {
-
-void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
- if (!GLAD_GL_KHR_debug) {
- // We don't need to throw an error as this is just for debugging
- return;
- }
-
- std::string object_label;
- if (extra_info.empty()) {
- switch (identifier) {
- case GL_TEXTURE:
- object_label = fmt::format("Texture@0x{:016X}", addr);
- break;
- case GL_PROGRAM:
- object_label = fmt::format("Shader@0x{:016X}", addr);
- break;
- default:
- object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr);
- break;
- }
- } else {
- object_label = fmt::format("{}@0x{:016X}", extra_info, addr);
- }
- glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
deleted file mode 100644
index 9c09ee12c..000000000
--- a/src/video_core/renderer_opengl/utils.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <string_view>
-#include <vector>
-#include <glad/glad.h>
-#include "common/common_types.h"
-
-namespace OpenGL {
-
-void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
new file mode 100644
index 000000000..1f6a169ae
--- /dev/null
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -0,0 +1,624 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+
+#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
+#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
+#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
+#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
+#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
+#include "video_core/renderer_vulkan/blit_image.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
+#include "video_core/renderer_vulkan/vk_state_tracker.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/surface.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+using VideoCommon::ImageViewType;
+
+namespace {
+struct PushConstants {
+ std::array<float, 2> tex_scale;
+ std::array<float, 2> tex_offset;
+};
+
+template <u32 binding>
+inline constexpr VkDescriptorSetLayoutBinding TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING{
+ .binding = binding,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = nullptr,
+};
+constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{
+ TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
+ TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<1>,
+};
+constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .bindingCount = 1,
+ .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
+};
+constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()),
+ .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(),
+};
+constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{
+ .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+ .offset = 0,
+ .size = sizeof(PushConstants),
+};
+constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .vertexBindingDescriptionCount = 0,
+ .pVertexBindingDescriptions = nullptr,
+ .vertexAttributeDescriptionCount = 0,
+ .pVertexAttributeDescriptions = nullptr,
+};
+constexpr VkPipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+ .primitiveRestartEnable = VK_FALSE,
+};
+constexpr VkPipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .viewportCount = 1,
+ .pViewports = nullptr,
+ .scissorCount = 1,
+ .pScissors = nullptr,
+};
+constexpr VkPipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .depthClampEnable = VK_FALSE,
+ .rasterizerDiscardEnable = VK_FALSE,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_BACK_BIT,
+ .frontFace = VK_FRONT_FACE_CLOCKWISE,
+ .depthBiasEnable = VK_FALSE,
+ .depthBiasConstantFactor = 0.0f,
+ .depthBiasClamp = 0.0f,
+ .depthBiasSlopeFactor = 0.0f,
+ .lineWidth = 1.0f,
+};
+constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ .sampleShadingEnable = VK_FALSE,
+ .minSampleShading = 0.0f,
+ .pSampleMask = nullptr,
+ .alphaToCoverageEnable = VK_FALSE,
+ .alphaToOneEnable = VK_FALSE,
+};
+constexpr std::array DYNAMIC_STATES{
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+};
+constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()),
+ .pDynamicStates = DYNAMIC_STATES.data(),
+};
+constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_CLEAR,
+ .attachmentCount = 0,
+ .pAttachments = nullptr,
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
+};
+constexpr VkPipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{
+ .blendEnable = VK_FALSE,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+};
+constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_CLEAR,
+ .attachmentCount = 1,
+ .pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE,
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
+};
+constexpr VkPipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .depthTestEnable = VK_TRUE,
+ .depthWriteEnable = VK_TRUE,
+ .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+ .depthBoundsTestEnable = VK_FALSE,
+ .stencilTestEnable = VK_FALSE,
+ .front = VkStencilOpState{},
+ .back = VkStencilOpState{},
+ .minDepthBounds = 0.0f,
+ .maxDepthBounds = 0.0f,
+};
+
+template <VkFilter filter>
+inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .magFilter = filter,
+ .minFilter = filter,
+ .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .mipLodBias = 0.0f,
+ .anisotropyEnable = VK_FALSE,
+ .maxAnisotropy = 0.0f,
+ .compareEnable = VK_FALSE,
+ .compareOp = VK_COMPARE_OP_NEVER,
+ .minLod = 0.0f,
+ .maxLod = 0.0f,
+ .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE,
+ .unnormalizedCoordinates = VK_TRUE,
+};
+
+constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo(
+ const VkDescriptorSetLayout* set_layout) {
+ return VkPipelineLayoutCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .setLayoutCount = 1,
+ .pSetLayouts = set_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &PUSH_CONSTANT_RANGE,
+ };
+}
+
+constexpr VkPipelineShaderStageCreateInfo PipelineShaderStageCreateInfo(VkShaderStageFlagBits stage,
+ VkShaderModule shader) {
+ return VkPipelineShaderStageCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = stage,
+ .module = shader,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ };
+}
+
+constexpr std::array<VkPipelineShaderStageCreateInfo, 2> MakeStages(
+ VkShaderModule vertex_shader, VkShaderModule fragment_shader) {
+ return std::array{
+ PipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader),
+ PipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader),
+ };
+}
+
+void UpdateOneTextureDescriptorSet(const Device& device, VkDescriptorSet descriptor_set,
+ VkSampler sampler, VkImageView image_view) {
+ const VkDescriptorImageInfo image_info{
+ .sampler = sampler,
+ .imageView = image_view,
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ const VkWriteDescriptorSet write_descriptor_set{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .pNext = nullptr,
+ .dstSet = descriptor_set,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = &image_info,
+ .pBufferInfo = nullptr,
+ .pTexelBufferView = nullptr,
+ };
+ device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr);
+}
+
+void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descriptor_set,
+ VkSampler sampler, VkImageView image_view_0,
+ VkImageView image_view_1) {
+ const VkDescriptorImageInfo image_info_0{
+ .sampler = sampler,
+ .imageView = image_view_0,
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ const VkDescriptorImageInfo image_info_1{
+ .sampler = sampler,
+ .imageView = image_view_1,
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ const std::array write_descriptor_sets{
+ VkWriteDescriptorSet{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .pNext = nullptr,
+ .dstSet = descriptor_set,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = &image_info_0,
+ .pBufferInfo = nullptr,
+ .pTexelBufferView = nullptr,
+ },
+ VkWriteDescriptorSet{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .pNext = nullptr,
+ .dstSet = descriptor_set,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = &image_info_1,
+ .pBufferInfo = nullptr,
+ .pTexelBufferView = nullptr,
+ },
+ };
+ device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr);
+}
+
+void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout,
+ const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region) {
+ const VkOffset2D offset{
+ .x = std::min(dst_region[0].x, dst_region[1].x),
+ .y = std::min(dst_region[0].y, dst_region[1].y),
+ };
+ const VkExtent2D extent{
+ .width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)),
+ .height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)),
+ };
+ const VkViewport viewport{
+ .x = static_cast<float>(offset.x),
+ .y = static_cast<float>(offset.y),
+ .width = static_cast<float>(extent.width),
+ .height = static_cast<float>(extent.height),
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f,
+ };
+ // TODO: Support scissored blits
+ const VkRect2D scissor{
+ .offset = offset,
+ .extent = extent,
+ };
+ const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x);
+ const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y);
+ const PushConstants push_constants{
+ .tex_scale = {scale_x, scale_y},
+ .tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)},
+ };
+ cmdbuf.SetViewport(0, viewport);
+ cmdbuf.SetScissor(0, scissor);
+ cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
+}
+
+} // Anonymous namespace
+
+BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
+ StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool)
+ : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
+ one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
+ ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
+ two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
+ TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
+ one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout),
+ two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout),
+ one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
+ PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
+ two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
+ PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
+ full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
+ blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
+ convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
+ convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
+ linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
+ nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
+ if (device.IsExtShaderStencilExportSupported()) {
+ blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV);
+ }
+}
+
+BlitImageHelper::~BlitImageHelper() = default;
+
+void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+ const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ Tegra::Engines::Fermi2D::Filter filter,
+ Tegra::Engines::Fermi2D::Operation operation) {
+ const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
+ const BlitImagePipelineKey key{
+ .renderpass = dst_framebuffer->RenderPass(),
+ .operation = operation,
+ };
+ const VkPipelineLayout layout = *one_texture_pipeline_layout;
+ const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
+ const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
+ const VkPipeline pipeline = FindOrEmplacePipeline(key);
+ const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
+ scheduler.RequestRenderpass(dst_framebuffer);
+ scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set,
+ &device = device](vk::CommandBuffer cmdbuf) {
+ // TODO: Barriers
+ UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
+ nullptr);
+ BindBlitState(cmdbuf, layout, dst_region, src_region);
+ cmdbuf.Draw(3, 1, 0, 0);
+ });
+ scheduler.InvalidateState();
+}
+
+void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
+ VkImageView src_depth_view, VkImageView src_stencil_view,
+ const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ Tegra::Engines::Fermi2D::Filter filter,
+ Tegra::Engines::Fermi2D::Operation operation) {
+ ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
+ ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
+
+ const VkPipelineLayout layout = *two_textures_pipeline_layout;
+ const VkSampler sampler = *nearest_sampler;
+ const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
+ const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
+ scheduler.RequestRenderpass(dst_framebuffer);
+ scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
+ src_stencil_view, descriptor_set,
+ &device = device](vk::CommandBuffer cmdbuf) {
+ // TODO: Barriers
+ UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
+ src_stencil_view);
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
+ nullptr);
+ BindBlitState(cmdbuf, layout, dst_region, src_region);
+ cmdbuf.Draw(3, 1, 0, 0);
+ });
+ scheduler.InvalidateState();
+}
+
+void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
+ const ImageView& src_image_view) {
+ ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
+ Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
+}
+
+void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
+ const ImageView& src_image_view) {
+
+ ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
+ Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
+}
+
+void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
+ const ImageView& src_image_view) {
+ ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
+ Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
+}
+
+void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
+ const ImageView& src_image_view) {
+ ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
+ Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
+}
+
+void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
+ const ImageView& src_image_view) {
+ const VkPipelineLayout layout = *one_texture_pipeline_layout;
+ const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
+ const VkSampler sampler = *nearest_sampler;
+ const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
+ const VkExtent2D extent{
+ .width = src_image_view.size.width,
+ .height = src_image_view.size.height,
+ };
+ scheduler.RequestRenderpass(dst_framebuffer);
+ scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent,
+ &device = device](vk::CommandBuffer cmdbuf) {
+ const VkOffset2D offset{
+ .x = 0,
+ .y = 0,
+ };
+ const VkViewport viewport{
+ .x = 0.0f,
+ .y = 0.0f,
+ .width = static_cast<float>(extent.width),
+ .height = static_cast<float>(extent.height),
+ .minDepth = 0.0f,
+ .maxDepth = 0.0f,
+ };
+ const VkRect2D scissor{
+ .offset = offset,
+ .extent = extent,
+ };
+ const PushConstants push_constants{
+ .tex_scale = {viewport.width, viewport.height},
+ .tex_offset = {0.0f, 0.0f},
+ };
+ UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
+
+ // TODO: Barriers
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
+ nullptr);
+ cmdbuf.SetViewport(0, viewport);
+ cmdbuf.SetScissor(0, scissor);
+ cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
+ cmdbuf.Draw(3, 1, 0, 0);
+ });
+ scheduler.InvalidateState();
+}
+
+VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) {
+ const auto it = std::ranges::find(blit_color_keys, key);
+ if (it != blit_color_keys.end()) {
+ return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)];
+ }
+ blit_color_keys.push_back(key);
+
+ const std::array stages = MakeStages(*full_screen_vert, *blit_color_to_color_frag);
+ const VkPipelineColorBlendAttachmentState blend_attachment{
+ .blendEnable = VK_FALSE,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ };
+ // TODO: programmable blending
+ const VkPipelineColorBlendStateCreateInfo color_blend_create_info{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_CLEAR,
+ .attachmentCount = 1,
+ .pAttachments = &blend_attachment,
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
+ };
+ blit_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(stages.size()),
+ .pStages = stages.data(),
+ .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .pTessellationState = nullptr,
+ .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pDepthStencilState = nullptr,
+ .pColorBlendState = &color_blend_create_info,
+ .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .layout = *one_texture_pipeline_layout,
+ .renderPass = key.renderpass,
+ .subpass = 0,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ .basePipelineIndex = 0,
+ }));
+ return *blit_color_pipelines.back();
+}
+
+VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
+ if (blit_depth_stencil_pipeline) {
+ return *blit_depth_stencil_pipeline;
+ }
+ const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag);
+ blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(stages.size()),
+ .pStages = stages.data(),
+ .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .pTessellationState = nullptr,
+ .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
+ .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .layout = *two_textures_pipeline_layout,
+ .renderPass = renderpass,
+ .subpass = 0,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ .basePipelineIndex = 0,
+ });
+ return *blit_depth_stencil_pipeline;
+}
+
+void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
+ if (pipeline) {
+ return;
+ }
+ const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag);
+ pipeline = device.GetLogical().CreateGraphicsPipeline({
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(stages.size()),
+ .pStages = stages.data(),
+ .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .pTessellationState = nullptr,
+ .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pDepthStencilState = nullptr,
+ .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
+ .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .layout = *one_texture_pipeline_layout,
+ .renderPass = renderpass,
+ .subpass = 0,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ .basePipelineIndex = 0,
+ });
+}
+
+void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
+ if (pipeline) {
+ return;
+ }
+ const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag);
+ pipeline = device.GetLogical().CreateGraphicsPipeline({
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(stages.size()),
+ .pStages = stages.data(),
+ .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .pTessellationState = nullptr,
+ .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
+ .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .layout = *one_texture_pipeline_layout,
+ .renderPass = renderpass,
+ .subpass = 0,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ .basePipelineIndex = 0,
+ });
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
new file mode 100644
index 000000000..43fd3d737
--- /dev/null
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -0,0 +1,96 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <compare>
+
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+using VideoCommon::Offset2D;
+
+class Device;
+class Framebuffer;
+class ImageView;
+class StateTracker;
+class VKScheduler;
+
+struct BlitImagePipelineKey {
+ constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default;
+
+ VkRenderPass renderpass;
+ Tegra::Engines::Fermi2D::Operation operation;
+};
+
+class BlitImageHelper {
+public:
+ explicit BlitImageHelper(const Device& device, VKScheduler& scheduler,
+ StateTracker& state_tracker, VKDescriptorPool& descriptor_pool);
+ ~BlitImageHelper();
+
+ void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
+ const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ Tegra::Engines::Fermi2D::Filter filter,
+ Tegra::Engines::Fermi2D::Operation operation);
+
+ void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
+ VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ Tegra::Engines::Fermi2D::Filter filter,
+ Tegra::Engines::Fermi2D::Operation operation);
+
+ void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+
+ void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+
+ void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+
+ void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+
+private:
+ void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
+ const ImageView& src_image_view);
+
+ [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key);
+
+ [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass);
+
+ void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
+
+ void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
+
+ const Device& device;
+ VKScheduler& scheduler;
+ StateTracker& state_tracker;
+
+ vk::DescriptorSetLayout one_texture_set_layout;
+ vk::DescriptorSetLayout two_textures_set_layout;
+ DescriptorAllocator one_texture_descriptor_allocator;
+ DescriptorAllocator two_textures_descriptor_allocator;
+ vk::PipelineLayout one_texture_pipeline_layout;
+ vk::PipelineLayout two_textures_pipeline_layout;
+ vk::ShaderModule full_screen_vert;
+ vk::ShaderModule blit_color_to_color_frag;
+ vk::ShaderModule blit_depth_stencil_frag;
+ vk::ShaderModule convert_depth_to_float_frag;
+ vk::ShaderModule convert_float_to_depth_frag;
+ vk::Sampler linear_sampler;
+ vk::Sampler nearest_sampler;
+
+ std::vector<BlitImagePipelineKey> blit_color_keys;
+ std::vector<vk::Pipeline> blit_color_pipelines;
+ vk::Pipeline blit_depth_stencil_pipeline;
+ vk::Pipeline convert_d32_to_r32_pipeline;
+ vk::Pipeline convert_r32_to_d32_pipeline;
+ vk::Pipeline convert_d16_to_r16_pipeline;
+ vk::Pipeline convert_r16_to_d16_pipeline;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 81a39a3b8..362278f01 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -8,17 +8,19 @@
#include <boost/functional/hash.hpp>
+#include "common/bit_cast.h"
#include "common/cityhash.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_state_tracker.h"
namespace Vulkan {
namespace {
-constexpr std::size_t POINT = 0;
-constexpr std::size_t LINE = 1;
-constexpr std::size_t POLYGON = 2;
+constexpr size_t POINT = 0;
+constexpr size_t LINE = 1;
+constexpr size_t POLYGON = 2;
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
POINT, // Points
LINE, // Lines
@@ -39,13 +41,17 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
} // Anonymous namespace
-void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) {
- const std::array enabled_lut = {regs.polygon_offset_point_enable,
- regs.polygon_offset_line_enable,
- regs.polygon_offset_fill_enable};
+void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
+ bool has_extended_dynamic_state) {
+ const Maxwell& regs = maxwell3d.regs;
+ const std::array enabled_lut{
+ regs.polygon_offset_point_enable,
+ regs.polygon_offset_line_enable,
+ regs.polygon_offset_fill_enable,
+ };
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
- raw = 0;
+ raw1 = 0;
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());
@@ -58,40 +64,58 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
logic_op.Assign(PackLogicOp(regs.logic_op.operation));
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
+ topology.Assign(regs.draw.topology);
+ msaa_mode.Assign(regs.multisample_mode);
- std::memcpy(&point_size, &regs.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast
-
- for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
- binding_divisors[index] =
- regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0;
+ raw2 = 0;
+ const auto test_func =
+ regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
+ alpha_test_func.Assign(PackComparisonOp(test_func));
+ early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
+
+ alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
+ point_size = Common::BitCast<u32>(regs.point_size);
+
+ if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) {
+ maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false;
+ for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
+ binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
+ }
}
-
- for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
- const auto& input = regs.vertex_attrib_format[index];
- auto& attribute = attributes[index];
- attribute.raw = 0;
- attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
- attribute.buffer.Assign(input.buffer);
- attribute.offset.Assign(input.offset);
- attribute.type.Assign(static_cast<u32>(input.type.Value()));
- attribute.size.Assign(static_cast<u32>(input.size.Value()));
+ if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) {
+ maxwell3d.dirty.flags[Dirty::VertexAttributes] = false;
+ for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+ const auto& input = regs.vertex_attrib_format[index];
+ auto& attribute = attributes[index];
+ attribute.raw = 0;
+ attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
+ attribute.buffer.Assign(input.buffer);
+ attribute.offset.Assign(input.offset);
+ attribute.type.Assign(static_cast<u32>(input.type.Value()));
+ attribute.size.Assign(static_cast<u32>(input.size.Value()));
+ }
}
-
- for (std::size_t index = 0; index < std::size(attachments); ++index) {
- attachments[index].Fill(regs, index);
+ if (maxwell3d.dirty.flags[Dirty::Blending]) {
+ maxwell3d.dirty.flags[Dirty::Blending] = false;
+ for (size_t index = 0; index < attachments.size(); ++index) {
+ attachments[index].Refresh(regs, index);
+ }
+ }
+ if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) {
+ maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false;
+ const auto& transform = regs.viewport_transform;
+ std::ranges::transform(transform, viewport_swizzles.begin(), [](const auto& viewport) {
+ return static_cast<u16>(viewport.swizzle.raw);
+ });
}
-
- const auto& transform = regs.viewport_transform;
- std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(),
- [](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); });
-
if (!has_extended_dynamic_state) {
no_extended_dynamic_state.Assign(1);
- dynamic_state.Fill(regs);
+ dynamic_state.Refresh(regs);
}
}
-void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) {
+void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
raw = 0;
@@ -130,8 +154,7 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
enable.Assign(1);
}
-void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
- const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
+void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
u32 packed_front_face = PackFrontFace(regs.front_face);
if (regs.screen_y_control.triangle_rast_flip != 0) {
// Flip front face
@@ -161,22 +184,16 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
depth_test_enable.Assign(regs.depth_test_enable);
front_face.Assign(packed_front_face);
depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
- topology.Assign(topology_index);
cull_face.Assign(PackCullFace(regs.cull_face));
cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0);
-
- for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
- const auto& input = regs.vertex_array[index];
- VertexBinding& binding = vertex_bindings[index];
- binding.raw = 0;
- binding.enabled.Assign(input.IsEnabled() ? 1 : 0);
- binding.stride.Assign(static_cast<u16>(input.stride.Value()));
- }
+ std::ranges::transform(regs.vertex_array, vertex_strides.begin(), [](const auto& array) {
+ return static_cast<u16>(array.stride.Value());
+ });
}
-std::size_t FixedPipelineState::Hash() const noexcept {
+size_t FixedPipelineState::Hash() const noexcept {
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
- return static_cast<std::size_t>(hash);
+ return static_cast<size_t>(hash);
}
bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index cdcbb65f5..a0eb83a68 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -58,7 +58,7 @@ struct FixedPipelineState {
BitField<30, 1, u32> enable;
};
- void Fill(const Maxwell& regs, std::size_t index);
+ void Refresh(const Maxwell& regs, size_t index);
constexpr std::array<bool, 4> Mask() const noexcept {
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
@@ -106,7 +106,7 @@ struct FixedPipelineState {
}
};
- template <std::size_t Position>
+ template <size_t Position>
union StencilFace {
BitField<Position + 0, 3, u32> action_stencil_fail;
BitField<Position + 3, 3, u32> action_depth_fail;
@@ -130,12 +130,6 @@ struct FixedPipelineState {
}
};
- union VertexBinding {
- u16 raw;
- BitField<0, 12, u16> stride;
- BitField<12, 1, u16> enabled;
- };
-
struct DynamicState {
union {
u32 raw1;
@@ -150,13 +144,13 @@ struct FixedPipelineState {
};
union {
u32 raw2;
- BitField<0, 4, u32> topology;
- BitField<4, 2, u32> cull_face;
- BitField<6, 1, u32> cull_enable;
+ BitField<0, 2, u32> cull_face;
+ BitField<2, 1, u32> cull_enable;
};
- std::array<VertexBinding, Maxwell::NumVertexArrays> vertex_bindings;
+ // Vertex stride is a 12 bits value, we have 4 bits to spare per element
+ std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
- void Fill(const Maxwell& regs);
+ void Refresh(const Maxwell& regs);
Maxwell::ComparisonOp DepthTestFunc() const noexcept {
return UnpackComparisonOp(depth_test_func);
@@ -169,14 +163,10 @@ struct FixedPipelineState {
Maxwell::FrontFace FrontFace() const noexcept {
return UnpackFrontFace(front_face.Value());
}
-
- constexpr Maxwell::PrimitiveTopology Topology() const noexcept {
- return static_cast<Maxwell::PrimitiveTopology>(topology.Value());
- }
};
union {
- u32 raw;
+ u32 raw1;
BitField<0, 1, u32> no_extended_dynamic_state;
BitField<2, 1, u32> primitive_restart_enable;
BitField<3, 1, u32> depth_bias_enable;
@@ -190,7 +180,16 @@ struct FixedPipelineState {
BitField<18, 1, u32> logic_op_enable;
BitField<19, 4, u32> logic_op;
BitField<23, 1, u32> rasterize_enable;
+ BitField<24, 4, Maxwell::PrimitiveTopology> topology;
+ BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
+ };
+ union {
+ u32 raw2;
+ BitField<0, 3, u32> alpha_test_func;
+ BitField<3, 1, u32> early_z;
};
+
+ u32 alpha_test_ref;
u32 point_size;
std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
@@ -198,9 +197,9 @@ struct FixedPipelineState {
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
DynamicState dynamic_state;
- void Fill(const Maxwell& regs, bool has_extended_dynamic_state);
+ void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state);
- std::size_t Hash() const noexcept;
+ size_t Hash() const noexcept;
bool operator==(const FixedPipelineState& rhs) const noexcept;
@@ -208,8 +207,8 @@ struct FixedPipelineState {
return !operator==(rhs);
}
- std::size_t Size() const noexcept {
- const std::size_t total_size = sizeof *this;
+ size_t Size() const noexcept {
+ const size_t total_size = sizeof *this;
return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
}
};
@@ -223,7 +222,7 @@ namespace std {
template <>
struct hash<Vulkan::FixedPipelineState> {
- std::size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
+ size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
return k.Hash();
}
};
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index f8c77f4fa..19aaf034f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -9,9 +9,9 @@
#include "common/logging/log.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/surface.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan::MaxwellToVK {
@@ -26,7 +26,7 @@ VkFilter Filter(Tegra::Texture::TextureFilter filter) {
case Tegra::Texture::TextureFilter::Linear:
return VK_FILTER_LINEAR;
}
- UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter));
+ UNREACHABLE_MSG("Invalid sampler filter={}", filter);
return {};
}
@@ -43,11 +43,11 @@ VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter
case Tegra::Texture::TextureMipmapFilter::Linear:
return VK_SAMPLER_MIPMAP_MODE_LINEAR;
}
- UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
+ UNREACHABLE_MSG("Invalid sampler mipmap mode={}", mipmap_filter);
return {};
}
-VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
+VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode,
Tegra::Texture::TextureFilter filter) {
switch (wrap_mode) {
case Tegra::Texture::WrapMode::Wrap:
@@ -78,9 +78,10 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w
case Tegra::Texture::WrapMode::MirrorOnceBorder:
UNIMPLEMENTED();
return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", wrap_mode);
+ return {};
}
- UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
- return {};
}
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
@@ -102,16 +103,15 @@ VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_
case Tegra::Texture::DepthCompareFunc::Always:
return VK_COMPARE_OP_ALWAYS;
}
- UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
- static_cast<u32>(depth_compare_func));
+ UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", depth_compare_func);
return {};
}
} // namespace Sampler
namespace {
-
-enum : u32 { Attachable = 1, Storage = 2 };
+constexpr u32 Attachable = 1 << 0;
+constexpr u32 Storage = 1 << 1;
struct FormatTuple {
VkFormat format; ///< Vulkan format
@@ -122,7 +122,7 @@ struct FormatTuple {
{VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT
{VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT
{VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM
- {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM
+ {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5_UNORM
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM
{VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
{VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
@@ -163,7 +163,7 @@ struct FormatTuple {
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
{VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT
{VK_FORMAT_UNDEFINED}, // R16G16_UINT
- {VK_FORMAT_UNDEFINED}, // R16G16_SINT
+ {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
{VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
{VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
{VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB
@@ -222,30 +222,36 @@ constexpr bool IsZetaFormat(PixelFormat pixel_format) {
} // Anonymous namespace
-FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format) {
- ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples));
-
- auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)];
+FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb,
+ PixelFormat pixel_format) {
+ ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples));
+ FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)];
if (tuple.format == VK_FORMAT_UNDEFINED) {
- UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}",
- static_cast<u32>(pixel_format));
- return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true};
+ UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", pixel_format);
+ return FormatInfo{VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true};
}
// Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
- tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format)
- ? VK_FORMAT_A8B8G8R8_SRGB_PACK32
- : VK_FORMAT_A8B8G8R8_UNORM_PACK32;
+ const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
+ if (is_srgb) {
+ tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
+ } else {
+ tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
+ tuple.usage |= Storage;
+ }
}
- const bool attachable = tuple.usage & Attachable;
- const bool storage = tuple.usage & Storage;
+ const bool attachable = (tuple.usage & Attachable) != 0;
+ const bool storage = (tuple.usage & Storage) != 0;
- VkFormatFeatureFlags usage;
- if (format_type == FormatType::Buffer) {
+ VkFormatFeatureFlags usage{};
+ switch (format_type) {
+ case FormatType::Buffer:
usage =
VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
- } else {
+ break;
+ case FormatType::Linear:
+ case FormatType::Optimal:
usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
VK_FORMAT_FEATURE_TRANSFER_SRC_BIT;
if (attachable) {
@@ -255,6 +261,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
if (storage) {
usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
}
+ break;
}
return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
}
@@ -274,11 +281,11 @@ VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
case Tegra::Engines::ShaderType::Compute:
return VK_SHADER_STAGE_COMPUTE_BIT;
}
- UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
+ UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage);
return {};
}
-VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
+VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
Maxwell::PrimitiveTopology topology) {
switch (topology) {
case Maxwell::PrimitiveTopology::Points:
@@ -298,9 +305,10 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::Patches:
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented topology={}", topology);
+ return {};
}
- UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
- return {};
}
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
@@ -325,6 +333,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R16G16B16A16_UNORM;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::SignedNorm:
@@ -347,6 +357,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R16G16B16A16_SNORM;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_SNORM_PACK32;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::UnsignedScaled:
@@ -369,6 +381,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R16G16B16A16_USCALED;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_USCALED_PACK32;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::SignedScaled:
@@ -391,6 +405,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R16G16B16A16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_SSCALED_PACK32;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::UnsignedInt:
@@ -421,6 +437,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32A32_UINT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_UINT_PACK32;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::SignedInt:
@@ -451,6 +469,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32A32_SINT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_SINT_PACK32;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::Float:
@@ -471,11 +491,12 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_SFLOAT;
+ default:
+ break;
}
break;
}
- UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type),
- static_cast<u32>(size));
+ UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", type, size);
return {};
}
@@ -506,24 +527,20 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
case Maxwell::ComparisonOp::AlwaysOld:
return VK_COMPARE_OP_ALWAYS;
}
- UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
+ UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
return {};
}
-VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) {
+VkIndexType IndexFormat(Maxwell::IndexFormat index_format) {
switch (index_format) {
case Maxwell::IndexFormat::UnsignedByte:
- if (!device.IsExtIndexTypeUint8Supported()) {
- UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
- return VK_INDEX_TYPE_UINT16;
- }
return VK_INDEX_TYPE_UINT8_EXT;
case Maxwell::IndexFormat::UnsignedShort:
return VK_INDEX_TYPE_UINT16;
case Maxwell::IndexFormat::UnsignedInt:
return VK_INDEX_TYPE_UINT32;
}
- UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
+ UNIMPLEMENTED_MSG("Unimplemented index_format={}", index_format);
return {};
}
@@ -554,7 +571,7 @@ VkStencilOp StencilOp(Maxwell::StencilOp stencil_op) {
case Maxwell::StencilOp::DecrWrapOGL:
return VK_STENCIL_OP_DECREMENT_AND_WRAP;
}
- UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
+ UNIMPLEMENTED_MSG("Unimplemented stencil op={}", stencil_op);
return {};
}
@@ -576,7 +593,7 @@ VkBlendOp BlendEquation(Maxwell::Blend::Equation equation) {
case Maxwell::Blend::Equation::MaxGL:
return VK_BLEND_OP_MAX;
}
- UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
+ UNIMPLEMENTED_MSG("Unimplemented blend equation={}", equation);
return {};
}
@@ -640,7 +657,7 @@ VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA;
}
- UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
+ UNIMPLEMENTED_MSG("Unimplemented blend factor={}", factor);
return {};
}
@@ -651,11 +668,11 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face) {
case Maxwell::FrontFace::CounterClockWise:
return VK_FRONT_FACE_COUNTER_CLOCKWISE;
}
- UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
+ UNIMPLEMENTED_MSG("Unimplemented front face={}", front_face);
return {};
}
-VkCullModeFlags CullFace(Maxwell::CullFace cull_face) {
+VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) {
switch (cull_face) {
case Maxwell::CullFace::Front:
return VK_CULL_MODE_FRONT_BIT;
@@ -664,7 +681,7 @@ VkCullModeFlags CullFace(Maxwell::CullFace cull_face) {
case Maxwell::CullFace::FrontAndBack:
return VK_CULL_MODE_FRONT_AND_BACK;
}
- UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
+ UNIMPLEMENTED_MSG("Unimplemented cull face={}", cull_face);
return {};
}
@@ -684,7 +701,7 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
case Tegra::Texture::SwizzleSource::OneFloat:
return VK_COMPONENT_SWIZZLE_ONE;
}
- UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
+ UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", swizzle);
return {};
}
@@ -707,8 +724,21 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle)
case Maxwell::ViewportSwizzle::NegativeW:
return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV;
}
- UNREACHABLE_MSG("Invalid swizzle={}", static_cast<int>(swizzle));
+ UNREACHABLE_MSG("Invalid swizzle={}", swizzle);
return {};
}
+VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction) {
+ switch (reduction) {
+ case Tegra::Texture::SamplerReduction::WeightedAverage:
+ return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
+ case Tegra::Texture::SamplerReduction::Min:
+ return VK_SAMPLER_REDUCTION_MODE_MIN_EXT;
+ case Tegra::Texture::SamplerReduction::Max:
+ return VK_SAMPLER_REDUCTION_MODE_MAX_EXT;
+ }
+ UNREACHABLE_MSG("Invalid sampler mode={}", static_cast<int>(reduction));
+ return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
+}
+
} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7e213452f..e3e06ba38 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -6,10 +6,10 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/surface.h"
#include "video_core/textures/texture.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan::MaxwellToVK {
@@ -22,7 +22,7 @@ VkFilter Filter(Tegra::Texture::TextureFilter filter);
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
-VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
+VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode,
Tegra::Texture::TextureFilter filter);
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
@@ -35,17 +35,25 @@ struct FormatInfo {
bool storage;
};
-FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format);
+/**
+ * Returns format properties supported in the host
+ * @param device Host device
+ * @param format_type Type of image the buffer will use
+ * @param with_srgb True when the format can be sRGB when converted to another format (ASTC)
+ * @param pixel_format Guest pixel format to describe
+ */
+[[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb,
+ PixelFormat pixel_format);
VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage);
-VkPrimitiveTopology PrimitiveTopology(const VKDevice& device, Maxwell::PrimitiveTopology topology);
+VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
-VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format);
+VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
@@ -55,10 +63,12 @@ VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor);
VkFrontFace FrontFace(Maxwell::FrontFace front_face);
-VkCullModeFlags CullFace(Maxwell::CullFace cull_face);
+VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face);
VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
+VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
+
} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index ae46e0444..1cc720ddd 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -12,8 +12,6 @@
#include <fmt/format.h>
-#include "common/dynamic_library.h"
-#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/telemetry.h"
#include "core/core.h"
@@ -24,179 +22,27 @@
#include "video_core/gpu.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-
-// Include these late to avoid polluting previous headers
-#ifdef _WIN32
-#include <windows.h>
-// ensure include order
-#include <vulkan/vulkan_win32.h>
-#endif
-
-#if !defined(_WIN32) && !defined(__APPLE__)
-#include <X11/Xlib.h>
-#include <vulkan/vulkan_wayland.h>
-#include <vulkan/vulkan_xlib.h>
-#endif
+#include "video_core/vulkan_common/vulkan_debug_callback.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_instance.h"
+#include "video_core/vulkan_common/vulkan_library.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_surface.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-
namespace {
-
-using Core::Frontend::WindowSystemType;
-
-VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
- VkDebugUtilsMessageTypeFlagsEXT type,
- const VkDebugUtilsMessengerCallbackDataEXT* data,
- [[maybe_unused]] void* user_data) {
- const char* message{data->pMessage};
-
- if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
- LOG_CRITICAL(Render_Vulkan, "{}", message);
- } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) {
- LOG_WARNING(Render_Vulkan, "{}", message);
- } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
- LOG_INFO(Render_Vulkan, "{}", message);
- } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
- LOG_DEBUG(Render_Vulkan, "{}", message);
- }
- return VK_FALSE;
-}
-
-Common::DynamicLibrary OpenVulkanLibrary() {
- Common::DynamicLibrary library;
-#ifdef __APPLE__
- // Check if a path to a specific Vulkan library has been specified.
- char* libvulkan_env = getenv("LIBVULKAN_PATH");
- if (!libvulkan_env || !library.Open(libvulkan_env)) {
- // Use the libvulkan.dylib from the application bundle.
- const std::string filename =
- Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
- library.Open(filename.c_str());
- }
-#else
- std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
- if (!library.Open(filename.c_str())) {
- // Android devices may not have libvulkan.so.1, only libvulkan.so.
- filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
- library.Open(filename.c_str());
- }
-#endif
- return library;
-}
-
-vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatch& dld,
- WindowSystemType window_type = WindowSystemType::Headless,
- bool enable_layers = false) {
- if (!library.IsOpen()) {
- LOG_ERROR(Render_Vulkan, "Vulkan library not available");
- return {};
- }
- if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) {
- LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan");
- return {};
- }
- if (!vk::Load(dld)) {
- LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
- return {};
- }
-
- std::vector<const char*> extensions;
- extensions.reserve(6);
- switch (window_type) {
- case Core::Frontend::WindowSystemType::Headless:
- break;
-#ifdef _WIN32
- case Core::Frontend::WindowSystemType::Windows:
- extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
- break;
-#endif
-#if !defined(_WIN32) && !defined(__APPLE__)
- case Core::Frontend::WindowSystemType::X11:
- extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
- break;
- case Core::Frontend::WindowSystemType::Wayland:
- extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
- break;
-#endif
- default:
- LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
- break;
- }
- if (window_type != Core::Frontend::WindowSystemType::Headless) {
- extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
- }
- if (enable_layers) {
- extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
- }
- extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
-
- const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld);
- if (!properties) {
- LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
- return {};
- }
-
- for (const char* extension : extensions) {
- const auto it =
- std::find_if(properties->begin(), properties->end(), [extension](const auto& prop) {
- return !std::strcmp(extension, prop.extensionName);
- });
- if (it == properties->end()) {
- LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
- return {};
- }
- }
-
- std::vector<const char*> layers;
- layers.reserve(1);
- if (enable_layers) {
- layers.push_back("VK_LAYER_KHRONOS_validation");
- }
-
- const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld);
- if (!layer_properties) {
- LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers");
- layers.clear();
- }
-
- for (auto layer_it = layers.begin(); layer_it != layers.end();) {
- const char* const layer = *layer_it;
- const auto it = std::find_if(
- layer_properties->begin(), layer_properties->end(),
- [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); });
- if (it == layer_properties->end()) {
- LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer);
- layer_it = layers.erase(layer_it);
- } else {
- ++layer_it;
- }
- }
-
- vk::Instance instance = vk::Instance::Create(layers, extensions, dld);
- if (!instance) {
- LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
- return {};
- }
- if (!vk::Load(*instance, dld)) {
- LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
- }
- return instance;
-}
-
std::string GetReadableVersion(u32 version) {
return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
VK_VERSION_PATCH(version));
}
-std::string GetDriverVersion(const VKDevice& device) {
+std::string GetDriverVersion(const Device& device) {
// Extracted from
// https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
const u32 version = device.GetDriverVersion();
@@ -213,7 +59,6 @@ std::string GetDriverVersion(const VKDevice& device) {
const u32 minor = version & 0x3fff;
return fmt::format("{}.{}", major, minor);
}
-
return GetReadableVersion(version);
}
@@ -235,213 +80,98 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
return separated_extensions;
}
+Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
+ VkSurfaceKHR surface) {
+ const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
+ const s32 device_index = Settings::values.vulkan_device.GetValue();
+ if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
+ LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
+ throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+ }
+ const vk::PhysicalDevice physical_device(devices[device_index], dld);
+ return Device(*instance, physical_device, surface, dld);
+}
} // Anonymous namespace
-RendererVulkan::RendererVulkan(Core::System& system_, Core::Frontend::EmuWindow& emu_window,
- Tegra::GPU& gpu_,
- std::unique_ptr<Core::Frontend::GraphicsContext> context)
- : RendererBase{emu_window, std::move(context)}, system{system_}, gpu{gpu_} {}
+RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
+ Core::Frontend::EmuWindow& emu_window,
+ Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
+ : RendererBase(emu_window, std::move(context_)),
+ telemetry_session(telemetry_session_),
+ cpu_memory(cpu_memory_),
+ gpu(gpu_),
+ library(OpenLibrary()),
+ instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
+ true, Settings::values.renderer_debug)),
+ debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
+ surface(CreateSurface(instance, render_window)),
+ device(CreateDevice(instance, dld, *surface)),
+ memory_allocator(device, false),
+ state_tracker(gpu),
+ scheduler(device, state_tracker),
+ swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
+ render_window.GetFramebufferLayout().height, false),
+ blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
+ screen_info),
+ rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
+ memory_allocator, state_tracker, scheduler) {
+ Report();
+} catch (const vk::Exception& exception) {
+ LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
+ throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
+}
RendererVulkan::~RendererVulkan() {
- ShutDown();
+ void(device.GetLogical().WaitIdle());
}
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
- render_window.PollEvents();
-
if (!framebuffer) {
return;
}
-
const auto& layout = render_window.GetFramebufferLayout();
if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
- rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
+ rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
- if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) {
- swapchain->Create(layout.width, layout.height, is_srgb);
- blit_screen->Recreate();
- }
-
- scheduler->WaitWorker();
-
- swapchain->AcquireNextImage();
- const auto [fence, render_semaphore] = blit_screen->Draw(*framebuffer, use_accelerated);
-
- scheduler->Flush(false, render_semaphore);
-
- if (swapchain->Present(render_semaphore, fence)) {
- blit_screen->Recreate();
+ if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) {
+ swapchain.Create(layout.width, layout.height, is_srgb);
+ blit_screen.Recreate();
}
- rasterizer->TickFrame();
- }
-
- render_window.PollEvents();
-}
-
-bool RendererVulkan::TryPresent(int /*timeout_ms*/) {
- // TODO (bunnei): ImplementMe
- return true;
-}
-
-bool RendererVulkan::Init() {
- library = OpenVulkanLibrary();
- instance = CreateInstance(library, dld, render_window.GetWindowInfo().type,
- Settings::values.renderer_debug);
- if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) {
- return false;
- }
-
- Report();
-
- memory_manager = std::make_unique<VKMemoryManager>(*device);
-
- resource_manager = std::make_unique<VKResourceManager>(*device);
-
- const auto& framebuffer = render_window.GetFramebufferLayout();
- swapchain = std::make_unique<VKSwapchain>(*surface, *device);
- swapchain->Create(framebuffer.width, framebuffer.height, false);
-
- state_tracker = std::make_unique<StateTracker>(system);
-
- scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker);
-
- rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device,
- *resource_manager, *memory_manager,
- *state_tracker, *scheduler);
-
- blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device,
- *resource_manager, *memory_manager, *swapchain,
- *scheduler, screen_info);
-
- return true;
-}
-
-void RendererVulkan::ShutDown() {
- if (!device) {
- return;
- }
- if (const auto& dev = device->GetLogical()) {
- dev.WaitIdle();
- }
-
- rasterizer.reset();
- blit_screen.reset();
- scheduler.reset();
- swapchain.reset();
- memory_manager.reset();
- resource_manager.reset();
- device.reset();
-}
+ scheduler.WaitWorker();
-bool RendererVulkan::CreateDebugCallback() {
- if (!Settings::values.renderer_debug) {
- return true;
- }
- debug_callback = instance.TryCreateDebugCallback(DebugCallback);
- if (!debug_callback) {
- LOG_ERROR(Render_Vulkan, "Failed to create debug callback");
- return false;
- }
- return true;
-}
+ swapchain.AcquireNextImage();
+ const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
-bool RendererVulkan::CreateSurface() {
- [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo();
- VkSurfaceKHR unsafe_surface = nullptr;
+ scheduler.Flush(render_semaphore);
-#ifdef _WIN32
- if (window_info.type == Core::Frontend::WindowSystemType::Windows) {
- const HWND hWnd = static_cast<HWND>(window_info.render_surface);
- const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
- nullptr, 0, nullptr, hWnd};
- const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>(
- dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR"));
- if (!vkCreateWin32SurfaceKHR ||
- vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
- LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
- return false;
+ if (swapchain.Present(render_semaphore)) {
+ blit_screen.Recreate();
}
- }
-#endif
-#if !defined(_WIN32) && !defined(__APPLE__)
- if (window_info.type == Core::Frontend::WindowSystemType::X11) {
- const VkXlibSurfaceCreateInfoKHR xlib_ci{
- VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,
- static_cast<Display*>(window_info.display_connection),
- reinterpret_cast<Window>(window_info.render_surface)};
- const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>(
- dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR"));
- if (!vkCreateXlibSurfaceKHR ||
- vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
- LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
- return false;
- }
- }
- if (window_info.type == Core::Frontend::WindowSystemType::Wayland) {
- const VkWaylandSurfaceCreateInfoKHR wayland_ci{
- VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0,
- static_cast<wl_display*>(window_info.display_connection),
- static_cast<wl_surface*>(window_info.render_surface)};
- const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>(
- dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR"));
- if (!vkCreateWaylandSurfaceKHR ||
- vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) !=
- VK_SUCCESS) {
- LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
- return false;
- }
- }
-#endif
- if (!unsafe_surface) {
- LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
- return false;
+ rasterizer.TickFrame();
}
- surface = vk::SurfaceKHR(unsafe_surface, *instance, dld);
- return true;
-}
-
-bool RendererVulkan::PickDevices() {
- const auto devices = instance.EnumeratePhysicalDevices();
- if (!devices) {
- LOG_ERROR(Render_Vulkan, "Failed to enumerate physical devices");
- return false;
- }
-
- const s32 device_index = Settings::values.vulkan_device.GetValue();
- if (device_index < 0 || device_index >= static_cast<s32>(devices->size())) {
- LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
- return false;
- }
- const vk::PhysicalDevice physical_device((*devices)[static_cast<std::size_t>(device_index)],
- dld);
- if (!VKDevice::IsSuitable(physical_device, *surface)) {
- return false;
- }
-
- device = std::make_unique<VKDevice>(*instance, physical_device, *surface, dld);
- return device->Create();
+ render_window.OnFrameDisplayed();
}
void RendererVulkan::Report() const {
- const std::string vendor_name{device->GetVendorName()};
- const std::string model_name{device->GetModelName()};
- const std::string driver_version = GetDriverVersion(*device);
+ const std::string vendor_name{device.GetVendorName()};
+ const std::string model_name{device.GetModelName()};
+ const std::string driver_version = GetDriverVersion(device);
const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
- const std::string api_version = GetReadableVersion(device->GetApiVersion());
+ const std::string api_version = GetReadableVersion(device.ApiVersion());
- const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions());
+ const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
- auto& telemetry_session = system.TelemetrySession();
- constexpr auto field = Common::Telemetry::FieldType::UserSystem;
+ static constexpr auto field = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
telemetry_session.AddField(field, "GPU_Model", model_name);
telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
@@ -449,25 +179,4 @@ void RendererVulkan::Report() const {
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
}
-std::vector<std::string> RendererVulkan::EnumerateDevices() {
- vk::InstanceDispatch dld;
- Common::DynamicLibrary library = OpenVulkanLibrary();
- vk::Instance instance = CreateInstance(library, dld);
- if (!instance) {
- return {};
- }
-
- const std::optional physical_devices = instance.EnumeratePhysicalDevices();
- if (!physical_devices) {
- return {};
- }
-
- std::vector<std::string> names;
- names.reserve(physical_devices->size());
- for (const auto& device : *physical_devices) {
- names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
- }
- return names;
-}
-
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 13debbbc0..72071316c 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -9,75 +9,67 @@
#include <vector>
#include "common/dynamic_library.h"
-
#include "video_core/renderer_base.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/renderer_vulkan/vk_blit_screen.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_state_tracker.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
-class System;
+class TelemetrySession;
}
-namespace Vulkan {
+namespace Core::Memory {
+class Memory;
+}
-class StateTracker;
-class VKBlitScreen;
-class VKDevice;
-class VKFence;
-class VKMemoryManager;
-class VKResourceManager;
-class VKSwapchain;
-class VKScheduler;
-class VKImage;
-
-struct VKScreenInfo {
- VKImage* image{};
- u32 width{};
- u32 height{};
- bool is_srgb{};
-};
+namespace Tegra {
+class GPU;
+}
+
+namespace Vulkan {
class RendererVulkan final : public VideoCore::RendererBase {
public:
- explicit RendererVulkan(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- Tegra::GPU& gpu,
- std::unique_ptr<Core::Frontend::GraphicsContext> context);
+ explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
+ Core::Frontend::EmuWindow& emu_window,
+ Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererVulkan() override;
- bool Init() override;
- void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
- bool TryPresent(int timeout_ms) override;
- static std::vector<std::string> EnumerateDevices();
+ VideoCore::RasterizerInterface* ReadRasterizer() override {
+ return &rasterizer;
+ }
private:
- bool CreateDebugCallback();
-
- bool CreateSurface();
-
- bool PickDevices();
-
void Report() const;
- Core::System& system;
+ Core::TelemetrySession& telemetry_session;
+ Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
Common::DynamicLibrary library;
vk::InstanceDispatch dld;
vk::Instance instance;
+ vk::DebugUtilsMessenger debug_callback;
vk::SurfaceKHR surface;
VKScreenInfo screen_info;
- vk::DebugCallback debug_callback;
- std::unique_ptr<VKDevice> device;
- std::unique_ptr<VKSwapchain> swapchain;
- std::unique_ptr<VKMemoryManager> memory_manager;
- std::unique_ptr<VKResourceManager> resource_manager;
- std::unique_ptr<StateTracker> state_tracker;
- std::unique_ptr<VKScheduler> scheduler;
- std::unique_ptr<VKBlitScreen> blit_screen;
+ Device device;
+ MemoryAllocator memory_allocator;
+ StateTracker state_tracker;
+ VKScheduler scheduler;
+ VKSwapchain swapchain;
+ VKBlitScreen blit_screen;
+ RasterizerVulkan rasterizer;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/shaders/quad_array.comp b/src/video_core/renderer_vulkan/shaders/quad_array.comp
deleted file mode 100644
index 5a5703308..000000000
--- a/src/video_core/renderer_vulkan/shaders/quad_array.comp
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-/*
- * Build instructions:
- * $ glslangValidator -V $THIS_FILE -o output.spv
- * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
- * $ xxd -i optimized.spv
- *
- * Then copy that bytecode to the C++ file
- */
-
-#version 460 core
-
-layout (local_size_x = 1024) in;
-
-layout (std430, set = 0, binding = 0) buffer OutputBuffer {
- uint output_indexes[];
-};
-
-layout (push_constant) uniform PushConstants {
- uint first;
-};
-
-void main() {
- uint primitive = gl_GlobalInvocationID.x;
- if (primitive * 6 >= output_indexes.length()) {
- return;
- }
-
- const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
- for (uint vertex = 0; vertex < 6; ++vertex) {
- uint index = first + primitive * 4 + quad_map[vertex];
- output_indexes[primitive * 6 + vertex] = index;
- }
-}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index a551e3de8..a1a32aabe 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -12,127 +12,28 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/math_util.h"
-
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
-
#include "video_core/gpu.h"
-#include "video_core/morton.h"
-#include "video_core/rasterizer_interface.h"
+#include "video_core/host_shaders/vulkan_present_frag_spv.h"
+#include "video_core/host_shaders/vulkan_present_vert_spv.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_image.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
-#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/surface.h"
+#include "video_core/textures/decoders.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
-// Generated from the "shaders/" directory, read the instructions there.
-constexpr u8 blit_vertex_code[] = {
- 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
- 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
- 0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
- 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
- 0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
- 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
- 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
- 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
- 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00,
- 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
- 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
- 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
- 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
- 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
- 0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00,
- 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00,
- 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
- 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
- 0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00,
- 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
- 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00,
- 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
- 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
- 0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
- 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
- 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
- 0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
- 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
- 0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
- 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
- 0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00,
- 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
- 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
- 0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
- 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
- 0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00,
- 0x38, 0x00, 0x01, 0x00};
-
-constexpr u8 blit_fragment_code[] = {
- 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
- 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
- 0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
- 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00,
- 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
- 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
- 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
- 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00,
- 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
- 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00,
- 0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
- 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
- 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
- 0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
- 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
- 0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
- 0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00,
- 0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
-
struct ScreenRectVertex {
ScreenRectVertex() = default;
explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {}
@@ -175,9 +76,9 @@ constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
// clang-format on
}
-std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
+u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
using namespace VideoCore::Surface;
- return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
+ return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
}
std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
@@ -210,17 +111,14 @@ struct VKBlitScreen::BufferData {
// Unaligned image data goes here
};
-VKBlitScreen::VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
- VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- VKSwapchain& swapchain, VKScheduler& scheduler,
- const VKScreenInfo& screen_info)
- : system{system}, render_window{render_window}, rasterizer{rasterizer}, device{device},
- resource_manager{resource_manager}, memory_manager{memory_manager}, swapchain{swapchain},
- scheduler{scheduler}, image_count{swapchain.GetImageCount()}, screen_info{screen_info} {
- watches.resize(image_count);
- std::generate(watches.begin(), watches.end(),
- []() { return std::make_unique<VKFenceWatch>(); });
+VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
+ Core::Frontend::EmuWindow& render_window_, const Device& device_,
+ MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_,
+ VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
+ : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
+ memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
+ image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
+ resource_ticks.resize(image_count);
CreateStaticResources();
CreateDynamicResources();
@@ -232,44 +130,40 @@ void VKBlitScreen::Recreate() {
CreateDynamicResources();
}
-std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
- bool use_accelerated) {
+VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated) {
RefreshResources(framebuffer);
// Finish any pending renderpass
scheduler.RequestOutsideRenderPassOperationContext();
const std::size_t image_index = swapchain.GetImageIndex();
- watches[image_index]->Watch(scheduler.GetFence());
- VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get();
+ scheduler.Wait(resource_ticks[image_index]);
+ resource_ticks[image_index] = scheduler.CurrentTick();
- UpdateDescriptorSet(image_index, blit_image->GetPresentView());
+ UpdateDescriptorSet(image_index,
+ use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
BufferData data;
SetUniformData(data, framebuffer);
SetVertexData(data, framebuffer);
- auto map = buffer_commit->Map();
- std::memcpy(map.GetAddress(), &data, sizeof(data));
+ const std::span<u8> mapped_span = buffer_commit.Map();
+ std::memcpy(mapped_span.data(), &data, sizeof(data));
if (!use_accelerated) {
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
- const auto pixel_format =
- VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
- const auto host_ptr = system.Memory().GetPointer(framebuffer_addr);
- rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer));
+ const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
+ const size_t size_bytes = GetSizeInBytes(framebuffer);
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
- VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
- framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
- map.GetAddress() + image_offset, host_ptr);
-
- blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
+ Tegra::Texture::UnswizzleTexture(
+ mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
+ bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
const VkBufferImageCopy copy{
.bufferOffset = image_offset,
@@ -291,15 +185,42 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
},
};
scheduler.Record(
- [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) {
- cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
+ [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) {
+ const VkImageMemoryBarrier base_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
+ VkImageMemoryBarrier read_barrier = base_barrier;
+ read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+ read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+
+ VkImageMemoryBarrier write_barrier = base_barrier;
+ write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, read_barrier);
+ cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
});
}
- map.Release();
-
- blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
- VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
-
scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
descriptor_set = descriptor_sets[image_index], buffer = *buffer,
size = swapchain.GetSize(), pipeline = *pipeline,
@@ -307,31 +228,31 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
const VkClearValue clear_color{
.color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}},
};
-
- VkRenderPassBeginInfo renderpass_bi;
- renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
- renderpass_bi.pNext = nullptr;
- renderpass_bi.renderPass = renderpass;
- renderpass_bi.framebuffer = framebuffer;
- renderpass_bi.renderArea.offset.x = 0;
- renderpass_bi.renderArea.offset.y = 0;
- renderpass_bi.renderArea.extent = size;
- renderpass_bi.clearValueCount = 1;
- renderpass_bi.pClearValues = &clear_color;
-
- VkViewport viewport;
- viewport.x = 0.0f;
- viewport.y = 0.0f;
- viewport.width = static_cast<float>(size.width);
- viewport.height = static_cast<float>(size.height);
- viewport.minDepth = 0.0f;
- viewport.maxDepth = 1.0f;
-
- VkRect2D scissor;
- scissor.offset.x = 0;
- scissor.offset.y = 0;
- scissor.extent = size;
-
+ const VkRenderPassBeginInfo renderpass_bi{
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .pNext = nullptr,
+ .renderPass = renderpass,
+ .framebuffer = framebuffer,
+ .renderArea =
+ {
+ .offset = {0, 0},
+ .extent = size,
+ },
+ .clearValueCount = 1,
+ .pClearValues = &clear_color,
+ };
+ const VkViewport viewport{
+ .x = 0.0f,
+ .y = 0.0f,
+ .width = static_cast<float>(size.width),
+ .height = static_cast<float>(size.height),
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f,
+ };
+ const VkRect2D scissor{
+ .offset = {0, 0},
+ .extent = size,
+ };
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.SetViewport(0, viewport);
@@ -342,8 +263,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
-
- return {scheduler.GetFence(), *semaphores[image_index]};
+ return *semaphores[image_index];
}
void VKBlitScreen::CreateStaticResources() {
@@ -375,8 +295,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer)
}
void VKBlitScreen::CreateShaders() {
- vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code);
- fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code);
+ vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
+ fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
}
void VKBlitScreen::CreateSemaphores() {
@@ -423,7 +343,7 @@ void VKBlitScreen::CreateRenderPass() {
const VkAttachmentReference color_attachment_ref{
.attachment = 0,
- .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkSubpassDescription subpass_description{
@@ -711,12 +631,12 @@ void VKBlitScreen::CreateFramebuffers() {
void VKBlitScreen::ReleaseRawImages() {
for (std::size_t i = 0; i < raw_images.size(); ++i) {
- watches[i]->Wait();
+ scheduler.Wait(resource_ticks.at(i));
}
raw_images.clear();
raw_buffer_commits.clear();
buffer.reset();
- buffer_commit.reset();
+ buffer_commit = MemoryCommit{};
}
void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
@@ -733,39 +653,61 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff
};
buffer = device.GetLogical().CreateBuffer(ci);
- buffer_commit = memory_manager.Commit(buffer, true);
+ buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload);
}
void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
raw_images.resize(image_count);
+ raw_image_views.resize(image_count);
raw_buffer_commits.resize(image_count);
- const VkImageCreateInfo ci{
- .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .imageType = VK_IMAGE_TYPE_2D,
- .format = GetFormat(framebuffer),
- .extent =
- {
- .width = framebuffer.width,
- .height = framebuffer.height,
- .depth = 1,
- },
- .mipLevels = 1,
- .arrayLayers = 1,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .tiling = VK_IMAGE_TILING_LINEAR,
- .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = 0,
- .pQueueFamilyIndices = nullptr,
- .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
- };
-
- for (std::size_t i = 0; i < image_count; ++i) {
- raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT);
- raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false);
+ for (size_t i = 0; i < image_count; ++i) {
+ raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = GetFormat(framebuffer),
+ .extent =
+ {
+ .width = framebuffer.width,
+ .height = framebuffer.height,
+ .depth = 1,
+ },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .tiling = VK_IMAGE_TILING_LINEAR,
+ .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ });
+ raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], MemoryUsage::DeviceLocal);
+ raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .image = *raw_images[i],
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = GetFormat(framebuffer),
+ .components =
+ {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ },
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ });
}
}
@@ -792,7 +734,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
const VkDescriptorImageInfo image_info{
.sampler = *sampler,
.imageView = image_view,
- .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkWriteDescriptorSet sampler_write{
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 243640fab..5e3177685 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -5,16 +5,18 @@
#pragma once
#include <memory>
-#include <tuple>
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class System;
}
+namespace Core::Memory {
+class Memory;
+}
+
namespace Core::Frontend {
class EmuWindow;
}
@@ -30,26 +32,31 @@ class RasterizerInterface;
namespace Vulkan {
struct ScreenInfo;
+
+class Device;
class RasterizerVulkan;
-class VKDevice;
-class VKFence;
-class VKImage;
class VKScheduler;
class VKSwapchain;
-class VKBlitScreen final {
+struct VKScreenInfo {
+ VkImageView image_view{};
+ u32 width{};
+ u32 height{};
+ bool is_srgb{};
+};
+
+class VKBlitScreen {
public:
- explicit VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
- VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- VKSwapchain& swapchain, VKScheduler& scheduler,
- const VKScreenInfo& screen_info);
+ explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
+ Core::Frontend::EmuWindow& render_window, const Device& device,
+ MemoryAllocator& memory_manager, VKSwapchain& swapchain,
+ VKScheduler& scheduler, const VKScreenInfo& screen_info);
~VKBlitScreen();
void Recreate();
- std::tuple<VKFence&, VkSemaphore> Draw(const Tegra::FramebufferConfig& framebuffer,
- bool use_accelerated);
+ [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
+ bool use_accelerated);
private:
struct BufferData;
@@ -81,12 +88,10 @@ private:
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
- Core::System& system;
+ Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
- VideoCore::RasterizerInterface& rasterizer;
- const VKDevice& device;
- VKResourceManager& resource_manager;
- VKMemoryManager& memory_manager;
+ const Device& device;
+ MemoryAllocator& memory_allocator;
VKSwapchain& swapchain;
VKScheduler& scheduler;
const std::size_t image_count;
@@ -104,13 +109,14 @@ private:
vk::Sampler sampler;
vk::Buffer buffer;
- VKMemoryCommit buffer_commit;
+ MemoryCommit buffer_commit;
- std::vector<std::unique_ptr<VKFenceWatch>> watches;
+ std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> semaphores;
- std::vector<std::unique_ptr<VKImage>> raw_images;
- std::vector<VKMemoryCommit> raw_buffer_commits;
+ std::vector<vk::Image> raw_images;
+ std::vector<vk::ImageView> raw_image_views;
+ std::vector<MemoryCommit> raw_buffer_commits;
u32 raw_width = 0;
u32 raw_height = 0;
};
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 1d2f8b557..848eedd66 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -3,172 +3,308 @@
// Refer to the license.txt file included.
#include <algorithm>
+#include <array>
#include <cstring>
-#include <memory>
+#include <span>
+#include <vector>
-#include "core/core.h"
#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/vk_stream_buffer.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-
namespace {
+VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
+ return VkBufferCopy{
+ .srcOffset = copy.src_offset,
+ .dstOffset = copy.dst_offset,
+ .size = copy.size,
+ };
+}
-constexpr VkBufferUsageFlags BUFFER_USAGE =
- VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
-
-constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE =
- VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
- VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
-
-constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
- VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
- VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
+VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
+ if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
+ return VK_INDEX_TYPE_UINT8_EXT;
+ }
+ if (num_elements <= 0xffff) {
+ return VK_INDEX_TYPE_UINT16;
+ }
+ return VK_INDEX_TYPE_UINT32;
+}
-std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
- return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE);
+size_t BytesPerIndex(VkIndexType index_type) {
+ switch (index_type) {
+ case VK_INDEX_TYPE_UINT8_EXT:
+ return 1;
+ case VK_INDEX_TYPE_UINT16:
+ return 2;
+ case VK_INDEX_TYPE_UINT32:
+ return 4;
+ default:
+ UNREACHABLE_MSG("Invalid index type={}", index_type);
+ return 1;
+ }
}
+template <typename T>
+std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
+ std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
+ std::ranges::transform(indices, indices.begin(),
+ [quad, first](u32 index) { return first + index + quad * 4; });
+ return indices;
+}
} // Anonymous namespace
-Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
- VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size)
- : BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
- const VkBufferCreateInfo ci{
+Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
+
+Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+ VAddr cpu_addr_, u64 size_bytes_)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
+ buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .size = static_cast<VkDeviceSize>(size),
- .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .size = SizeBytes(),
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
+ VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- };
+ });
+ if (runtime.device.HasDebuggingToolAttached()) {
+ buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
+ }
+ commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
+}
- buffer.handle = device.GetLogical().CreateBuffer(ci);
- buffer.commit = memory_manager.Commit(buffer.handle, false);
+BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
+ VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ VKDescriptorPool& descriptor_pool)
+ : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
+ staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
+ uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
+ quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
+
+StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
+ return staging_pool.Request(size, MemoryUsage::Upload);
}
-Buffer::~Buffer() = default;
+StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
+ return staging_pool.Request(size, MemoryUsage::Download);
+}
-void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {
- const auto& staging = staging_pool.GetUnusedBuffer(size, true);
- std::memcpy(staging.commit->Map(size), data, size);
+void BufferCacheRuntime::Finish() {
+ scheduler.Finish();
+}
+void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ static constexpr VkMemoryBarrier READ_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+ };
+ static constexpr VkMemoryBarrier WRITE_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+ };
+ // Measuring a popular game, this number never exceeds the specified size once data is warmed up
+ boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
+ std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, READ_BARRIER);
+ cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, WRITE_BARRIER);
+ });
+}
- const VkBuffer handle = Handle();
- scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
- cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});
-
- const VkBufferMemoryBarrier barrier{
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = UPLOAD_ACCESS_BARRIERS,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = handle,
- .offset = offset,
- .size = size,
- };
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
- barrier, {});
+void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
+ u32 base_vertex, u32 num_indices, VkBuffer buffer,
+ u32 offset, [[maybe_unused]] u32 size) {
+ VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
+ VkDeviceSize vk_offset = offset;
+ VkBuffer vk_buffer = buffer;
+ if (topology == PrimitiveTopology::Quads) {
+ vk_index_type = VK_INDEX_TYPE_UINT32;
+ std::tie(vk_buffer, vk_offset) =
+ quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
+ } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
+ vk_index_type = VK_INDEX_TYPE_UINT16;
+ std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
+ }
+ if (vk_buffer == VK_NULL_HANDLE) {
+ // Vulkan doesn't support null index buffers. Replace it with our own null buffer.
+ ReserveNullIndexBuffer();
+ vk_buffer = *null_index_buffer;
+ }
+ scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
});
}
-void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
- const auto& staging = staging_pool.GetUnusedBuffer(size, true);
- scheduler.RequestOutsideRenderPassOperationContext();
+void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
+ ReserveQuadArrayLUT(first + count, true);
- const VkBuffer handle = Handle();
- scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
- const VkBufferMemoryBarrier barrier{
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = handle,
- .offset = offset,
- .size = size,
- };
-
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
- VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
- VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
- cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size});
+ // The LUT has the indices 0, 1, 2, and 3 copied as an array
+ // To apply these 'first' offsets we can apply an offset based on the modulus.
+ const VkIndexType index_type = quad_array_lut_index_type;
+ const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
+ const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
+ scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindIndexBuffer(buffer, offset, index_type);
});
- scheduler.Finish();
-
- std::memcpy(data, staging.commit->Map(size), size);
}
-void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t size) {
- scheduler.RequestOutsideRenderPassOperationContext();
+void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
+ u32 stride) {
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
+ const VkDeviceSize vk_offset = offset;
+ const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
+ const VkDeviceSize vk_stride = stride;
+ cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
+ });
+ } else {
+ scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindVertexBuffer(index, buffer, offset);
+ });
+ }
+}
- const VkBuffer dst_buffer = Handle();
- scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
- size](vk::CommandBuffer cmdbuf) {
- cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
-
- std::array<VkBufferMemoryBarrier, 2> barriers;
- barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barriers[0].pNext = nullptr;
- barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
- barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
- barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barriers[0].buffer = src_buffer;
- barriers[0].offset = src_offset;
- barriers[0].size = size;
- barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barriers[1].pNext = nullptr;
- barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
- barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
- barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barriers[1].buffer = dst_buffer;
- barriers[1].offset = dst_offset;
- barriers[1].size = size;
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
- barriers, {});
+void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
+ u32 size) {
+ if (!device.IsExtTransformFeedbackSupported()) {
+ // Already logged in the rasterizer
+ return;
+ }
+ scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
+ const VkDeviceSize vk_offset = offset;
+ const VkDeviceSize vk_size = size;
+ cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
});
}
-VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
- const VKDevice& device, VKMemoryManager& memory_manager,
- VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
- : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
- CreateStreamBuffer(device,
- scheduler)},
- device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
- staging_pool} {}
+void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
+ update_descriptor_queue.AddBuffer(buffer, offset, size);
+}
+
+void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
+ if (num_indices <= current_num_indices) {
+ return;
+ }
+ if (wait_for_idle) {
+ scheduler.Finish();
+ }
+ current_num_indices = num_indices;
+ quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
-VKBufferCache::~VKBufferCache() = default;
+ const u32 num_quads = num_indices / 4;
+ const u32 num_triangle_indices = num_quads * 6;
+ const u32 num_first_offset_copies = 4;
+ const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
+ const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
+ quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = size_bytes,
+ .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
+ if (device.HasDebuggingToolAttached()) {
+ quad_array_lut.SetObjectNameEXT("Quad LUT");
+ }
+ quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
-std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
- return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr,
- size);
+ const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
+ u8* staging_data = staging.mapped_span.data();
+ const size_t quad_size = bytes_per_index * 6;
+ for (u32 first = 0; first < num_first_offset_copies; ++first) {
+ for (u32 quad = 0; quad < num_quads; ++quad) {
+ switch (quad_array_lut_index_type) {
+ case VK_INDEX_TYPE_UINT8_EXT:
+ std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
+ break;
+ case VK_INDEX_TYPE_UINT16:
+ std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
+ break;
+ case VK_INDEX_TYPE_UINT32:
+ std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
+ break;
+ default:
+ UNREACHABLE();
+ break;
+ }
+ staging_data += quad_size;
+ }
+ }
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
+ dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
+ const VkBufferCopy copy{
+ .srcOffset = src_offset,
+ .dstOffset = 0,
+ .size = size_bytes,
+ };
+ const VkBufferMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = dst_buffer,
+ .offset = 0,
+ .size = size_bytes,
+ };
+ cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
+ 0, write_barrier);
+ });
}
-VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
- size = std::max(size, std::size_t(4));
- const auto& empty = staging_pool.GetUnusedBuffer(size, false);
+void BufferCacheRuntime::ReserveNullIndexBuffer() {
+ if (null_index_buffer) {
+ return;
+ }
+ null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = 4,
+ .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
+ if (device.HasDebuggingToolAttached()) {
+ null_index_buffer.SetObjectNameEXT("Null index buffer");
+ }
+ null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal);
+
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
- cmdbuf.FillBuffer(buffer, 0, size, 0);
+ scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) {
+ cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
});
- return {*empty.handle, 0, 0};
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 991ee451c..041e6515c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -4,70 +4,124 @@
#pragma once
-#include <memory>
-
-#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
-#include "video_core/renderer_vulkan/vk_stream_buffer.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Core {
-class System;
-}
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-class VKDevice;
-class VKMemoryManager;
+class Device;
+class VKDescriptorPool;
class VKScheduler;
+class VKUpdateDescriptorQueue;
-class Buffer final : public VideoCommon::BufferBlock {
+class BufferCacheRuntime;
+
+class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
public:
- explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
- VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);
- ~Buffer();
+ explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
+ explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+ VAddr cpu_addr_, u64 size_bytes_);
+
+ [[nodiscard]] VkBuffer Handle() const noexcept {
+ return *buffer;
+ }
+
+ operator VkBuffer() const noexcept {
+ return *buffer;
+ }
+
+private:
+ vk::Buffer buffer;
+ MemoryCommit commit;
+};
+
+class BufferCacheRuntime {
+ friend Buffer;
+
+ using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
+ using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
+
+public:
+ explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
+ VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ VKDescriptorPool& descriptor_pool);
+
+ void Finish();
- void Upload(std::size_t offset, std::size_t size, const u8* data);
+ [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
- void Download(std::size_t offset, std::size_t size, u8* data);
+ [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
- void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t size);
+ void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
+ std::span<const VideoCommon::BufferCopy> copies);
- VkBuffer Handle() const {
- return *buffer.handle;
+ void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
+ u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
+
+ void BindQuadArrayIndexBuffer(u32 first, u32 count);
+
+ void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
+
+ void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
+
+ std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
+ [[maybe_unused]] u32 binding_index, u32 size) {
+ const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
+ BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
+ return ref.mapped_span;
}
- u64 Address() const {
- return 0;
+ void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
+ BindBuffer(buffer, offset, size);
+ }
+
+ void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
+ [[maybe_unused]] bool is_written) {
+ BindBuffer(buffer, offset, size);
}
private:
+ void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
+
+ void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
+
+ void ReserveNullIndexBuffer();
+
+ const Device& device;
+ MemoryAllocator& memory_allocator;
VKScheduler& scheduler;
- VKStagingBufferPool& staging_pool;
+ StagingBufferPool& staging_pool;
+ VKUpdateDescriptorQueue& update_descriptor_queue;
- VKBuffer buffer;
-};
+ vk::Buffer quad_array_lut;
+ MemoryCommit quad_array_lut_commit;
+ VkIndexType quad_array_lut_index_type{};
+ u32 current_num_indices = 0;
-class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
-public:
- explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
- const VKDevice& device, VKMemoryManager& memory_manager,
- VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
- ~VKBufferCache();
+ vk::Buffer null_index_buffer;
+ MemoryCommit null_index_buffer_commit;
- BufferInfo GetEmptyBuffer(std::size_t size) override;
+ Uint8Pass uint8_pass;
+ QuadIndexedPass quad_index_pass;
+};
-protected:
- std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
+struct BufferCacheParams {
+ using Runtime = Vulkan::BufferCacheRuntime;
+ using Buffer = Vulkan::Buffer;
-private:
- const VKDevice& device;
- VKMemoryManager& memory_manager;
- VKScheduler& scheduler;
- VKStagingBufferPool& staging_pool;
+ static constexpr bool IS_OPENGL = false;
+ static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
+ static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
+ static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
+ static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
+ static constexpr bool USE_MEMORY_MAPS = true;
};
+using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp
new file mode 100644
index 000000000..a99df9323
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp
@@ -0,0 +1,46 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstddef>
+
+#include "video_core/renderer_vulkan/vk_command_pool.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+constexpr size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
+
+struct CommandPool::Pool {
+ vk::CommandPool handle;
+ vk::CommandBuffers cmdbufs;
+};
+
+CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const Device& device_)
+ : ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {}
+
+CommandPool::~CommandPool() = default;
+
+void CommandPool::Allocate(size_t begin, size_t end) {
+ // Command buffers are going to be commited, recorded, executed every single usage cycle.
+ // They are also going to be reseted when commited.
+ Pool& pool = pools.emplace_back();
+ pool.handle = device.GetLogical().CreateCommandPool({
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .pNext = nullptr,
+ .flags =
+ VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = device.GetGraphicsFamily(),
+ });
+ pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
+}
+
+VkCommandBuffer CommandPool::Commit() {
+ const size_t index = CommitResource();
+ const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
+ const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
+ return pools[pool_index].cmdbufs[sub_index];
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h
new file mode 100644
index 000000000..61c26a22a
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_command_pool.h
@@ -0,0 +1,34 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+class Device;
+class MasterSemaphore;
+
+class CommandPool final : public ResourcePool {
+public:
+ explicit CommandPool(MasterSemaphore& master_semaphore_, const Device& device_);
+ ~CommandPool() override;
+
+ void Allocate(size_t begin, size_t end) override;
+
+ VkCommandBuffer Commit();
+
+private:
+ struct Pool;
+
+ const Device& device;
+ std::vector<Pool> pools;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 182461ed9..2f9a7b028 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,131 +10,19 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
+#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-
namespace {
-
-// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there.
-constexpr u8 quad_array[] = {
- 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
- 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
- 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
- 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
- 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
- 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
- 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
- 0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
- 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
- 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00,
- 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
- 0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
- 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
- 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00,
- 0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
- 0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
- 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
- 0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
- 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
- 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
- 0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
- 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
- 0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
- 0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
- 0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
- 0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
- 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
- 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00,
- 0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00,
- 0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00,
- 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00,
- 0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
- 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
- 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
- 0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
- 0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
- 0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
- 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
- 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
- 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
- 0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00,
- 0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00,
- 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
- 0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
- 0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
- 0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
- 0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
- 0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00,
- 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
- 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
- 0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
- 0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00,
- 0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
- 0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
- 0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
- 0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
- 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
- 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
-
-VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
- return {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- };
-}
-
-VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
- return {
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .offset = 0,
- .stride = sizeof(DescriptorUpdateEntry),
- };
-}
-
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
return {
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
@@ -143,206 +31,6 @@ VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
};
}
-// Uint8 SPIR-V module. Generated from the "shaders/" directory.
-constexpr u8 uint8_pass[] = {
- 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
- 0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00,
- 0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74,
- 0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f,
- 0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
- 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c,
- 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00,
- 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
- 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
- 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
- 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
- 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
- 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
- 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
- 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
- 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00,
- 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
- 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
- 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
- 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
- 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
- 0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
- 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00,
- 0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
- 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
- 0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
- 0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00,
- 0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
- 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
- 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
- 0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
- 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
- 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
- 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
- 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
- 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
- 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
- 0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
- 0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
- 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
- 0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
- 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
- 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
- 0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
- 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00,
- 0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
- 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
- 0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
- 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
- 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
-
-// Quad indexed SPIR-V module. Generated from the "shaders/" directory.
-constexpr u8 QUAD_INDEXED_SPV[] = {
- 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
- 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
- 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
- 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
- 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
- 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
- 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
- 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
- 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
- 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
- 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
- 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00,
- 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
- 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
- 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
- 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
- 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
- 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
- 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
- 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
- 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
- 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
- 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
- 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00,
- 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
- 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
- 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
- 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
- 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00,
- 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
- 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
- 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00,
- 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
- 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
- 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00,
- 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00,
- 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00,
- 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00,
- 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00,
- 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
- 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
- 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
- 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
- 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
- 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
- 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
- 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
- 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
- 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
- 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
- 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00,
- 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
- 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
- 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00,
- 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00,
- 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
- 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00,
- 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
- 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
- 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
- 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00,
- 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
- 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
- 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
- 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00,
- 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
- 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00,
- 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
- 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00,
- 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
- 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00,
- 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00,
- 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
- 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
- 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00,
- 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00,
- 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00,
- 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
-
std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
return {{
{
@@ -375,11 +63,11 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
} // Anonymous namespace
-VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
+VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
- vk::Span<VkPushConstantRange> push_constants, std::size_t code_size,
- const u8* code) {
+ vk::Span<VkPushConstantRange> push_constants,
+ std::span<const u32> code) {
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@@ -387,7 +75,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
.bindingCount = bindings.size(),
.pBindings = bindings.data(),
});
-
layout = device.GetLogical().CreatePipelineLayout({
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@@ -397,7 +84,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
.pushConstantRangeCount = push_constants.size(),
.pPushConstantRanges = push_constants.data(),
});
-
if (!templates.empty()) {
descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
@@ -414,18 +100,13 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
}
-
- auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
- std::memcpy(code_copy.get(), code, code_size);
-
module = device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .codeSize = code_size,
- .pCode = code_copy.get(),
+ .codeSize = static_cast<u32>(code.size_bytes()),
+ .pCode = code.data(),
});
-
pipeline = device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
@@ -448,126 +129,70 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
VKComputePass::~VKComputePass() = default;
-VkDescriptorSet VKComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
- VKFence& fence) {
+VkDescriptorSet VKComputePass::CommitDescriptorSet(
+ VKUpdateDescriptorQueue& update_descriptor_queue) {
if (!descriptor_template) {
return nullptr;
}
- const auto set = descriptor_allocator->Commit(fence);
+ const VkDescriptorSet set = descriptor_allocator->Commit();
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
-QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
- VKStagingBufferPool& staging_buffer_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue)
- : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(),
- BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
- BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array),
- scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
- update_descriptor_queue{update_descriptor_queue} {}
-
-QuadArrayPass::~QuadArrayPass() = default;
-
-std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
- const u32 num_triangle_vertices = (num_vertices / 4) * 6;
- const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
- auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
-
- update_descriptor_queue.Acquire();
- update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
- const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
-
- scheduler.RequestOutsideRenderPassOperationContext();
-
- ASSERT(num_vertices % 4 == 0);
- const u32 num_quads = num_vertices / 4;
- scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads,
- first, set](vk::CommandBuffer cmdbuf) {
- constexpr u32 dispatch_size = 1024;
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
- cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
- cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
-
- VkBufferMemoryBarrier barrier;
- barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
- barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
- barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = buffer;
- barrier.offset = 0;
- barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
- });
- return {*buffer.handle, 0};
-}
-
-Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue)
+Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
+ VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_)
: VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
- BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass),
- uint8_pass),
- scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
- update_descriptor_queue{update_descriptor_queue} {}
+ BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV),
+ scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
+ update_descriptor_queue{update_descriptor_queue_} {}
Uint8Pass::~Uint8Pass() = default;
-std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
- u64 src_offset) {
- const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16));
- auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
+std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
+ u32 src_offset) {
+ const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
+ const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
- update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
- const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+ update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
+ const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
+ scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
num_vertices](vk::CommandBuffer cmdbuf) {
- constexpr u32 dispatch_size = 1024;
+ static constexpr u32 DISPATCH_SIZE = 1024;
+ static constexpr VkMemoryBarrier WRITE_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+ };
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
- cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1);
-
- VkBufferMemoryBarrier barrier;
- barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
- barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
- barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = buffer;
- barrier.offset = 0;
- barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16));
+ cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
+ VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
});
- return {*buffer.handle, 0};
+ return {staging.buffer, staging.offset};
}
-QuadIndexedPass::QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
- VKStagingBufferPool& staging_buffer_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue)
- : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
+QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
+ VKDescriptorPool& descriptor_pool_,
+ StagingBufferPool& staging_buffer_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_)
+ : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
BuildInputOutputDescriptorUpdateTemplate(),
- BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV),
- QUAD_INDEXED_SPV),
- scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
- update_descriptor_queue{update_descriptor_queue} {}
+ BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV),
+ scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
+ update_descriptor_queue{update_descriptor_queue_} {}
QuadIndexedPass::~QuadIndexedPass() = default;
-std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
+std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
- VkBuffer src_buffer, u64 src_offset) {
+ VkBuffer src_buffer, u32 src_offset) {
const u32 index_shift = [index_format] {
switch (index_format) {
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
@@ -584,38 +209,33 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
const u32 num_tri_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
- auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
+ const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
- update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
- const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+ update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
+ const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
+ scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
- static constexpr u32 dispatch_size = 1024;
+ static constexpr u32 DISPATCH_SIZE = 1024;
+ static constexpr VkMemoryBarrier WRITE_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+ };
const std::array push_constants = {base_vertex, index_shift};
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
&push_constants);
- cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1);
-
- VkBufferMemoryBarrier barrier;
- barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
- barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
- barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = buffer;
- barrier.offset = 0;
- barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
+ cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
+ VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
});
- return {*buffer.handle, 0};
+ return {staging.buffer, staging.offset};
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 230b526bc..17d781d99 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -5,33 +5,31 @@
#pragma once
#include <optional>
+#include <span>
#include <utility>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-class VKDevice;
-class VKFence;
+class Device;
+class StagingBufferPool;
class VKScheduler;
-class VKStagingBufferPool;
class VKUpdateDescriptorQueue;
class VKComputePass {
public:
- explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
+ explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
- vk::Span<VkPushConstantRange> push_constants, std::size_t code_size,
- const u8* code);
+ vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
~VKComputePass();
protected:
- VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
- VKFence& fence);
+ VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue);
vk::DescriptorUpdateTemplateKHR descriptor_template;
vk::PipelineLayout layout;
@@ -43,52 +41,39 @@ private:
vk::ShaderModule module;
};
-class QuadArrayPass final : public VKComputePass {
-public:
- explicit QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
- VKStagingBufferPool& staging_buffer_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue);
- ~QuadArrayPass();
-
- std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
-
-private:
- VKScheduler& scheduler;
- VKStagingBufferPool& staging_buffer_pool;
- VKUpdateDescriptorQueue& update_descriptor_queue;
-};
-
class Uint8Pass final : public VKComputePass {
public:
- explicit Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue);
+ explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
+ VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_);
~Uint8Pass();
- std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
+ /// Assemble uint8 indices into an uint16 index buffer
+ /// Returns a pair with the staging buffer, and the offset where the assembled data is
+ std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer,
+ u32 src_offset);
private:
VKScheduler& scheduler;
- VKStagingBufferPool& staging_buffer_pool;
+ StagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
};
class QuadIndexedPass final : public VKComputePass {
public:
- explicit QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
- VKStagingBufferPool& staging_buffer_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue);
+ explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
+ VKDescriptorPool& descriptor_pool_,
+ StagingBufferPool& staging_buffer_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_);
~QuadIndexedPass();
- std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
- u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
- u64 src_offset);
+ std::pair<VkBuffer, VkDeviceSize> Assemble(
+ Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
+ u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
private:
VKScheduler& scheduler;
- VKStagingBufferPool& staging_buffer_pool;
+ StagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
};
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index ed9d2991c..3a48219b7 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -6,25 +6,25 @@
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue,
- const SPIRVShader& shader)
- : device{device}, scheduler{scheduler}, entries{shader.entries},
+VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
+ VKDescriptorPool& descriptor_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ const SPIRVShader& shader_)
+ : device{device_}, scheduler{scheduler_}, entries{shader_.entries},
descriptor_set_layout{CreateDescriptorSetLayout()},
- descriptor_allocator{descriptor_pool, *descriptor_set_layout},
- update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
+ descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
+ update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
descriptor_template{CreateDescriptorUpdateTemplate()},
- shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {}
+ shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {}
VKComputePipeline::~VKComputePipeline() = default;
@@ -32,7 +32,7 @@ VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
if (!descriptor_template) {
return {};
}
- const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+ const VkDescriptorSet set = descriptor_allocator.Commit();
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 6e2f22a4a..7e16575ac 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -7,20 +7,20 @@
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-class VKDevice;
+class Device;
class VKScheduler;
class VKUpdateDescriptorQueue;
class VKComputePipeline final {
public:
- explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue,
- const SPIRVShader& shader);
+ explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
+ VKDescriptorPool& descriptor_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ const SPIRVShader& shader_);
~VKComputePipeline();
VkDescriptorSet CommitDescriptorSet();
@@ -48,7 +48,7 @@ private:
vk::Pipeline CreatePipeline() const;
- const VKDevice& device;
+ const Device& device;
VKScheduler& scheduler;
ShaderEntries entries;
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index ac4a0884e..ef9fb5910 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -6,23 +6,25 @@
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
constexpr std::size_t SETS_GROW_RATE = 0x20;
-DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool,
- VkDescriptorSetLayout layout)
- : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {}
+DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_,
+ VkDescriptorSetLayout layout_)
+ : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE),
+ descriptor_pool{descriptor_pool_}, layout{layout_} {}
DescriptorAllocator::~DescriptorAllocator() = default;
-VkDescriptorSet DescriptorAllocator::Commit(VKFence& fence) {
- const std::size_t index = CommitResource(fence);
+VkDescriptorSet DescriptorAllocator::Commit() {
+ const std::size_t index = CommitResource();
return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
}
@@ -30,8 +32,9 @@ void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
}
-VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
- : device{device}, active_pool{AllocateNewPool()} {}
+VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler)
+ : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{
+ AllocateNewPool()} {}
VKDescriptorPool::~VKDescriptorPool() = default;
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index 9efa66bef..f892be7be 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -6,21 +6,24 @@
#include <vector>
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
+class Device;
class VKDescriptorPool;
+class VKScheduler;
-class DescriptorAllocator final : public VKFencedPool {
+class DescriptorAllocator final : public ResourcePool {
public:
explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
~DescriptorAllocator() override;
+ DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
DescriptorAllocator(const DescriptorAllocator&) = delete;
- VkDescriptorSet Commit(VKFence& fence);
+ VkDescriptorSet Commit();
protected:
void Allocate(std::size_t begin, std::size_t end) override;
@@ -36,15 +39,19 @@ class VKDescriptorPool final {
friend DescriptorAllocator;
public:
- explicit VKDescriptorPool(const VKDevice& device);
+ explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler);
~VKDescriptorPool();
+ VKDescriptorPool(const VKDescriptorPool&) = delete;
+ VKDescriptorPool& operator=(const VKDescriptorPool&) = delete;
+
private:
vk::DescriptorPool* AllocateNewPool();
vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count);
- const VKDevice& device;
+ const Device& device;
+ MasterSemaphore& master_semaphore;
std::vector<vk::DescriptorPool> pools;
vk::DescriptorPool* active_pool;
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index d7f65d435..3bec48d14 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -3,23 +3,21 @@
// Refer to the license.txt file included.
#include <memory>
-#include <thread>
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed)
- : VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {}
+InnerFence::InnerFence(VKScheduler& scheduler_, u32 payload_, bool is_stubbed_)
+ : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {}
-InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
- u32 payload, bool is_stubbed)
- : VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {}
+InnerFence::InnerFence(VKScheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_)
+ : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}
InnerFence::~InnerFence() = default;
@@ -27,63 +25,38 @@ void InnerFence::Queue() {
if (is_stubbed) {
return;
}
- ASSERT(!event);
-
- event = device.GetLogical().CreateNewEvent();
- ticks = scheduler.Ticks();
-
- scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
- cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
- });
+ // Get the current tick so we can wait for it
+ wait_tick = scheduler.CurrentTick();
+ scheduler.Flush();
}
bool InnerFence::IsSignaled() const {
if (is_stubbed) {
return true;
}
- ASSERT(event);
- return IsEventSignalled();
+ return scheduler.IsFree(wait_tick);
}
void InnerFence::Wait() {
if (is_stubbed) {
return;
}
- ASSERT(event);
-
- if (ticks >= scheduler.Ticks()) {
- scheduler.Flush();
- }
- while (!IsEventSignalled()) {
- std::this_thread::yield();
- }
-}
-
-bool InnerFence::IsEventSignalled() const {
- switch (const VkResult result = event.GetStatus()) {
- case VK_EVENT_SET:
- return true;
- case VK_EVENT_RESET:
- return false;
- default:
- throw vk::Exception(result);
- }
+ scheduler.Wait(wait_tick);
}
-VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
- VKQueryCache& query_cache)
- : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
- device{device}, scheduler{scheduler} {}
+VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
+ TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ VKQueryCache& query_cache_, const Device& device_,
+ VKScheduler& scheduler_)
+ : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
+ scheduler{scheduler_} {}
Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
- return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
+ return std::make_shared<InnerFence>(scheduler, value, is_stubbed);
}
Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
- return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed);
+ return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
}
void VKFenceManager::QueueFence(Fence& fence) {
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 043fe7947..2f8322d29 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -8,7 +8,8 @@
#include "video_core/fence_manager.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class System;
@@ -20,18 +21,14 @@ class RasterizerInterface;
namespace Vulkan {
-class VKBufferCache;
-class VKDevice;
+class Device;
class VKQueryCache;
class VKScheduler;
-class VKTextureCache;
class InnerFence : public VideoCommon::FenceBase {
public:
- explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload,
- bool is_stubbed);
- explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
- u32 payload, bool is_stubbed);
+ explicit InnerFence(VKScheduler& scheduler_, u32 payload_, bool is_stubbed_);
+ explicit InnerFence(VKScheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_);
~InnerFence();
void Queue();
@@ -41,24 +38,20 @@ public:
void Wait();
private:
- bool IsEventSignalled() const;
-
- const VKDevice& device;
VKScheduler& scheduler;
- vk::Event event;
- u64 ticks = 0;
+ u64 wait_tick = 0;
};
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager =
- VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>;
+ VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>;
class VKFenceManager final : public GenericFenceManager {
public:
- explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
- VKQueryCache& query_cache);
+ explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ TextureCache& texture_cache, BufferCache& buffer_cache,
+ VKQueryCache& query_cache, const Device& device,
+ VKScheduler& scheduler);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
@@ -68,7 +61,6 @@ protected:
void WaitFence(Fence& fence) override;
private:
- const VKDevice& device;
VKScheduler& scheduler;
};
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 2e46c6278..fc6dd83eb 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -12,13 +12,12 @@
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
-#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
@@ -69,23 +68,45 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
};
}
+VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
+ switch (msaa_mode) {
+ case Tegra::Texture::MsaaMode::Msaa1x1:
+ return VK_SAMPLE_COUNT_1_BIT;
+ case Tegra::Texture::MsaaMode::Msaa2x1:
+ case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
+ return VK_SAMPLE_COUNT_2_BIT;
+ case Tegra::Texture::MsaaMode::Msaa2x2:
+ case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
+ case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
+ return VK_SAMPLE_COUNT_4_BIT;
+ case Tegra::Texture::MsaaMode::Msaa4x2:
+ case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
+ case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
+ case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
+ return VK_SAMPLE_COUNT_8_BIT;
+ case Tegra::Texture::MsaaMode::Msaa4x4:
+ return VK_SAMPLE_COUNT_16_BIT;
+ default:
+ UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
+ return VK_SAMPLE_COUNT_1_BIT;
+ }
+}
+
} // Anonymous namespace
-VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue,
- VKRenderPassCache& renderpass_cache,
+VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
+ VKDescriptorPool& descriptor_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
const GraphicsPipelineCacheKey& key,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
- const SPIRVProgram& program)
- : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()},
+ const SPIRVProgram& program, u32 num_color_buffers)
+ : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()},
descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
- descriptor_allocator{descriptor_pool, *descriptor_set_layout},
- update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
- descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
- program)},
- renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)},
- pipeline{CreatePipeline(cache_key.renderpass_params, program)} {}
+ descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
+ update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
+ descriptor_template{CreateDescriptorUpdateTemplate(program)},
+ modules(CreateShaderModules(program)),
+ pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
VKGraphicsPipeline::~VKGraphicsPipeline() = default;
@@ -93,7 +114,7 @@ VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
if (!descriptor_template) {
return {};
}
- const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+ const VkDescriptorSet set = descriptor_allocator.Commit();
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
@@ -159,10 +180,12 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
+ .codeSize = 0,
+ .pCode = nullptr,
};
- std::vector<vk::ShaderModule> modules;
- modules.reserve(Maxwell::MaxShaderStage);
+ std::vector<vk::ShaderModule> shader_modules;
+ shader_modules.reserve(Maxwell::MaxShaderStage);
for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
const auto& stage = program[i];
if (!stage) {
@@ -173,13 +196,14 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
ci.codeSize = stage->code.size() * sizeof(u32);
ci.pCode = stage->code.data();
- modules.push_back(device.GetLogical().CreateShaderModule(ci));
+ shader_modules.push_back(device.GetLogical().CreateShaderModule(ci));
}
- return modules;
+ return shader_modules;
}
-vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
- const SPIRVProgram& program) const {
+vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
+ VkRenderPass renderpass,
+ u32 num_color_buffers) const {
const auto& state = cache_key.fixed_state;
const auto& viewport_swizzles = state.viewport_swizzles;
@@ -189,11 +213,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
// state is ignored
dynamic.raw1 = 0;
dynamic.raw2 = 0;
- for (FixedPipelineState::VertexBinding& binding : dynamic.vertex_bindings) {
- // Enable all vertex bindings
- binding.raw = 0;
- binding.enabled.Assign(1);
- }
+ dynamic.vertex_strides.fill(0);
} else {
dynamic = state.dynamic_state;
}
@@ -201,19 +221,13 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
std::vector<VkVertexInputBindingDescription> vertex_bindings;
std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
- const auto& binding = dynamic.vertex_bindings[index];
- if (!binding.enabled) {
- continue;
- }
const bool instanced = state.binding_divisors[index] != 0;
const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
-
vertex_bindings.push_back({
.binding = static_cast<u32>(index),
- .stride = binding.stride,
+ .stride = dynamic.vertex_strides[index],
.inputRate = rate,
});
-
if (instanced) {
vertex_binding_divisors.push_back({
.binding = static_cast<u32>(index),
@@ -229,7 +243,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
if (!attribute.enabled) {
continue;
}
- if (input_attributes.find(static_cast<u32>(index)) == input_attributes.end()) {
+ if (!input_attributes.contains(static_cast<u32>(index))) {
// Skip attributes not used by the vertex shaders.
continue;
}
@@ -261,12 +275,12 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
vertex_input_ci.pNext = &input_divisor_ci;
}
- const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology());
+ const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology);
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()),
+ .topology = MaxwellToVK::PrimitiveTopology(device, state.topology),
.primitiveRestartEnable = state.primitive_restart_enable != 0 &&
SupportsPrimitiveRestart(input_assembly_topology),
};
@@ -289,8 +303,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
};
std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
- std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(),
- UnpackViewportSwizzle);
+ std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
.pNext = nullptr,
@@ -311,8 +324,8 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
.rasterizerDiscardEnable =
static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
.polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode =
- dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE,
+ .cullMode = static_cast<VkCullModeFlags>(
+ dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
.depthBiasEnable = state.depth_bias_enable,
.depthBiasConstantFactor = 0.0f,
@@ -325,7 +338,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ .rasterizationSamples = ConvertMsaaMode(state.msaa_mode),
.sampleShadingEnable = VK_FALSE,
.minSampleShading = 0.0f,
.pSampleMask = nullptr,
@@ -351,8 +364,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
};
std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
- const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments);
- for (std::size_t index = 0; index < num_attachments; ++index) {
+ for (std::size_t index = 0; index < num_color_buffers; ++index) {
static constexpr std::array COMPONENT_TABLE{
VK_COLOR_COMPONENT_R_BIT,
VK_COLOR_COMPONENT_G_BIT,
@@ -386,8 +398,9 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY,
- .attachmentCount = static_cast<u32>(num_attachments),
+ .attachmentCount = num_color_buffers,
.pAttachments = cb_attachments.data(),
+ .blendConstants = {},
};
std::vector dynamic_states{
@@ -400,7 +413,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
static constexpr std::array extended{
VK_DYNAMIC_STATE_CULL_MODE_EXT,
VK_DYNAMIC_STATE_FRONT_FACE_EXT,
- VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT,
VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT,
VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
@@ -446,8 +458,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
stage_ci.pNext = &subgroup_size_ci;
}
}
-
- const VkGraphicsPipelineCreateInfo ci{
+ return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -467,8 +478,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
.subpass = 0,
.basePipelineHandle = nullptr,
.basePipelineIndex = 0,
- };
- return device.GetLogical().CreateGraphicsPipeline(ci);
+ });
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 58aa35efd..8b6a98fe0 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -8,20 +8,19 @@
#include <optional>
#include <vector>
+#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct GraphicsPipelineCacheKey {
- RenderPassParams renderpass_params;
- u32 padding;
+ VkRenderPass renderpass;
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
FixedPipelineState fixed_state;
@@ -34,16 +33,15 @@ struct GraphicsPipelineCacheKey {
}
std::size_t Size() const noexcept {
- return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
+ return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size();
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
+class Device;
class VKDescriptorPool;
-class VKDevice;
-class VKRenderPassCache;
class VKScheduler;
class VKUpdateDescriptorQueue;
@@ -51,13 +49,12 @@ using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderSt
class VKGraphicsPipeline final {
public:
- explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
+ explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue,
- VKRenderPassCache& renderpass_cache,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
const GraphicsPipelineCacheKey& key,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
- const SPIRVProgram& program);
+ const SPIRVProgram& program, u32 num_color_buffers);
~VKGraphicsPipeline();
VkDescriptorSet CommitDescriptorSet();
@@ -70,10 +67,6 @@ public:
return *layout;
}
- VkRenderPass GetRenderPass() const {
- return renderpass;
- }
-
GraphicsPipelineCacheKey GetCacheKey() const {
return cache_key;
}
@@ -89,10 +82,10 @@ private:
std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
- vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params,
- const SPIRVProgram& program) const;
+ vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass,
+ u32 num_color_buffers) const;
- const VKDevice& device;
+ const Device& device;
VKScheduler& scheduler;
const GraphicsPipelineCacheKey cache_key;
const u64 hash;
@@ -104,7 +97,6 @@ private:
vk::DescriptorUpdateTemplateKHR descriptor_template;
std::vector<vk::ShaderModule> modules;
- VkRenderPass renderpass;
vk::Pipeline pipeline;
};
diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp
deleted file mode 100644
index 1c418ea17..000000000
--- a/src/video_core/renderer_vulkan/vk_image.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <memory>
-#include <vector>
-
-#include "common/assert.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_image.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Vulkan {
-
-VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, const VkImageCreateInfo& image_ci,
- VkImageAspectFlags aspect_mask)
- : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask},
- image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} {
- UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0,
- "Queue family tracking is not implemented");
-
- image = device.GetLogical().CreateImage(image_ci);
-
- const u32 num_ranges = image_num_layers * image_num_levels;
- barriers.resize(num_ranges);
- subrange_states.resize(num_ranges, {{}, image_ci.initialLayout});
-}
-
-VKImage::~VKImage() = default;
-
-void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
- VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
- VkImageLayout new_layout) {
- if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) {
- return;
- }
-
- std::size_t cursor = 0;
- for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
- for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) {
- const u32 layer = base_layer + layer_it;
- const u32 level = base_level + level_it;
- auto& state = GetSubrangeState(layer, level);
- auto& barrier = barriers[cursor];
- barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
- barrier.pNext = nullptr;
- barrier.srcAccessMask = state.access;
- barrier.dstAccessMask = new_access;
- barrier.oldLayout = state.layout;
- barrier.newLayout = new_layout;
- barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.image = *image;
- barrier.subresourceRange.aspectMask = aspect_mask;
- barrier.subresourceRange.baseMipLevel = level;
- barrier.subresourceRange.levelCount = 1;
- barrier.subresourceRange.baseArrayLayer = layer;
- barrier.subresourceRange.layerCount = 1;
- state.access = new_access;
- state.layout = new_layout;
- }
- }
-
- scheduler.RequestOutsideRenderPassOperationContext();
-
- scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) {
- // TODO(Rodrigo): Implement a way to use the latest stage across subresources.
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
- VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {},
- vk::Span(barriers.data(), cursor));
- });
-}
-
-bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
- VkAccessFlags new_access, VkImageLayout new_layout) noexcept {
- const bool is_full_range = base_layer == 0 && num_layers == image_num_layers &&
- base_level == 0 && num_levels == image_num_levels;
- if (!is_full_range) {
- state_diverged = true;
- }
-
- if (!state_diverged) {
- auto& state = GetSubrangeState(0, 0);
- if (state.access != new_access || state.layout != new_layout) {
- return true;
- }
- }
-
- for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
- for (u32 level_it = 0; level_it < num_levels; ++level_it) {
- const u32 layer = base_layer + layer_it;
- const u32 level = base_level + level_it;
- auto& state = GetSubrangeState(layer, level);
- if (state.access != new_access || state.layout != new_layout) {
- return true;
- }
- }
- }
- return false;
-}
-
-void VKImage::CreatePresentView() {
- // Image type has to be 2D to be presented.
- present_view = device.GetLogical().CreateImageView({
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .image = *image,
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = format,
- .components =
- {
- .r = VK_COMPONENT_SWIZZLE_IDENTITY,
- .g = VK_COMPONENT_SWIZZLE_IDENTITY,
- .b = VK_COMPONENT_SWIZZLE_IDENTITY,
- .a = VK_COMPONENT_SWIZZLE_IDENTITY,
- },
- .subresourceRange =
- {
- .aspectMask = aspect_mask,
- .baseMipLevel = 0,
- .levelCount = 1,
- .baseArrayLayer = 0,
- .layerCount = 1,
- },
- });
-}
-
-VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
- return subrange_states[static_cast<std::size_t>(layer * image_num_levels) +
- static_cast<std::size_t>(level)];
-}
-
-} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h
deleted file mode 100644
index b4d7229e5..000000000
--- a/src/video_core/renderer_vulkan/vk_image.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Vulkan {
-
-class VKDevice;
-class VKScheduler;
-
-class VKImage {
-public:
- explicit VKImage(const VKDevice& device, VKScheduler& scheduler,
- const VkImageCreateInfo& image_ci, VkImageAspectFlags aspect_mask);
- ~VKImage();
-
- /// Records in the passed command buffer an image transition and updates the state of the image.
- void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
- VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
- VkImageLayout new_layout);
-
- /// Returns a view compatible with presentation, the image has to be 2D.
- VkImageView GetPresentView() {
- if (!present_view) {
- CreatePresentView();
- }
- return *present_view;
- }
-
- /// Returns the Vulkan image handler.
- const vk::Image& GetHandle() const {
- return image;
- }
-
- /// Returns the Vulkan format for this image.
- VkFormat GetFormat() const {
- return format;
- }
-
- /// Returns the Vulkan aspect mask.
- VkImageAspectFlags GetAspectMask() const {
- return aspect_mask;
- }
-
-private:
- struct SubrangeState final {
- VkAccessFlags access = 0; ///< Current access bits.
- VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout.
- };
-
- bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
- VkAccessFlags new_access, VkImageLayout new_layout) noexcept;
-
- /// Creates a presentation view.
- void CreatePresentView();
-
- /// Returns the subrange state for a layer and layer.
- SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept;
-
- const VKDevice& device; ///< Device handler.
- VKScheduler& scheduler; ///< Device scheduler.
-
- const VkFormat format; ///< Vulkan format.
- const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask.
- const u32 image_num_layers; ///< Number of layers.
- const u32 image_num_levels; ///< Number of mipmap levels.
-
- vk::Image image; ///< Image handle.
- vk::ImageView present_view; ///< Image view compatible with presentation.
-
- std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers.
- std::vector<SubrangeState> subrange_states; ///< Current subrange state.
-
- bool state_diverged = false; ///< True when subresources mismatch in layout.
-};
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
new file mode 100644
index 000000000..56ec5e380
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -0,0 +1,56 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <atomic>
+#include <chrono>
+
+#include "core/settings.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+using namespace std::chrono_literals;
+
+MasterSemaphore::MasterSemaphore(const Device& device) {
+ static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR,
+ .initialValue = 0,
+ };
+ static constexpr VkSemaphoreCreateInfo semaphore_ci{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ .pNext = &semaphore_type_ci,
+ .flags = 0,
+ };
+ semaphore = device.GetLogical().CreateSemaphore(semaphore_ci);
+
+ if (!Settings::values.renderer_debug) {
+ return;
+ }
+ // Validation layers have a bug where they fail to track resource usage when using timeline
+ // semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have
+ // a separate thread waiting for each timeline semaphore value.
+ debug_thread = std::thread([this] {
+ u64 counter = 0;
+ while (!shutdown) {
+ if (semaphore.Wait(counter, 10'000'000)) {
+ ++counter;
+ }
+ }
+ });
+}
+
+MasterSemaphore::~MasterSemaphore() {
+ shutdown = true;
+
+ // This thread might not be started
+ if (debug_thread.joinable()) {
+ debug_thread.join();
+ }
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
new file mode 100644
index 000000000..2c7ed654d
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -0,0 +1,75 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include <thread>
+
+#include "common/common_types.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+class Device;
+
+class MasterSemaphore {
+public:
+ explicit MasterSemaphore(const Device& device);
+ ~MasterSemaphore();
+
+ /// Returns the current logical tick.
+ [[nodiscard]] u64 CurrentTick() const noexcept {
+ return current_tick.load(std::memory_order_relaxed);
+ }
+
+ /// Returns the last known GPU tick.
+ [[nodiscard]] u64 KnownGpuTick() const noexcept {
+ return gpu_tick.load(std::memory_order_relaxed);
+ }
+
+ /// Returns the timeline semaphore handle.
+ [[nodiscard]] VkSemaphore Handle() const noexcept {
+ return *semaphore;
+ }
+
+ /// Returns true when a tick has been hit by the GPU.
+ [[nodiscard]] bool IsFree(u64 tick) {
+ return gpu_tick.load(std::memory_order_relaxed) >= tick;
+ }
+
+ /// Advance to the logical tick.
+ void NextTick() noexcept {
+ ++current_tick;
+ }
+
+ /// Refresh the known GPU tick
+ void Refresh() {
+ gpu_tick.store(semaphore.GetCounter(), std::memory_order_relaxed);
+ }
+
+ /// Waits for a tick to be hit on the GPU
+ void Wait(u64 tick) {
+ // No need to wait if the GPU is ahead of the tick
+ if (IsFree(tick)) {
+ return;
+ }
+ // Update the GPU tick and try again
+ Refresh();
+ if (IsFree(tick)) {
+ return;
+ }
+ // If none of the above is hit, fallback to a regular wait
+ semaphore.Wait(tick);
+ }
+
+private:
+ vk::Semaphore semaphore; ///< Timeline semaphore.
+ std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
+ std::atomic<u64> current_tick{1}; ///< Current logical tick.
+ std::atomic<bool> shutdown{false}; ///< True when the object is being destroyed.
+ std::thread debug_thread; ///< Debug thread to workaround validation layer bugs.
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
deleted file mode 100644
index 24c8960ac..000000000
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <optional>
-#include <tuple>
-#include <vector>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Vulkan {
-
-namespace {
-
-u64 GetAllocationChunkSize(u64 required_size) {
- static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
- auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
- return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
-}
-
-} // Anonymous namespace
-
-class VKMemoryAllocation final {
-public:
- explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
- VkMemoryPropertyFlags properties, u64 allocation_size, u32 type)
- : device{device}, memory{std::move(memory)}, properties{properties},
- allocation_size{allocation_size}, shifted_type{ShiftType(type)} {}
-
- VKMemoryCommit Commit(VkDeviceSize commit_size, VkDeviceSize alignment) {
- auto found = TryFindFreeSection(free_iterator, allocation_size,
- static_cast<u64>(commit_size), static_cast<u64>(alignment));
- if (!found) {
- found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
- static_cast<u64>(alignment));
- if (!found) {
- // Signal out of memory, it'll try to do more allocations.
- return nullptr;
- }
- }
- auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
- *found + commit_size);
- commits.push_back(commit.get());
-
- // Last commit's address is highly probable to be free.
- free_iterator = *found + commit_size;
-
- return commit;
- }
-
- void Free(const VKMemoryCommitImpl* commit) {
- ASSERT(commit);
-
- const auto it = std::find(std::begin(commits), std::end(commits), commit);
- if (it == commits.end()) {
- UNREACHABLE_MSG("Freeing unallocated commit!");
- return;
- }
- commits.erase(it);
- }
-
- /// Returns whether this allocation is compatible with the arguments.
- bool IsCompatible(VkMemoryPropertyFlags wanted_properties, u32 type_mask) const {
- return (wanted_properties & properties) && (type_mask & shifted_type) != 0;
- }
-
-private:
- static constexpr u32 ShiftType(u32 type) {
- return 1U << type;
- }
-
- /// A memory allocator, it may return a free region between "start" and "end" with the solicited
- /// requirements.
- std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
- u64 iterator = Common::AlignUp(start, alignment);
- while (iterator + size <= end) {
- const u64 try_left = iterator;
- const u64 try_right = try_left + size;
-
- bool overlap = false;
- for (const auto& commit : commits) {
- const auto [commit_left, commit_right] = commit->interval;
- if (try_left < commit_right && commit_left < try_right) {
- // There's an overlap, continue the search where the overlapping commit ends.
- iterator = Common::AlignUp(commit_right, alignment);
- overlap = true;
- break;
- }
- }
- if (!overlap) {
- // A free address has been found.
- return try_left;
- }
- }
-
- // No free regions where found, return an empty optional.
- return std::nullopt;
- }
-
- const VKDevice& device; ///< Vulkan device.
- const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
- const VkMemoryPropertyFlags properties; ///< Vulkan properties.
- const u64 allocation_size; ///< Size of this allocation.
- const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
-
- /// Hints where the next free region is likely going to be.
- u64 free_iterator{};
-
- /// Stores all commits done from this allocation.
- std::vector<const VKMemoryCommitImpl*> commits;
-};
-
-VKMemoryManager::VKMemoryManager(const VKDevice& device)
- : device{device}, properties{device.GetPhysical().GetMemoryProperties()} {}
-
-VKMemoryManager::~VKMemoryManager() = default;
-
-VKMemoryCommit VKMemoryManager::Commit(const VkMemoryRequirements& requirements,
- bool host_visible) {
- const u64 chunk_size = GetAllocationChunkSize(requirements.size);
-
- // When a host visible commit is asked, search for host visible and coherent, otherwise search
- // for a fast device local type.
- const VkMemoryPropertyFlags wanted_properties =
- host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
- : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
-
- if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
- return commit;
- }
-
- // Commit has failed, allocate more memory.
- if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
- // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
- // Allocation has failed, panic.
- UNREACHABLE_MSG("Ran out of VRAM!");
- return {};
- }
-
- // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
- // there's a bug.
- auto commit = TryAllocCommit(requirements, wanted_properties);
- ASSERT(commit);
- return commit;
-}
-
-VKMemoryCommit VKMemoryManager::Commit(const vk::Buffer& buffer, bool host_visible) {
- auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), host_visible);
- buffer.BindMemory(commit->GetMemory(), commit->GetOffset());
- return commit;
-}
-
-VKMemoryCommit VKMemoryManager::Commit(const vk::Image& image, bool host_visible) {
- auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), host_visible);
- image.BindMemory(commit->GetMemory(), commit->GetOffset());
- return commit;
-}
-
-bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask,
- u64 size) {
- const u32 type = [&] {
- for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
- const auto flags = properties.memoryTypes[type_index].propertyFlags;
- if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
- // The type matches in type and in the wanted properties.
- return type_index;
- }
- }
- UNREACHABLE_MSG("Couldn't find a compatible memory type!");
- return 0U;
- }();
-
- // Try to allocate found type.
- vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
- .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = nullptr,
- .allocationSize = size,
- .memoryTypeIndex = type,
- });
- if (!memory) {
- LOG_CRITICAL(Render_Vulkan, "Device allocation failed!");
- return false;
- }
-
- allocations.push_back(std::make_unique<VKMemoryAllocation>(device, std::move(memory),
- wanted_properties, size, type));
- return true;
-}
-
-VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requirements,
- VkMemoryPropertyFlags wanted_properties) {
- for (auto& allocation : allocations) {
- if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
- continue;
- }
- if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
- return commit;
- }
- }
- return {};
-}
-
-VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
- const vk::DeviceMemory& memory, u64 begin, u64 end)
- : device{device}, memory{memory}, interval{begin, end}, allocation{allocation} {}
-
-VKMemoryCommitImpl::~VKMemoryCommitImpl() {
- allocation->Free(this);
-}
-
-MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
- return MemoryMap{this, memory.Map(interval.first + offset_, size)};
-}
-
-void VKMemoryCommitImpl::Unmap() const {
- memory.Unmap();
-}
-
-MemoryMap VKMemoryCommitImpl::Map() const {
- return Map(interval.second - interval.first);
-}
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
deleted file mode 100644
index 1af88e3d4..000000000
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <utility>
-#include <vector>
-#include "common/common_types.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Vulkan {
-
-class MemoryMap;
-class VKDevice;
-class VKMemoryAllocation;
-class VKMemoryCommitImpl;
-
-using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
-
-class VKMemoryManager final {
-public:
- explicit VKMemoryManager(const VKDevice& device);
- VKMemoryManager(const VKMemoryManager&) = delete;
- ~VKMemoryManager();
-
- /**
- * Commits a memory with the specified requeriments.
- * @param requirements Requirements returned from a Vulkan call.
- * @param host_visible Signals the allocator that it *must* use host visible and coherent
- * memory. When passing false, it will try to allocate device local memory.
- * @returns A memory commit.
- */
- VKMemoryCommit Commit(const VkMemoryRequirements& requirements, bool host_visible);
-
- /// Commits memory required by the buffer and binds it.
- VKMemoryCommit Commit(const vk::Buffer& buffer, bool host_visible);
-
- /// Commits memory required by the image and binds it.
- VKMemoryCommit Commit(const vk::Image& image, bool host_visible);
-
-private:
- /// Allocates a chunk of memory.
- bool AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
-
- /// Tries to allocate a memory commit.
- VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements,
- VkMemoryPropertyFlags wanted_properties);
-
- const VKDevice& device; ///< Device handler.
- const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
- std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
-};
-
-class VKMemoryCommitImpl final {
- friend VKMemoryAllocation;
- friend MemoryMap;
-
-public:
- explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
- const vk::DeviceMemory& memory, u64 begin, u64 end);
- ~VKMemoryCommitImpl();
-
- /// Maps a memory region and returns a pointer to it.
- /// It's illegal to have more than one memory map at the same time.
- MemoryMap Map(u64 size, u64 offset = 0) const;
-
- /// Maps the whole commit and returns a pointer to it.
- /// It's illegal to have more than one memory map at the same time.
- MemoryMap Map() const;
-
- /// Returns the Vulkan memory handler.
- VkDeviceMemory GetMemory() const {
- return *memory;
- }
-
- /// Returns the start position of the commit relative to the allocation.
- VkDeviceSize GetOffset() const {
- return static_cast<VkDeviceSize>(interval.first);
- }
-
-private:
- /// Unmaps memory.
- void Unmap() const;
-
- const VKDevice& device; ///< Vulkan device.
- const vk::DeviceMemory& memory; ///< Vulkan device memory handler.
- std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
- VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
-};
-
-/// Holds ownership of a memory map.
-class MemoryMap final {
-public:
- explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address)
- : commit{commit}, address{address} {}
-
- ~MemoryMap() {
- if (commit) {
- commit->Unmap();
- }
- }
-
- /// Prematurely releases the memory map.
- void Release() {
- commit->Unmap();
- commit = nullptr;
- }
-
- /// Returns the address of the memory map.
- u8* GetAddress() const {
- return address;
- }
-
- /// Returns the address of the memory map;
- operator u8*() const {
- return address;
- }
-
-private:
- const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
- u8* address{}; ///< Address to the mapped memory.
-};
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index cfdcdd6ab..8991505ca 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -7,6 +7,8 @@
#include <memory>
#include <vector>
+#include "common/bit_cast.h"
+#include "common/cityhash.h"
#include "common/microprofile.h"
#include "core/core.h"
#include "core/memory.h"
@@ -17,18 +19,17 @@
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
-#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/memory_util.h"
#include "video_core/shader_cache.h"
#include "video_core/shader_notify.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
@@ -51,7 +52,9 @@ constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEX
constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
- VideoCommon::Shader::CompileDepth::FullDecompile};
+ .depth = VideoCommon::Shader::CompileDepth::FullDecompile,
+ .disable_else_derivation = true,
+};
constexpr std::size_t GetStageFromProgram(std::size_t program) {
return program == 0 ? 0 : program - 1;
@@ -74,7 +77,7 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) {
case Maxwell::ShaderProgram::Fragment:
return ShaderType::Fragment;
default:
- UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program));
+ UNIMPLEMENTED_MSG("program={}", program);
return ShaderType::Vertex;
}
}
@@ -135,64 +138,54 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
return std::memcmp(&rhs, this, sizeof *this) == 0;
}
-Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
- VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
- : gpu_addr{gpu_addr}, program_code{std::move(program_code)},
- registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
- compiler_settings, registry},
- entries{GenerateShaderEntries(shader_ir)} {}
+Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_,
+ GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_)
+ : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_),
+ shader_ir(program_code, main_offset_, compiler_settings, registry),
+ entries(GenerateShaderEntries(shader_ir)) {}
Shader::~Shader() = default;
-Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
- Tegra::Engines::ShaderType stage) {
- if (stage == ShaderType::Compute) {
- return system.GPU().KeplerCompute();
- } else {
- return system.GPU().Maxwell3D();
- }
-}
-
-VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue,
- VKRenderPassCache& renderpass_cache)
- : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
- scheduler{scheduler}, descriptor_pool{descriptor_pool},
- update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
+VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_,
+ VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_)
+ : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
+ kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
+ scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
+ update_descriptor_queue_} {}
VKPipelineCache::~VKPipelineCache() = default;
std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
- const auto& gpu = system.GPU().Maxwell3D();
-
std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
+
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
- if (!gpu.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
continue;
}
- auto& memory_manager{system.GPU().MemoryManager()};
- const GPUVAddr program_addr{GetShaderAddress(system, program)};
- const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+ const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
if (!result) {
- const auto host_ptr{memory_manager.GetPointer(program_addr)};
+ const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
// No shader found - create a new one
- constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
+ static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
- ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+ ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
- auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
- stage_offset);
+ auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
+ std::move(code), stage_offset);
result = shader.get();
if (cpu_addr) {
@@ -207,7 +200,8 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
}
VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
- const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
+ const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
+ VideoCommon::Shader::AsyncShaders& async_shaders) {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
if (last_graphics_pipeline && last_graphics_key == key) {
@@ -215,16 +209,16 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
}
last_graphics_key = key;
- if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(system.GPU())) {
+ if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
std::unique_lock lock{pipeline_cache};
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
if (is_cache_miss) {
- system.GPU().ShaderNotify().MarkSharderBuilding();
+ gpu.ShaderNotify().MarkSharderBuilding();
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key.fixed_state);
async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
- update_descriptor_queue, renderpass_cache, bindings,
- program, key);
+ update_descriptor_queue, bindings, program, key,
+ num_color_buffers);
}
last_graphics_pipeline = pair->second.get();
return last_graphics_pipeline;
@@ -233,13 +227,13 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
auto& entry = pair->second;
if (is_cache_miss) {
- system.GPU().ShaderNotify().MarkSharderBuilding();
+ gpu.ShaderNotify().MarkSharderBuilding();
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key.fixed_state);
entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
- update_descriptor_queue, renderpass_cache, key,
- bindings, program);
- system.GPU().ShaderNotify().MarkShaderComplete();
+ update_descriptor_queue, key, bindings,
+ program, num_color_buffers);
+ gpu.ShaderNotify().MarkShaderComplete();
}
last_graphics_pipeline = entry.get();
return last_graphics_pipeline;
@@ -255,22 +249,21 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
}
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
- auto& memory_manager = system.GPU().MemoryManager();
- const auto program_addr = key.shader;
+ const GPUVAddr gpu_addr = key.shader;
- const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
if (!shader) {
// No shader found - create a new one
- const auto host_ptr = memory_manager.GetPointer(program_addr);
+ const auto host_ptr = gpu_memory.GetPointer(gpu_addr);
- ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
+ ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
- auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
- std::move(code), KERNEL_MAIN_OFFSET);
+ auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr,
+ *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET);
shader = shader_info.get();
if (cpu_addr) {
@@ -298,7 +291,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
}
void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
- system.GPU().ShaderNotify().MarkShaderComplete();
+ gpu.ShaderNotify().MarkShaderComplete();
std::unique_lock lock{pipeline_cache};
graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
}
@@ -339,12 +332,8 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) {
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
- auto& memory_manager = system.GPU().MemoryManager();
- const auto& gpu = system.GPU().Maxwell3D();
-
Specialization specialization;
- if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points ||
- device.IsExtExtendedDynamicStateSupported()) {
+ if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) {
float point_size;
std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
specialization.point_size = point_size;
@@ -356,20 +345,24 @@ VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
specialization.attribute_types[i] = attribute.Type();
}
specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
+ specialization.early_fragment_tests = fixed_state.early_z;
+
+ // Alpha test
+ specialization.alpha_test_func =
+ FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value());
+ specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref);
SPIRVProgram program;
std::vector<VkDescriptorSetLayoutBinding> bindings;
- for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) {
const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
-
// Skip stages that are not enabled
- if (!gpu.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
continue;
}
-
- const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
- const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+ const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
@@ -377,12 +370,8 @@ VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
const auto& entries = shader->GetEntries();
program[stage] = {
Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
- entries};
-
- if (program_enum == Maxwell::ShaderProgram::VertexA) {
- // VertexB was combined with VertexA, so we skip the VertexB iteration
- ++index;
- }
+ entries,
+ };
const u32 old_binding = specialization.base_binding;
specialization.base_binding =
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index c04829e77..89d635a3d 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -19,14 +19,13 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
-#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
-#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class System;
@@ -34,11 +33,10 @@ class System;
namespace Vulkan {
+class Device;
class RasterizerVulkan;
class VKComputePipeline;
class VKDescriptorPool;
-class VKDevice;
-class VKFence;
class VKScheduler;
class VKUpdateDescriptorQueue;
@@ -85,8 +83,9 @@ namespace Vulkan {
class Shader {
public:
- explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
- VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
+ explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_,
+ Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_,
+ VideoCommon::Shader::ProgramCode program_code, u32 main_offset_);
~Shader();
GPUVAddr GetGpuAddr() const {
@@ -97,22 +96,19 @@ public:
return shader_ir;
}
- const VideoCommon::Shader::Registry& GetRegistry() const {
- return registry;
- }
-
const VideoCommon::Shader::ShaderIR& GetIR() const {
return shader_ir;
}
+ const VideoCommon::Shader::Registry& GetRegistry() const {
+ return registry;
+ }
+
const ShaderEntries& GetEntries() const {
return entries;
}
private:
- static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
- Tegra::Engines::ShaderType stage);
-
GPUVAddr gpu_addr{};
VideoCommon::Shader::ProgramCode program_code;
VideoCommon::Shader::Registry registry;
@@ -122,16 +118,18 @@ private:
class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
public:
- explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue,
- VKRenderPassCache& renderpass_cache);
+ explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
+ Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::Engines::KeplerCompute& kepler_compute,
+ Tegra::MemoryManager& gpu_memory, const Device& device,
+ VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
+ VKUpdateDescriptorQueue& update_descriptor_queue);
~VKPipelineCache() override;
std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
+ u32 num_color_buffers,
VideoCommon::Shader::AsyncShaders& async_shaders);
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
@@ -145,12 +143,15 @@ private:
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
const FixedPipelineState& fixed_state);
- Core::System& system;
- const VKDevice& device;
+ Tegra::GPU& gpu;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
+
+ const Device& device;
VKScheduler& scheduler;
VKDescriptorPool& descriptor_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
- VKRenderPassCache& renderpass_cache;
std::unique_ptr<Shader> null_shader;
std::unique_ptr<Shader> null_kernel;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 6cd63d090..7cadd5147 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -7,37 +7,35 @@
#include <utility>
#include <vector>
-#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
+using VideoCore::QueryType;
+
namespace {
constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION};
-constexpr VkQueryType GetTarget(VideoCore::QueryType type) {
+constexpr VkQueryType GetTarget(QueryType type) {
return QUERY_TARGETS[static_cast<std::size_t>(type)];
}
} // Anonymous namespace
-QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {}
+QueryPool::QueryPool(const Device& device_, VKScheduler& scheduler, QueryType type_)
+ : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {}
QueryPool::~QueryPool() = default;
-void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) {
- device = &device_;
- type = type_;
-}
-
-std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) {
+std::pair<VkQueryPool, u32> QueryPool::Commit() {
std::size_t index;
do {
- index = CommitResource(fence);
+ index = CommitResource();
} while (usage[index]);
usage[index] = true;
@@ -47,7 +45,7 @@ std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) {
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
usage.resize(end);
- pools.push_back(device->GetLogical().CreateQueryPool({
+ pools.push_back(device.GetLogical().CreateQueryPool({
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -68,32 +66,39 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
}
-VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKScheduler& scheduler)
- : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
- QueryPool>{system, rasterizer},
- device{device}, scheduler{scheduler} {
- for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
- query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
+VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
+ const Device& device_, VKScheduler& scheduler_)
+ : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_},
+ query_pools{
+ QueryPool{device_, scheduler_, QueryType::SamplesPassed},
+ } {}
+
+VKQueryCache::~VKQueryCache() {
+ // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class
+ // destructor is called. The query cache should be redesigned to have a proper ownership model
+ // instead of using shared pointers.
+ for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) {
+ auto& stream = Stream(static_cast<QueryType>(query_type));
+ stream.Update(false);
+ stream.Reset();
}
}
-VKQueryCache::~VKQueryCache() = default;
-
-std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
- return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
+std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(QueryType type) {
+ return query_pools[static_cast<std::size_t>(type)].Commit();
}
-void VKQueryCache::Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query) {
+void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
query_pools[static_cast<std::size_t>(type)].Reserve(query);
}
-HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
- VideoCore::QueryType type)
- : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
- type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
- const vk::Device* logical = &cache.Device().GetLogical();
- cache.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
+HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
+ QueryType type_)
+ : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_},
+ query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
+ const vk::Device* logical = &cache.GetDevice().GetLogical();
+ cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
logical->ResetQueryPoolEXT(query.first, query.second, 1);
cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
});
@@ -104,26 +109,28 @@ HostCounter::~HostCounter() {
}
void HostCounter::EndQuery() {
- cache.Scheduler().Record(
+ cache.GetScheduler().Record(
[query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
}
u64 HostCounter::BlockingQuery() const {
- if (ticks >= cache.Scheduler().Ticks()) {
- cache.Scheduler().Flush();
+ if (tick >= cache.GetScheduler().CurrentTick()) {
+ cache.GetScheduler().Flush();
}
+
u64 data;
- const VkResult result = cache.Device().GetLogical().GetQueryResults(
+ const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
query.first, query.second, 1, sizeof(data), &data, sizeof(data),
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
- switch (result) {
+
+ switch (query_result) {
case VK_SUCCESS:
return data;
case VK_ERROR_DEVICE_LOST:
- cache.Device().ReportLoss();
+ cache.GetDevice().ReportLoss();
[[fallthrough]];
default:
- throw vk::Exception(result);
+ throw vk::Exception(query_result);
}
}
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 40119e6d3..7190946b9 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -11,8 +11,8 @@
#include "common/common_types.h"
#include "video_core/query_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace VideoCore {
class RasterizerInterface;
@@ -21,21 +21,19 @@ class RasterizerInterface;
namespace Vulkan {
class CachedQuery;
+class Device;
class HostCounter;
-class VKDevice;
class VKQueryCache;
class VKScheduler;
using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
-class QueryPool final : public VKFencedPool {
+class QueryPool final : public ResourcePool {
public:
- explicit QueryPool();
+ explicit QueryPool(const Device& device, VKScheduler& scheduler, VideoCore::QueryType type);
~QueryPool() override;
- void Initialize(const VKDevice& device, VideoCore::QueryType type);
-
- std::pair<VkQueryPool, u32> Commit(VKFence& fence);
+ std::pair<VkQueryPool, u32> Commit();
void Reserve(std::pair<VkQueryPool, u32> query);
@@ -45,42 +43,43 @@ protected:
private:
static constexpr std::size_t GROW_STEP = 512;
- const VKDevice* device = nullptr;
- VideoCore::QueryType type = {};
+ const Device& device;
+ const VideoCore::QueryType type;
std::vector<vk::QueryPool> pools;
std::vector<bool> usage;
};
class VKQueryCache final
- : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
- QueryPool> {
+ : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter> {
public:
- explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKScheduler& scheduler);
+ explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
+ const Device& device_, VKScheduler& scheduler_);
~VKQueryCache();
std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query);
- const VKDevice& Device() const noexcept {
+ const Device& GetDevice() const noexcept {
return device;
}
- VKScheduler& Scheduler() const noexcept {
+ VKScheduler& GetScheduler() const noexcept {
return scheduler;
}
private:
- const VKDevice& device;
+ const Device& device;
VKScheduler& scheduler;
+ std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
};
class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
public:
- explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
- VideoCore::QueryType type);
+ explicit HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
+ VideoCore::QueryType type_);
~HostCounter();
void EndQuery();
@@ -91,13 +90,13 @@ private:
VKQueryCache& cache;
const VideoCore::QueryType type;
const std::pair<VkQueryPool, u32> query;
- const u64 ticks;
+ const u64 tick;
};
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
public:
- explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr)
- : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {}
+ explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_)
+ : CachedQueryBase{cpu_addr_, host_ptr_} {}
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index ff1b52eab..394a1c4e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -8,8 +8,6 @@
#include <mutex>
#include <vector>
-#include <boost/container/static_vector.hpp>
-
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
@@ -19,54 +17,54 @@
#include "core/settings.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
-#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
-#include "video_core/renderer_vulkan/vk_sampler_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader_cache.h"
+#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using VideoCommon::ImageViewId;
+using VideoCommon::ImageViewType;
MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
namespace {
+struct DrawParams {
+ u32 base_instance;
+ u32 num_instances;
+ u32 base_vertex;
+ u32 num_vertices;
+ bool is_indexed;
+};
-constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute);
+constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
-VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) {
+VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
const auto& src = regs.viewport_transform[index];
const float width = src.scale_x * 2.0f;
const float height = src.scale_y * 2.0f;
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
-
VkViewport viewport{
.x = src.translate_x - src.scale_x,
.y = src.translate_y - src.scale_y,
@@ -75,16 +73,14 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si
.minDepth = src.translate_z - src.scale_z * reduce_z,
.maxDepth = src.translate_z + src.scale_z,
};
-
if (!device.IsExtDepthRangeUnrestrictedSupported()) {
viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
}
-
return viewport;
}
-VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
+VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
const auto& src = regs.scissor_test[index];
VkRect2D scissor;
if (src.enable) {
@@ -104,305 +100,157 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
- for (std::size_t i = 0; i < std::size(addresses); ++i) {
+ for (size_t i = 0; i < std::size(addresses); ++i) {
addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
}
return addresses;
}
-void TransitionImages(const std::vector<ImageView>& views, VkPipelineStageFlags pipeline_stage,
- VkAccessFlags access) {
- for (auto& [view, layout] : views) {
- view->Transition(*layout, pipeline_stage, access);
+struct TextureHandle {
+ constexpr TextureHandle(u32 data, bool via_header_index) {
+ const Tegra::Texture::TextureHandle handle{data};
+ image = handle.tic_id;
+ sampler = via_header_index ? image : handle.tsc_id.Value();
}
-}
+
+ u32 image;
+ u32 sampler;
+};
template <typename Engine, typename Entry>
-Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
- std::size_t stage, std::size_t index = 0) {
- const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
+TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
+ size_t stage, size_t index = 0) {
+ const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
if (entry.is_separated) {
const u32 buffer_1 = entry.buffer;
const u32 buffer_2 = entry.secondary_buffer;
const u32 offset_1 = entry.offset;
const u32 offset_2 = entry.secondary_offset;
- const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
- const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
- return engine.GetTextureInfo(handle_1 | handle_2);
+ const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
+ const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
+ return TextureHandle(handle_1 | handle_2, via_header_index);
}
}
if (entry.is_bindless) {
- const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
- return engine.GetTextureInfo(tex_handle);
- }
- const auto& gpu_profile = engine.AccessGuestDriverProfile();
- const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
- const u32 offset = entry.offset + entry_offset;
- if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
- return engine.GetStageTexture(stage_type, offset);
- } else {
- return engine.GetTexture(offset);
- }
-}
-
-/// @brief Determine if an attachment to be updated has to preserve contents
-/// @param is_clear True when a clear is being executed
-/// @param regs 3D registers
-/// @return True when the contents have to be preserved
-bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) {
- if (!is_clear) {
- return true;
- }
- // First we have to make sure all clear masks are enabled.
- if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B ||
- !regs.clear_buffers.A) {
- return true;
- }
- // If scissors are disabled, the whole screen is cleared
- if (!regs.clear_flags.scissor) {
- return false;
- }
- // Then we have to confirm scissor testing clears the whole image
- const std::size_t index = regs.clear_buffers.RT;
- const auto& scissor = regs.scissor_test[0];
- return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width ||
- scissor.max_y < regs.rt[index].height;
-}
-
-/// @brief Determine if an attachment to be updated has to preserve contents
-/// @param is_clear True when a clear is being executed
-/// @param regs 3D registers
-/// @return True when the contents have to be preserved
-bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {
- // If we are not clearing, the contents have to be preserved
- if (!is_clear) {
- return true;
- }
- // For depth stencil clears we only have to confirm scissor test covers the whole image
- if (!regs.clear_flags.scissor) {
- return false;
+ const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
+ return TextureHandle(raw, via_header_index);
+ }
+ const u32 buffer = engine.GetBoundBuffer();
+ const u64 offset = (entry.offset + index) * sizeof(u32);
+ return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
+}
+
+ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
+ if (entry.is_buffer) {
+ return ImageViewType::e2D;
+ }
+ switch (entry.type) {
+ case Tegra::Shader::TextureType::Texture1D:
+ return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
+ case Tegra::Shader::TextureType::Texture2D:
+ return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
+ case Tegra::Shader::TextureType::Texture3D:
+ return ImageViewType::e3D;
+ case Tegra::Shader::TextureType::TextureCube:
+ return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
+ }
+ UNREACHABLE();
+ return ImageViewType::e2D;
+}
+
+ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
+ switch (entry.type) {
+ case Tegra::Shader::ImageType::Texture1D:
+ return ImageViewType::e1D;
+ case Tegra::Shader::ImageType::Texture1DArray:
+ return ImageViewType::e1DArray;
+ case Tegra::Shader::ImageType::Texture2D:
+ return ImageViewType::e2D;
+ case Tegra::Shader::ImageType::Texture2DArray:
+ return ImageViewType::e2DArray;
+ case Tegra::Shader::ImageType::Texture3D:
+ return ImageViewType::e3D;
+ case Tegra::Shader::ImageType::TextureBuffer:
+ return ImageViewType::Buffer;
+ }
+ UNREACHABLE();
+ return ImageViewType::e2D;
+}
+
+void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache,
+ VKUpdateDescriptorQueue& update_descriptor_queue,
+ ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) {
+ for ([[maybe_unused]] const auto& entry : entries.uniform_texels) {
+ const ImageViewId image_view_id = *image_view_id_ptr++;
+ const ImageView& image_view = texture_cache.GetImageView(image_view_id);
+ update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
}
- // Make sure the clear cover the whole image
- const auto& scissor = regs.scissor_test[0];
- return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width ||
- scissor.max_y < regs.zeta_height;
-}
-
-template <std::size_t N>
-std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
- std::array<VkDeviceSize, N> expanded;
- std::copy(strides.begin(), strides.end(), expanded.begin());
- return expanded;
-}
-
-} // Anonymous namespace
-
-class BufferBindings final {
-public:
- void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) {
- vertex.buffers[vertex.num_buffers] = buffer;
- vertex.offsets[vertex.num_buffers] = offset;
- vertex.sizes[vertex.num_buffers] = size;
- vertex.strides[vertex.num_buffers] = static_cast<u16>(stride);
- ++vertex.num_buffers;
- }
-
- void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) {
- index.buffer = buffer;
- index.offset = offset;
- index.type = type;
- }
-
- void Bind(const VKDevice& device, VKScheduler& scheduler) const {
- // Use this large switch case to avoid dispatching more memory in the record lambda than
- // what we need. It looks horrible, but it's the best we can do on standard C++.
- switch (vertex.num_buffers) {
- case 0:
- return BindStatic<0>(device, scheduler);
- case 1:
- return BindStatic<1>(device, scheduler);
- case 2:
- return BindStatic<2>(device, scheduler);
- case 3:
- return BindStatic<3>(device, scheduler);
- case 4:
- return BindStatic<4>(device, scheduler);
- case 5:
- return BindStatic<5>(device, scheduler);
- case 6:
- return BindStatic<6>(device, scheduler);
- case 7:
- return BindStatic<7>(device, scheduler);
- case 8:
- return BindStatic<8>(device, scheduler);
- case 9:
- return BindStatic<9>(device, scheduler);
- case 10:
- return BindStatic<10>(device, scheduler);
- case 11:
- return BindStatic<11>(device, scheduler);
- case 12:
- return BindStatic<12>(device, scheduler);
- case 13:
- return BindStatic<13>(device, scheduler);
- case 14:
- return BindStatic<14>(device, scheduler);
- case 15:
- return BindStatic<15>(device, scheduler);
- case 16:
- return BindStatic<16>(device, scheduler);
- case 17:
- return BindStatic<17>(device, scheduler);
- case 18:
- return BindStatic<18>(device, scheduler);
- case 19:
- return BindStatic<19>(device, scheduler);
- case 20:
- return BindStatic<20>(device, scheduler);
- case 21:
- return BindStatic<21>(device, scheduler);
- case 22:
- return BindStatic<22>(device, scheduler);
- case 23:
- return BindStatic<23>(device, scheduler);
- case 24:
- return BindStatic<24>(device, scheduler);
- case 25:
- return BindStatic<25>(device, scheduler);
- case 26:
- return BindStatic<26>(device, scheduler);
- case 27:
- return BindStatic<27>(device, scheduler);
- case 28:
- return BindStatic<28>(device, scheduler);
- case 29:
- return BindStatic<29>(device, scheduler);
- case 30:
- return BindStatic<30>(device, scheduler);
- case 31:
- return BindStatic<31>(device, scheduler);
- case 32:
- return BindStatic<32>(device, scheduler);
- }
- UNREACHABLE();
- }
-
-private:
- // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
- struct {
- std::size_t num_buffers = 0;
- std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
- std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
- std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
- std::array<u16, Maxwell::NumVertexArrays> strides;
- } vertex;
-
- struct {
- VkBuffer buffer = nullptr;
- VkDeviceSize offset;
- VkIndexType type;
- } index;
-
- template <std::size_t N>
- void BindStatic(const VKDevice& device, VKScheduler& scheduler) const {
- if (device.IsExtExtendedDynamicStateSupported()) {
- if (index.buffer) {
- BindStatic<N, true, true>(scheduler);
- } else {
- BindStatic<N, false, true>(scheduler);
- }
- } else {
- if (index.buffer) {
- BindStatic<N, true, false>(scheduler);
- } else {
- BindStatic<N, false, false>(scheduler);
- }
+ for (const auto& entry : entries.samplers) {
+ for (size_t i = 0; i < entry.size; ++i) {
+ const VkSampler sampler = *sampler_ptr++;
+ const ImageViewId image_view_id = *image_view_id_ptr++;
+ const ImageView& image_view = texture_cache.GetImageView(image_view_id);
+ const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
+ update_descriptor_queue.AddSampledImage(handle, sampler);
}
}
-
- template <std::size_t N, bool is_indexed, bool has_extended_dynamic_state>
- void BindStatic(VKScheduler& scheduler) const {
- static_assert(N <= Maxwell::NumVertexArrays);
- if constexpr (N == 0) {
- return;
- }
-
- std::array<VkBuffer, N> buffers;
- std::array<VkDeviceSize, N> offsets;
- std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
- std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
-
- if constexpr (has_extended_dynamic_state) {
- // With extended dynamic states we can specify the length and stride of a vertex buffer
- std::array<VkDeviceSize, N> sizes;
- std::array<u16, N> strides;
- std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin());
- std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin());
-
- if constexpr (is_indexed) {
- scheduler.Record(
- [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
- cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
- offsets.data(), sizes.data(),
- ExpandStrides(strides).data());
- });
- } else {
- scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
- offsets.data(), sizes.data(),
- ExpandStrides(strides).data());
- });
- }
- return;
- }
-
- if constexpr (is_indexed) {
- // Indexed draw
- scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
- cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
- });
- } else {
- // Array draw
- scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
- });
- }
+ for ([[maybe_unused]] const auto& entry : entries.storage_texels) {
+ const ImageViewId image_view_id = *image_view_id_ptr++;
+ const ImageView& image_view = texture_cache.GetImageView(image_view_id);
+ update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
}
-};
-
-void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
- if (is_indexed) {
- cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance);
- } else {
- cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance);
+ for (const auto& entry : entries.images) {
+ // TODO: Mark as modified
+ const ImageViewId image_view_id = *image_view_id_ptr++;
+ const ImageView& image_view = texture_cache.GetImageView(image_view_id);
+ const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
+ update_descriptor_queue.AddImage(handle);
+ }
+}
+
+DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
+ bool is_indexed) {
+ DrawParams params{
+ .base_instance = regs.vb_base_instance,
+ .num_instances = is_instanced ? num_instances : 1,
+ .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
+ .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
+ .is_indexed = is_indexed,
+ };
+ if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
+ // 6 triangle vertices per quad, base vertex is part of the index
+ // See BindQuadArrayIndexBuffer for more details
+ params.num_vertices = (params.num_vertices / 4) * 6;
+ params.base_vertex = 0;
+ params.is_indexed = true;
}
+ return params;
}
+} // Anonymous namespace
-RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
- VKScreenInfo& screen_info, const VKDevice& device,
- VKResourceManager& resource_manager,
- VKMemoryManager& memory_manager, StateTracker& state_tracker,
- VKScheduler& scheduler)
- : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
- screen_info{screen_info}, device{device}, resource_manager{resource_manager},
- memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler},
- staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
- update_descriptor_queue(device, scheduler), renderpass_cache(device),
- quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
- quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
- uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
- texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
- staging_pool),
- pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
- renderpass_cache),
- buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
- sampler_cache(device),
- fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
- query_cache(system, *this, device, scheduler),
- wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
+RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
+ Tegra::MemoryManager& gpu_memory_,
+ Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_,
+ const Device& device_, MemoryAllocator& memory_allocator_,
+ StateTracker& state_tracker_, VKScheduler& scheduler_)
+ : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
+ gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
+ screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
+ state_tracker{state_tracker_}, scheduler{scheduler_},
+ staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
+ update_descriptor_queue(device, scheduler),
+ blit_image(device, scheduler, state_tracker, descriptor_pool),
+ texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image},
+ texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+ buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
+ update_descriptor_queue, descriptor_pool),
+ buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
+ pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
+ descriptor_pool, update_descriptor_queue),
+ query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
+ wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
scheduler.SetQueryCache(query_cache);
if (device.UseAsynchronousShaders()) {
async_shaders.AllocateWorkers();
@@ -414,64 +262,57 @@ RasterizerVulkan::~RasterizerVulkan() = default;
void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(Vulkan_Drawing);
+ SCOPE_EXIT({ gpu.TickWork(); });
FlushWork();
query_cache.UpdateCounters();
- SCOPE_EXIT({ system.GPU().TickWork(); });
-
- const auto& gpu = system.GPU().Maxwell3D();
- GraphicsPipelineCacheKey key;
- key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported());
-
- buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
+ graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
- BufferBindings buffer_bindings;
- const DrawParameters draw_params =
- SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- update_descriptor_queue.Acquire();
- sampled_views.clear();
- image_views.clear();
+ texture_cache.SynchronizeGraphicsDescriptors();
+ texture_cache.UpdateRenderTargets(false);
const auto shaders = pipeline_cache.GetShaders();
- key.shaders = GetShaderAddresses(shaders);
- SetupShaderDescriptors(shaders);
-
- buffer_cache.Unmap();
+ graphics_key.shaders = GetShaderAddresses(shaders);
- const Texceptions texceptions = UpdateAttachments(false);
- SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
+ graphics_key.shaders = GetShaderAddresses(shaders);
+ SetupShaderDescriptors(shaders, is_indexed);
- key.renderpass_params = GetRenderPassParams(texceptions);
- key.padding = 0;
+ const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
+ graphics_key.renderpass = framebuffer->RenderPass();
- auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
+ VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
+ graphics_key, framebuffer->NumColorBuffers(), async_shaders);
if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
// Async graphics pipeline was not ready.
return;
}
- scheduler.BindGraphicsPipeline(pipeline->GetHandle());
-
- const auto renderpass = pipeline->GetRenderPass();
- const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
- scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
+ BeginTransformFeedback();
+ scheduler.RequestRenderpass(framebuffer);
+ scheduler.BindGraphicsPipeline(pipeline->GetHandle());
UpdateDynamicStates();
- buffer_bindings.Bind(device, scheduler);
-
- BeginTransformFeedback();
-
- const auto pipeline_layout = pipeline->GetLayout();
- const auto descriptor_set = pipeline->CommitDescriptorSet();
+ const auto& regs = maxwell3d.regs;
+ const u32 num_instances = maxwell3d.mme_draw.instance_count;
+ const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
+ const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
+ const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
if (descriptor_set) {
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
- DESCRIPTOR_SET, descriptor_set, {});
+ DESCRIPTOR_SET, descriptor_set, nullptr);
+ }
+ if (draw_params.is_indexed) {
+ cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
+ draw_params.base_vertex, draw_params.base_instance);
+ } else {
+ cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
+ draw_params.base_vertex, draw_params.base_instance);
}
- draw_params.Draw(cmdbuf);
});
EndTransformFeedback();
@@ -480,17 +321,13 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing);
- const auto& gpu = system.GPU().Maxwell3D();
- if (!system.GPU().Maxwell3D().ShouldExecute()) {
+ if (!maxwell3d.ShouldExecute()) {
return;
}
- sampled_views.clear();
- image_views.clear();
-
query_cache.UpdateCounters();
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A;
const bool use_depth = regs.clear_buffers.Z;
@@ -499,20 +336,24 @@ void RasterizerVulkan::Clear() {
return;
}
- [[maybe_unused]] const auto texceptions = UpdateAttachments(true);
- DEBUG_ASSERT(texceptions.none());
- SetupImageTransitions(0, color_attachments, zeta_attachment);
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UpdateRenderTargets(true);
+ const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
+ const VkExtent2D render_area = framebuffer->RenderArea();
+ scheduler.RequestRenderpass(framebuffer);
- const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0));
- const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
- scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
-
- VkClearRect clear_rect;
- clear_rect.baseArrayLayer = regs.clear_buffers.layer;
- clear_rect.layerCount = 1;
- clear_rect.rect = GetScissorState(regs, 0);
- clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width);
- clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height);
+ VkClearRect clear_rect{
+ .rect = GetScissorState(regs, 0),
+ .baseArrayLayer = regs.clear_buffers.layer,
+ .layerCount = 1,
+ };
+ if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) {
+ return;
+ }
+ clear_rect.rect.extent = VkExtent2D{
+ .width = std::min(clear_rect.rect.extent.width, render_area.width),
+ .height = std::min(clear_rect.rect.extent.height, render_area.height),
+ };
if (use_color) {
VkClearValue clear_value;
@@ -539,7 +380,6 @@ void RasterizerVulkan::Clear() {
if (use_stencil) {
aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
-
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
VkClearAttachment attachment;
@@ -553,51 +393,69 @@ void RasterizerVulkan::Clear() {
void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
MICROPROFILE_SCOPE(Vulkan_Compute);
- update_descriptor_queue.Acquire();
- sampled_views.clear();
- image_views.clear();
query_cache.UpdateCounters();
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& launch_desc = kepler_compute.launch_description;
auto& pipeline = pipeline_cache.GetComputePipeline({
.shader = code_addr,
.shared_memory_size = launch_desc.shared_alloc,
- .workgroup_size =
- {
- launch_desc.block_dim_x,
- launch_desc.block_dim_y,
- launch_desc.block_dim_z,
- },
+ .workgroup_size{
+ launch_desc.block_dim_x,
+ launch_desc.block_dim_y,
+ launch_desc.block_dim_z,
+ },
});
// Compute dispatches can't be executed inside a renderpass
scheduler.RequestOutsideRenderPassOperationContext();
- buffer_cache.Map(CalculateComputeStreamBufferSize());
+ image_view_indices.clear();
+ sampler_handles.clear();
+
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
const auto& entries = pipeline.GetEntries();
- SetupComputeConstBuffers(entries);
- SetupComputeGlobalBuffers(entries);
+ buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
+ buffer_cache.UnbindComputeStorageBuffers();
+ u32 ssbo_index = 0;
+ for (const auto& buffer : entries.global_buffers) {
+ buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
+ buffer.is_written);
+ ++ssbo_index;
+ }
+ buffer_cache.UpdateComputeBuffers();
+
+ texture_cache.SynchronizeComputeDescriptors();
+
SetupComputeUniformTexels(entries);
SetupComputeTextures(entries);
SetupComputeStorageTexels(entries);
SetupComputeImages(entries);
- buffer_cache.Unmap();
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+
+ update_descriptor_queue.Acquire();
- TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
- VK_ACCESS_SHADER_READ_BIT);
- TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
- VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
+ buffer_cache.BindHostComputeBuffers();
+ ImageViewId* image_view_id_ptr = image_view_ids.data();
+ VkSampler* sampler_ptr = sampler_handles.data();
+ PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
+ sampler_ptr);
+
+ const VkPipeline pipeline_handle = pipeline.GetHandle();
+ const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
+ const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
- grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(),
- layout = pipeline.GetLayout(),
- descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) {
+ grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout,
+ descriptor_set](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET,
- descriptor_set, {});
+ if (descriptor_set) {
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout,
+ DESCRIPTOR_SET, descriptor_set, nullptr);
+ }
cmdbuf.Dispatch(grid_x, grid_y, grid_z);
});
}
@@ -611,31 +469,50 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
query_cache.Query(gpu_addr, type, timestamp);
}
+void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+ u32 size) {
+ buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
+}
+
void RasterizerVulkan::FlushAll() {}
void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- texture_cache.FlushRegion(addr, size);
- buffer_cache.FlushRegion(addr, size);
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.DownloadMemory(addr, size);
+ }
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.DownloadMemory(addr, size);
+ }
query_cache.FlushRegion(addr, size);
}
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
+ std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
if (!Settings::IsGPULevelHigh()) {
- return buffer_cache.MustFlushRegion(addr, size);
+ return buffer_cache.IsRegionGpuModified(addr, size);
}
- return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
+ return texture_cache.IsRegionGpuModified(addr, size) ||
+ buffer_cache.IsRegionGpuModified(addr, size);
}
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- texture_cache.InvalidateRegion(addr, size);
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.WriteMemory(addr, size);
+ }
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.WriteMemory(addr, size);
+ }
pipeline_cache.InvalidateRegion(addr, size);
- buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
}
@@ -643,28 +520,46 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- texture_cache.OnCPUWrite(addr, size);
pipeline_cache.OnCPUWrite(addr, size);
- buffer_cache.OnCPUWrite(addr, size);
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.WriteMemory(addr, size);
+ }
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.CachedWriteMemory(addr, size);
+ }
}
void RasterizerVulkan::SyncGuestHost() {
- texture_cache.SyncGuestHost();
- buffer_cache.SyncGuestHost();
pipeline_cache.SyncGuestHost();
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.FlushCachedWrites();
+ }
+}
+
+void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UnmapMemory(addr, size);
+ }
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.WriteMemory(addr, size);
+ }
+ pipeline_cache.OnCPUWrite(addr, size);
}
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
- gpu.MemoryManager().Write<u32>(addr, value);
+ gpu_memory.Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;
@@ -673,7 +568,6 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
}
void RasterizerVulkan::ReleaseFences() {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
return;
}
@@ -707,6 +601,14 @@ void RasterizerVulkan::WaitForIdle() {
});
}
+void RasterizerVulkan::FragmentBarrier() {
+ // We already put barriers when a render pass finishes
+}
+
+void RasterizerVulkan::TiledCacheBarrier() {
+ // TODO: Implementing tiled barriers requires rewriting a good chunk of the Vulkan backend
+}
+
void RasterizerVulkan::FlushCommands() {
if (draw_counter > 0) {
draw_counter = 0;
@@ -717,14 +619,23 @@ void RasterizerVulkan::FlushCommands() {
void RasterizerVulkan::TickFrame() {
draw_counter = 0;
update_descriptor_queue.TickFrame();
- buffer_cache.TickFrame();
+ fence_manager.TickFrame();
staging_pool.TickFrame();
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.TickFrame();
+ }
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.TickFrame();
+ }
}
-bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
- const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
- texture_cache.DoFermiCopy(src, dst, copy_config);
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.BlitImage(dst, src, copy_config);
return true;
}
@@ -733,28 +644,18 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
if (!framebuffer_addr) {
return false;
}
-
- const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
- if (!surface) {
+ std::scoped_lock lock{texture_cache.mutex};
+ ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr);
+ if (!image_view) {
return false;
}
-
- // Verify that the cached surface is the same size and format as the requested framebuffer
- const auto& params{surface->GetSurfaceParams()};
- ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
- ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
-
- screen_info.image = &surface->GetImage();
- screen_info.width = params.width;
- screen_info.height = params.height;
- screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion;
+ screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
+ screen_info.width = image_view->size.width;
+ screen_info.height = image_view->size.height;
+ screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
}
-void RasterizerVulkan::SetupDirtyFlags() {
- state_tracker.Initialize();
-}
-
void RasterizerVulkan::FlushWork() {
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
@@ -776,178 +677,54 @@ void RasterizerVulkan::FlushWork() {
draw_counter = 0;
}
-RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
- MICROPROFILE_SCOPE(Vulkan_RenderTargets);
- auto& maxwell3d = system.GPU().Maxwell3D();
- auto& dirty = maxwell3d.dirty.flags;
- auto& regs = maxwell3d.regs;
-
- const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
- dirty[VideoCommon::Dirty::RenderTargets] = false;
-
- texture_cache.GuardRenderTargets(true);
-
- Texceptions texceptions;
- for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
- if (update_rendertargets) {
- const bool preserve_contents = HasToPreserveColorContents(is_clear, regs);
- color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);
- }
- if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
- texceptions[rt] = true;
- }
- }
-
- if (update_rendertargets) {
- const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs);
- zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);
- }
- if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
- texceptions[ZETA_TEXCEPTION_INDEX] = true;
- }
-
- texture_cache.GuardRenderTargets(false);
-
- return texceptions;
-}
-
-bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) {
- bool overlap = false;
- for (auto& [view, layout] : sampled_views) {
- if (!attachment.IsSameSurface(*view)) {
- continue;
- }
- overlap = true;
- *layout = VK_IMAGE_LAYOUT_GENERAL;
- }
- return overlap;
-}
-
-std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
- VkRenderPass renderpass) {
- FramebufferCacheKey key{
- .renderpass = renderpass,
- .width = std::numeric_limits<u32>::max(),
- .height = std::numeric_limits<u32>::max(),
- .layers = std::numeric_limits<u32>::max(),
- .views = {},
- };
-
- const auto try_push = [&key](const View& view) {
- if (!view) {
- return false;
- }
- key.views.push_back(view->GetAttachment());
- key.width = std::min(key.width, view->GetWidth());
- key.height = std::min(key.height, view->GetHeight());
- key.layers = std::min(key.layers, view->GetNumLayers());
- return true;
- };
-
- const auto& regs = system.GPU().Maxwell3D().regs;
- const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
- for (std::size_t index = 0; index < num_attachments; ++index) {
- if (try_push(color_attachments[index])) {
- texture_cache.MarkColorBufferInUse(index);
- }
- }
- if (try_push(zeta_attachment)) {
- texture_cache.MarkDepthBufferInUse();
- }
-
- const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
- auto& framebuffer = fbentry->second;
- if (is_cache_miss) {
- framebuffer = device.GetLogical().CreateFramebuffer({
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .renderPass = key.renderpass,
- .attachmentCount = static_cast<u32>(key.views.size()),
- .pAttachments = key.views.data(),
- .width = key.width,
- .height = key.height,
- .layers = key.layers,
- });
- }
-
- return {*framebuffer, VkExtent2D{key.width, key.height}};
-}
-
-RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
- BufferBindings& buffer_bindings,
- bool is_indexed,
- bool is_instanced) {
- MICROPROFILE_SCOPE(Vulkan_Geometry);
-
- const auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
-
- SetupVertexArrays(buffer_bindings);
-
- const u32 base_instance = regs.vb_base_instance;
- const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1;
- const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
- const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
-
- DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
- SetupIndexBuffer(buffer_bindings, params, is_indexed);
-
- return params;
-}
-
void RasterizerVulkan::SetupShaderDescriptors(
- const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
- texture_cache.GuardSamplers(true);
-
- for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
- // Skip VertexA stage
+ const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
+ image_view_indices.clear();
+ sampler_handles.clear();
+ for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
Shader* const shader = shaders[stage + 1];
if (!shader) {
continue;
}
- const auto& entries = shader->GetEntries();
- SetupGraphicsConstBuffers(entries, stage);
- SetupGraphicsGlobalBuffers(entries, stage);
+ const ShaderEntries& entries = shader->GetEntries();
SetupGraphicsUniformTexels(entries, stage);
SetupGraphicsTextures(entries, stage);
SetupGraphicsStorageTexels(entries, stage);
SetupGraphicsImages(entries, stage);
+
+ buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
+ buffer_cache.UnbindGraphicsStorageBuffers(stage);
+ u32 ssbo_index = 0;
+ for (const auto& buffer : entries.global_buffers) {
+ buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
+ buffer.cbuf_offset, buffer.is_written);
+ ++ssbo_index;
+ }
}
- texture_cache.GuardSamplers(false);
-}
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ buffer_cache.UpdateGraphicsBuffers(is_indexed);
+ texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+
+ buffer_cache.BindHostGeometryBuffers(is_indexed);
-void RasterizerVulkan::SetupImageTransitions(
- Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
- const View& zeta_attachment) {
- TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT);
- TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
- VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
+ update_descriptor_queue.Acquire();
- for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) {
- const auto color_attachment = color_attachments[rt];
- if (color_attachment == nullptr) {
+ ImageViewId* image_view_id_ptr = image_view_ids.data();
+ VkSampler* sampler_ptr = sampler_handles.data();
+ for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+ // Skip VertexA stage
+ Shader* const shader = shaders[stage + 1];
+ if (!shader) {
continue;
}
- const auto image_layout =
- texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
- VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
- }
-
- if (zeta_attachment != nullptr) {
- const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX]
- ? VK_IMAGE_LAYOUT_GENERAL
- : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
- zeta_attachment->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
+ buffer_cache.BindHostStageBuffers(stage);
+ PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
+ image_view_id_ptr, sampler_ptr);
}
}
void RasterizerVulkan::UpdateDynamicStates() {
- auto& regs = system.GPU().Maxwell3D().regs;
+ auto& regs = maxwell3d.regs;
UpdateViewportsState(regs);
UpdateScissorsState(regs);
UpdateDepthBias(regs);
@@ -961,14 +738,13 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateDepthWriteEnable(regs);
UpdateDepthCompareOp(regs);
UpdateFrontFace(regs);
- UpdatePrimitiveTopology(regs);
UpdateStencilOp(regs);
UpdateStencilTestEnable(regs);
}
}
void RasterizerVulkan::BeginTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
@@ -976,322 +752,108 @@ void RasterizerVulkan::BeginTransformFeedback() {
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
return;
}
-
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
-
- UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
- UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
- UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
-
- const auto& binding = regs.tfb_bindings[0];
- UNIMPLEMENTED_IF(binding.buffer_enable == 0);
- UNIMPLEMENTED_IF(binding.buffer_offset != 0);
-
- const GPUVAddr gpu_addr = binding.Address();
- const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
- const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
-
- scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
- cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
- });
+ scheduler.Record(
+ [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); });
}
void RasterizerVulkan::EndTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
if (!device.IsExtTransformFeedbackSupported()) {
return;
}
-
scheduler.Record(
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
}
-void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
- const auto& regs = system.GPU().Maxwell3D().regs;
-
- for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
- const auto& vertex_array = regs.vertex_array[index];
- if (!vertex_array.IsEnabled()) {
- continue;
- }
- const GPUVAddr start{vertex_array.StartAddress()};
- const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
-
- ASSERT(end >= start);
- const std::size_t size = end - start;
- if (size == 0) {
- buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
- continue;
- }
- const auto info = buffer_cache.UploadMemory(start, size);
- buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride);
- }
-}
-
-void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
- bool is_indexed) {
- if (params.num_vertices == 0) {
- return;
- }
- const auto& regs = system.GPU().Maxwell3D().regs;
- switch (regs.draw.topology) {
- case Maxwell::PrimitiveTopology::Quads: {
- if (!params.is_indexed) {
- const auto [buffer, offset] =
- quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
- buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
- params.base_vertex = 0;
- params.num_vertices = params.num_vertices * 6 / 4;
- params.is_indexed = true;
- break;
- }
- const GPUVAddr gpu_addr = regs.index_array.IndexStart();
- const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
- VkBuffer buffer = info.handle;
- u64 offset = info.offset;
- std::tie(buffer, offset) = quad_indexed_pass.Assemble(
- regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
-
- buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
- params.num_vertices = (params.num_vertices / 4) * 6;
- params.base_vertex = 0;
- break;
- }
- default: {
- if (!is_indexed) {
- break;
- }
- const GPUVAddr gpu_addr = regs.index_array.IndexStart();
- const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
- VkBuffer buffer = info.handle;
- u64 offset = info.offset;
-
- auto format = regs.index_array.format;
- const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
- if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
- std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset);
- format = Maxwell::IndexFormat::UnsignedShort;
- }
-
- buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
- break;
- }
- }
-}
-
-void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
- const auto& gpu = system.GPU().Maxwell3D();
- const auto& shader_stage = gpu.state.shader_stages[stage];
- for (const auto& entry : entries.const_buffers) {
- SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
- }
-}
-
-void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
- auto& gpu{system.GPU()};
- const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]};
-
- for (const auto& entry : entries.global_buffers) {
- const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
- SetupGlobalBuffer(entry, addr);
- }
-}
-
-void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().Maxwell3D();
+void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
+ const auto& regs = maxwell3d.regs;
+ const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : entries.uniform_texels) {
- const auto image = GetTextureInfo(gpu, entry, stage).tic;
- SetupUniformTexels(image, entry);
+ const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
+ image_view_indices.push_back(handle.image);
}
}
-void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().Maxwell3D();
+void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
+ const auto& regs = maxwell3d.regs;
+ const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : entries.samplers) {
- for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(gpu, entry, stage, i);
- SetupTexture(texture, entry);
+ for (size_t index = 0; index < entry.size; ++index) {
+ const TextureHandle handle =
+ GetTextureInfo(maxwell3d, via_header_index, entry, stage, index);
+ image_view_indices.push_back(handle.image);
+
+ Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
+ sampler_handles.push_back(sampler->Handle());
}
}
}
-void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().Maxwell3D();
+void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
+ const auto& regs = maxwell3d.regs;
+ const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : entries.storage_texels) {
- const auto image = GetTextureInfo(gpu, entry, stage).tic;
- SetupStorageTexel(image, entry);
+ const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
+ image_view_indices.push_back(handle.image);
}
}
-void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_Images);
- const auto& gpu = system.GPU().Maxwell3D();
+void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
+ const auto& regs = maxwell3d.regs;
+ const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : entries.images) {
- const auto tic = GetTextureInfo(gpu, entry, stage).tic;
- SetupImage(tic, entry);
- }
-}
-
-void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
- for (const auto& entry : entries.const_buffers) {
- const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
- const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
- Tegra::Engines::ConstBufferInfo buffer;
- buffer.address = config.Address();
- buffer.size = config.size;
- buffer.enabled = mask[entry.GetIndex()];
- SetupConstBuffer(entry, buffer);
- }
-}
-
-void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
- const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config};
- for (const auto& entry : entries.global_buffers) {
- const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
- SetupGlobalBuffer(entry, addr);
+ const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
+ image_view_indices.push_back(handle.image);
}
}
void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().KeplerCompute();
+ const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : entries.uniform_texels) {
- const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
- SetupUniformTexels(image, entry);
+ const TextureHandle handle =
+ GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
+ image_view_indices.push_back(handle.image);
}
}
void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().KeplerCompute();
+ const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : entries.samplers) {
- for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i);
- SetupTexture(texture, entry);
+ for (size_t index = 0; index < entry.size; ++index) {
+ const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry,
+ COMPUTE_SHADER_INDEX, index);
+ image_view_indices.push_back(handle.image);
+
+ Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
+ sampler_handles.push_back(sampler->Handle());
}
}
}
void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().KeplerCompute();
+ const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : entries.storage_texels) {
- const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
- SetupStorageTexel(image, entry);
+ const TextureHandle handle =
+ GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
+ image_view_indices.push_back(handle.image);
}
}
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_Images);
- const auto& gpu = system.GPU().KeplerCompute();
+ const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : entries.images) {
- const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
- SetupImage(tic, entry);
+ const TextureHandle handle =
+ GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
+ image_view_indices.push_back(handle.image);
}
}
-void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
- const Tegra::Engines::ConstBufferInfo& buffer) {
- if (!buffer.enabled) {
- // Set values to zero to unbind buffers
- update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);
- return;
- }
-
- // Align the size to avoid bad std140 interactions
- const std::size_t size =
- Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
- ASSERT(size <= MaxConstbufferSize);
-
- const auto info =
- buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
- update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
-}
-
-void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
- auto& memory_manager{system.GPU().MemoryManager()};
- const auto actual_addr = memory_manager.Read<u64>(address);
- const auto size = memory_manager.Read<u32>(address + 8);
-
- if (size == 0) {
- // Sometimes global memory pointers don't have a proper size. Upload a dummy entry
- // because Vulkan doesn't like empty buffers.
- // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
- // default buffer.
- static constexpr std::size_t dummy_size = 4;
- const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
- update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
- return;
- }
-
- const auto info = buffer_cache.UploadMemory(
- actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
- update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
-}
-
-void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
- const UniformTexelEntry& entry) {
- const auto view = texture_cache.GetTextureSurface(tic, entry);
- ASSERT(view->IsBufferView());
-
- update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
-}
-
-void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture,
- const SamplerEntry& entry) {
- auto view = texture_cache.GetTextureSurface(texture.tic, entry);
- ASSERT(!view->IsBufferView());
-
- const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
- texture.tic.z_source, texture.tic.w_source);
- const auto sampler = sampler_cache.GetSampler(texture.tsc);
- update_descriptor_queue.AddSampledImage(sampler, image_view);
-
- VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
- *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- sampled_views.push_back(ImageView{std::move(view), image_layout});
-}
-
-void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic,
- const StorageTexelEntry& entry) {
- const auto view = texture_cache.GetImageSurface(tic, entry);
- ASSERT(view->IsBufferView());
-
- update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
-}
-
-void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
- auto view = texture_cache.GetImageSurface(tic, entry);
-
- if (entry.is_written) {
- view->MarkAsModified(texture_cache.Tick());
- }
-
- UNIMPLEMENTED_IF(tic.IsBuffer());
-
- const VkImageView image_view =
- view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
- update_descriptor_queue.AddImage(image_view);
-
- VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
- *image_layout = VK_IMAGE_LAYOUT_GENERAL;
- image_views.push_back(ImageView{std::move(view), image_layout});
-}
-
void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchViewports()) {
return;
@@ -1304,7 +866,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
- GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)};
+ GetViewportState(device, regs, 14), GetViewportState(device, regs, 15),
+ };
scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
}
@@ -1312,13 +875,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
if (!state_tracker.TouchScissors()) {
return;
}
- const std::array scissors = {
+ const std::array scissors{
GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
- GetScissorState(regs, 15)};
+ GetScissorState(regs, 15),
+ };
scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
}
@@ -1442,16 +1006,6 @@ void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) {
[front_face](vk::CommandBuffer cmdbuf) { cmdbuf.SetFrontFaceEXT(front_face); });
}
-void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) {
- const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value();
- if (!state_tracker.ChangePrimitiveTopology(primitive_topology)) {
- return;
- }
- scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) {
- cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology));
- });
-}
-
void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchStencilOp()) {
return;
@@ -1493,101 +1047,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
});
}
-std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
- std::size_t size = CalculateVertexArraysSize();
- if (is_indexed) {
- size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
- }
- size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
- return size;
-}
-
-std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
- return Tegra::Engines::KeplerCompute::NumConstBuffers *
- (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
-}
-
-std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
-
- std::size_t size = 0;
- for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- // This implementation assumes that all attributes are used in the shader.
- const GPUVAddr start{regs.vertex_array[index].StartAddress()};
- const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
- DEBUG_ASSERT(end >= start);
-
- size += (end - start) * regs.vertex_array[index].enable;
- }
- return size;
-}
-
-std::size_t RasterizerVulkan::CalculateIndexBufferSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
- return static_cast<std::size_t>(regs.index_array.count) *
- static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
-}
-
-std::size_t RasterizerVulkan::CalculateConstBufferSize(
- const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
- if (entry.IsIndirect()) {
- // Buffer is accessed indirectly, so upload the entire thing
- return buffer.size;
- } else {
- // Buffer is accessed directly, upload just what we use
- return entry.GetSize();
- }
-}
-
-RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
- const auto& regs = system.GPU().Maxwell3D().regs;
- const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
-
- RenderPassParams params;
- params.color_formats = {};
- std::size_t color_texceptions = 0;
-
- std::size_t index = 0;
- for (std::size_t rt = 0; rt < num_attachments; ++rt) {
- const auto& rendertarget = regs.rt[rt];
- if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) {
- continue;
- }
- params.color_formats[index] = static_cast<u8>(rendertarget.format);
- color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index;
- ++index;
- }
- params.num_color_attachments = static_cast<u8>(index);
- params.texceptions = static_cast<u8>(color_texceptions);
-
- params.zeta_format = regs.zeta_enable ? static_cast<u8>(regs.zeta.format) : 0;
- params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX];
- return params;
-}
-
-VkBuffer RasterizerVulkan::DefaultBuffer() {
- if (default_buffer) {
- return *default_buffer;
- }
-
- default_buffer = device.GetLogical().CreateBuffer({
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .size = DEFAULT_BUFFER_SIZE,
- .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = 0,
- .pQueueFamilyIndices = nullptr,
- });
- default_buffer_commit = memory_manager.Commit(default_buffer, false);
-
- scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) {
- cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0);
- });
- return *default_buffer;
-}
-
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index f640ba649..acea1ba2d 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -11,28 +11,25 @@
#include <vector>
#include <boost/container/static_vector.hpp>
-#include <boost/functional/hash.hpp>
#include "common/common_types.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
-#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
-#include "video_core/renderer_vulkan/vk_sampler_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/async_shaders.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class System;
@@ -50,66 +47,15 @@ namespace Vulkan {
struct VKScreenInfo;
-using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>;
-
-struct FramebufferCacheKey {
- VkRenderPass renderpass{};
- u32 width = 0;
- u32 height = 0;
- u32 layers = 0;
- ImageViewsPack views;
-
- std::size_t Hash() const noexcept {
- std::size_t hash = 0;
- boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass));
- for (const auto& view : views) {
- boost::hash_combine(hash, static_cast<VkImageView>(view));
- }
- boost::hash_combine(hash, width);
- boost::hash_combine(hash, height);
- boost::hash_combine(hash, layers);
- return hash;
- }
-
- bool operator==(const FramebufferCacheKey& rhs) const noexcept {
- return std::tie(renderpass, views, width, height, layers) ==
- std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers);
- }
-
- bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
- return !operator==(rhs);
- }
-};
-
-} // namespace Vulkan
-
-namespace std {
-
-template <>
-struct hash<Vulkan::FramebufferCacheKey> {
- std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept {
- return k.Hash();
- }
-};
-
-} // namespace std
-
-namespace Vulkan {
-
class StateTracker;
-class BufferBindings;
-
-struct ImageView {
- View view;
- VkImageLayout* layout = nullptr;
-};
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
- explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
- VKScreenInfo& screen_info, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- StateTracker& state_tracker, VKScheduler& scheduler);
+ explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
+ Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+ VKScreenInfo& screen_info_, const Device& device_,
+ MemoryAllocator& memory_allocator_, StateTracker& state_tracker_,
+ VKScheduler& scheduler_);
~RasterizerVulkan() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -117,25 +63,28 @@ public:
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
+ void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
+ void UnmapMemory(VAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
+ void FragmentBarrier() override;
+ void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
- bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
- const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+ bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
- void SetupDirtyFlags() override;
VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
return async_shaders;
@@ -146,45 +95,22 @@ public:
}
/// Maximum supported size that a constbuffer can have in bytes.
- static constexpr std::size_t MaxConstbufferSize = 0x10000;
+ static constexpr size_t MaxConstbufferSize = 0x10000;
static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
private:
- struct DrawParameters {
- void Draw(vk::CommandBuffer cmdbuf) const;
+ static constexpr size_t MAX_TEXTURES = 192;
+ static constexpr size_t MAX_IMAGES = 48;
+ static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
- u32 base_instance = 0;
- u32 num_instances = 0;
- u32 base_vertex = 0;
- u32 num_vertices = 0;
- bool is_indexed = 0;
- };
-
- using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;
-
- static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
void FlushWork();
- /// @brief Updates the currently bound attachments
- /// @param is_clear True when the framebuffer is updated as a clear
- /// @return Bitfield of attachments being used as sampled textures
- Texceptions UpdateAttachments(bool is_clear);
-
- std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
-
- /// Setups geometry buffers and state.
- DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
- bool is_indexed, bool is_instanced);
-
/// Setup descriptors in the graphics pipeline.
- void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
-
- void SetupImageTransitions(Texceptions texceptions,
- const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
- const View& zeta_attachment);
+ void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
+ bool is_indexed);
void UpdateDynamicStates();
@@ -192,18 +118,6 @@ private:
void EndTransformFeedback();
- bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
-
- void SetupVertexArrays(BufferBindings& buffer_bindings);
-
- void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
-
- /// Setup constant buffers in the graphics pipeline.
- void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
-
- /// Setup global buffers in the graphics pipeline.
- void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
-
/// Setup uniform texels in the graphics pipeline.
void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
@@ -216,12 +130,6 @@ private:
/// Setup images in the graphics pipeline.
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
- /// Setup constant buffers in the compute pipeline.
- void SetupComputeConstBuffers(const ShaderEntries& entries);
-
- /// Setup global buffers in the compute pipeline.
- void SetupComputeGlobalBuffers(const ShaderEntries& entries);
-
/// Setup texel buffers in the compute pipeline.
void SetupComputeUniformTexels(const ShaderEntries& entries);
@@ -234,19 +142,6 @@ private:
/// Setup images in the compute pipeline.
void SetupComputeImages(const ShaderEntries& entries);
- void SetupConstBuffer(const ConstBufferEntry& entry,
- const Tegra::Engines::ConstBufferInfo& buffer);
-
- void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
-
- void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
-
- void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
-
- void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
-
- void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
-
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -260,64 +155,43 @@ private:
void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
- void UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
- std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
-
- std::size_t CalculateComputeStreamBufferSize() const;
-
- std::size_t CalculateVertexArraysSize() const;
-
- std::size_t CalculateIndexBufferSize() const;
-
- std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
- const Tegra::Engines::ConstBufferInfo& buffer) const;
-
- RenderPassParams GetRenderPassParams(Texceptions texceptions) const;
+ Tegra::GPU& gpu;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
- VkBuffer DefaultBuffer();
-
- Core::System& system;
- Core::Frontend::EmuWindow& render_window;
VKScreenInfo& screen_info;
- const VKDevice& device;
- VKResourceManager& resource_manager;
- VKMemoryManager& memory_manager;
+ const Device& device;
+ MemoryAllocator& memory_allocator;
StateTracker& state_tracker;
VKScheduler& scheduler;
- VKStagingBufferPool staging_pool;
+ StagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
- VKRenderPassCache renderpass_cache;
- QuadArrayPass quad_array_pass;
- QuadIndexedPass quad_indexed_pass;
- Uint8Pass uint8_pass;
+ BlitImageHelper blit_image;
+
+ GraphicsPipelineCacheKey graphics_key;
- VKTextureCache texture_cache;
+ TextureCacheRuntime texture_cache_runtime;
+ TextureCache texture_cache;
+ BufferCacheRuntime buffer_cache_runtime;
+ BufferCache buffer_cache;
VKPipelineCache pipeline_cache;
- VKBufferCache buffer_cache;
- VKSamplerCache sampler_cache;
- VKFenceManager fence_manager;
VKQueryCache query_cache;
+ VKFenceManager fence_manager;
- vk::Buffer default_buffer;
- VKMemoryCommit default_buffer_commit;
vk::Event wfi_event;
VideoCommon::Shader::AsyncShaders async_shaders;
- std::array<View, Maxwell::NumRenderTargets> color_attachments;
- View zeta_attachment;
-
- std::vector<ImageView> sampled_views;
- std::vector<ImageView> image_views;
+ boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
+ std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
+ boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;
u32 draw_counter = 0;
-
- // TODO(Rodrigo): Invalidate on image destruction
- std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
deleted file mode 100644
index 80284cf92..000000000
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-#include <memory>
-#include <vector>
-
-#include "common/cityhash.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_vulkan/maxwell_to_vk.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Vulkan {
-
-std::size_t RenderPassParams::Hash() const noexcept {
- const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
- return static_cast<std::size_t>(hash);
-}
-
-bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept {
- return std::memcmp(&rhs, this, sizeof *this) == 0;
-}
-
-VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}
-
-VKRenderPassCache::~VKRenderPassCache() = default;
-
-VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
- const auto [pair, is_cache_miss] = cache.try_emplace(params);
- auto& entry = pair->second;
- if (is_cache_miss) {
- entry = CreateRenderPass(params);
- }
- return *entry;
-}
-
-vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
- using namespace VideoCore::Surface;
- const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
-
- std::vector<VkAttachmentDescription> descriptors;
- descriptors.reserve(num_attachments);
-
- std::vector<VkAttachmentReference> color_references;
- color_references.reserve(num_attachments);
-
- for (std::size_t rt = 0; rt < num_attachments; ++rt) {
- const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]);
- const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format);
- const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
- ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
- static_cast<int>(pixel_format));
-
- // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed.
- const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0
- ? VK_IMAGE_LAYOUT_GENERAL
- : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- descriptors.push_back({
- .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
- .format = format.format,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
- .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
- .initialLayout = color_layout,
- .finalLayout = color_layout,
- });
-
- color_references.push_back({
- .attachment = static_cast<u32>(rt),
- .layout = color_layout,
- });
- }
-
- VkAttachmentReference zeta_attachment_ref;
- const bool has_zeta = params.zeta_format != 0;
- if (has_zeta) {
- const auto guest_format = static_cast<Tegra::DepthFormat>(params.zeta_format);
- const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format);
- const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
- ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
- static_cast<int>(pixel_format));
-
- const VkImageLayout zeta_layout = params.zeta_texception != 0
- ? VK_IMAGE_LAYOUT_GENERAL
- : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
- descriptors.push_back({
- .flags = 0,
- .format = format.format,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = zeta_layout,
- .finalLayout = zeta_layout,
- });
-
- zeta_attachment_ref = {
- .attachment = static_cast<u32>(num_attachments),
- .layout = zeta_layout,
- };
- }
-
- const VkSubpassDescription subpass_description{
- .flags = 0,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .pInputAttachments = nullptr,
- .colorAttachmentCount = static_cast<u32>(color_references.size()),
- .pColorAttachments = color_references.data(),
- .pResolveAttachments = nullptr,
- .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr,
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = nullptr,
- };
-
- VkAccessFlags access = 0;
- VkPipelineStageFlags stage = 0;
- if (!color_references.empty()) {
- access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
- stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
- }
-
- if (has_zeta) {
- access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
- stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
- }
-
- const VkSubpassDependency subpass_dependency{
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = stage,
- .dstStageMask = stage,
- .srcAccessMask = 0,
- .dstAccessMask = access,
- .dependencyFlags = 0,
- };
-
- return device.GetLogical().CreateRenderPass({
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .attachmentCount = static_cast<u32>(descriptors.size()),
- .pAttachments = descriptors.data(),
- .subpassCount = 1,
- .pSubpasses = &subpass_description,
- .dependencyCount = 1,
- .pDependencies = &subpass_dependency,
- });
-}
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
deleted file mode 100644
index 8b0fec720..000000000
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <type_traits>
-#include <unordered_map>
-
-#include <boost/container/static_vector.hpp>
-#include <boost/functional/hash.hpp>
-
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-#include "video_core/surface.h"
-
-namespace Vulkan {
-
-class VKDevice;
-
-struct RenderPassParams {
- std::array<u8, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_formats;
- u8 num_color_attachments;
- u8 texceptions;
-
- u8 zeta_format;
- u8 zeta_texception;
-
- std::size_t Hash() const noexcept;
-
- bool operator==(const RenderPassParams& rhs) const noexcept;
-
- bool operator!=(const RenderPassParams& rhs) const noexcept {
- return !operator==(rhs);
- }
-};
-static_assert(std::has_unique_object_representations_v<RenderPassParams>);
-static_assert(std::is_trivially_copyable_v<RenderPassParams>);
-static_assert(std::is_trivially_constructible_v<RenderPassParams>);
-
-} // namespace Vulkan
-
-namespace std {
-
-template <>
-struct hash<Vulkan::RenderPassParams> {
- std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
- return k.Hash();
- }
-};
-
-} // namespace std
-
-namespace Vulkan {
-
-class VKRenderPassCache final {
-public:
- explicit VKRenderPassCache(const VKDevice& device);
- ~VKRenderPassCache();
-
- VkRenderPass GetRenderPass(const RenderPassParams& params);
-
-private:
- vk::RenderPass CreateRenderPass(const RenderPassParams& params) const;
-
- const VKDevice& device;
- std::unordered_map<RenderPassParams, vk::RenderPass> cache;
-};
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
deleted file mode 100644
index f19330a36..000000000
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ /dev/null
@@ -1,311 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <optional>
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Vulkan {
-
-namespace {
-
-// TODO(Rodrigo): Fine tune these numbers.
-constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
-constexpr std::size_t FENCES_GROW_STEP = 0x40;
-
-constexpr VkFenceCreateInfo BuildFenceCreateInfo() {
- return {
- .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- };
-}
-
-} // Anonymous namespace
-
-class CommandBufferPool final : public VKFencedPool {
-public:
- explicit CommandBufferPool(const VKDevice& device)
- : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
-
- void Allocate(std::size_t begin, std::size_t end) override {
- // Command buffers are going to be commited, recorded, executed every single usage cycle.
- // They are also going to be reseted when commited.
- Pool& pool = pools.emplace_back();
- pool.handle = device.GetLogical().CreateCommandPool({
- .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
- .pNext = nullptr,
- .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
- VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
- .queueFamilyIndex = device.GetGraphicsFamily(),
- });
- pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
- }
-
- VkCommandBuffer Commit(VKFence& fence) {
- const std::size_t index = CommitResource(fence);
- const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
- const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
- return pools[pool_index].cmdbufs[sub_index];
- }
-
-private:
- struct Pool {
- vk::CommandPool handle;
- vk::CommandBuffers cmdbufs;
- };
-
- const VKDevice& device;
- std::vector<Pool> pools;
-};
-
-VKResource::VKResource() = default;
-
-VKResource::~VKResource() = default;
-
-VKFence::VKFence(const VKDevice& device)
- : device{device}, handle{device.GetLogical().CreateFence(BuildFenceCreateInfo())} {}
-
-VKFence::~VKFence() = default;
-
-void VKFence::Wait() {
- switch (const VkResult result = handle.Wait()) {
- case VK_SUCCESS:
- return;
- case VK_ERROR_DEVICE_LOST:
- device.ReportLoss();
- [[fallthrough]];
- default:
- throw vk::Exception(result);
- }
-}
-
-void VKFence::Release() {
- ASSERT(is_owned);
- is_owned = false;
-}
-
-void VKFence::Commit() {
- is_owned = true;
- is_used = true;
-}
-
-bool VKFence::Tick(bool gpu_wait, bool owner_wait) {
- if (!is_used) {
- // If a fence is not used it's always free.
- return true;
- }
- if (is_owned && !owner_wait) {
- // The fence is still being owned (Release has not been called) and ownership wait has
- // not been asked.
- return false;
- }
-
- if (gpu_wait) {
- // Wait for the fence if it has been requested.
- (void)handle.Wait();
- } else {
- if (handle.GetStatus() != VK_SUCCESS) {
- // Vulkan fence is not ready, not much it can do here
- return false;
- }
- }
-
- // Broadcast resources their free state.
- for (auto* resource : protected_resources) {
- resource->OnFenceRemoval(this);
- }
- protected_resources.clear();
-
- // Prepare fence for reusage.
- handle.Reset();
- is_used = false;
- return true;
-}
-
-void VKFence::Protect(VKResource* resource) {
- protected_resources.push_back(resource);
-}
-
-void VKFence::Unprotect(VKResource* resource) {
- const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
- ASSERT(it != protected_resources.end());
-
- resource->OnFenceRemoval(this);
- protected_resources.erase(it);
-}
-
-void VKFence::RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept {
- std::replace(std::begin(protected_resources), std::end(protected_resources), old_resource,
- new_resource);
-}
-
-VKFenceWatch::VKFenceWatch() = default;
-
-VKFenceWatch::VKFenceWatch(VKFence& initial_fence) {
- Watch(initial_fence);
-}
-
-VKFenceWatch::VKFenceWatch(VKFenceWatch&& rhs) noexcept {
- fence = std::exchange(rhs.fence, nullptr);
- if (fence) {
- fence->RedirectProtection(&rhs, this);
- }
-}
-
-VKFenceWatch& VKFenceWatch::operator=(VKFenceWatch&& rhs) noexcept {
- fence = std::exchange(rhs.fence, nullptr);
- if (fence) {
- fence->RedirectProtection(&rhs, this);
- }
- return *this;
-}
-
-VKFenceWatch::~VKFenceWatch() {
- if (fence) {
- fence->Unprotect(this);
- }
-}
-
-void VKFenceWatch::Wait() {
- if (fence == nullptr) {
- return;
- }
- fence->Wait();
- fence->Unprotect(this);
-}
-
-void VKFenceWatch::Watch(VKFence& new_fence) {
- Wait();
- fence = &new_fence;
- fence->Protect(this);
-}
-
-bool VKFenceWatch::TryWatch(VKFence& new_fence) {
- if (fence) {
- return false;
- }
- fence = &new_fence;
- fence->Protect(this);
- return true;
-}
-
-void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) {
- ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence");
- fence = nullptr;
-}
-
-VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {}
-
-VKFencedPool::~VKFencedPool() = default;
-
-std::size_t VKFencedPool::CommitResource(VKFence& fence) {
- const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> {
- for (std::size_t iterator = begin; iterator < end; ++iterator) {
- if (watches[iterator]->TryWatch(fence)) {
- // The resource is now being watched, a free resource was successfully found.
- return iterator;
- }
- }
- return {};
- };
- // Try to find a free resource from the hinted position to the end.
- auto found = Search(free_iterator, watches.size());
- if (!found) {
- // Search from beginning to the hinted position.
- found = Search(0, free_iterator);
- if (!found) {
- // Both searches failed, the pool is full; handle it.
- const std::size_t free_resource = ManageOverflow();
-
- // Watch will wait for the resource to be free.
- watches[free_resource]->Watch(fence);
- found = free_resource;
- }
- }
- // Free iterator is hinted to the resource after the one that's been commited.
- free_iterator = (*found + 1) % watches.size();
- return *found;
-}
-
-std::size_t VKFencedPool::ManageOverflow() {
- const std::size_t old_capacity = watches.size();
- Grow();
-
- // The last entry is guaranted to be free, since it's the first element of the freshly
- // allocated resources.
- return old_capacity;
-}
-
-void VKFencedPool::Grow() {
- const std::size_t old_capacity = watches.size();
- watches.resize(old_capacity + grow_step);
- std::generate(watches.begin() + old_capacity, watches.end(),
- []() { return std::make_unique<VKFenceWatch>(); });
- Allocate(old_capacity, old_capacity + grow_step);
-}
-
-VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} {
- GrowFences(FENCES_GROW_STEP);
- command_buffer_pool = std::make_unique<CommandBufferPool>(device);
-}
-
-VKResourceManager::~VKResourceManager() = default;
-
-VKFence& VKResourceManager::CommitFence() {
- const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* {
- const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); };
- const auto hinted = fences.begin() + fences_iterator;
-
- auto it = std::find_if(hinted, fences.end(), Tick);
- if (it == fences.end()) {
- it = std::find_if(fences.begin(), hinted, Tick);
- if (it == hinted) {
- return nullptr;
- }
- }
- fences_iterator = std::distance(fences.begin(), it) + 1;
- if (fences_iterator >= fences.size())
- fences_iterator = 0;
-
- auto& fence = *it;
- fence->Commit();
- return fence.get();
- };
-
- VKFence* found_fence = StepFences(false, false);
- if (!found_fence) {
- // Try again, this time waiting.
- found_fence = StepFences(true, false);
-
- if (!found_fence) {
- // Allocate new fences and try again.
- LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(),
- fences.size() + FENCES_GROW_STEP);
-
- GrowFences(FENCES_GROW_STEP);
- found_fence = StepFences(true, false);
- ASSERT(found_fence != nullptr);
- }
- }
- return *found_fence;
-}
-
-VkCommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) {
- return command_buffer_pool->Commit(fence);
-}
-
-void VKResourceManager::GrowFences(std::size_t new_fences_count) {
- const std::size_t previous_size = fences.size();
- fences.resize(previous_size + new_fences_count);
-
- std::generate(fences.begin() + previous_size, fences.end(),
- [this] { return std::make_unique<VKFence>(device); });
-}
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
deleted file mode 100644
index f683d2276..000000000
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <cstddef>
-#include <memory>
-#include <vector>
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Vulkan {
-
-class VKDevice;
-class VKFence;
-class VKResourceManager;
-
-class CommandBufferPool;
-
-/// Interface for a Vulkan resource
-class VKResource {
-public:
- explicit VKResource();
- virtual ~VKResource();
-
- /**
- * Signals the object that an owning fence has been signaled.
- * @param signaling_fence Fence that signals its usage end.
- */
- virtual void OnFenceRemoval(VKFence* signaling_fence) = 0;
-};
-
-/**
- * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access.
- * They must be commited from the resource manager. Their usage flow is: commit the fence from the
- * resource manager, protect resources with it and use them, send the fence to an execution queue
- * and Wait for it if needed and then call Release. Used resources will automatically be signaled
- * when they are free to be reused.
- * @brief Protects resources for concurrent usage and signals its release.
- */
-class VKFence {
- friend class VKResourceManager;
-
-public:
- explicit VKFence(const VKDevice& device);
- ~VKFence();
-
- /**
- * Waits for the fence to be signaled.
- * @warning You must have ownership of the fence and it has to be previously sent to a queue to
- * call this function.
- */
- void Wait();
-
- /**
- * Releases ownership of the fence. Pass after it has been sent to an execution queue.
- * Unmanaged usage of the fence after the call will result in undefined behavior because it may
- * be being used for something else.
- */
- void Release();
-
- /// Protects a resource with this fence.
- void Protect(VKResource* resource);
-
- /// Removes protection for a resource.
- void Unprotect(VKResource* resource);
-
- /// Redirects one protected resource to a new address.
- void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept;
-
- /// Retreives the fence.
- operator VkFence() const {
- return *handle;
- }
-
-private:
- /// Take ownership of the fence.
- void Commit();
-
- /**
- * Updates the fence status.
- * @warning Waiting for the owner might soft lock the execution.
- * @param gpu_wait Wait for the fence to be signaled by the driver.
- * @param owner_wait Wait for the owner to signal its freedom.
- * @returns True if the fence is free. Waiting for gpu and owner will always return true.
- */
- bool Tick(bool gpu_wait, bool owner_wait);
-
- const VKDevice& device; ///< Device handler
- vk::Fence handle; ///< Vulkan fence
- std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence
- bool is_owned = false; ///< The fence has been commited but not released yet.
- bool is_used = false; ///< The fence has been commited but it has not been checked to be free.
-};
-
-/**
- * A fence watch is used to keep track of the usage of a fence and protect a resource or set of
- * resources without having to inherit VKResource from their handlers.
- */
-class VKFenceWatch final : public VKResource {
-public:
- explicit VKFenceWatch();
- VKFenceWatch(VKFence& initial_fence);
- VKFenceWatch(VKFenceWatch&&) noexcept;
- VKFenceWatch(const VKFenceWatch&) = delete;
- ~VKFenceWatch() override;
-
- VKFenceWatch& operator=(VKFenceWatch&&) noexcept;
-
- /// Waits for the fence to be released.
- void Wait();
-
- /**
- * Waits for a previous fence and watches a new one.
- * @param new_fence New fence to wait to.
- */
- void Watch(VKFence& new_fence);
-
- /**
- * Checks if it's currently being watched and starts watching it if it's available.
- * @returns True if a watch has started, false if it's being watched.
- */
- bool TryWatch(VKFence& new_fence);
-
- void OnFenceRemoval(VKFence* signaling_fence) override;
-
- /**
- * Do not use it paired with Watch. Use TryWatch instead.
- * Returns true when the watch is free.
- */
- bool IsUsed() const {
- return fence != nullptr;
- }
-
-private:
- VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
-};
-
-/**
- * Handles a pool of resources protected by fences. Manages resource overflow allocating more
- * resources.
- */
-class VKFencedPool {
-public:
- explicit VKFencedPool(std::size_t grow_step);
- virtual ~VKFencedPool();
-
-protected:
- /**
- * Commits a free resource and protects it with a fence. It may allocate new resources.
- * @param fence Fence that protects the commited resource.
- * @returns Index of the resource commited.
- */
- std::size_t CommitResource(VKFence& fence);
-
- /// Called when a chunk of resources have to be allocated.
- virtual void Allocate(std::size_t begin, std::size_t end) = 0;
-
-private:
- /// Manages pool overflow allocating new resources.
- std::size_t ManageOverflow();
-
- /// Allocates a new page of resources.
- void Grow();
-
- std::size_t grow_step = 0; ///< Number of new resources created after an overflow
- std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
- std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources
-};
-
-/**
- * The resource manager handles all resources that can be protected with a fence avoiding
- * driver-side or GPU-side concurrent usage. Usage is documented in VKFence.
- */
-class VKResourceManager final {
-public:
- explicit VKResourceManager(const VKDevice& device);
- ~VKResourceManager();
-
- /// Commits a fence. It has to be sent to a queue and released.
- VKFence& CommitFence();
-
- /// Commits an unused command buffer and protects it with a fence.
- VkCommandBuffer CommitCommandBuffer(VKFence& fence);
-
-private:
- /// Allocates new fences.
- void GrowFences(std::size_t new_fences_count);
-
- const VKDevice& device; ///< Device handler.
- std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found.
- std::vector<std::unique_ptr<VKFence>> fences; ///< Pool of fences.
- std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers.
-};
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
new file mode 100644
index 000000000..a8bf7bda8
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -0,0 +1,63 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+
+namespace Vulkan {
+
+ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
+ : master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
+
+ResourcePool::~ResourcePool() = default;
+
+size_t ResourcePool::CommitResource() {
+ // Refresh semaphore to query updated results
+ master_semaphore.Refresh();
+ const u64 gpu_tick = master_semaphore.KnownGpuTick();
+ const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
+ for (size_t iterator = begin; iterator < end; ++iterator) {
+ if (gpu_tick >= ticks[iterator]) {
+ ticks[iterator] = master_semaphore.CurrentTick();
+ return iterator;
+ }
+ }
+ return std::nullopt;
+ };
+ // Try to find a free resource from the hinted position to the end.
+ std::optional<size_t> found = search(hint_iterator, ticks.size());
+ if (!found) {
+ // Search from beginning to the hinted position.
+ found = search(0, hint_iterator);
+ if (!found) {
+ // Both searches failed, the pool is full; handle it.
+ const size_t free_resource = ManageOverflow();
+
+ ticks[free_resource] = master_semaphore.CurrentTick();
+ found = free_resource;
+ }
+ }
+ // Free iterator is hinted to the resource after the one that's been commited.
+ hint_iterator = (*found + 1) % ticks.size();
+ return *found;
+}
+
+size_t ResourcePool::ManageOverflow() {
+ const size_t old_capacity = ticks.size();
+ Grow();
+
+ // The last entry is guaranted to be free, since it's the first element of the freshly
+ // allocated resources.
+ return old_capacity;
+}
+
+void ResourcePool::Grow() {
+ const size_t old_capacity = ticks.size();
+ ticks.resize(old_capacity + grow_step);
+ Allocate(old_capacity, old_capacity + grow_step);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
new file mode 100644
index 000000000..9d0bb3b4d
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -0,0 +1,43 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Vulkan {
+
+class MasterSemaphore;
+
+/**
+ * Handles a pool of resources protected by fences. Manages resource overflow allocating more
+ * resources.
+ */
+class ResourcePool {
+public:
+ explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
+ virtual ~ResourcePool();
+
+protected:
+ size_t CommitResource();
+
+ /// Called when a chunk of resources have to be allocated.
+ virtual void Allocate(size_t begin, size_t end) = 0;
+
+private:
+ /// Manages pool overflow allocating new resources.
+ size_t ManageOverflow();
+
+ /// Allocates a new page of resources.
+ void Grow();
+
+ MasterSemaphore& master_semaphore;
+ size_t grow_step = 0; ///< Number of new resources created after an overflow
+ size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
+ std::vector<u64> ticks; ///< Ticks for each resource
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
deleted file mode 100644
index b068888f9..000000000
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <unordered_map>
-
-#include "video_core/renderer_vulkan/maxwell_to_vk.h"
-#include "video_core/renderer_vulkan/vk_sampler_cache.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-#include "video_core/textures/texture.h"
-
-using Tegra::Texture::TextureMipmapFilter;
-
-namespace Vulkan {
-
-namespace {
-
-VkBorderColor ConvertBorderColor(std::array<float, 4> color) {
- // TODO(Rodrigo): Manage integer border colors
- if (color == std::array<float, 4>{0, 0, 0, 0}) {
- return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
- } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
- return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
- } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
- return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
- }
- if (color[0] + color[1] + color[2] > 1.35f) {
- // If color elements are brighter than roughly 0.5 average, use white border
- return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
- } else if (color[3] > 0.5f) {
- return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
- } else {
- return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
- }
-}
-
-} // Anonymous namespace
-
-VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {}
-
-VKSamplerCache::~VKSamplerCache() = default;
-
-vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
- const bool arbitrary_borders = device.IsExtCustomBorderColorSupported();
- const std::array color = tsc.GetBorderColor();
-
- VkSamplerCustomBorderColorCreateInfoEXT border{
- .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
- .pNext = nullptr,
- .customBorderColor = {},
- .format = VK_FORMAT_UNDEFINED,
- };
- std::memcpy(&border.customBorderColor, color.data(), sizeof(color));
-
- return device.GetLogical().CreateSampler({
- .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
- .pNext = arbitrary_borders ? &border : nullptr,
- .flags = 0,
- .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
- .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
- .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
- .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
- .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
- .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
- .mipLodBias = tsc.GetLodBias(),
- .anisotropyEnable =
- static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE),
- .maxAnisotropy = tsc.GetMaxAnisotropy(),
- .compareEnable = tsc.depth_compare_enabled,
- .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
- .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(),
- .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(),
- .borderColor =
- arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
- .unnormalizedCoordinates = VK_FALSE,
- });
-}
-
-VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const {
- return *sampler;
-}
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
deleted file mode 100644
index a33d1c0ee..000000000
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "video_core/renderer_vulkan/wrapper.h"
-#include "video_core/sampler_cache.h"
-#include "video_core/textures/texture.h"
-
-namespace Vulkan {
-
-class VKDevice;
-
-class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> {
-public:
- explicit VKSamplerCache(const VKDevice& device);
- ~VKSamplerCache();
-
-protected:
- vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
-
- VkSampler ToSamplerType(const vk::Sampler& sampler) const override;
-
-private:
- const VKDevice& device;
-};
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index dbbd0961a..f35c120b0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -10,12 +10,14 @@
#include "common/microprofile.h"
#include "common/thread.h"
-#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_command_pool.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
@@ -35,10 +37,10 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
last = nullptr;
}
-VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
- StateTracker& state_tracker)
- : device{device}, resource_manager{resource_manager}, state_tracker{state_tracker},
- next_fence{&resource_manager.CommitFence()} {
+VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_)
+ : device{device_}, state_tracker{state_tracker_},
+ master_semaphore{std::make_unique<MasterSemaphore>(device)},
+ command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
AcquireNewChunk();
AllocateNewContext();
worker_thread = std::thread(&VKScheduler::WorkerThread, this);
@@ -50,20 +52,15 @@ VKScheduler::~VKScheduler() {
worker_thread.join();
}
-void VKScheduler::Flush(bool release_fence, VkSemaphore semaphore) {
+void VKScheduler::Flush(VkSemaphore semaphore) {
SubmitExecution(semaphore);
- if (release_fence) {
- current_fence->Release();
- }
AllocateNewContext();
}
-void VKScheduler::Finish(bool release_fence, VkSemaphore semaphore) {
+void VKScheduler::Finish(VkSemaphore semaphore) {
+ const u64 presubmit_tick = CurrentTick();
SubmitExecution(semaphore);
- current_fence->Wait();
- if (release_fence) {
- current_fence->Release();
- }
+ Wait(presubmit_tick);
AllocateNewContext();
}
@@ -88,38 +85,39 @@ void VKScheduler::DispatchWork() {
AcquireNewChunk();
}
-void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer,
- VkExtent2D render_area) {
- if (renderpass == state.renderpass && framebuffer == state.framebuffer &&
+void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) {
+ const VkRenderPass renderpass = framebuffer->RenderPass();
+ const VkFramebuffer framebuffer_handle = framebuffer->Handle();
+ const VkExtent2D render_area = framebuffer->RenderArea();
+ if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer &&
render_area.width == state.render_area.width &&
render_area.height == state.render_area.height) {
return;
}
- const bool end_renderpass = state.renderpass != nullptr;
+ EndRenderPass();
state.renderpass = renderpass;
- state.framebuffer = framebuffer;
+ state.framebuffer = framebuffer_handle;
state.render_area = render_area;
- const VkRenderPassBeginInfo renderpass_bi{
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .pNext = nullptr,
- .renderPass = renderpass,
- .framebuffer = framebuffer,
- .renderArea =
- {
- .offset = {.x = 0, .y = 0},
- .extent = render_area,
- },
- .clearValueCount = 0,
- .pClearValues = nullptr,
- };
-
- Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) {
- if (end_renderpass) {
- cmdbuf.EndRenderPass();
- }
+ Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) {
+ const VkRenderPassBeginInfo renderpass_bi{
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .pNext = nullptr,
+ .renderPass = renderpass,
+ .framebuffer = framebuffer_handle,
+ .renderArea =
+ {
+ .offset = {.x = 0, .y = 0},
+ .extent = render_area,
+ },
+ .clearValueCount = 0,
+ .pClearValues = nullptr,
+ };
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
});
+ num_renderpass_images = framebuffer->NumImages();
+ renderpass_images = framebuffer->Images();
+ renderpass_image_ranges = framebuffer->ImageRanges();
}
void VKScheduler::RequestOutsideRenderPassOperationContext() {
@@ -160,18 +158,38 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
current_cmdbuf.End();
+ const VkSemaphore timeline_semaphore = master_semaphore->Handle();
+ const u32 num_signal_semaphores = semaphore ? 2U : 1U;
+
+ const u64 signal_value = master_semaphore->CurrentTick();
+ const u64 wait_value = signal_value - 1;
+ const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+
+ master_semaphore->NextTick();
+
+ const std::array signal_values{signal_value, u64(0)};
+ const std::array signal_semaphores{timeline_semaphore, semaphore};
+
+ const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
+ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
+ .pNext = nullptr,
+ .waitSemaphoreValueCount = 1,
+ .pWaitSemaphoreValues = &wait_value,
+ .signalSemaphoreValueCount = num_signal_semaphores,
+ .pSignalSemaphoreValues = signal_values.data(),
+ };
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = nullptr,
- .waitSemaphoreCount = 0,
- .pWaitSemaphores = nullptr,
- .pWaitDstStageMask = nullptr,
+ .pNext = &timeline_si,
+ .waitSemaphoreCount = 1,
+ .pWaitSemaphores = &timeline_semaphore,
+ .pWaitDstStageMask = &wait_stage_mask,
.commandBufferCount = 1,
.pCommandBuffers = current_cmdbuf.address(),
- .signalSemaphoreCount = semaphore ? 1U : 0U,
- .pSignalSemaphores = &semaphore,
+ .signalSemaphoreCount = num_signal_semaphores,
+ .pSignalSemaphores = signal_semaphores.data(),
};
- switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) {
+ switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
@@ -183,14 +201,9 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
}
void VKScheduler::AllocateNewContext() {
- ++ticks;
-
std::unique_lock lock{mutex};
- current_fence = next_fence;
- next_fence = &resource_manager.CommitFence();
- current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence),
- device.GetDispatchLoader());
+ current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
current_cmdbuf.Begin({
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = nullptr,
@@ -218,8 +231,37 @@ void VKScheduler::EndRenderPass() {
if (!state.renderpass) {
return;
}
+ Record([num_images = num_renderpass_images, images = renderpass_images,
+ ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
+ std::array<VkImageMemoryBarrier, 9> barriers;
+ for (size_t i = 0; i < num_images; ++i) {
+ barriers[i] = VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = images[i],
+ .subresourceRange = ranges[i],
+ };
+ }
+ cmdbuf.EndRenderPass();
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
+ VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
+ vk::Span(barriers.data(), num_images));
+ });
state.renderpass = nullptr;
- Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); });
+ num_renderpass_images = 0;
}
void VKScheduler::AcquireNewChunk() {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 970a65566..3ce48e9d2 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -6,52 +6,37 @@
#include <atomic>
#include <condition_variable>
+#include <cstddef>
#include <memory>
#include <stack>
#include <thread>
#include <utility>
+#include "common/alignment.h"
#include "common/common_types.h"
#include "common/threadsafe_queue.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
+class CommandPool;
+class Device;
+class Framebuffer;
class StateTracker;
-class VKDevice;
-class VKFence;
class VKQueryCache;
-class VKResourceManager;
-
-class VKFenceView {
-public:
- VKFenceView() = default;
- VKFenceView(VKFence* const& fence) : fence{fence} {}
-
- VKFence* operator->() const noexcept {
- return fence;
- }
-
- operator VKFence&() const noexcept {
- return *fence;
- }
-
-private:
- VKFence* const& fence;
-};
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class VKScheduler {
public:
- explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
- StateTracker& state_tracker);
+ explicit VKScheduler(const Device& device, StateTracker& state_tracker);
~VKScheduler();
/// Sends the current execution context to the GPU.
- void Flush(bool release_fence = true, VkSemaphore semaphore = nullptr);
+ void Flush(VkSemaphore semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
- void Finish(bool release_fence = true, VkSemaphore semaphore = nullptr);
+ void Finish(VkSemaphore semaphore = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
@@ -61,8 +46,7 @@ public:
void DispatchWork();
/// Requests to begin a renderpass.
- void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer,
- VkExtent2D render_area);
+ void RequestRenderpass(const Framebuffer* framebuffer);
/// Requests the current executino context to be able to execute operations only allowed outside
/// of a renderpass.
@@ -71,6 +55,9 @@ public:
/// Binds a pipeline to the current execution context.
void BindGraphicsPipeline(VkPipeline pipeline);
+ /// Invalidates current command buffer state except for render passes
+ void InvalidateState();
+
/// Assigns the query cache.
void SetQueryCache(VKQueryCache& query_cache_) {
query_cache = &query_cache_;
@@ -86,14 +73,24 @@ public:
(void)chunk->Record(command);
}
- /// Gets a reference to the current fence.
- VKFenceView GetFence() const {
- return current_fence;
+ /// Returns the current command buffer tick.
+ [[nodiscard]] u64 CurrentTick() const noexcept {
+ return master_semaphore->CurrentTick();
}
- /// Returns the current command buffer tick.
- u64 Ticks() const {
- return ticks;
+ /// Returns true when a tick has been triggered by the GPU.
+ [[nodiscard]] bool IsFree(u64 tick) const noexcept {
+ return master_semaphore->IsFree(tick);
+ }
+
+ /// Waits for the given tick to trigger on the GPU.
+ void Wait(u64 tick) {
+ master_semaphore->Wait(tick);
+ }
+
+ /// Returns the master timeline semaphore.
+ [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
+ return *master_semaphore;
}
private:
@@ -118,7 +115,7 @@ private:
template <typename T>
class TypedCommand final : public Command {
public:
- explicit TypedCommand(T&& command) : command{std::move(command)} {}
+ explicit TypedCommand(T&& command_) : command{std::move(command_)} {}
~TypedCommand() override = default;
TypedCommand(TypedCommand&&) = delete;
@@ -141,12 +138,11 @@ private:
using FuncType = TypedCommand<T>;
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
+ command_offset = Common::AlignUp(command_offset, alignof(FuncType));
if (command_offset > sizeof(data) - sizeof(FuncType)) {
return false;
}
-
- Command* current_last = last;
-
+ Command* const current_last = last;
last = new (data.data() + command_offset) FuncType(std::move(command));
if (current_last) {
@@ -154,7 +150,6 @@ private:
} else {
first = last;
}
-
command_offset += sizeof(FuncType);
return true;
}
@@ -167,8 +162,15 @@ private:
Command* first = nullptr;
Command* last = nullptr;
- std::size_t command_offset = 0;
- std::array<u8, 0x8000> data{};
+ size_t command_offset = 0;
+ alignas(std::max_align_t) std::array<u8, 0x8000> data{};
+ };
+
+ struct State {
+ VkRenderPass renderpass = nullptr;
+ VkFramebuffer framebuffer = nullptr;
+ VkExtent2D render_area = {0, 0};
+ VkPipeline graphics_pipeline = nullptr;
};
void WorkerThread();
@@ -177,39 +179,35 @@ private:
void AllocateNewContext();
- void InvalidateState();
-
void EndPendingOperations();
void EndRenderPass();
void AcquireNewChunk();
- const VKDevice& device;
- VKResourceManager& resource_manager;
+ const Device& device;
StateTracker& state_tracker;
+ std::unique_ptr<MasterSemaphore> master_semaphore;
+ std::unique_ptr<CommandPool> command_pool;
+
VKQueryCache* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf;
- VKFence* current_fence = nullptr;
- VKFence* next_fence = nullptr;
-
- struct State {
- VkRenderPass renderpass = nullptr;
- VkFramebuffer framebuffer = nullptr;
- VkExtent2D render_area = {0, 0};
- VkPipeline graphics_pipeline = nullptr;
- } state;
std::unique_ptr<CommandChunk> chunk;
std::thread worker_thread;
+ State state;
+
+ u32 num_renderpass_images = 0;
+ std::array<VkImage, 9> renderpass_images{};
+ std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
+
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
std::mutex mutex;
std::condition_variable cv;
- std::atomic<u64> ticks = 0;
bool quit = false;
};
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index cd7d7a4e4..40e2e0d38 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -22,11 +22,11 @@
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/shader/transform_feedback.h"
+#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
@@ -55,8 +55,8 @@ enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
class Expression final {
public:
- Expression(Id id, Type type) : id{id}, type{type} {
- ASSERT(type != Type::Void);
+ Expression(Id id_, Type type_) : id{id_}, type{type_} {
+ ASSERT(type_ != Type::Void);
}
Expression() : type{Type::Void} {}
@@ -102,7 +102,7 @@ struct GenericVaryingDescription {
bool is_scalar = false;
};
-spv::Dim GetSamplerDim(const Sampler& sampler) {
+spv::Dim GetSamplerDim(const SamplerEntry& sampler) {
ASSERT(!sampler.is_buffer);
switch (sampler.type) {
case Tegra::Shader::TextureType::Texture1D:
@@ -114,12 +114,12 @@ spv::Dim GetSamplerDim(const Sampler& sampler) {
case Tegra::Shader::TextureType::TextureCube:
return spv::Dim::Cube;
default:
- UNIMPLEMENTED_MSG("Unimplemented sampler type={}", static_cast<int>(sampler.type));
+ UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type);
return spv::Dim::Dim2D;
}
}
-std::pair<spv::Dim, bool> GetImageDim(const Image& image) {
+std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) {
switch (image.type) {
case Tegra::Shader::ImageType::Texture1D:
return {spv::Dim::Dim1D, false};
@@ -134,7 +134,7 @@ std::pair<spv::Dim, bool> GetImageDim(const Image& image) {
case Tegra::Shader::ImageType::Texture3D:
return {spv::Dim::Dim3D, false};
default:
- UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(image.type));
+ UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type);
return {spv::Dim::Dim2D, false};
}
}
@@ -274,12 +274,12 @@ bool IsPrecise(Operation operand) {
class SPIRVDecompiler final : public Sirit::Module {
public:
- explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage,
- const Registry& registry, const Specialization& specialization)
- : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()},
- registry{registry}, specialization{specialization} {
- if (stage != ShaderType::Compute) {
- transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
+ explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_,
+ const Registry& registry_, const Specialization& specialization_)
+ : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()},
+ registry{registry_}, specialization{specialization_} {
+ if (stage_ != ShaderType::Compute) {
+ transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo());
}
AddCapability(spv::Capability::Shader);
@@ -293,6 +293,7 @@ public:
AddCapability(spv::Capability::DrawParameters);
AddCapability(spv::Capability::SubgroupBallotKHR);
AddCapability(spv::Capability::SubgroupVoteKHR);
+ AddExtension("SPV_KHR_16bit_storage");
AddExtension("SPV_KHR_shader_ballot");
AddExtension("SPV_KHR_subgroup_vote");
AddExtension("SPV_KHR_storage_buffer_storage_class");
@@ -307,7 +308,6 @@ public:
"supported on this device");
}
}
-
if (ir.UsesLayer() || ir.UsesViewportIndex()) {
if (ir.UsesViewportIndex()) {
AddCapability(spv::Capability::MultiViewport);
@@ -317,15 +317,13 @@ public:
AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
}
}
-
if (device.IsFormatlessImageLoadSupported()) {
AddCapability(spv::Capability::StorageImageReadWithoutFormat);
}
-
if (device.IsFloat16Supported()) {
AddCapability(spv::Capability::Float16);
}
- t_scalar_half = Name(TypeFloat(device.IsFloat16Supported() ? 16 : 32), "scalar_half");
+ t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half");
t_half = Name(TypeVector(t_scalar_half, 2), "half");
const Id main = Decompile();
@@ -369,6 +367,9 @@ public:
if (header.ps.omap.depth) {
AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
}
+ if (specialization.early_fragment_tests) {
+ AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests);
+ }
break;
case ShaderType::Compute:
const auto workgroup_size = specialization.workgroup_size;
@@ -972,7 +973,7 @@ private:
return binding;
}
- void DeclareImage(const Image& image, u32& binding) {
+ void DeclareImage(const ImageEntry& image, u32& binding) {
const auto [dim, arrayed] = GetImageDim(image);
constexpr int depth = 0;
constexpr bool ms = false;
@@ -1080,9 +1081,9 @@ private:
indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size");
}
- const auto& output_attributes = ir.GetOutputAttributes();
- const bool declare_clip_distances =
- std::any_of(output_attributes.begin(), output_attributes.end(), [](const auto& index) {
+ const auto& ir_output_attributes = ir.GetOutputAttributes();
+ const bool declare_clip_distances = std::any_of(
+ ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) {
return index == Attribute::Index::ClipDistances0123 ||
index == Attribute::Index::ClipDistances4567;
});
@@ -1246,7 +1247,7 @@ private:
const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
return {OpLoad(GetTypeDefinition(type), pointer), type};
}
- UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
+ UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
return {v_float_zero, Type::Float};
}
@@ -1333,7 +1334,10 @@ private:
}
if (const auto comment = std::get_if<CommentNode>(&*node)) {
- Name(OpUndef(t_void), comment->GetText());
+ if (device.HasDebuggingToolAttached()) {
+ // We should insert comments with OpString instead of using named variables
+ Name(OpUndef(t_int), comment->GetText());
+ }
return {};
}
@@ -1882,7 +1886,7 @@ private:
case Tegra::Shader::TextureType::Texture3D:
return 3;
default:
- UNREACHABLE_MSG("Invalid texture type={}", static_cast<int>(type));
+ UNREACHABLE_MSG("Invalid texture type={}", type);
return 2;
}
}();
@@ -2067,6 +2071,46 @@ private:
return {};
}
+ Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) {
+ using Compare = Maxwell::ComparisonOp;
+ switch (compare_op) {
+ case Compare::NeverOld:
+ return v_false; // Never let the test pass
+ case Compare::LessOld:
+ return OpFOrdLessThan(t_bool, operand_1, operand_2);
+ case Compare::EqualOld:
+ return OpFOrdEqual(t_bool, operand_1, operand_2);
+ case Compare::LessEqualOld:
+ return OpFOrdLessThanEqual(t_bool, operand_1, operand_2);
+ case Compare::GreaterOld:
+ return OpFOrdGreaterThan(t_bool, operand_1, operand_2);
+ case Compare::NotEqualOld:
+ return OpFOrdNotEqual(t_bool, operand_1, operand_2);
+ case Compare::GreaterEqualOld:
+ return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2);
+ default:
+ UNREACHABLE();
+ return v_true;
+ }
+ }
+
+ void AlphaTest(Id pointer) {
+ if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) {
+ return;
+ }
+ const Id true_label = OpLabel();
+ const Id discard_label = OpLabel();
+ const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref);
+ const Id alpha_value = OpLoad(t_float, pointer);
+ const Id condition =
+ MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference);
+
+ OpBranchConditional(condition, true_label, discard_label);
+ AddLabel(discard_label);
+ OpKill();
+ AddLabel(true_label);
+ }
+
void PreExit() {
if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) {
const u32 position_index = out_indices.position.value();
@@ -2078,8 +2122,7 @@ private:
OpStore(z_pointer, depth);
}
if (stage == ShaderType::Fragment) {
- const auto SafeGetRegister = [&](u32 reg) {
- // TODO(Rodrigo): Replace with contains once C++20 releases
+ const auto SafeGetRegister = [this](u32 reg) {
if (const auto it = registers.find(reg); it != registers.end()) {
return OpLoad(t_float, it->second);
}
@@ -2089,8 +2132,6 @@ private:
UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0,
"Sample mask write is unimplemented");
- // TODO(Rodrigo): Alpha testing
-
// Write the color outputs using the data in the shader registers, disabled
// rendertargets/components are skipped in the register assignment.
u32 current_reg = 0;
@@ -2102,6 +2143,9 @@ private:
}
const Id pointer = AccessElement(t_out_float, frag_colors[rt], component);
OpStore(pointer, SafeGetRegister(current_reg));
+ if (rt == 0 && component == 3) {
+ AlphaTest(pointer);
+ }
++current_reg;
}
}
@@ -2701,7 +2745,7 @@ private:
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
- const VKDevice& device;
+ const Device& device;
const ShaderIR& ir;
const ShaderType stage;
const Tegra::Shader::Header header;
@@ -2843,7 +2887,7 @@ private:
class ExprDecompiler {
public:
- explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {}
+ explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
Id operator()(const ExprAnd& expr) {
const Id type_def = decomp.GetTypeDefinition(Type::Bool);
@@ -2899,7 +2943,7 @@ private:
class ASTDecompiler {
public:
- explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {}
+ explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
void operator()(const ASTProgram& ast) {
ASTNode current = ast.nodes.GetFirst();
@@ -3062,7 +3106,11 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
}
for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_written);
+ entries.global_buffers.emplace_back(GlobalBufferEntry{
+ .cbuf_index = base.cbuf_index,
+ .cbuf_offset = base.cbuf_offset,
+ .is_written = usage.is_written,
+ });
}
for (const auto& sampler : ir.GetSamplers()) {
if (sampler.is_buffer) {
@@ -3083,13 +3131,16 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
entries.attributes.insert(GetGenericAttributeLocation(attribute));
}
}
+ for (const auto& buffer : entries.const_buffers) {
+ entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
+ }
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
entries.uses_warps = ir.UsesWarps();
return entries;
}
-std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
+std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
ShaderType stage, const VideoCommon::Shader::Registry& registry,
const Specialization& specialization) {
return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 2b0e90396..5d94132a5 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -15,23 +15,21 @@
#include "video_core/shader/shader_ir.h"
namespace Vulkan {
-class VKDevice;
-}
-namespace Vulkan {
+class Device;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using UniformTexelEntry = VideoCommon::Shader::Sampler;
-using SamplerEntry = VideoCommon::Shader::Sampler;
-using StorageTexelEntry = VideoCommon::Shader::Image;
-using ImageEntry = VideoCommon::Shader::Image;
+using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
+using SamplerEntry = VideoCommon::Shader::SamplerEntry;
+using StorageTexelEntry = VideoCommon::Shader::ImageEntry;
+using ImageEntry = VideoCommon::Shader::ImageEntry;
constexpr u32 DESCRIPTOR_SET = 0;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
- explicit constexpr ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, u32 index)
- : VideoCommon::Shader::ConstBuffer{entry}, index{index} {}
+ explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_)
+ : ConstBuffer{entry_}, index{index_} {}
constexpr u32 GetIndex() const {
return index;
@@ -41,24 +39,7 @@ private:
u32 index{};
};
-class GlobalBufferEntry {
-public:
- constexpr explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset, bool is_written)
- : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_written{is_written} {}
-
- constexpr u32 GetCbufIndex() const {
- return cbuf_index;
- }
-
- constexpr u32 GetCbufOffset() const {
- return cbuf_offset;
- }
-
- constexpr bool IsWritten() const {
- return is_written;
- }
-
-private:
+struct GlobalBufferEntry {
u32 cbuf_index{};
u32 cbuf_offset{};
bool is_written{};
@@ -80,6 +61,7 @@ struct ShaderEntries {
std::set<u32> attributes;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
+ u32 enabled_uniform_buffers{};
bool uses_warps{};
};
@@ -95,6 +77,9 @@ struct Specialization final {
std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
bool ndc_minus_one_to_one{};
+ bool early_fragment_tests{};
+ float alpha_test_ref{};
+ Maxwell::ComparisonOp alpha_test_func{};
};
// Old gcc versions don't consider this trivially copyable.
// static_assert(std::is_trivially_copyable_v<Specialization>);
@@ -106,7 +91,7 @@ struct SPIRVShader {
ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
-std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
+std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
Tegra::Engines::ShaderType stage,
const VideoCommon::Shader::Registry& registry,
const Specialization& specialization);
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp
index c1a218d76..aaad4f292 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -7,24 +7,19 @@
#include "common/assert.h"
#include "common/common_types.h"
-#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) {
- // Avoid undefined behavior by copying to a staging allocation
- ASSERT(code_size % sizeof(u32) == 0);
- const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
- std::memcpy(data.get(), code_data, code_size);
-
+vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code) {
return device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .codeSize = code_size,
- .pCode = data.get(),
+ .codeSize = static_cast<u32>(code.size_bytes()),
+ .pCode = code.data(),
});
}
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h
index d1d3f3cae..9517cbe84 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.h
+++ b/src/video_core/renderer_vulkan/vk_shader_util.h
@@ -4,13 +4,15 @@
#pragma once
+#include <span>
+
#include "common/common_types.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-class VKDevice;
+class Device;
-vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data);
+vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code);
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 5eca0ab91..7a1232497 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -3,76 +3,214 @@
// Refer to the license.txt file included.
#include <algorithm>
-#include <unordered_map>
#include <utility>
#include <vector>
+#include <fmt/format.h>
+
+#include "common/alignment.h"
+#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
+namespace {
+// Maximum potential alignment of a Vulkan buffer
+constexpr VkDeviceSize MAX_ALIGNMENT = 256;
+// Maximum size to put elements in the stream buffer
+constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024;
+// Stream buffer size in bytes
+constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
+constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
-VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence,
- u64 last_epoch)
- : buffer{std::move(buffer)}, watch{fence}, last_epoch{last_epoch} {}
+constexpr VkMemoryPropertyFlags HOST_FLAGS =
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
-VKStagingBufferPool::StagingBuffer::StagingBuffer(StagingBuffer&& rhs) noexcept {
- buffer = std::move(rhs.buffer);
- watch = std::move(rhs.watch);
- last_epoch = rhs.last_epoch;
+bool IsStreamHeap(VkMemoryHeap heap) noexcept {
+ return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
}
-VKStagingBufferPool::StagingBuffer::~StagingBuffer() = default;
+std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
+ VkMemoryPropertyFlags flags) noexcept {
+ for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
+ if (((type_mask >> type_index) & 1) == 0) {
+ // Memory type is incompatible
+ continue;
+ }
+ const VkMemoryType& memory_type = props.memoryTypes[type_index];
+ if ((memory_type.propertyFlags & flags) != flags) {
+ // Memory type doesn't have the flags we want
+ continue;
+ }
+ if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
+ // Memory heap is not suitable for streaming
+ continue;
+ }
+ // Success!
+ return type_index;
+ }
+ return std::nullopt;
+}
-VKStagingBufferPool::StagingBuffer& VKStagingBufferPool::StagingBuffer::operator=(
- StagingBuffer&& rhs) noexcept {
- buffer = std::move(rhs.buffer);
- watch = std::move(rhs.watch);
- last_epoch = rhs.last_epoch;
- return *this;
+u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) {
+ // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
+ std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
+ if (type) {
+ return *type;
+ }
+ // Otherwise try without the DEVICE_LOCAL_BIT
+ type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
+ if (type) {
+ return *type;
+ }
+ // This should never happen, and in case it does, signal it as an out of memory situation
+ throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
-VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
- VKScheduler& scheduler)
- : device{device}, memory_manager{memory_manager}, scheduler{scheduler} {}
+size_t Region(size_t iterator) noexcept {
+ return iterator / REGION_SIZE;
+}
+} // Anonymous namespace
-VKStagingBufferPool::~VKStagingBufferPool() = default;
+StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
+ VKScheduler& scheduler_)
+ : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
+ const vk::Device& dev = device.GetLogical();
+ stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = STREAM_BUFFER_SIZE,
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
+ if (device.HasDebuggingToolAttached()) {
+ stream_buffer.SetObjectNameEXT("Stream Buffer");
+ }
+ VkMemoryDedicatedRequirements dedicated_reqs{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
+ .pNext = nullptr,
+ .prefersDedicatedAllocation = VK_FALSE,
+ .requiresDedicatedAllocation = VK_FALSE,
+ };
+ const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
+ const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
+ dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
+ const VkMemoryDedicatedAllocateInfo dedicated_info{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .image = nullptr,
+ .buffer = *stream_buffer,
+ };
+ const auto memory_properties = device.GetPhysical().GetMemoryProperties();
+ stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = make_dedicated ? &dedicated_info : nullptr,
+ .allocationSize = requirements.size,
+ .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits),
+ });
+ if (device.HasDebuggingToolAttached()) {
+ stream_memory.SetObjectNameEXT("Stream Buffer Memory");
+ }
+ stream_buffer.BindMemory(*stream_memory, 0);
+ stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
+}
-VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visible) {
- if (const auto buffer = TryGetReservedBuffer(size, host_visible)) {
- return *buffer;
+StagingBufferPool::~StagingBufferPool() = default;
+
+StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
+ if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
+ return GetStreamBuffer(size);
}
- return CreateStagingBuffer(size, host_visible);
+ return GetStagingBuffer(size, usage);
}
-void VKStagingBufferPool::TickFrame() {
- ++epoch;
- current_delete_level = (current_delete_level + 1) % NumLevels;
+void StagingBufferPool::TickFrame() {
+ current_delete_level = (current_delete_level + 1) % NUM_LEVELS;
- ReleaseCache(true);
- ReleaseCache(false);
+ ReleaseCache(MemoryUsage::DeviceLocal);
+ ReleaseCache(MemoryUsage::Upload);
+ ReleaseCache(MemoryUsage::Download);
}
-VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) {
- for (auto& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) {
- if (entry.watch.TryWatch(scheduler.GetFence())) {
- entry.last_epoch = epoch;
- return &*entry.buffer;
+StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
+ if (AreRegionsActive(Region(free_iterator) + 1,
+ std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
+ // Avoid waiting for the previous usages to be free
+ return GetStagingBuffer(size, MemoryUsage::Upload);
+ }
+ const u64 current_tick = scheduler.CurrentTick();
+ std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
+ current_tick);
+ used_iterator = iterator;
+ free_iterator = std::max(free_iterator, iterator + size);
+
+ if (iterator + size >= STREAM_BUFFER_SIZE) {
+ std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
+ current_tick);
+ used_iterator = 0;
+ iterator = 0;
+ free_iterator = size;
+
+ if (AreRegionsActive(0, Region(size) + 1)) {
+ // Avoid waiting for the previous usages to be free
+ return GetStagingBuffer(size, MemoryUsage::Upload);
}
}
- return nullptr;
+ const size_t offset = iterator;
+ iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
+ return StagingBufferRef{
+ .buffer = *stream_buffer,
+ .offset = static_cast<VkDeviceSize>(offset),
+ .mapped_span = std::span<u8>(stream_pointer + offset, size),
+ };
}
-VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
- const u32 log2 = Common::Log2Ceil64(size);
+bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
+ const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
+ return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
+ [gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });
+};
- auto buffer = std::make_unique<VKBuffer>();
- buffer->handle = device.GetLogical().CreateBuffer({
+StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
+ if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
+ return *ref;
+ }
+ return CreateStagingBuffer(size, usage);
+}
+
+std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
+ MemoryUsage usage) {
+ StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
+
+ const auto is_free = [this](const StagingBuffer& entry) {
+ return scheduler.IsFree(entry.tick);
+ };
+ auto& entries = cache_level.entries;
+ const auto hint_it = entries.begin() + cache_level.iterate_index;
+ auto it = std::find_if(entries.begin() + cache_level.iterate_index, entries.end(), is_free);
+ if (it == entries.end()) {
+ it = std::find_if(entries.begin(), hint_it, is_free);
+ if (it == hint_it) {
+ return std::nullopt;
+ }
+ }
+ cache_level.iterate_index = std::distance(entries.begin(), it) + 1;
+ it->tick = scheduler.CurrentTick();
+ return it->Ref();
+}
+
+StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage) {
+ const u32 log2 = Common::Log2Ceil64(size);
+ vk::Buffer buffer = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -84,48 +222,63 @@ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_v
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
- buffer->commit = memory_manager.Commit(buffer->handle, host_visible);
-
- auto& entries = GetCache(host_visible)[log2].entries;
- return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer;
-}
+ if (device.HasDebuggingToolAttached()) {
+ ++buffer_index;
+ buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
+ }
+ MemoryCommit commit = memory_allocator.Commit(buffer, usage);
+ const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{};
-VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) {
- return host_visible ? host_staging_buffers : device_staging_buffers;
+ StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
+ .buffer = std::move(buffer),
+ .commit = std::move(commit),
+ .mapped_span = mapped_span,
+ .tick = scheduler.CurrentTick(),
+ });
+ return entry.Ref();
}
-void VKStagingBufferPool::ReleaseCache(bool host_visible) {
- auto& cache = GetCache(host_visible);
- const u64 size = ReleaseLevel(cache, current_delete_level);
- if (size == 0) {
- return;
+StagingBufferPool::StagingBuffersCache& StagingBufferPool::GetCache(MemoryUsage usage) {
+ switch (usage) {
+ case MemoryUsage::DeviceLocal:
+ return device_local_cache;
+ case MemoryUsage::Upload:
+ return upload_cache;
+ case MemoryUsage::Download:
+ return download_cache;
+ default:
+ UNREACHABLE_MSG("Invalid memory usage={}", usage);
+ return upload_cache;
}
}
-u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) {
- static constexpr std::size_t deletions_per_tick = 16;
+void StagingBufferPool::ReleaseCache(MemoryUsage usage) {
+ ReleaseLevel(GetCache(usage), current_delete_level);
+}
+void StagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, size_t log2) {
+ constexpr size_t deletions_per_tick = 16;
auto& staging = cache[log2];
auto& entries = staging.entries;
- const std::size_t old_size = entries.size();
+ const size_t old_size = entries.size();
- const auto is_deleteable = [this](const auto& entry) {
- static constexpr u64 epochs_to_destroy = 180;
- return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed();
+ const auto is_deleteable = [this](const StagingBuffer& entry) {
+ return scheduler.IsFree(entry.tick);
};
- const std::size_t begin_offset = staging.delete_index;
- const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
- const auto begin = std::begin(entries) + begin_offset;
- const auto end = std::begin(entries) + end_offset;
+ const size_t begin_offset = staging.delete_index;
+ const size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
+ const auto begin = entries.begin() + begin_offset;
+ const auto end = entries.begin() + end_offset;
entries.erase(std::remove_if(begin, end, is_deleteable), end);
- const std::size_t new_size = entries.size();
+ const size_t new_size = entries.size();
staging.delete_index += deletions_per_tick;
if (staging.delete_index >= new_size) {
staging.delete_index = 0;
}
-
- return (1ULL << log2) * (old_size - new_size);
+ if (staging.iterate_index > new_size) {
+ staging.iterate_index = 0;
+ }
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 3c4901437..69f7618de 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -9,73 +9,97 @@
#include "common/common_types.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-class VKDevice;
-class VKFenceWatch;
+class Device;
class VKScheduler;
-struct VKBuffer final {
- vk::Buffer handle;
- VKMemoryCommit commit;
+struct StagingBufferRef {
+ VkBuffer buffer;
+ VkDeviceSize offset;
+ std::span<u8> mapped_span;
};
-class VKStagingBufferPool final {
+class StagingBufferPool {
public:
- explicit VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
- VKScheduler& scheduler);
- ~VKStagingBufferPool();
+ static constexpr size_t NUM_SYNCS = 16;
- VKBuffer& GetUnusedBuffer(std::size_t size, bool host_visible);
+ explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
+ VKScheduler& scheduler);
+ ~StagingBufferPool();
+
+ StagingBufferRef Request(size_t size, MemoryUsage usage);
void TickFrame();
private:
- struct StagingBuffer final {
- explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, u64 last_epoch);
- StagingBuffer(StagingBuffer&& rhs) noexcept;
- StagingBuffer(const StagingBuffer&) = delete;
- ~StagingBuffer();
-
- StagingBuffer& operator=(StagingBuffer&& rhs) noexcept;
+ struct StreamBufferCommit {
+ size_t upper_bound;
+ u64 tick;
+ };
- std::unique_ptr<VKBuffer> buffer;
- VKFenceWatch watch;
- u64 last_epoch = 0;
+ struct StagingBuffer {
+ vk::Buffer buffer;
+ MemoryCommit commit;
+ std::span<u8> mapped_span;
+ u64 tick = 0;
+
+ StagingBufferRef Ref() const noexcept {
+ return {
+ .buffer = *buffer,
+ .offset = 0,
+ .mapped_span = mapped_span,
+ };
+ }
};
- struct StagingBuffers final {
+ struct StagingBuffers {
std::vector<StagingBuffer> entries;
- std::size_t delete_index = 0;
+ size_t delete_index = 0;
+ size_t iterate_index = 0;
};
- static constexpr std::size_t NumLevels = sizeof(std::size_t) * CHAR_BIT;
- using StagingBuffersCache = std::array<StagingBuffers, NumLevels>;
+ static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
+ using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
+
+ StagingBufferRef GetStreamBuffer(size_t size);
- VKBuffer* TryGetReservedBuffer(std::size_t size, bool host_visible);
+ bool AreRegionsActive(size_t region_begin, size_t region_end) const;
- VKBuffer& CreateStagingBuffer(std::size_t size, bool host_visible);
+ StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage);
- StagingBuffersCache& GetCache(bool host_visible);
+ std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
- void ReleaseCache(bool host_visible);
+ StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
- u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2);
+ StagingBuffersCache& GetCache(MemoryUsage usage);
- const VKDevice& device;
- VKMemoryManager& memory_manager;
+ void ReleaseCache(MemoryUsage usage);
+
+ void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
+
+ const Device& device;
+ MemoryAllocator& memory_allocator;
VKScheduler& scheduler;
- StagingBuffersCache host_staging_buffers;
- StagingBuffersCache device_staging_buffers;
+ vk::Buffer stream_buffer;
+ vk::DeviceMemory stream_memory;
+ u8* stream_pointer = nullptr;
+
+ size_t iterator = 0;
+ size_t used_iterator = 0;
+ size_t free_iterator = 0;
+ std::array<u64, NUM_SYNCS> sync_ticks{};
- u64 epoch = 0;
+ StagingBuffersCache device_local_cache;
+ StagingBuffersCache upload_cache;
+ StagingBuffersCache download_cache;
- std::size_t current_delete_level = 0;
+ size_t current_delete_level = 0;
+ u64 buffer_index = 0;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 4bd1009f9..956f86845 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
+#include <array>
#include <cstddef>
#include <iterator>
@@ -14,12 +15,10 @@
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
-#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32))
+#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace Vulkan {
-
namespace {
-
using namespace Dirty;
using namespace VideoCommon::Dirty;
using Tegra::Engines::Maxwell3D;
@@ -29,21 +28,18 @@ using Table = Maxwell3D::DirtyState::Table;
using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
+ static constexpr int INVALIDATION_FLAGS[]{
+ Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
+ StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
+ DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers,
+ };
Flags flags{};
- flags[Viewports] = true;
- flags[Scissors] = true;
- flags[DepthBias] = true;
- flags[BlendConstants] = true;
- flags[DepthBounds] = true;
- flags[StencilProperties] = true;
- flags[CullMode] = true;
- flags[DepthBoundsEnable] = true;
- flags[DepthTestEnable] = true;
- flags[DepthWriteEnable] = true;
- flags[DepthCompareOp] = true;
- flags[FrontFace] = true;
- flags[StencilOp] = true;
- flags[StencilTestEnable] = true;
+ for (const int flag : INVALIDATION_FLAGS) {
+ flags[flag] = true;
+ }
+ for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
+ flags[index] = true;
+ }
return flags;
}
@@ -130,15 +126,40 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
tables[0][OFF(stencil_enable)] = StencilTestEnable;
}
-} // Anonymous namespace
+void SetupDirtyBlending(Tables& tables) {
+ tables[0][OFF(color_mask_common)] = Blending;
+ tables[0][OFF(independent_blend_enable)] = Blending;
+ FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending);
+ FillBlock(tables[0], OFF(blend), NUM(blend), Blending);
+ FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending);
+}
+
+void SetupDirtyInstanceDivisors(Tables& tables) {
+ static constexpr size_t divisor_offset = 3;
+ for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
+ tables[0][OFF(instanced_arrays) + index] = InstanceDivisors;
+ tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] =
+ InstanceDivisors;
+ }
+}
-StateTracker::StateTracker(Core::System& system)
- : system{system}, invalidation_flags{MakeInvalidationFlags()} {}
+void SetupDirtyVertexAttributes(Tables& tables) {
+ FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes);
+}
+
+void SetupDirtyViewportSwizzles(Tables& tables) {
+ static constexpr size_t swizzle_offset = 6;
+ for (size_t index = 0; index < Regs::NumViewports; ++index) {
+ tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
+ ViewportSwizzles;
+ }
+}
+} // Anonymous namespace
-void StateTracker::Initialize() {
- auto& dirty = system.GPU().Maxwell3D().dirty;
- auto& tables = dirty.tables;
- SetupDirtyRenderTargets(tables);
+StateTracker::StateTracker(Tegra::GPU& gpu)
+ : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
+ auto& tables = gpu.Maxwell3D().dirty.tables;
+ SetupDirtyFlags(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyDepthBias(tables);
@@ -153,11 +174,10 @@ void StateTracker::Initialize() {
SetupDirtyFrontFace(tables);
SetupDirtyStencilOp(tables);
SetupDirtyStencilTestEnable(tables);
-}
-
-void StateTracker::InvalidateCommandBufferState() {
- system.GPU().Maxwell3D().dirty.flags |= invalidation_flags;
- current_topology = INVALID_TOPOLOGY;
+ SetupDirtyBlending(tables);
+ SetupDirtyInstanceDivisors(tables);
+ SetupDirtyVertexAttributes(tables);
+ SetupDirtyViewportSwizzles(tables);
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 13a6ce786..84e918a71 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -35,6 +35,11 @@ enum : u8 {
StencilOp,
StencilTestEnable,
+ Blending,
+ InstanceDivisors,
+ VertexAttributes,
+ ViewportSwizzles,
+
Last
};
static_assert(Last <= std::numeric_limits<u8>::max());
@@ -45,11 +50,20 @@ class StateTracker {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
- explicit StateTracker(Core::System& system);
+ explicit StateTracker(Tegra::GPU& gpu);
- void Initialize();
+ void InvalidateCommandBufferState() {
+ flags |= invalidation_flags;
+ current_topology = INVALID_TOPOLOGY;
+ }
- void InvalidateCommandBufferState();
+ void InvalidateViewports() {
+ flags[Dirty::Viewports] = true;
+ }
+
+ void InvalidateScissors() {
+ flags[Dirty::Scissors] = true;
+ }
bool TouchViewports() {
return Exchange(Dirty::Viewports, false);
@@ -121,13 +135,12 @@ private:
static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
bool Exchange(std::size_t id, bool new_value) const noexcept {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
const bool is_dirty = flags[id];
flags[id] = new_value;
return is_dirty;
}
- Core::System& system;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
};
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index a5526a3f5..a09fe084e 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -10,16 +10,19 @@
#include "common/alignment.h"
#include "common/assert.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
+constexpr VkBufferUsageFlags BUFFER_USAGE =
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
@@ -57,17 +60,16 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
} // Anonymous namespace
-VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
- VkBufferUsageFlags usage)
- : device{device}, scheduler{scheduler} {
- CreateBuffers(usage);
+VKStreamBuffer::VKStreamBuffer(const Device& device_, VKScheduler& scheduler_)
+ : device{device_}, scheduler{scheduler_} {
+ CreateBuffers();
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
}
VKStreamBuffer::~VKStreamBuffer() = default;
-std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
+std::pair<u8*, u64> VKStreamBuffer::Map(u64 size, u64 alignment) {
ASSERT(size <= stream_buffer_size);
mapped_size = size;
@@ -77,7 +79,6 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
WaitPendingOperations(offset);
- bool invalidated = false;
if (offset + size > stream_buffer_size) {
// The buffer would overflow, save the amount of used watches and reset the state.
invalidation_mark = current_watch_cursor;
@@ -91,11 +92,9 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
// Ensure that we don't wait for uncommitted fences.
scheduler.Flush();
-
- invalidated = true;
}
- return {memory.Map(offset, size), offset, invalidated};
+ return std::make_pair(memory.Map(offset, size), offset);
}
void VKStreamBuffer::Unmap(u64 size) {
@@ -111,23 +110,24 @@ void VKStreamBuffer::Unmap(u64 size) {
}
auto& watch = current_watches[current_watch_cursor++];
watch.upper_bound = offset;
- watch.fence.Watch(scheduler.GetFence());
+ watch.tick = scheduler.CurrentTick();
}
-void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
+void VKStreamBuffer::CreateBuffers() {
const auto memory_properties = device.GetPhysical().GetMemoryProperties();
const u32 preferred_type = GetMemoryType(memory_properties);
const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
// Substract from the preferred heap size some bytes to avoid getting out of memory.
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
- const VkDeviceSize allocable_size = heap_size - 9 * 1024 * 1024;
+ // As per DXVK's example, using `heap_size / 2`
+ const VkDeviceSize allocable_size = heap_size / 2;
buffer = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size),
- .usage = usage,
+ .usage = BUFFER_USAGE,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
@@ -157,7 +157,7 @@ void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
- watch.fence.Wait();
+ scheduler.Wait(watch.tick);
++wait_cursor;
}
}
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 689f0d276..2e9c8cb46 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -5,32 +5,29 @@
#pragma once
#include <optional>
-#include <tuple>
+#include <utility>
#include <vector>
#include "common/common_types.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-class VKDevice;
-class VKFence;
+class Device;
class VKFenceWatch;
class VKScheduler;
class VKStreamBuffer final {
public:
- explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
- VkBufferUsageFlags usage);
+ explicit VKStreamBuffer(const Device& device, VKScheduler& scheduler);
~VKStreamBuffer();
/**
* Reserves a region of memory from the stream buffer.
* @param size Size to reserve.
- * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
- * offset and a boolean that's true when buffer has been invalidated.
+ * @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/
- std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
+ std::pair<u8*, u64> Map(u64 size, u64 alignment);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Unmap(u64 size);
@@ -44,20 +41,20 @@ public:
}
private:
- struct Watch final {
- VKFenceWatch fence;
+ struct Watch {
+ u64 tick{};
u64 upper_bound{};
};
/// Creates Vulkan buffer handles committing the required the required memory.
- void CreateBuffers(VkBufferUsageFlags usage);
+ void CreateBuffers();
/// Increases the amount of watches available.
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
void WaitPendingOperations(u64 requested_upper_bound);
- const VKDevice& device; ///< Vulkan device manager.
+ const Device& device; ///< Vulkan device manager.
VKScheduler& scheduler; ///< Command scheduler.
vk::Buffer buffer; ///< Mapped buffer.
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 6bfd2abae..0b63bd6c8 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -11,10 +11,10 @@
#include "common/logging/log.h"
#include "core/core.h"
#include "core/frontend/framebuffer_layout.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
@@ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
} // Anonymous namespace
-VKSwapchain::VKSwapchain(VkSurfaceKHR surface, const VKDevice& device)
- : surface{surface}, device{device} {}
+VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_,
+ u32 width, u32 height, bool srgb)
+ : surface{surface_}, device{device_}, scheduler{scheduler_} {
+ Create(width, height, srgb);
+}
VKSwapchain::~VKSwapchain() = default;
@@ -75,21 +78,18 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
CreateSemaphores();
CreateImageViews();
- fences.resize(image_count, nullptr);
+ resource_ticks.clear();
+ resource_ticks.resize(image_count);
}
void VKSwapchain::AcquireNextImage() {
device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
*present_semaphores[frame_index], {}, &image_index);
- if (auto& fence = fences[image_index]; fence) {
- fence->Wait();
- fence->Release();
- fence = nullptr;
- }
+ scheduler.Wait(resource_ticks[image_index]);
}
-bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) {
+bool VKSwapchain::Present(VkSemaphore render_semaphore) {
const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
const auto present_queue{device.GetPresentQueue()};
@@ -123,8 +123,7 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) {
break;
}
- ASSERT(fences[image_index] == nullptr);
- fences[image_index] = &fence;
+ resource_ticks[image_index] = scheduler.CurrentTick();
frame_index = (frame_index + 1) % static_cast<u32>(image_count);
return recreated;
}
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index a35d61345..a728511e0 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -7,7 +7,7 @@
#include <vector>
#include "common/common_types.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Layout {
struct FramebufferLayout;
@@ -15,12 +15,13 @@ struct FramebufferLayout;
namespace Vulkan {
-class VKDevice;
-class VKFence;
+class Device;
+class VKScheduler;
class VKSwapchain {
public:
- explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device);
+ explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler,
+ u32 width, u32 height, bool srgb);
~VKSwapchain();
/// Creates (or recreates) the swapchain with a given size.
@@ -31,7 +32,7 @@ public:
/// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
/// recreated. Takes responsability for the ownership of fence.
- bool Present(VkSemaphore render_semaphore, VKFence& fence);
+ bool Present(VkSemaphore render_semaphore);
/// Returns true when the framebuffer layout has changed.
bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
@@ -73,7 +74,8 @@ private:
void Destroy();
const VkSurfaceKHR surface;
- const VKDevice& device;
+ const Device& device;
+ VKScheduler& scheduler;
vk::SwapchainKHR swapchain;
@@ -81,7 +83,7 @@ private:
std::vector<VkImage> images;
std::vector<vk::ImageView> image_views;
std::vector<vk::Framebuffer> framebuffers;
- std::vector<VKFence*> fences;
+ std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> present_semaphores;
u32 image_index{};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 2c6f54101..22a1014a9 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -4,614 +4,1167 @@
#include <algorithm>
#include <array>
-#include <cstddef>
-#include <cstring>
-#include <memory>
-#include <variant>
+#include <span>
#include <vector>
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "core/core.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/morton.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-#include "video_core/surface.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-using VideoCore::MortonSwizzle;
-using VideoCore::MortonSwizzleMode;
-
+using Tegra::Engines::Fermi2D;
using Tegra::Texture::SwizzleSource;
-using VideoCore::Surface::PixelFormat;
-using VideoCore::Surface::SurfaceTarget;
+using Tegra::Texture::TextureMipmapFilter;
+using VideoCommon::BufferImageCopy;
+using VideoCommon::ImageInfo;
+using VideoCommon::ImageType;
+using VideoCommon::SubresourceRange;
+using VideoCore::Surface::IsPixelFormatASTC;
namespace {
-VkImageType SurfaceTargetToImage(SurfaceTarget target) {
- switch (target) {
- case SurfaceTarget::Texture1D:
- case SurfaceTarget::Texture1DArray:
+constexpr std::array ATTACHMENT_REFERENCES{
+ VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
+ VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
+ VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
+ VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
+ VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
+ VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
+ VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
+ VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
+ VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
+};
+
+constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
+ if (color == std::array<float, 4>{0, 0, 0, 0}) {
+ return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
+ } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
+ return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
+ } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
+ return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
+ }
+ if (color[0] + color[1] + color[2] > 1.35f) {
+ // If color elements are brighter than roughly 0.5 average, use white border
+ return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
+ } else if (color[3] > 0.5f) {
+ return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
+ } else {
+ return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
+ }
+}
+
+[[nodiscard]] VkImageType ConvertImageType(const ImageType type) {
+ switch (type) {
+ case ImageType::e1D:
return VK_IMAGE_TYPE_1D;
- case SurfaceTarget::Texture2D:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubemap:
- case SurfaceTarget::TextureCubeArray:
+ case ImageType::e2D:
+ case ImageType::Linear:
return VK_IMAGE_TYPE_2D;
- case SurfaceTarget::Texture3D:
+ case ImageType::e3D:
return VK_IMAGE_TYPE_3D;
- case SurfaceTarget::TextureBuffer:
- UNREACHABLE();
- return {};
+ case ImageType::Buffer:
+ break;
}
- UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target));
+ UNREACHABLE_MSG("Invalid image type={}", type);
return {};
}
-VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) {
- if (pixel_format < PixelFormat::MaxColorFormat) {
- return VK_IMAGE_ASPECT_COLOR_BIT;
- } else if (pixel_format < PixelFormat::MaxDepthFormat) {
- return VK_IMAGE_ASPECT_DEPTH_BIT;
- } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) {
- return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
- } else {
- UNREACHABLE_MSG("Invalid pixel format={}", static_cast<int>(pixel_format));
- return VK_IMAGE_ASPECT_COLOR_BIT;
+[[nodiscard]] VkSampleCountFlagBits ConvertSampleCount(u32 num_samples) {
+ switch (num_samples) {
+ case 1:
+ return VK_SAMPLE_COUNT_1_BIT;
+ case 2:
+ return VK_SAMPLE_COUNT_2_BIT;
+ case 4:
+ return VK_SAMPLE_COUNT_4_BIT;
+ case 8:
+ return VK_SAMPLE_COUNT_8_BIT;
+ case 16:
+ return VK_SAMPLE_COUNT_16_BIT;
+ default:
+ UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
+ return VK_SAMPLE_COUNT_1_BIT;
}
}
-VkImageViewType GetImageViewType(SurfaceTarget target) {
- switch (target) {
- case SurfaceTarget::Texture1D:
- return VK_IMAGE_VIEW_TYPE_1D;
- case SurfaceTarget::Texture2D:
- return VK_IMAGE_VIEW_TYPE_2D;
- case SurfaceTarget::Texture3D:
- return VK_IMAGE_VIEW_TYPE_3D;
- case SurfaceTarget::Texture1DArray:
- return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
- case SurfaceTarget::Texture2DArray:
- return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
- case SurfaceTarget::TextureCubemap:
- return VK_IMAGE_VIEW_TYPE_CUBE;
- case SurfaceTarget::TextureCubeArray:
- return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
- case SurfaceTarget::TextureBuffer:
- break;
+[[nodiscard]] VkImageUsageFlags ImageUsageFlags(const MaxwellToVK::FormatInfo& info,
+ PixelFormat format) {
+ VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT;
+ if (info.attachable) {
+ switch (VideoCore::Surface::GetFormatType(format)) {
+ case VideoCore::Surface::SurfaceType::ColorTexture:
+ usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ break;
+ case VideoCore::Surface::SurfaceType::Depth:
+ case VideoCore::Surface::SurfaceType::DepthStencil:
+ usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
+ break;
+ default:
+ UNREACHABLE_MSG("Invalid surface type");
+ }
}
- UNREACHABLE();
- return {};
+ if (info.storage) {
+ usage |= VK_IMAGE_USAGE_STORAGE_BIT;
+ }
+ return usage;
}
-vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
- std::size_t host_memory_size) {
- // TODO(Rodrigo): Move texture buffer creation to the buffer cache
- return device.GetLogical().CreateBuffer({
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+/// Returns the preferred format for a VkImage
+[[nodiscard]] PixelFormat StorageFormat(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::A8B8G8R8_SRGB:
+ return PixelFormat::A8B8G8R8_UNORM;
+ default:
+ return format;
+ }
+}
+
+[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const Device& device, const ImageInfo& info) {
+ const PixelFormat format = StorageFormat(info.format);
+ const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, format);
+ VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+ if (info.type == ImageType::e2D && info.resources.layers >= 6 &&
+ info.size.width == info.size.height) {
+ flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
+ }
+ if (info.type == ImageType::e3D) {
+ flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
+ }
+ const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples);
+ return VkImageCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
- .flags = 0,
- .size = static_cast<VkDeviceSize>(host_memory_size),
- .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
- VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
- VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .flags = flags,
+ .imageType = ConvertImageType(info.type),
+ .format = format_info.format,
+ .extent{
+ .width = info.size.width >> samples_x,
+ .height = info.size.height >> samples_y,
+ .depth = info.size.depth,
+ },
+ .mipLevels = static_cast<u32>(info.resources.levels),
+ .arrayLayers = static_cast<u32>(info.resources.layers),
+ .samples = ConvertSampleCount(info.num_samples),
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = ImageUsageFlags(format_info, format),
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
-}
-
-VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
- const SurfaceParams& params, VkBuffer buffer,
- std::size_t host_memory_size) {
- ASSERT(params.IsBuffer());
-
- return {
- .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .buffer = buffer,
- .format =
- MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format,
- .offset = 0,
- .range = static_cast<VkDeviceSize>(host_memory_size),
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
};
}
-VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
- ASSERT(!params.IsBuffer());
-
- const auto [format, attachable, storage] =
- MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format);
+[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) {
+ if (info.type == ImageType::Buffer) {
+ return vk::Image{};
+ }
+ return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
+}
- VkImageCreateInfo ci{
- .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) {
+ if (info.type != ImageType::Buffer) {
+ return vk::Buffer{};
+ }
+ const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format);
+ return device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .imageType = SurfaceTargetToImage(params.target),
- .format = format,
- .extent = {},
- .mipLevels = params.num_levels,
- .arrayLayers = static_cast<u32>(params.GetNumLayers()),
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .tiling = VK_IMAGE_TILING_OPTIMAL,
- .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
- VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
+ .size = info.size.width * bytes_per_block,
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
+ VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
- };
- if (attachable) {
- ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT
- : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
- }
- if (storage) {
- ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
- }
-
- switch (params.target) {
- case SurfaceTarget::TextureCubemap:
- case SurfaceTarget::TextureCubeArray:
- ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
- [[fallthrough]];
- case SurfaceTarget::Texture1D:
- case SurfaceTarget::Texture1DArray:
- case SurfaceTarget::Texture2D:
- case SurfaceTarget::Texture2DArray:
- ci.extent = {params.width, params.height, 1};
- break;
- case SurfaceTarget::Texture3D:
- ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
- ci.extent = {params.width, params.height, params.depth};
- break;
- case SurfaceTarget::TextureBuffer:
- UNREACHABLE();
- }
-
- return ci;
+ });
}
-u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) {
- return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
- (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
+ switch (VideoCore::Surface::GetFormatType(format)) {
+ case VideoCore::Surface::SurfaceType::ColorTexture:
+ return VK_IMAGE_ASPECT_COLOR_BIT;
+ case VideoCore::Surface::SurfaceType::Depth:
+ return VK_IMAGE_ASPECT_DEPTH_BIT;
+ case VideoCore::Surface::SurfaceType::DepthStencil:
+ return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+ default:
+ UNREACHABLE_MSG("Invalid surface type");
+ return VkImageAspectFlags{};
+ }
}
-} // Anonymous namespace
-
-CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
- GPUVAddr gpu_addr, const SurfaceParams& params)
- : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system},
- device{device}, resource_manager{resource_manager},
- memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {
- if (params.IsBuffer()) {
- buffer = CreateBuffer(device, params, host_memory_size);
- commit = memory_manager.Commit(buffer, false);
-
- const auto buffer_view_ci =
- GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size);
- format = buffer_view_ci.format;
-
- buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci);
- } else {
- const auto image_ci = GenerateImageCreateInfo(device, params);
- format = image_ci.format;
-
- image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format));
- commit = memory_manager.Commit(image->GetHandle(), false);
+[[nodiscard]] VkImageAspectFlags ImageViewAspectMask(const VideoCommon::ImageViewInfo& info) {
+ if (info.IsRenderTarget()) {
+ return ImageAspectMask(info.format);
}
-
- // TODO(Rodrigo): Move this to a virtual function.
- u32 num_layers = 1;
- if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
- num_layers = params.depth;
+ const bool is_first = info.Swizzle()[0] == SwizzleSource::R;
+ switch (info.format) {
+ case PixelFormat::D24_UNORM_S8_UINT:
+ case PixelFormat::D32_FLOAT_S8_UINT:
+ return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
+ case PixelFormat::S8_UINT_D24_UNORM:
+ return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
+ case PixelFormat::D16_UNORM:
+ case PixelFormat::D32_FLOAT:
+ return VK_IMAGE_ASPECT_DEPTH_BIT;
+ default:
+ return VK_IMAGE_ASPECT_COLOR_BIT;
}
- main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
}
-CachedSurface::~CachedSurface() = default;
-
-void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
- // To upload data we have to be outside of a renderpass
- scheduler.RequestOutsideRenderPassOperationContext();
+[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device,
+ const ImageView* image_view) {
+ using MaxwellToVK::SurfaceFormat;
+ const PixelFormat pixel_format = image_view->format;
+ return VkAttachmentDescription{
+ .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
+ .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format,
+ .samples = image_view->Samples(),
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+}
- if (params.IsBuffer()) {
- UploadBuffer(staging_buffer);
- } else {
- UploadImage(staging_buffer);
+[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
+ switch (swizzle) {
+ case SwizzleSource::Zero:
+ return VK_COMPONENT_SWIZZLE_ZERO;
+ case SwizzleSource::R:
+ return VK_COMPONENT_SWIZZLE_R;
+ case SwizzleSource::G:
+ return VK_COMPONENT_SWIZZLE_G;
+ case SwizzleSource::B:
+ return VK_COMPONENT_SWIZZLE_B;
+ case SwizzleSource::A:
+ return VK_COMPONENT_SWIZZLE_A;
+ case SwizzleSource::OneFloat:
+ case SwizzleSource::OneInt:
+ return VK_COMPONENT_SWIZZLE_ONE;
}
+ UNREACHABLE_MSG("Invalid swizzle={}", swizzle);
+ return VK_COMPONENT_SWIZZLE_ZERO;
}
-void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
- UNIMPLEMENTED_IF(params.IsBuffer());
-
- if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) {
- LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed");
+[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) {
+ switch (type) {
+ case VideoCommon::ImageViewType::e1D:
+ return VK_IMAGE_VIEW_TYPE_1D;
+ case VideoCommon::ImageViewType::e2D:
+ return VK_IMAGE_VIEW_TYPE_2D;
+ case VideoCommon::ImageViewType::Cube:
+ return VK_IMAGE_VIEW_TYPE_CUBE;
+ case VideoCommon::ImageViewType::e3D:
+ return VK_IMAGE_VIEW_TYPE_3D;
+ case VideoCommon::ImageViewType::e1DArray:
+ return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
+ case VideoCommon::ImageViewType::e2DArray:
+ return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
+ case VideoCommon::ImageViewType::CubeArray:
+ return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
+ case VideoCommon::ImageViewType::Rect:
+ LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported");
+ return VK_IMAGE_VIEW_TYPE_2D;
+ case VideoCommon::ImageViewType::Buffer:
+ UNREACHABLE_MSG("Texture buffers can't be image views");
+ return VK_IMAGE_VIEW_TYPE_1D;
}
+ UNREACHABLE_MSG("Invalid image view type={}", type);
+ return VK_IMAGE_VIEW_TYPE_2D;
+}
- // We can't copy images to buffers inside a renderpass
- scheduler.RequestOutsideRenderPassOperationContext();
+[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers(
+ VideoCommon::SubresourceLayers subresource, VkImageAspectFlags aspect_mask) {
+ return VkImageSubresourceLayers{
+ .aspectMask = aspect_mask,
+ .mipLevel = static_cast<u32>(subresource.base_level),
+ .baseArrayLayer = static_cast<u32>(subresource.base_layer),
+ .layerCount = static_cast<u32>(subresource.num_layers),
+ };
+}
- FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT,
- VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+[[nodiscard]] VkOffset3D MakeOffset3D(VideoCommon::Offset3D offset3d) {
+ return VkOffset3D{
+ .x = offset3d.x,
+ .y = offset3d.y,
+ .z = offset3d.z,
+ };
+}
- const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
- // TODO(Rodrigo): Do this in a single copy
- for (u32 level = 0; level < params.num_levels; ++level) {
- scheduler.Record([image = *image->GetHandle(), buffer = *buffer.handle,
- copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) {
- cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy);
- });
- }
- scheduler.Finish();
+[[nodiscard]] VkExtent3D MakeExtent3D(VideoCommon::Extent3D extent3d) {
+ return VkExtent3D{
+ .width = static_cast<u32>(extent3d.width),
+ .height = static_cast<u32>(extent3d.height),
+ .depth = static_cast<u32>(extent3d.depth),
+ };
+}
- // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy.
- std::memcpy(staging_buffer.data(), buffer.commit->Map(host_memory_size), host_memory_size);
+[[nodiscard]] VkImageCopy MakeImageCopy(const VideoCommon::ImageCopy& copy,
+ VkImageAspectFlags aspect_mask) noexcept {
+ return VkImageCopy{
+ .srcSubresource = MakeImageSubresourceLayers(copy.src_subresource, aspect_mask),
+ .srcOffset = MakeOffset3D(copy.src_offset),
+ .dstSubresource = MakeImageSubresourceLayers(copy.dst_subresource, aspect_mask),
+ .dstOffset = MakeOffset3D(copy.dst_offset),
+ .extent = MakeExtent3D(copy.extent),
+ };
}
-void CachedSurface::DecorateSurfaceName() {
- // TODO(Rodrigo): Add name decorations
+[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
+ std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
+ std::vector<VkBufferCopy> result(copies.size());
+ std::ranges::transform(
+ copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
+ return VkBufferCopy{
+ .srcOffset = static_cast<VkDeviceSize>(copy.src_offset + buffer_offset),
+ .dstOffset = static_cast<VkDeviceSize>(copy.dst_offset),
+ .size = static_cast<VkDeviceSize>(copy.size),
+ };
+ });
+ return result;
}
-View CachedSurface::CreateView(const ViewParams& params) {
- // TODO(Rodrigo): Add name decorations
- return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params);
+[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies(
+ std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
+ struct Maker {
+ VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
+ return VkBufferImageCopy{
+ .bufferOffset = copy.buffer_offset + buffer_offset,
+ .bufferRowLength = copy.buffer_row_length,
+ .bufferImageHeight = copy.buffer_image_height,
+ .imageSubresource =
+ {
+ .aspectMask = aspect_mask,
+ .mipLevel = static_cast<u32>(copy.image_subresource.base_level),
+ .baseArrayLayer = static_cast<u32>(copy.image_subresource.base_layer),
+ .layerCount = static_cast<u32>(copy.image_subresource.num_layers),
+ },
+ .imageOffset =
+ {
+ .x = copy.image_offset.x,
+ .y = copy.image_offset.y,
+ .z = copy.image_offset.z,
+ },
+ .imageExtent =
+ {
+ .width = copy.image_extent.width,
+ .height = copy.image_extent.height,
+ .depth = copy.image_extent.depth,
+ },
+ };
+ }
+ size_t buffer_offset;
+ VkImageAspectFlags aspect_mask;
+ };
+ if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+ std::vector<VkBufferImageCopy> result(copies.size() * 2);
+ std::ranges::transform(copies, result.begin(),
+ Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
+ std::ranges::transform(copies, result.begin() + copies.size(),
+ Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
+ return result;
+ } else {
+ std::vector<VkBufferImageCopy> result(copies.size());
+ std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
+ return result;
+ }
}
-void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
- const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
- std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
+[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(VkImageAspectFlags aspect_mask,
+ const SubresourceRange& range) {
+ return VkImageSubresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = static_cast<u32>(range.base.level),
+ .levelCount = static_cast<u32>(range.extent.levels),
+ .baseArrayLayer = static_cast<u32>(range.base.layer),
+ .layerCount = static_cast<u32>(range.extent.layers),
+ };
+}
- scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer,
- size = host_memory_size](vk::CommandBuffer cmdbuf) {
- VkBufferCopy copy;
- copy.srcOffset = 0;
- copy.dstOffset = 0;
- copy.size = size;
- cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
+[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(const ImageView* image_view) {
+ SubresourceRange range = image_view->range;
+ if (True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
+ // Slice image views always affect a single layer, but their subresource range corresponds
+ // to the slice. Override the value to affect a single layer.
+ range.base.layer = 0;
+ range.extent.layers = 1;
+ }
+ return MakeSubresourceRange(ImageAspectMask(image_view->format), range);
+}
- VkBufferMemoryBarrier barrier;
- barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
- barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
- barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway
- barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = dst_buffer;
- barrier.offset = 0;
- barrier.size = size;
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
- 0, {}, barrier, {});
- });
+[[nodiscard]] VkImageSubresourceLayers MakeSubresourceLayers(const ImageView* image_view) {
+ return VkImageSubresourceLayers{
+ .aspectMask = ImageAspectMask(image_view->format),
+ .mipLevel = static_cast<u32>(image_view->range.base.level),
+ .baseArrayLayer = static_cast<u32>(image_view->range.base.layer),
+ .layerCount = static_cast<u32>(image_view->range.extent.layers),
+ };
}
-void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
- const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
- std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
-
- FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
-
- for (u32 level = 0; level < params.num_levels; ++level) {
- const VkBufferImageCopy copy = GetBufferImageCopy(level);
- if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
- scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(),
- copy](vk::CommandBuffer cmdbuf) {
- std::array<VkBufferImageCopy, 2> copies = {copy, copy};
- copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
- copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
- cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- copies);
- });
- } else {
- scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(),
- copy](vk::CommandBuffer cmdbuf) {
- cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
- });
- }
+[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) {
+ switch (value) {
+ case SwizzleSource::G:
+ return SwizzleSource::R;
+ default:
+ return value;
}
}
-VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
- return {
- .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted),
- .bufferRowLength = 0,
- .bufferImageHeight = 0,
- .imageSubresource =
+void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
+ VkImageAspectFlags aspect_mask, bool is_initialized,
+ std::span<const VkBufferImageCopy> copies) {
+ static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
+ VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
+ const VkImageMemoryBarrier read_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = WRITE_ACCESS_FLAGS,
+ .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
+ .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ };
+ const VkImageMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ };
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
+ read_barrier);
+ cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies);
+ // TODO: Move this to another API
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
+ write_barrier);
+}
+
+[[nodiscard]] VkImageBlit MakeImageBlit(const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ const VkImageSubresourceLayers& dst_layers,
+ const VkImageSubresourceLayers& src_layers) {
+ return VkImageBlit{
+ .srcSubresource = src_layers,
+ .srcOffsets =
{
- .aspectMask = image->GetAspectMask(),
- .mipLevel = level,
- .baseArrayLayer = 0,
- .layerCount = static_cast<u32>(params.GetNumLayers()),
+ {
+ .x = src_region[0].x,
+ .y = src_region[0].y,
+ .z = 0,
+ },
+ {
+ .x = src_region[1].x,
+ .y = src_region[1].y,
+ .z = 1,
+ },
},
- .imageOffset = {.x = 0, .y = 0, .z = 0},
- .imageExtent =
+ .dstSubresource = dst_layers,
+ .dstOffsets =
{
- .width = params.GetMipWidth(level),
- .height = params.GetMipHeight(level),
- .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U,
+ {
+ .x = dst_region[0].x,
+ .y = dst_region[0].y,
+ .z = 0,
+ },
+ {
+ .x = dst_region[1].x,
+ .y = dst_region[1].y,
+ .z = 1,
+ },
},
};
}
-VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
- return {image->GetAspectMask(), 0, params.num_levels, 0,
- static_cast<u32>(params.GetNumLayers())};
+[[nodiscard]] VkImageResolve MakeImageResolve(const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ const VkImageSubresourceLayers& dst_layers,
+ const VkImageSubresourceLayers& src_layers) {
+ return VkImageResolve{
+ .srcSubresource = src_layers,
+ .srcOffset =
+ {
+ .x = src_region[0].x,
+ .y = src_region[0].y,
+ .z = 0,
+ },
+ .dstSubresource = dst_layers,
+ .dstOffset =
+ {
+ .x = dst_region[0].x,
+ .y = dst_region[0].y,
+ .z = 0,
+ },
+ .extent =
+ {
+ .width = static_cast<u32>(dst_region[1].x - dst_region[0].x),
+ .height = static_cast<u32>(dst_region[1].y - dst_region[0].y),
+ .depth = 1,
+ },
+ };
}
-CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
- const ViewParams& params)
- : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
- image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
- aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
- base_level{params.base_level}, num_levels{params.num_levels},
- image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} {
- if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
- base_layer = 0;
- num_layers = 1;
- base_slice = params.base_layer;
- num_slices = params.num_layers;
- } else {
- base_layer = params.base_layer;
- num_layers = params.num_layers;
+struct RangedBarrierRange {
+ u32 min_mip = std::numeric_limits<u32>::max();
+ u32 max_mip = std::numeric_limits<u32>::min();
+ u32 min_layer = std::numeric_limits<u32>::max();
+ u32 max_layer = std::numeric_limits<u32>::min();
+
+ void AddLayers(const VkImageSubresourceLayers& layers) {
+ min_mip = std::min(min_mip, layers.mipLevel);
+ max_mip = std::max(max_mip, layers.mipLevel + 1);
+ min_layer = std::min(min_layer, layers.baseArrayLayer);
+ max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount);
+ }
+
+ VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept {
+ return VkImageSubresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = min_mip,
+ .levelCount = max_mip - min_mip,
+ .baseArrayLayer = min_layer,
+ .layerCount = max_layer - min_layer,
+ };
}
+};
+
+} // Anonymous namespace
+
+void TextureCacheRuntime::Finish() {
+ scheduler.Finish();
}
-CachedSurfaceView::~CachedSurfaceView() = default;
+StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
+ return staging_buffer_pool.Request(size, MemoryUsage::Upload);
+}
-VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source,
- SwizzleSource z_source, SwizzleSource w_source) {
- const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
- if (last_image_view && last_swizzle == new_swizzle) {
- return last_image_view;
- }
- last_swizzle = new_swizzle;
+StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
+ return staging_buffer_pool.Request(size, MemoryUsage::Download);
+}
- const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
- auto& image_view = entry->second;
- if (!is_cache_miss) {
- return last_image_view = *image_view;
+void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
+ const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ Tegra::Engines::Fermi2D::Filter filter,
+ Tegra::Engines::Fermi2D::Operation operation) {
+ const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format);
+ const bool is_dst_msaa = dst.Samples() != VK_SAMPLE_COUNT_1_BIT;
+ const bool is_src_msaa = src.Samples() != VK_SAMPLE_COUNT_1_BIT;
+ ASSERT(aspect_mask == ImageAspectMask(dst.format));
+ if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) {
+ blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter,
+ operation);
+ return;
}
-
- std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source),
- MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)};
- if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) {
- // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
- std::swap(swizzle[0], swizzle[2]);
+ if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+ if (!device.IsBlitDepthStencilSupported()) {
+ UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa);
+ blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(),
+ dst_region, src_region, filter, operation);
+ return;
+ }
}
-
- // Games can sample depth or stencil values on textures. This is decided by the swizzle value on
- // hardware. To emulate this on Vulkan we specify it in the aspect.
- VkImageAspectFlags aspect = aspect_mask;
- if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
- UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
- const bool is_first = x_source == SwizzleSource::R;
- switch (params.pixel_format) {
- case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT:
- case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT:
- aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
- break;
- case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM:
- aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
- break;
- default:
- aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
- UNIMPLEMENTED();
+ ASSERT(src.ImageFormat() == dst.ImageFormat());
+ ASSERT(!(is_dst_msaa && !is_src_msaa));
+ ASSERT(operation == Fermi2D::Operation::SrcCopy);
+
+ const VkImage dst_image = dst.ImageHandle();
+ const VkImage src_image = src.ImageHandle();
+ const VkImageSubresourceLayers dst_layers = MakeSubresourceLayers(&dst);
+ const VkImageSubresourceLayers src_layers = MakeSubresourceLayers(&src);
+ const bool is_resolve = is_src_msaa && !is_dst_msaa;
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([filter, dst_region, src_region, dst_image, src_image, dst_layers, src_layers,
+ aspect_mask, is_resolve](vk::CommandBuffer cmdbuf) {
+ const std::array read_barriers{
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = src_image,
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ },
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = dst_image,
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ },
+ };
+ VkImageMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = dst_image,
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ };
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, nullptr, nullptr, read_barriers);
+ if (is_resolve) {
+ cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ MakeImageResolve(dst_region, src_region, dst_layers, src_layers));
+ } else {
+ const bool is_linear = filter == Fermi2D::Filter::Bilinear;
+ const VkFilter vk_filter = is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
+ cmdbuf.BlitImage(
+ src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ MakeImageBlit(dst_region, src_region, dst_layers, src_layers), vk_filter);
}
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, write_barrier);
+ });
+}
- // Make sure we sample the first component
- std::transform(
- swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) {
- return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component;
- });
+void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
+ switch (dst_view.format) {
+ case PixelFormat::R16_UNORM:
+ if (src_view.format == PixelFormat::D16_UNORM) {
+ return blit_image_helper.ConvertD16ToR16(dst, src_view);
+ }
+ break;
+ case PixelFormat::R32_FLOAT:
+ if (src_view.format == PixelFormat::D32_FLOAT) {
+ return blit_image_helper.ConvertD32ToR32(dst, src_view);
+ }
+ break;
+ case PixelFormat::D16_UNORM:
+ if (src_view.format == PixelFormat::R16_UNORM) {
+ return blit_image_helper.ConvertR16ToD16(dst, src_view);
+ }
+ break;
+ case PixelFormat::D32_FLOAT:
+ if (src_view.format == PixelFormat::R32_FLOAT) {
+ return blit_image_helper.ConvertR32ToD32(dst, src_view);
+ }
+ break;
+ default:
+ break;
}
+ UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format);
+}
- if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
- ASSERT(base_slice == 0);
- ASSERT(num_slices == params.depth);
- }
+void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
+ std::span<const VideoCommon::ImageCopy> copies) {
+ std::vector<VkImageCopy> vk_copies(copies.size());
+ const VkImageAspectFlags aspect_mask = dst.AspectMask();
+ ASSERT(aspect_mask == src.AspectMask());
- image_view = device.GetLogical().CreateImageView({
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .image = surface.GetImageHandle(),
- .viewType = image_view_type,
- .format = surface.GetImage().GetFormat(),
- .components =
- {
- .r = swizzle[0],
- .g = swizzle[1],
- .b = swizzle[2],
- .a = swizzle[3],
+ std::ranges::transform(copies, vk_copies.begin(), [aspect_mask](const auto& copy) {
+ return MakeImageCopy(copy, aspect_mask);
+ });
+ const VkImage dst_image = dst.Handle();
+ const VkImage src_image = src.Handle();
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
+ RangedBarrierRange dst_range;
+ RangedBarrierRange src_range;
+ for (const VkImageCopy& copy : vk_copies) {
+ dst_range.AddLayers(copy.dstSubresource);
+ src_range.AddLayers(copy.srcSubresource);
+ }
+ const std::array read_barriers{
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = src_image,
+ .subresourceRange = src_range.SubresourceRange(aspect_mask),
},
- .subresourceRange =
- {
- .aspectMask = aspect,
- .baseMipLevel = base_level,
- .levelCount = num_levels,
- .baseArrayLayer = base_layer,
- .layerCount = num_layers,
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = dst_image,
+ .subresourceRange = dst_range.SubresourceRange(aspect_mask),
},
+ };
+ const VkImageMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = dst_image,
+ .subresourceRange = dst_range.SubresourceRange(aspect_mask),
+ };
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, {}, {}, read_barriers);
+ cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, write_barrier);
});
-
- return last_image_view = *image_view;
}
-VkImageView CachedSurfaceView::GetAttachment() {
- if (render_target) {
- return *render_target;
+Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
+ VAddr cpu_addr_)
+ : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
+ image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)),
+ aspect_mask(ImageAspectMask(info.format)) {
+ if (image) {
+ commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
+ } else {
+ commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
+ }
+ if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
+ flags |= VideoCommon::ImageFlagBits::Converted;
+ }
+ if (runtime.device.HasDebuggingToolAttached()) {
+ if (image) {
+ image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
+ } else {
+ buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
+ }
}
+}
- VkImageViewCreateInfo ci{
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .image = surface.GetImageHandle(),
- .viewType = VK_IMAGE_VIEW_TYPE_1D,
- .format = surface.GetImage().GetFormat(),
- .components =
- {
- .r = VK_COMPONENT_SWIZZLE_IDENTITY,
- .g = VK_COMPONENT_SWIZZLE_IDENTITY,
- .b = VK_COMPONENT_SWIZZLE_IDENTITY,
- .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
+ // TODO: Move this to another API
+ scheduler->RequestOutsideRenderPassOperationContext();
+ std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
+ const VkBuffer src_buffer = map.buffer;
+ const VkImage vk_image = *image;
+ const VkImageAspectFlags vk_aspect_mask = aspect_mask;
+ const bool is_initialized = std::exchange(initialized, true);
+ scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
+ vk_copies](vk::CommandBuffer cmdbuf) {
+ CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
+ });
+}
+
+void Image::UploadMemory(const StagingBufferRef& map,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ // TODO: Move this to another API
+ scheduler->RequestOutsideRenderPassOperationContext();
+ std::vector vk_copies = TransformBufferCopies(copies, map.offset);
+ const VkBuffer src_buffer = map.buffer;
+ const VkBuffer dst_buffer = *buffer;
+ scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
+ // TODO: Barriers
+ cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
+ });
+}
+
+void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
+ std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
+ scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
+ vk_copies](vk::CommandBuffer cmdbuf) {
+ const VkImageMemoryBarrier read_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
},
- .subresourceRange =
- {
+ };
+ const VkImageMemoryBarrier image_write_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
.aspectMask = aspect_mask,
- .baseMipLevel = base_level,
- .levelCount = num_levels,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
- .layerCount = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
},
+ };
+ const VkMemoryBarrier memory_write_barrier{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+ };
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, read_barrier);
+ cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, memory_write_barrier, nullptr, image_write_barrier);
+ });
+}
+
+ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
+ ImageId image_id_, Image& image)
+ : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
+ image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount(
+ image.info.num_samples)} {
+ const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
+ std::array<SwizzleSource, 4> swizzle{
+ SwizzleSource::R,
+ SwizzleSource::G,
+ SwizzleSource::B,
+ SwizzleSource::A,
};
- if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
- ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
- ci.subresourceRange.baseArrayLayer = base_slice;
- ci.subresourceRange.layerCount = num_slices;
- } else {
- ci.viewType = image_view_type;
- ci.subresourceRange.baseArrayLayer = base_layer;
- ci.subresourceRange.layerCount = num_layers;
+ if (!info.IsRenderTarget()) {
+ swizzle = info.Swizzle();
+ if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) {
+ std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
+ }
+ }
+ const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
+ const VkFormat vk_format = format_info.format;
+ const VkImageViewUsageCreateInfo image_view_usage{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .usage = ImageUsageFlags(format_info, format),
+ };
+ const VkImageViewCreateInfo create_info{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = &image_view_usage,
+ .flags = 0,
+ .image = image.Handle(),
+ .viewType = VkImageViewType{},
+ .format = vk_format,
+ .components{
+ .r = ComponentSwizzle(swizzle[0]),
+ .g = ComponentSwizzle(swizzle[1]),
+ .b = ComponentSwizzle(swizzle[2]),
+ .a = ComponentSwizzle(swizzle[3]),
+ },
+ .subresourceRange = MakeSubresourceRange(aspect_mask, info.range),
+ };
+ const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) {
+ VkImageViewCreateInfo ci{create_info};
+ ci.viewType = ImageViewType(view_type);
+ if (num_layers) {
+ ci.subresourceRange.layerCount = *num_layers;
+ }
+ vk::ImageView handle = device->GetLogical().CreateImageView(ci);
+ if (device->HasDebuggingToolAttached()) {
+ handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str());
+ }
+ image_views[static_cast<size_t>(view_type)] = std::move(handle);
+ };
+ switch (info.type) {
+ case VideoCommon::ImageViewType::e1D:
+ case VideoCommon::ImageViewType::e1DArray:
+ create(VideoCommon::ImageViewType::e1D, 1);
+ create(VideoCommon::ImageViewType::e1DArray, std::nullopt);
+ render_target = Handle(VideoCommon::ImageViewType::e1DArray);
+ break;
+ case VideoCommon::ImageViewType::e2D:
+ case VideoCommon::ImageViewType::e2DArray:
+ create(VideoCommon::ImageViewType::e2D, 1);
+ create(VideoCommon::ImageViewType::e2DArray, std::nullopt);
+ render_target = Handle(VideoCommon::ImageViewType::e2DArray);
+ break;
+ case VideoCommon::ImageViewType::e3D:
+ create(VideoCommon::ImageViewType::e3D, std::nullopt);
+ render_target = Handle(VideoCommon::ImageViewType::e3D);
+ break;
+ case VideoCommon::ImageViewType::Cube:
+ case VideoCommon::ImageViewType::CubeArray:
+ create(VideoCommon::ImageViewType::Cube, 6);
+ create(VideoCommon::ImageViewType::CubeArray, std::nullopt);
+ break;
+ case VideoCommon::ImageViewType::Rect:
+ UNIMPLEMENTED();
+ break;
+ case VideoCommon::ImageViewType::Buffer:
+ buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .buffer = image.Buffer(),
+ .format = vk_format,
+ .offset = 0, // TODO: Redesign buffer cache to support this
+ .range = image.guest_size_bytes,
+ });
+ break;
}
- render_target = device.GetLogical().CreateImageView(ci);
- return *render_target;
}
-VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKResourceManager& resource_manager,
- VKMemoryManager& memory_manager, VKScheduler& scheduler,
- VKStagingBufferPool& staging_pool)
- : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device},
- resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
- staging_pool{staging_pool} {}
-
-VKTextureCache::~VKTextureCache() = default;
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
+ : VideoCommon::ImageViewBase{params} {}
-Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
- return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager,
- scheduler, staging_pool, gpu_addr, params);
+VkImageView ImageView::DepthView() {
+ if (depth_view) {
+ return *depth_view;
+ }
+ depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT);
+ return *depth_view;
}
-void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
- const VideoCommon::CopyParams& copy_params) {
- const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D;
- const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D;
- UNIMPLEMENTED_IF(src_3d);
-
- // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and
- // dimension respectively.
- const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z;
- const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0;
-
- const u32 extent_z = dst_3d ? copy_params.depth : 1;
- const u32 num_layers = dst_3d ? 1 : copy_params.depth;
-
- // We can't copy inside a renderpass
- scheduler.RequestOutsideRenderPassOperationContext();
-
- src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1,
- VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT,
- VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
- dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1,
- VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
-
- const VkImageCopy copy{
- .srcSubresource =
- {
- .aspectMask = src_surface->GetAspectMask(),
- .mipLevel = copy_params.source_level,
- .baseArrayLayer = copy_params.source_z,
- .layerCount = num_layers,
- },
- .srcOffset =
- {
- .x = static_cast<s32>(copy_params.source_x),
- .y = static_cast<s32>(copy_params.source_y),
- .z = 0,
- },
- .dstSubresource =
- {
- .aspectMask = dst_surface->GetAspectMask(),
- .mipLevel = copy_params.dest_level,
- .baseArrayLayer = dst_base_layer,
- .layerCount = num_layers,
- },
- .dstOffset =
- {
- .x = static_cast<s32>(copy_params.dest_x),
- .y = static_cast<s32>(copy_params.dest_y),
- .z = static_cast<s32>(dst_offset_z),
- },
- .extent =
- {
- .width = copy_params.width,
- .height = copy_params.height,
- .depth = extent_z,
- },
- };
+VkImageView ImageView::StencilView() {
+ if (stencil_view) {
+ return *stencil_view;
+ }
+ stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT);
+ return *stencil_view;
+}
- const VkImage src_image = src_surface->GetImageHandle();
- const VkImage dst_image = dst_surface->GetImageHandle();
- scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) {
- cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
+vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) {
+ return device->GetLogical().CreateImageView({
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .image = image_handle,
+ .viewType = ImageViewType(type),
+ .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format,
+ .components{
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ },
+ .subresourceRange = MakeSubresourceRange(aspect_mask, range),
});
}
-void VKTextureCache::ImageBlit(View& src_view, View& dst_view,
- const Tegra::Engines::Fermi2D::Config& copy_config) {
- // We can't blit inside a renderpass
- scheduler.RequestOutsideRenderPassOperationContext();
-
- src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_READ_BIT);
- dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_WRITE_BIT);
-
- VkImageBlit blit;
- blit.srcSubresource = src_view->GetImageSubresourceLayers();
- blit.srcOffsets[0].x = copy_config.src_rect.left;
- blit.srcOffsets[0].y = copy_config.src_rect.top;
- blit.srcOffsets[0].z = 0;
- blit.srcOffsets[1].x = copy_config.src_rect.right;
- blit.srcOffsets[1].y = copy_config.src_rect.bottom;
- blit.srcOffsets[1].z = 1;
- blit.dstSubresource = dst_view->GetImageSubresourceLayers();
- blit.dstOffsets[0].x = copy_config.dst_rect.left;
- blit.dstOffsets[0].y = copy_config.dst_rect.top;
- blit.dstOffsets[0].z = 0;
- blit.dstOffsets[1].x = copy_config.dst_rect.right;
- blit.dstOffsets[1].y = copy_config.dst_rect.bottom;
- blit.dstOffsets[1].z = 1;
-
- const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
-
- scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit,
- is_linear](vk::CommandBuffer cmdbuf) {
- cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit,
- is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST);
+Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& tsc) {
+ const auto& device = runtime.device;
+ const bool arbitrary_borders = runtime.device.IsExtCustomBorderColorSupported();
+ const std::array<float, 4> color = tsc.BorderColor();
+ // C++20 bit_cast
+ VkClearColorValue border_color;
+ std::memcpy(&border_color, &color, sizeof(color));
+ const VkSamplerCustomBorderColorCreateInfoEXT border_ci{
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .customBorderColor = border_color,
+ .format = VK_FORMAT_UNDEFINED,
+ };
+ const void* pnext = nullptr;
+ if (arbitrary_borders) {
+ pnext = &border_ci;
+ }
+ const VkSamplerReductionModeCreateInfoEXT reduction_ci{
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT,
+ .pNext = pnext,
+ .reductionMode = MaxwellToVK::SamplerReduction(tsc.reduction_filter),
+ };
+ if (runtime.device.IsExtSamplerFilterMinmaxSupported()) {
+ pnext = &reduction_ci;
+ } else if (reduction_ci.reductionMode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT) {
+ LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required");
+ }
+ // Some games have samplers with garbage. Sanitize them here.
+ const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f);
+ sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .pNext = pnext,
+ .flags = 0,
+ .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
+ .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
+ .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
+ .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
+ .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
+ .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
+ .mipLodBias = tsc.LodBias(),
+ .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE),
+ .maxAnisotropy = max_anisotropy,
+ .compareEnable = tsc.depth_compare_enabled,
+ .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
+ .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
+ .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
+ .borderColor =
+ arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
+ .unnormalizedCoordinates = VK_FALSE,
});
}
-void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) {
- // Currently unimplemented. PBO copies should be dropped and we should use a render pass to
- // convert from color to depth and viceversa.
- LOG_WARNING(Render_Vulkan, "Unimplemented");
+Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
+ ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
+ std::vector<VkAttachmentDescription> descriptions;
+ std::vector<VkImageView> attachments;
+ RenderPassKey renderpass_key{};
+ s32 num_layers = 1;
+
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ const ImageView* const color_buffer = color_buffers[index];
+ if (!color_buffer) {
+ renderpass_key.color_formats[index] = PixelFormat::Invalid;
+ continue;
+ }
+ descriptions.push_back(AttachmentDescription(runtime.device, color_buffer));
+ attachments.push_back(color_buffer->RenderTarget());
+ renderpass_key.color_formats[index] = color_buffer->format;
+ num_layers = std::max(num_layers, color_buffer->range.extent.layers);
+ images[num_images] = color_buffer->ImageHandle();
+ image_ranges[num_images] = MakeSubresourceRange(color_buffer);
+ samples = color_buffer->Samples();
+ ++num_images;
+ }
+ const size_t num_colors = attachments.size();
+ const VkAttachmentReference* depth_attachment =
+ depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr;
+ if (depth_buffer) {
+ descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer));
+ attachments.push_back(depth_buffer->RenderTarget());
+ renderpass_key.depth_format = depth_buffer->format;
+ num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
+ images[num_images] = depth_buffer->ImageHandle();
+ image_ranges[num_images] = MakeSubresourceRange(depth_buffer);
+ samples = depth_buffer->Samples();
+ ++num_images;
+ } else {
+ renderpass_key.depth_format = PixelFormat::Invalid;
+ }
+ renderpass_key.samples = samples;
+
+ const auto& device = runtime.device.GetLogical();
+ const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key);
+ if (is_new) {
+ const VkSubpassDescription subpass{
+ .flags = 0,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .pInputAttachments = nullptr,
+ .colorAttachmentCount = static_cast<u32>(num_colors),
+ .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
+ .pResolveAttachments = nullptr,
+ .pDepthStencilAttachment = depth_attachment,
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = nullptr,
+ };
+ cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .attachmentCount = static_cast<u32>(descriptions.size()),
+ .pAttachments = descriptions.data(),
+ .subpassCount = 1,
+ .pSubpasses = &subpass,
+ .dependencyCount = 0,
+ .pDependencies = nullptr,
+ });
+ }
+ renderpass = *cache_pair->second;
+ render_area = VkExtent2D{
+ .width = key.size.width,
+ .height = key.size.height,
+ };
+ num_color_buffers = static_cast<u32>(num_colors);
+ framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .renderPass = renderpass,
+ .attachmentCount = static_cast<u32>(attachments.size()),
+ .pAttachments = attachments.data(),
+ .width = key.size.width,
+ .height = key.size.height,
+ .layers = static_cast<u32>(std::max(num_layers, 1)),
+ });
+ if (runtime.device.HasDebuggingToolAttached()) {
+ framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
+ }
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 807e26c8a..b08c23459 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -4,225 +4,253 @@
#pragma once
-#include <memory>
-#include <unordered_map>
-
-#include "common/common_types.h"
-#include "video_core/renderer_vulkan/vk_image.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-#include "video_core/texture_cache/surface_base.h"
-#include "video_core/texture_cache/texture_cache.h"
-
-namespace Core {
-class System;
-}
+#include <compare>
+#include <span>
-namespace VideoCore {
-class RasterizerInterface;
-}
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-class RasterizerVulkan;
-class VKDevice;
-class VKResourceManager;
+using VideoCommon::ImageId;
+using VideoCommon::NUM_RT;
+using VideoCommon::Offset2D;
+using VideoCommon::RenderTargets;
+using VideoCore::Surface::PixelFormat;
+
+class BlitImageHelper;
+class Device;
+class Image;
+class ImageView;
+class Framebuffer;
+class StagingBufferPool;
class VKScheduler;
-class VKStagingBufferPool;
-class CachedSurfaceView;
-class CachedSurface;
+struct RenderPassKey {
+ constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
-using Surface = std::shared_ptr<CachedSurface>;
-using View = std::shared_ptr<CachedSurfaceView>;
-using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
+ std::array<PixelFormat, NUM_RT> color_formats;
+ PixelFormat depth_format;
+ VkSampleCountFlagBits samples;
+};
-using VideoCommon::SurfaceParams;
-using VideoCommon::ViewParams;
+} // namespace Vulkan
-class CachedSurface final : public VideoCommon::SurfaceBase<View> {
- friend CachedSurfaceView;
+namespace std {
+template <>
+struct hash<Vulkan::RenderPassKey> {
+ [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
+ size_t value = static_cast<size_t>(key.depth_format) << 48;
+ value ^= static_cast<size_t>(key.samples) << 52;
+ for (size_t i = 0; i < key.color_formats.size(); ++i) {
+ value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
+ }
+ return value;
+ }
+};
+} // namespace std
-public:
- explicit CachedSurface(Core::System& system, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
- GPUVAddr gpu_addr, const SurfaceParams& params);
- ~CachedSurface();
+namespace Vulkan {
- void UploadTexture(const std::vector<u8>& staging_buffer) override;
- void DownloadTexture(std::vector<u8>& staging_buffer) override;
+struct TextureCacheRuntime {
+ const Device& device;
+ VKScheduler& scheduler;
+ MemoryAllocator& memory_allocator;
+ StagingBufferPool& staging_buffer_pool;
+ BlitImageHelper& blit_image_helper;
+ std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{};
- void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
- VkImageLayout new_layout) {
- image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels,
- new_stage_mask, new_access, new_layout);
- }
+ void Finish();
- void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
- VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
- VkImageLayout new_layout) {
- image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
- new_access, new_layout);
- }
+ [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
- VKImage& GetImage() {
- return *image;
- }
+ [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
- const VKImage& GetImage() const {
- return *image;
- }
+ void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
+ const std::array<Offset2D, 2>& dst_region,
+ const std::array<Offset2D, 2>& src_region,
+ Tegra::Engines::Fermi2D::Filter filter,
+ Tegra::Engines::Fermi2D::Operation operation);
+
+ void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
- VkImage GetImageHandle() const {
- return *image->GetHandle();
+ void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
+
+ [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept {
+ return false;
}
- VkImageAspectFlags GetAspectMask() const {
- return image->GetAspectMask();
+ void AccelerateImageUpload(Image&, const StagingBufferRef&,
+ std::span<const VideoCommon::SwizzleParameters>) {
+ UNREACHABLE();
}
- VkBufferView GetBufferViewHandle() const {
- return *buffer_view;
+ void InsertUploadMemoryBarrier() {}
+
+ bool HasBrokenTextureViewFormats() const noexcept {
+ // No known Vulkan driver has broken image views
+ return false;
}
+};
-protected:
- void DecorateSurfaceName();
+class Image : public VideoCommon::ImageBase {
+public:
+ explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
+ VAddr cpu_addr);
- View CreateView(const ViewParams& params) override;
+ void UploadMemory(const StagingBufferRef& map,
+ std::span<const VideoCommon::BufferImageCopy> copies);
-private:
- void UploadBuffer(const std::vector<u8>& staging_buffer);
+ void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
- void UploadImage(const std::vector<u8>& staging_buffer);
+ void DownloadMemory(const StagingBufferRef& map,
+ std::span<const VideoCommon::BufferImageCopy> copies);
- VkBufferImageCopy GetBufferImageCopy(u32 level) const;
+ [[nodiscard]] VkImage Handle() const noexcept {
+ return *image;
+ }
- VkImageSubresourceRange GetImageSubresourceRange() const;
+ [[nodiscard]] VkBuffer Buffer() const noexcept {
+ return *buffer;
+ }
- Core::System& system;
- const VKDevice& device;
- VKResourceManager& resource_manager;
- VKMemoryManager& memory_manager;
- VKScheduler& scheduler;
- VKStagingBufferPool& staging_pool;
+ [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept {
+ return aspect_mask;
+ }
- std::optional<VKImage> image;
+private:
+ VKScheduler* scheduler;
+ vk::Image image;
vk::Buffer buffer;
- vk::BufferView buffer_view;
- VKMemoryCommit commit;
-
- VkFormat format = VK_FORMAT_UNDEFINED;
+ MemoryCommit commit;
+ VkImageAspectFlags aspect_mask = 0;
+ bool initialized = false;
};
-class CachedSurfaceView final : public VideoCommon::ViewBase {
+class ImageView : public VideoCommon::ImageViewBase {
public:
- explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
- const ViewParams& params);
- ~CachedSurfaceView();
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
- VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source);
+ [[nodiscard]] VkImageView DepthView();
- VkImageView GetAttachment();
+ [[nodiscard]] VkImageView StencilView();
- bool IsSameSurface(const CachedSurfaceView& rhs) const {
- return &surface == &rhs.surface;
+ [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept {
+ return *image_views[static_cast<size_t>(query_type)];
}
- u32 GetWidth() const {
- return params.GetMipWidth(base_level);
+ [[nodiscard]] VkBufferView BufferView() const noexcept {
+ return *buffer_view;
}
- u32 GetHeight() const {
- return params.GetMipHeight(base_level);
+ [[nodiscard]] VkImage ImageHandle() const noexcept {
+ return image_handle;
}
- u32 GetNumLayers() const {
- return num_layers;
+ [[nodiscard]] VkImageView RenderTarget() const noexcept {
+ return render_target;
}
- bool IsBufferView() const {
- return buffer_view;
+ [[nodiscard]] PixelFormat ImageFormat() const noexcept {
+ return image_format;
}
- VkImage GetImage() const {
- return image;
+ [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
+ return samples;
}
- VkBufferView GetBufferView() const {
- return buffer_view;
- }
+private:
+ [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask);
- VkImageSubresourceRange GetImageSubresourceRange() const {
- return {aspect_mask, base_level, num_levels, base_layer, num_layers};
- }
+ const Device* device = nullptr;
+ std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views;
+ vk::ImageView depth_view;
+ vk::ImageView stencil_view;
+ vk::BufferView buffer_view;
+ VkImage image_handle = VK_NULL_HANDLE;
+ VkImageView render_target = VK_NULL_HANDLE;
+ PixelFormat image_format = PixelFormat::Invalid;
+ VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
+};
- VkImageSubresourceLayers GetImageSubresourceLayers() const {
- return {surface.GetAspectMask(), base_level, base_layer, num_layers};
- }
+class ImageAlloc : public VideoCommon::ImageAllocBase {};
- void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask,
- VkAccessFlags new_access) const {
- surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
- new_access, new_layout);
- }
+class Sampler {
+public:
+ explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
- void MarkAsModified(u64 tick) {
- surface.MarkAsModified(true, tick);
+ [[nodiscard]] VkSampler Handle() const noexcept {
+ return *sampler;
}
private:
- // Store a copy of these values to avoid double dereference when reading them
- const SurfaceParams params;
- const VkImage image;
- const VkBufferView buffer_view;
- const VkImageAspectFlags aspect_mask;
-
- const VKDevice& device;
- CachedSurface& surface;
- const u32 base_level;
- const u32 num_levels;
- const VkImageViewType image_view_type;
- u32 base_layer = 0;
- u32 num_layers = 0;
- u32 base_slice = 0;
- u32 num_slices = 0;
-
- VkImageView last_image_view = nullptr;
- u32 last_swizzle = 0;
-
- vk::ImageView render_target;
- std::unordered_map<u32, vk::ImageView> view_cache;
+ vk::Sampler sampler;
};
-class VKTextureCache final : public TextureCacheBase {
+class Framebuffer {
public:
- explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKResourceManager& resource_manager,
- VKMemoryManager& memory_manager, VKScheduler& scheduler,
- VKStagingBufferPool& staging_pool);
- ~VKTextureCache();
+ explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
+ ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
-private:
- Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
+ [[nodiscard]] VkFramebuffer Handle() const noexcept {
+ return *framebuffer;
+ }
- void ImageCopy(Surface& src_surface, Surface& dst_surface,
- const VideoCommon::CopyParams& copy_params) override;
+ [[nodiscard]] VkRenderPass RenderPass() const noexcept {
+ return renderpass;
+ }
- void ImageBlit(View& src_view, View& dst_view,
- const Tegra::Engines::Fermi2D::Config& copy_config) override;
+ [[nodiscard]] VkExtent2D RenderArea() const noexcept {
+ return render_area;
+ }
- void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
+ [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
+ return samples;
+ }
- const VKDevice& device;
- VKResourceManager& resource_manager;
- VKMemoryManager& memory_manager;
- VKScheduler& scheduler;
- VKStagingBufferPool& staging_pool;
+ [[nodiscard]] u32 NumColorBuffers() const noexcept {
+ return num_color_buffers;
+ }
+
+ [[nodiscard]] u32 NumImages() const noexcept {
+ return num_images;
+ }
+
+ [[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
+ return images;
+ }
+
+ [[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
+ return image_ranges;
+ }
+
+private:
+ vk::Framebuffer framebuffer;
+ VkRenderPass renderpass{};
+ VkExtent2D render_area{};
+ VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
+ u32 num_color_buffers = 0;
+ u32 num_images = 0;
+ std::array<VkImage, 9> images{};
+ std::array<VkImageSubresourceRange, 9> image_ranges{};
};
+struct TextureCacheParams {
+ static constexpr bool ENABLE_VALIDATION = true;
+ static constexpr bool FRAMEBUFFER_BLITS = false;
+ static constexpr bool HAS_EMULATED_COPIES = false;
+
+ using Runtime = Vulkan::TextureCacheRuntime;
+ using Image = Vulkan::Image;
+ using ImageAlloc = Vulkan::ImageAlloc;
+ using ImageView = Vulkan::ImageView;
+ using Sampler = Vulkan::Sampler;
+ using Framebuffer = Vulkan::Framebuffer;
+};
+
+using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 351c048d2..f99273c6a 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -7,15 +7,15 @@
#include "common/assert.h"
#include "common/logging/log.h"
-#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler)
- : device{device}, scheduler{scheduler} {}
+VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_)
+ : device{device_}, scheduler{scheduler_} {}
VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index 945320c72..e214f7195 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -8,11 +8,11 @@
#include <boost/container/static_vector.hpp>
#include "common/common_types.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-class VKDevice;
+class Device;
class VKScheduler;
struct DescriptorUpdateEntry {
@@ -31,7 +31,7 @@ struct DescriptorUpdateEntry {
class VKUpdateDescriptorQueue final {
public:
- explicit VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler);
+ explicit VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_);
~VKUpdateDescriptorQueue();
void TickFrame();
@@ -40,32 +40,36 @@ public:
void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
- void AddSampledImage(VkSampler sampler, VkImageView image_view) {
- payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
+ void AddSampledImage(VkImageView image_view, VkSampler sampler) {
+ payload.emplace_back(VkDescriptorImageInfo{
+ .sampler = sampler,
+ .imageView = image_view,
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ });
}
void AddImage(VkImageView image_view) {
- payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
+ payload.emplace_back(VkDescriptorImageInfo{
+ .sampler = VK_NULL_HANDLE,
+ .imageView = image_view,
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ });
}
- void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) {
- payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
+ void AddBuffer(VkBuffer buffer, u64 offset, size_t size) {
+ payload.emplace_back(VkDescriptorBufferInfo{
+ .buffer = buffer,
+ .offset = offset,
+ .range = size,
+ });
}
void AddTexelBuffer(VkBufferView texel_buffer) {
payload.emplace_back(texel_buffer);
}
- VkImageLayout* LastImageLayout() {
- return &payload.back().image.imageLayout;
- }
-
- const VkImageLayout* LastImageLayout() const {
- return &payload.back().image.imageLayout;
- }
-
private:
- const VKDevice& device;
+ const Device& device;
VKScheduler& scheduler;
const DescriptorUpdateEntry* upload_start = nullptr;
diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp
deleted file mode 100644
index 53c7ef12d..000000000
--- a/src/video_core/sampler_cache.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/cityhash.h"
-#include "common/common_types.h"
-#include "video_core/sampler_cache.h"
-
-namespace VideoCommon {
-
-std::size_t SamplerCacheKey::Hash() const {
- static_assert(sizeof(raw) % sizeof(u64) == 0);
- return static_cast<std::size_t>(
- Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
-}
-
-bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
- return raw == rhs.raw;
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h
deleted file mode 100644
index cbe3ad071..000000000
--- a/src/video_core/sampler_cache.h
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <cstddef>
-#include <unordered_map>
-
-#include "video_core/textures/texture.h"
-
-namespace VideoCommon {
-
-struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
- std::size_t Hash() const;
-
- bool operator==(const SamplerCacheKey& rhs) const;
-
- bool operator!=(const SamplerCacheKey& rhs) const {
- return !operator==(rhs);
- }
-};
-
-} // namespace VideoCommon
-
-namespace std {
-
-template <>
-struct hash<VideoCommon::SamplerCacheKey> {
- std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept {
- return k.Hash();
- }
-};
-
-} // namespace std
-
-namespace VideoCommon {
-
-template <typename SamplerType, typename SamplerStorageType>
-class SamplerCache {
-public:
- SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) {
- const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
- auto& sampler = entry->second;
- if (is_cache_miss) {
- sampler = CreateSampler(tsc);
- }
- return ToSamplerType(sampler);
- }
-
-protected:
- virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0;
-
- virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0;
-
-private:
- std::unordered_map<SamplerCacheKey, SamplerStorageType> cache;
-};
-
-} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
index 3f96d9076..db11144c7 100644
--- a/src/video_core/shader/ast.cpp
+++ b/src/video_core/shader/ast.cpp
@@ -212,16 +212,15 @@ public:
}
void operator()(const ExprPredicate& expr) {
- inner += "P" + std::to_string(expr.predicate);
+ inner += fmt::format("P{}", expr.predicate);
}
void operator()(const ExprCondCode& expr) {
- u32 cc = static_cast<u32>(expr.cc);
- inner += "CC" + std::to_string(cc);
+ inner += fmt::format("CC{}", expr.cc);
}
void operator()(const ExprVar& expr) {
- inner += "V" + std::to_string(expr.var_index);
+ inner += fmt::format("V{}", expr.var_index);
}
void operator()(const ExprBoolean& expr) {
@@ -229,7 +228,7 @@ public:
}
void operator()(const ExprGprEqual& expr) {
- inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')';
+ inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value);
}
const std::string& GetResult() const {
@@ -374,8 +373,8 @@ std::string ASTManager::Print() const {
return printer.GetResult();
}
-ASTManager::ASTManager(bool full_decompile, bool disable_else_derivation)
- : full_decompile{full_decompile}, disable_else_derivation{disable_else_derivation} {};
+ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_)
+ : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {}
ASTManager::~ASTManager() {
Clear();
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
index cca13bcde..dc49b369e 100644
--- a/src/video_core/shader/ast.h
+++ b/src/video_core/shader/ast.h
@@ -76,7 +76,7 @@ public:
class ASTIfThen {
public:
- explicit ASTIfThen(Expr condition) : condition{std::move(condition)} {}
+ explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {}
Expr condition;
ASTZipper nodes{};
};
@@ -88,63 +88,68 @@ public:
class ASTBlockEncoded {
public:
- explicit ASTBlockEncoded(u32 start, u32 end) : start{start}, end{end} {}
+ explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {}
u32 start;
u32 end;
};
class ASTBlockDecoded {
public:
- explicit ASTBlockDecoded(NodeBlock&& new_nodes) : nodes(std::move(new_nodes)) {}
+ explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {}
NodeBlock nodes;
};
class ASTVarSet {
public:
- explicit ASTVarSet(u32 index, Expr condition) : index{index}, condition{std::move(condition)} {}
+ explicit ASTVarSet(u32 index_, Expr condition_)
+ : index{index_}, condition{std::move(condition_)} {}
+
u32 index;
Expr condition;
};
class ASTLabel {
public:
- explicit ASTLabel(u32 index) : index{index} {}
+ explicit ASTLabel(u32 index_) : index{index_} {}
u32 index;
bool unused{};
};
class ASTGoto {
public:
- explicit ASTGoto(Expr condition, u32 label) : condition{std::move(condition)}, label{label} {}
+ explicit ASTGoto(Expr condition_, u32 label_)
+ : condition{std::move(condition_)}, label{label_} {}
+
Expr condition;
u32 label;
};
class ASTDoWhile {
public:
- explicit ASTDoWhile(Expr condition) : condition{std::move(condition)} {}
+ explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {}
Expr condition;
ASTZipper nodes{};
};
class ASTReturn {
public:
- explicit ASTReturn(Expr condition, bool kills)
- : condition{std::move(condition)}, kills{kills} {}
+ explicit ASTReturn(Expr condition_, bool kills_)
+ : condition{std::move(condition_)}, kills{kills_} {}
+
Expr condition;
bool kills;
};
class ASTBreak {
public:
- explicit ASTBreak(Expr condition) : condition{std::move(condition)} {}
+ explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {}
Expr condition;
};
class ASTBase {
public:
- explicit ASTBase(ASTNode parent, ASTData data)
- : data{std::move(data)}, parent{std::move(parent)} {}
+ explicit ASTBase(ASTNode parent_, ASTData data_)
+ : data{std::move(data_)}, parent{std::move(parent_)} {}
template <class U, class... Args>
static ASTNode Make(ASTNode parent, Args&&... args) {
@@ -199,55 +204,48 @@ public:
}
std::optional<u32> GetGotoLabel() const {
- auto inner = std::get_if<ASTGoto>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTGoto>(&data)) {
return {inner->label};
}
- return {};
+ return std::nullopt;
}
Expr GetGotoCondition() const {
- auto inner = std::get_if<ASTGoto>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTGoto>(&data)) {
return inner->condition;
}
return nullptr;
}
void MarkLabelUnused() {
- auto inner = std::get_if<ASTLabel>(&data);
- if (inner) {
+ if (auto* inner = std::get_if<ASTLabel>(&data)) {
inner->unused = true;
}
}
bool IsLabelUnused() const {
- auto inner = std::get_if<ASTLabel>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTLabel>(&data)) {
return inner->unused;
}
return true;
}
std::optional<u32> GetLabelIndex() const {
- auto inner = std::get_if<ASTLabel>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTLabel>(&data)) {
return {inner->index};
}
- return {};
+ return std::nullopt;
}
Expr GetIfCondition() const {
- auto inner = std::get_if<ASTIfThen>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTIfThen>(&data)) {
return inner->condition;
}
return nullptr;
}
void SetGotoCondition(Expr new_condition) {
- auto inner = std::get_if<ASTGoto>(&data);
- if (inner) {
+ if (auto* inner = std::get_if<ASTGoto>(&data)) {
inner->condition = std::move(new_condition);
}
}
@@ -307,7 +305,7 @@ private:
class ASTManager final {
public:
- ASTManager(bool full_decompile, bool disable_else_derivation);
+ explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_);
~ASTManager();
ASTManager(const ASTManager& o) = delete;
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index f815584f7..02adcf9c7 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -13,21 +13,22 @@
namespace VideoCommon::Shader {
-AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {}
+AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {}
AsyncShaders::~AsyncShaders() {
KillWorkers();
}
void AsyncShaders::AllocateWorkers() {
- // Max worker threads we should allow
- constexpr u32 MAX_THREADS = 4;
- // Deduce how many threads we can use
- const u32 threads_used = std::thread::hardware_concurrency() / 4;
- // Always allow at least 1 thread regardless of our settings
- const auto max_worker_count = std::max(1U, threads_used);
- // Don't use more than MAX_THREADS
- const auto num_workers = std::min(max_worker_count, MAX_THREADS);
+ // Use at least one thread
+ u32 num_workers = 1;
+
+ // Deduce how many more threads we can use
+ const u32 thread_count = std::thread::hardware_concurrency();
+ if (thread_count >= 8) {
+ // Increase async workers by 1 for every 2 threads >= 8
+ num_workers += 1 + (thread_count - 8) / 2;
+ }
// If we already have workers queued, ignore
if (num_workers == worker_threads.size()) {
@@ -42,8 +43,8 @@ void AsyncShaders::AllocateWorkers() {
// Create workers
for (std::size_t i = 0; i < num_workers; i++) {
context_list.push_back(emu_window.CreateSharedContext());
- worker_threads.push_back(
- std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()));
+ worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this,
+ context_list[i].get());
}
}
@@ -63,6 +64,7 @@ void AsyncShaders::FreeWorkers() {
void AsyncShaders::KillWorkers() {
is_thread_exiting.store(true);
+ cv.notify_all();
for (auto& thread : worker_threads) {
thread.detach();
}
@@ -73,11 +75,11 @@ void AsyncShaders::KillWorkers() {
worker_threads.clear();
}
-bool AsyncShaders::HasWorkQueued() {
+bool AsyncShaders::HasWorkQueued() const {
return !pending_queue.empty();
}
-bool AsyncShaders::HasCompletedWork() {
+bool AsyncShaders::HasCompletedWork() const {
std::shared_lock lock{completed_mutex};
return !finished_work.empty();
}
@@ -102,11 +104,10 @@ bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
}
std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
- std::vector<AsyncShaders::Result> results;
+ std::vector<Result> results;
{
std::unique_lock lock{completed_mutex};
- results.assign(std::make_move_iterator(finished_work.begin()),
- std::make_move_iterator(finished_work.end()));
+ results = std::move(finished_work);
finished_work.clear();
}
return results;
@@ -115,11 +116,10 @@ std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
Tegra::Engines::ShaderType shader_type, u64 uid,
std::vector<u64> code, std::vector<u64> code_b,
- u32 main_offset,
- VideoCommon::Shader::CompilerSettings compiler_settings,
- const VideoCommon::Shader::Registry& registry,
- VAddr cpu_addr) {
- WorkerParams params{
+ u32 main_offset, CompilerSettings compiler_settings,
+ const Registry& registry, VAddr cpu_addr) {
+ std::unique_lock lock(queue_mutex);
+ pending_queue.push({
.backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
.device = &device,
.shader_type = shader_type,
@@ -130,35 +130,48 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
.compiler_settings = compiler_settings,
.registry = registry,
.cpu_address = cpu_addr,
- };
- std::unique_lock lock(queue_mutex);
- pending_queue.push(std::move(params));
+ .pp_cache = nullptr,
+ .vk_device = nullptr,
+ .scheduler = nullptr,
+ .descriptor_pool = nullptr,
+ .update_descriptor_queue = nullptr,
+ .bindings{},
+ .program{},
+ .key{},
+ .num_color_buffers = 0,
+ });
cv.notify_one();
}
void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
- const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
+ const Vulkan::Device& device, Vulkan::VKScheduler& scheduler,
Vulkan::VKDescriptorPool& descriptor_pool,
Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
- Vulkan::VKRenderPassCache& renderpass_cache,
std::vector<VkDescriptorSetLayoutBinding> bindings,
Vulkan::SPIRVProgram program,
- Vulkan::GraphicsPipelineCacheKey key) {
- WorkerParams params{
+ Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) {
+ std::unique_lock lock(queue_mutex);
+ pending_queue.push({
.backend = Backend::Vulkan,
+ .device = nullptr,
+ .shader_type{},
+ .uid = 0,
+ .code{},
+ .code_b{},
+ .main_offset = 0,
+ .compiler_settings{},
+ .registry{},
+ .cpu_address = 0,
.pp_cache = pp_cache,
.vk_device = &device,
.scheduler = &scheduler,
.descriptor_pool = &descriptor_pool,
.update_descriptor_queue = &update_descriptor_queue,
- .renderpass_cache = &renderpass_cache,
- .bindings = bindings,
- .program = program,
+ .bindings = std::move(bindings),
+ .program = std::move(program),
.key = key,
- };
-
- std::unique_lock lock(queue_mutex);
- pending_queue.push(std::move(params));
+ .num_color_buffers = num_color_buffers,
+ });
cv.notify_one();
}
@@ -210,8 +223,8 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
} else if (work.backend == Backend::Vulkan) {
auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
*work.vk_device, *work.scheduler, *work.descriptor_pool,
- *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
- work.program);
+ *work.update_descriptor_queue, work.key, work.bindings, work.program,
+ work.num_color_buffers);
work.pp_cache->EmplacePipeline(std::move(pipeline));
}
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index d5ae814d5..7fdff6e56 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -5,19 +5,19 @@
#pragma once
#include <condition_variable>
-#include <deque>
#include <memory>
#include <shared_mutex>
#include <thread>
-#include "common/bit_field.h"
+
+#include <glad/glad.h>
+
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/vulkan_common/vulkan_device.h"
namespace Core::Frontend {
class EmuWindow;
@@ -57,7 +57,7 @@ public:
Tegra::Engines::ShaderType shader_type;
};
- explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window);
+ explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_);
~AsyncShaders();
/// Start up shader worker threads
@@ -70,34 +70,34 @@ public:
void KillWorkers();
/// Check to see if any shaders have actually been compiled
- bool HasCompletedWork();
+ [[nodiscard]] bool HasCompletedWork() const;
/// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
/// every shader async as some shaders are only built and executed once. We try to "guess" which
/// shader would be used only once
- bool IsShaderAsync(const Tegra::GPU& gpu) const;
+ [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
/// Pulls completed compiled shaders
- std::vector<Result> GetCompletedWork();
+ [[nodiscard]] std::vector<Result> GetCompletedWork();
void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
- VideoCommon::Shader::CompilerSettings compiler_settings,
- const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);
+ CompilerSettings compiler_settings, const Registry& registry,
+ VAddr cpu_addr);
- void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
+ void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device,
Vulkan::VKScheduler& scheduler,
Vulkan::VKDescriptorPool& descriptor_pool,
Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
- Vulkan::VKRenderPassCache& renderpass_cache,
std::vector<VkDescriptorSetLayoutBinding> bindings,
- Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key);
+ Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key,
+ u32 num_color_buffers);
private:
void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
/// Check our worker queue to see if we have any work queued already
- bool HasWorkQueued();
+ [[nodiscard]] bool HasWorkQueued() const;
struct WorkerParams {
Backend backend;
@@ -108,30 +108,30 @@ private:
std::vector<u64> code;
std::vector<u64> code_b;
u32 main_offset;
- VideoCommon::Shader::CompilerSettings compiler_settings;
- std::optional<VideoCommon::Shader::Registry> registry;
+ CompilerSettings compiler_settings;
+ std::optional<Registry> registry;
VAddr cpu_address;
// For Vulkan
Vulkan::VKPipelineCache* pp_cache;
- const Vulkan::VKDevice* vk_device;
+ const Vulkan::Device* vk_device;
Vulkan::VKScheduler* scheduler;
Vulkan::VKDescriptorPool* descriptor_pool;
Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
- Vulkan::VKRenderPassCache* renderpass_cache;
std::vector<VkDescriptorSetLayoutBinding> bindings;
Vulkan::SPIRVProgram program;
Vulkan::GraphicsPipelineCacheKey key;
+ u32 num_color_buffers;
};
std::condition_variable cv;
- std::mutex queue_mutex;
- std::shared_mutex completed_mutex;
+ mutable std::mutex queue_mutex;
+ mutable std::shared_mutex completed_mutex;
std::atomic<bool> is_thread_exiting{};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
std::vector<std::thread> worker_threads;
std::queue<WorkerParams> pending_queue;
- std::vector<AsyncShaders::Result> finished_work;
+ std::vector<Result> finished_work;
Core::Frontend::EmuWindow& emu_window;
};
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 336397cdb..43d965f2f 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -66,8 +66,8 @@ struct BlockInfo {
};
struct CFGRebuildState {
- explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry)
- : program_code{program_code}, registry{registry}, start{start} {}
+ explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_)
+ : program_code{program_code_}, registry{registry_}, start{start_} {}
const ProgramCode& program_code;
Registry& registry;
@@ -241,10 +241,10 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
ParseInfo parse_info{};
SingleBranch single_branch{};
- const auto insert_label = [](CFGRebuildState& state, u32 address) {
- const auto pair = state.labels.emplace(address);
+ const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) {
+ const auto pair = rebuild_state.labels.emplace(label_address);
if (pair.second) {
- state.inspect_queries.push_back(address);
+ rebuild_state.inspect_queries.push_back(label_address);
}
};
@@ -257,7 +257,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
single_branch.ignore = false;
break;
}
- if (state.registered.count(offset) != 0) {
+ if (state.registered.contains(offset)) {
single_branch.address = offset;
single_branch.ignore = true;
break;
@@ -547,13 +547,13 @@ bool TryQuery(CFGRebuildState& state) {
gather_labels(q2.ssy_stack, state.ssy_labels, block);
gather_labels(q2.pbk_stack, state.pbk_labels, block);
if (std::holds_alternative<SingleBranch>(*block.branch)) {
- const auto branch = std::get_if<SingleBranch>(block.branch.get());
+ auto* branch = std::get_if<SingleBranch>(block.branch.get());
if (!branch->condition.IsUnconditional()) {
q2.address = block.end + 1;
state.queries.push_back(q2);
}
- Query conditional_query{q2};
+ auto& conditional_query = state.queries.emplace_back(q2);
if (branch->is_sync) {
if (branch->address == unassigned_branch) {
branch->address = conditional_query.ssy_stack.top();
@@ -567,21 +567,21 @@ bool TryQuery(CFGRebuildState& state) {
conditional_query.pbk_stack.pop();
}
conditional_query.address = branch->address;
- state.queries.push_back(std::move(conditional_query));
return true;
}
- const auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+
+ const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get());
for (const auto& branch_case : multi_branch->branches) {
- Query conditional_query{q2};
+ auto& conditional_query = state.queries.emplace_back(q2);
conditional_query.address = branch_case.address;
- state.queries.push_back(std::move(conditional_query));
}
+
return true;
}
void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
- const auto get_expr = ([&](const Condition& cond) -> Expr {
- Expr result{};
+ const auto get_expr = [](const Condition& cond) -> Expr {
+ Expr result;
if (cond.cc != ConditionCode::T) {
result = MakeExpr<ExprCondCode>(cond.cc);
}
@@ -594,10 +594,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
}
Expr extra = MakeExpr<ExprPredicate>(pred);
if (negate) {
- extra = MakeExpr<ExprNot>(extra);
+ extra = MakeExpr<ExprNot>(std::move(extra));
}
if (result) {
- return MakeExpr<ExprAnd>(extra, result);
+ return MakeExpr<ExprAnd>(std::move(extra), std::move(result));
}
return extra;
}
@@ -605,9 +605,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
return result;
}
return MakeExpr<ExprBoolean>(true);
- });
+ };
+
if (std::holds_alternative<SingleBranch>(*branch_info)) {
- const auto branch = std::get_if<SingleBranch>(branch_info.get());
+ const auto* branch = std::get_if<SingleBranch>(branch_info.get());
if (branch->address < 0) {
if (branch->kill) {
mm.InsertReturn(get_expr(branch->condition), true);
@@ -619,7 +620,7 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
mm.InsertGoto(get_expr(branch->condition), branch->address);
return;
}
- const auto multi_branch = std::get_if<MultiBranch>(branch_info.get());
+ const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get());
for (const auto& branch_case : multi_branch->branches) {
mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
branch_case.address);
@@ -631,12 +632,12 @@ void DecompileShader(CFGRebuildState& state) {
for (auto label : state.labels) {
state.manager->DeclareLabel(label);
}
- for (auto& block : state.block_info) {
- if (state.labels.count(block.start) != 0) {
+ for (const auto& block : state.block_info) {
+ if (state.labels.contains(block.start)) {
state.manager->InsertLabel(block.start);
}
const bool ignore = BlockBranchIsIgnored(block.branch);
- u32 end = ignore ? block.end + 1 : block.end;
+ const u32 end = ignore ? block.end + 1 : block.end;
state.manager->InsertBlock(block.start, end);
if (!ignore) {
InsertBranch(*state.manager, block.branch);
@@ -736,7 +737,7 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
auto back = result_out->blocks.begin();
auto next = std::next(back);
while (next != result_out->blocks.end()) {
- if (state.labels.count(next->start) == 0 && next->start == back->end + 1) {
+ if (!state.labels.contains(next->start) && next->start == back->end + 1) {
back->end = next->end;
next = result_out->blocks.erase(next);
continue;
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 62a3510d8..37bf96492 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -42,10 +42,10 @@ struct Condition {
class SingleBranch {
public:
SingleBranch() = default;
- SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk,
- bool ignore)
- : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk},
- ignore{ignore} {}
+ explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_,
+ bool is_brk_, bool ignore_)
+ : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_},
+ ignore{ignore_} {}
bool operator==(const SingleBranch& b) const {
return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
@@ -65,15 +65,15 @@ public:
};
struct CaseBranch {
- CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {}
+ explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {}
u32 cmp_value;
u32 address;
};
class MultiBranch {
public:
- MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches)
- : gpr{gpr}, branches{std::move(branches)} {}
+ explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_)
+ : gpr{gpr_}, branches{std::move(branches_)} {}
u32 gpr{};
std::vector<CaseBranch> branches{};
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index eeac328a6..6576d1208 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -25,7 +25,7 @@ using Tegra::Shader::OpCode;
namespace {
void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
- const std::list<Sampler>& used_samplers) {
+ const std::list<SamplerEntry>& used_samplers) {
if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
return;
}
@@ -43,9 +43,9 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
}
}
-std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
+std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
VideoCore::GuestDriverProfile& gpu_driver,
- const std::list<Sampler>& used_samplers) {
+ const std::list<SamplerEntry>& used_samplers) {
const u32 base_offset = sampler_to_deduce.offset;
u32 max_offset{std::numeric_limits<u32>::max()};
for (const auto& sampler : used_samplers) {
@@ -66,7 +66,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
class ASTDecoder {
public:
- ASTDecoder(ShaderIR& ir) : ir(ir) {}
+ explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
void operator()(ASTProgram& ast) {
ASTNode current = ast.nodes.GetFirst();
@@ -153,8 +153,8 @@ void ShaderIR::Decode() {
const auto& blocks = shader_info.blocks;
NodeBlock current_block;
u32 current_label = static_cast<u32>(exit_branch);
- for (auto& block : blocks) {
- if (shader_info.labels.count(block.start) != 0) {
+ for (const auto& block : blocks) {
+ if (shader_info.labels.contains(block.start)) {
insert_block(current_block, current_label);
current_block.clear();
current_label = block.start;
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 4db329fa5..15eb700e7 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -110,8 +110,7 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
case SubOp::Sqrt:
return Operation(OperationCode::FSqrt, PRECISE, op_a);
default:
- UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
- static_cast<unsigned>(instr.sub_op.Value()));
+ UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value());
return Immediate(0);
}
}();
@@ -137,7 +136,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::FCMP_RR:
- case OpCode::Id::FCMP_RC: {
+ case OpCode::Id::FCMP_RC:
+ case OpCode::Id::FCMP_IMMR: {
UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
Node op_c = GetRegister(instr.gpr39);
Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index a276aee44..88103fede 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -53,6 +53,9 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
absolute_a = ((instr.value >> 44) & 1) != 0;
absolute_b = ((instr.value >> 54) & 1) != 0;
break;
+ default:
+ UNREACHABLE();
+ break;
}
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 73155966f..7b5bb7003 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -83,7 +83,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
case IAdd3Height::UpperHalfWord:
return BitfieldExtract(value, 16, 16);
default:
- UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height));
+ UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height);
return Immediate(0);
}
};
@@ -258,7 +258,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
case OpCode::Id::LEA_IMM:
case OpCode::Id::LEA_RZ:
case OpCode::Id::LEA_HI: {
- auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
+ auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::LEA_R2: {
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
@@ -294,8 +294,9 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
"Unhandled LEA Predicate");
- Node value = Operation(OperationCode::ILogicalShiftLeft, std::move(op_a), std::move(op_c));
- value = Operation(OperationCode::IAdd, std::move(op_b), std::move(value));
+ Node value =
+ Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_));
+ value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value));
SetRegister(bb, instr.gpr0, std::move(value));
break;
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index 73880db0e..73580277a 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -28,23 +28,26 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
case OpCode::Id::IADD32I: {
UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
- op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true);
+ op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
- const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
+ Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
- SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
- SetRegister(bb, instr.gpr0, value);
+ SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
+ SetRegister(bb, instr.gpr0, std::move(value));
break;
}
case OpCode::Id::LOP32I: {
- if (instr.alu.lop32i.invert_a)
- op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
+ if (instr.alu.lop32i.invert_a) {
+ op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
+ }
- if (instr.alu.lop32i.invert_b)
- op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
+ if (instr.alu.lop32i.invert_b) {
+ op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
+ }
- WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
- PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc);
+ WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
+ std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
+ instr.op_32.generates_cc != 0);
break;
}
default:
@@ -58,18 +61,18 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
Node op_b, PredicateResultMode predicate_mode, Pred predicate,
bool sets_cc) {
- const Node result = [&]() {
+ Node result = [&] {
switch (logic_op) {
case LogicOperation::And:
- return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b);
+ return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
case LogicOperation::Or:
- return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b);
+ return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
case LogicOperation::Xor:
- return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b);
+ return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
case LogicOperation::PassB:
return op_b;
default:
- UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op));
+ UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op);
return Immediate(0);
}
}();
@@ -84,13 +87,12 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation
return;
case PredicateResultMode::NotZero: {
// Set the predicate to true if the result is not zero.
- const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0));
- SetPredicate(bb, static_cast<u64>(predicate), compare);
+ Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
+ SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
break;
}
default:
- UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}",
- static_cast<u32>(predicate_mode));
+ UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode);
}
}
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index b9989c88c..fea7a54df 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -244,7 +244,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
return Operation(OperationCode::FTrunc, value);
default:
UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
- static_cast<u32>(instr.conversion.f2f.rounding.Value()));
+ instr.conversion.f2f.rounding.Value());
return value;
}
}();
@@ -300,7 +300,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
return Operation(OperationCode::FTrunc, PRECISE, value);
default:
UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
- static_cast<u32>(instr.conversion.f2i.rounding.Value()));
+ instr.conversion.f2i.rounding.Value());
return Immediate(0);
}
}();
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index b2e88fa20..fa83108cd 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -22,13 +22,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- PredCondition cond;
- bool bf;
- bool ftz;
- bool neg_a;
- bool abs_a;
- bool neg_b;
- bool abs_b;
+ PredCondition cond{};
+ bool bf = false;
+ bool ftz = false;
+ bool neg_a = false;
+ bool abs_a = false;
+ bool neg_b = false;
+ bool abs_b = false;
switch (opcode->get().GetId()) {
case OpCode::Id::HSET2_C:
case OpCode::Id::HSET2_IMM:
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index e75ca4fdb..5470e8cf4 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -119,6 +119,8 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
return descriptor.r_type;
}
break;
+ default:
+ break;
}
UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
return ComponentType::FLOAT;
@@ -210,19 +212,20 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
return 0;
case TextureFormat::R8G24:
if (component == 0) {
- return 8;
+ return 24;
}
if (component == 1) {
- return 24;
+ return 8;
}
return 0;
case TextureFormat::R8G8:
return (component == 0 || component == 1) ? 8 : 0;
case TextureFormat::G4R4:
return (component == 0 || component == 1) ? 4 : 0;
+ default:
+ UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
+ return 0;
}
- UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
- return 0;
}
std::size_t GetImageComponentMask(TextureFormat format) {
@@ -257,9 +260,10 @@ std::size_t GetImageComponentMask(TextureFormat format) {
case TextureFormat::R8:
case TextureFormat::R1:
return std::size_t{R};
+ default:
+ UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
+ return std::size_t{R | G | B | A};
}
- UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
- return std::size_t{R | G | B | A};
}
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
@@ -354,9 +358,9 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
auto descriptor = [this, instr] {
- std::optional<Tegra::Engines::SamplerDescriptor> descriptor;
+ std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor;
if (instr.suldst.is_immediate) {
- descriptor =
+ sampler_descriptor =
registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
} else {
const Node image_register = GetRegister(instr.gpr39);
@@ -364,12 +368,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
static_cast<s64>(global_code.size()));
const auto buffer = std::get<1>(result);
const auto offset = std::get<2>(result);
- descriptor = registry.ObtainBindlessSampler(buffer, offset);
+ sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset);
}
- if (!descriptor) {
+ if (!sampler_descriptor) {
UNREACHABLE_MSG("Failed to obtain image descriptor");
}
- return *descriptor;
+ return *sampler_descriptor;
}();
const auto comp_mask = GetImageComponentMask(descriptor.format);
@@ -463,7 +467,10 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
return OperationCode::AtomicImageXor;
case Tegra::Shader::ImageAtomicOperation::Exch:
return OperationCode::AtomicImageExchange;
+ default:
+ break;
}
+ break;
default:
break;
}
@@ -490,11 +497,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
return pc;
}
-Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
+ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
const auto offset = static_cast<u32>(image.index.Value());
- const auto it = std::find_if(std::begin(used_images), std::end(used_images),
- [offset](const Image& entry) { return entry.offset == offset; });
+ const auto it =
+ std::find_if(std::begin(used_images), std::end(used_images),
+ [offset](const ImageEntry& entry) { return entry.offset == offset; });
if (it != std::end(used_images)) {
ASSERT(!it->is_bindless && it->type == type);
return *it;
@@ -504,7 +512,7 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t
return used_images.emplace_back(next_index, offset, type);
}
-Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
+ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
const Node image_register = GetRegister(reg);
const auto result =
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
@@ -513,7 +521,7 @@ Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::Im
const auto offset = std::get<2>(result);
const auto it = std::find_if(std::begin(used_images), std::end(used_images),
- [buffer, offset](const Image& entry) {
+ [buffer, offset](const ImageEntry& entry) {
return entry.buffer == buffer && entry.offset == offset;
});
if (it != std::end(used_images)) {
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index e2bba88dd..50f4e7d35 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -47,7 +47,7 @@ OperationCode GetAtomOperation(AtomicOp op) {
case AtomicOp::Exch:
return OperationCode::AtomicIExchange;
default:
- UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
+ UNIMPLEMENTED_MSG("op={}", op);
return OperationCode::AtomicIAdd;
}
}
@@ -83,7 +83,7 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
case Tegra::Shader::UniformType::UnsignedQuad:
return 128;
default:
- UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
+ UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type);
return 32;
}
}
@@ -175,12 +175,12 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
default:
- UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value()));
+ UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value());
}
break;
}
case OpCode::Id::LD_L:
- LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown));
+ LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown);
[[fallthrough]];
case OpCode::Id::LD_S: {
const auto GetAddress = [&](s32 offset) {
@@ -224,7 +224,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
default:
UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
- static_cast<u32>(instr.ldst_sl.type.Value()));
+ instr.ldst_sl.type.Value());
}
break;
}
@@ -306,8 +306,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::ST_L:
- LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}",
- static_cast<u64>(instr.st_l.cache_management.Value()));
+ LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value());
[[fallthrough]];
case OpCode::Id::ST_S: {
const auto GetAddress = [&](s32 offset) {
@@ -340,7 +339,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
default:
UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
- static_cast<u32>(instr.ldst_sl.type.Value()));
+ instr.ldst_sl.type.Value());
}
break;
}
@@ -387,7 +386,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
case OpCode::Id::RED: {
UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
- static_cast<int>(instr.red.type.Value()));
+ instr.red.type.Value());
const auto [real_address, base_address, descriptor] =
TrackGlobalMemory(bb, instr, true, true);
if (!real_address || !base_address) {
@@ -403,12 +402,12 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
instr.atom.operation == AtomicOp::Dec ||
instr.atom.operation == AtomicOp::SafeAdd,
- "operation={}", static_cast<int>(instr.atom.operation.Value()));
+ "operation={}", instr.atom.operation.Value());
UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
instr.atom.type == GlobalAtomicType::U64 ||
instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
- "type={}", static_cast<int>(instr.atom.type.Value()));
+ "type={}", instr.atom.type.Value());
const auto [real_address, base_address, descriptor] =
TrackGlobalMemory(bb, instr, true, true);
@@ -428,10 +427,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
case OpCode::Id::ATOMS: {
UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
instr.atoms.operation == AtomicOp::Dec,
- "operation={}", static_cast<int>(instr.atoms.operation.Value()));
+ "operation={}", instr.atoms.operation.Value());
UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
instr.atoms.type == AtomicType::U64,
- "type={}", static_cast<int>(instr.atoms.type.Value()));
+ "type={}", instr.atoms.type.Value());
const bool is_signed =
instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
const s32 offset = instr.atoms.GetImmediateOffset();
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 29a7cfbfe..5f88537bc 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -34,14 +34,13 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::EXIT: {
- const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
- UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}",
- static_cast<u32>(cc));
+ const ConditionCode cc = instr.flow_condition_code;
+ UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
switch (instr.flow.cond) {
case Tegra::Shader::FlowCondition::Always:
bb.push_back(Operation(OperationCode::Exit));
- if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) {
+ if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
// If this is an unconditional exit then just end processing here,
// otherwise we have to account for the possibility of the condition
// not being met, so continue processing the next instruction.
@@ -56,17 +55,15 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
default:
- UNIMPLEMENTED_MSG("Unhandled flow condition: {}",
- static_cast<u32>(instr.flow.cond.Value()));
+ UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value());
}
break;
}
case OpCode::Id::KIL: {
UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
- const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
- UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}",
- static_cast<u32>(cc));
+ const ConditionCode cc = instr.flow_condition_code;
+ UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc);
bb.push_back(Operation(OperationCode::Discard));
break;
@@ -79,6 +76,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
case SystemVariable::InvocationId:
return Operation(OperationCode::InvocationId);
case SystemVariable::Ydirection:
+ uses_y_negate = true;
return Operation(OperationCode::YNegate);
case SystemVariable::InvocationInfo:
LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
@@ -90,11 +88,11 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
return Immediate(0U);
case SystemVariable::Tid: {
- Node value = Immediate(0);
- value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9);
- value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdY), 16, 9);
- value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
- return value;
+ Node val = Immediate(0);
+ val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9);
+ val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9);
+ val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
+ return val;
}
case SystemVariable::TidX:
return Operation(OperationCode::LocalInvocationIdX);
@@ -130,8 +128,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
return Immediate(0u);
}
default:
- UNIMPLEMENTED_MSG("Unhandled system move: {}",
- static_cast<u32>(instr.sys20.Value()));
+ UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value());
return Immediate(0u);
}
}();
@@ -181,8 +178,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
}
const Node branch = Operation(OperationCode::BranchIndirect, operand);
- const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
- if (cc != Tegra::Shader::ConditionCode::T) {
+ const ConditionCode cc = instr.flow_condition_code;
+ if (cc != ConditionCode::T) {
bb.push_back(Conditional(GetConditionCode(cc), {branch}));
} else {
bb.push_back(branch);
@@ -218,9 +215,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::SYNC: {
- const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
- UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
- static_cast<u32>(cc));
+ const ConditionCode cc = instr.flow_condition_code;
+ UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc);
if (decompiled) {
break;
@@ -231,9 +227,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::BRK: {
- const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
- UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
- static_cast<u32>(cc));
+ const ConditionCode cc = instr.flow_condition_code;
+ UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc);
if (decompiled) {
break;
}
@@ -306,7 +301,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
case Tegra::Shader::MembarType::GL:
return OperationCode::MemoryBarrierGlobal;
default:
- UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value()));
+ UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value());
return OperationCode::MemoryBarrierGlobal;
}
}();
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index d4ffa8014..a53819c15 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -125,7 +125,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
case OpCode::Id::SHF_LEFT_IMM: {
UNIMPLEMENTED_IF(instr.generates_cc);
UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
- static_cast<int>(instr.shf.xmode.Value()));
+ instr.shf.xmode.Value());
if (instr.is_b_imm) {
op_b = Immediate(static_cast<u32>(instr.shf.immediate));
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 29ebf65ba..833fa2a39 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -34,7 +34,7 @@ static std::size_t GetCoordCount(TextureType texture_type) {
case TextureType::TextureCube:
return 3;
default:
- UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
+ UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type);
return 0;
}
}
@@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
SamplerInfo info;
info.is_shadow = is_depth_compare;
- const std::optional<Sampler> sampler = GetSampler(instr.sampler, info);
+ const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
@@ -173,9 +173,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
SamplerInfo info;
info.type = texture_type;
info.is_array = is_array;
- const std::optional<Sampler> sampler = is_bindless
- ? GetBindlessSampler(base_reg, info, index_var)
- : GetSampler(instr.sampler, info);
+ const std::optional<SamplerEntry> sampler =
+ is_bindless ? GetBindlessSampler(base_reg, info, index_var)
+ : GetSampler(instr.sampler, info);
Node4 values;
if (!sampler) {
std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
@@ -217,9 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
[[fallthrough]];
case OpCode::Id::TXQ: {
Node index_var;
- const std::optional<Sampler> sampler = is_bindless
- ? GetBindlessSampler(instr.gpr8, {}, index_var)
- : GetSampler(instr.sampler, {});
+ const std::optional<SamplerEntry> sampler =
+ is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
+ : GetSampler(instr.sampler, {});
if (!sampler) {
u32 indexer = 0;
@@ -255,8 +255,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
break;
}
default:
- UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
- static_cast<u32>(instr.txq.query_type.Value()));
+ UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value());
}
break;
}
@@ -273,7 +272,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
info.type = texture_type;
info.is_array = is_array;
Node index_var;
- const std::optional<Sampler> sampler =
+ const std::optional<SamplerEntry> sampler =
is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
: GetSampler(instr.sampler, info);
@@ -292,33 +291,36 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
break;
}
- std::vector<Node> coords;
-
- // TODO: Add coordinates for different samplers once other texture types are implemented.
- switch (texture_type) {
- case TextureType::Texture1D:
- coords.push_back(GetRegister(instr.gpr8));
- break;
- case TextureType::Texture2D:
- coords.push_back(GetRegister(instr.gpr8.Value() + 0));
- coords.push_back(GetRegister(instr.gpr8.Value() + 1));
- break;
- default:
- UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<int>(texture_type));
+ const u64 base_index = is_array ? 1 : 0;
+ const u64 num_components = [texture_type] {
+ switch (texture_type) {
+ case TextureType::Texture1D:
+ return 1;
+ case TextureType::Texture2D:
+ return 2;
+ case TextureType::TextureCube:
+ return 3;
+ default:
+ UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type);
+ return 2;
+ }
+ }();
+ // TODO: What's the array component used for?
- // Fallback to interpreting as a 2D texture for now
- coords.push_back(GetRegister(instr.gpr8.Value() + 0));
- coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+ std::vector<Node> coords;
+ coords.reserve(num_components);
+ for (u64 component = 0; component < num_components; ++component) {
+ coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
}
+
u32 indexer = 0;
for (u32 element = 0; element < 2; ++element) {
if (!instr.tmml.IsComponentEnabled(element)) {
continue;
}
- auto params = coords;
MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
- const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
- SetTemporary(bb, indexer++, value);
+ Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
+ SetTemporary(bb, indexer++, std::move(value));
}
for (u32 i = 0; i < indexer; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
@@ -377,14 +379,15 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
return info;
}
-std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
- SamplerInfo sampler_info) {
+std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
+ SamplerInfo sampler_info) {
const u32 offset = static_cast<u32>(sampler.index.Value());
const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
// If this sampler has already been used, return the existing mapping.
- const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
- [offset](const Sampler& entry) { return entry.offset == offset; });
+ const auto it =
+ std::find_if(used_samplers.begin(), used_samplers.end(),
+ [offset](const SamplerEntry& entry) { return entry.offset == offset; });
if (it != used_samplers.end()) {
ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
@@ -397,8 +400,8 @@ std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
*info.is_shadow, *info.is_buffer, false);
}
-std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
- Node& index_var) {
+std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
+ SamplerInfo info, Node& index_var) {
const Node sampler_register = GetRegister(reg);
const auto [base_node, tracked_sampler_info] =
TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
@@ -414,7 +417,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
// If this sampler has already been used, return the existing mapping.
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
- [buffer, offset](const Sampler& entry) {
+ [buffer, offset](const SamplerEntry& entry) {
return entry.buffer == buffer && entry.offset == offset;
});
if (it != used_samplers.end()) {
@@ -434,11 +437,12 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
// Try to use an already created sampler if it exists
- const auto it = std::find_if(
- used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
- return offsets == std::pair{entry.offset, entry.secondary_offset} &&
- indices == std::pair{entry.buffer, entry.secondary_buffer};
- });
+ const auto it =
+ std::find_if(used_samplers.begin(), used_samplers.end(),
+ [indices, offsets](const SamplerEntry& entry) {
+ return offsets == std::pair{entry.offset, entry.secondary_offset} &&
+ indices == std::pair{entry.buffer, entry.secondary_buffer};
+ });
if (it != used_samplers.end()) {
ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
@@ -458,7 +462,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
// If this sampler has already been used, return the existing mapping.
const auto it = std::find_if(
used_samplers.begin(), used_samplers.end(),
- [base_offset](const Sampler& entry) { return entry.offset == base_offset; });
+ [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
if (it != used_samplers.end()) {
ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
@@ -553,7 +557,6 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
const bool is_shadow = depth_compare != nullptr;
const bool is_bindless = bindless_reg.has_value();
- UNIMPLEMENTED_IF(texture_type == TextureType::TextureCube && is_array && is_shadow);
ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
"Illegal texture type");
@@ -564,9 +567,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
info.is_buffer = false;
Node index_var;
- const std::optional<Sampler> sampler = is_bindless
- ? GetBindlessSampler(*bindless_reg, info, index_var)
- : GetSampler(instr.sampler, info);
+ const std::optional<SamplerEntry> sampler =
+ is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
+ : GetSampler(instr.sampler, info);
if (!sampler) {
return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
}
@@ -593,7 +596,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
lod = GetRegister(instr.gpr20.Value() + bias_offset);
break;
default:
- UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
+ UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode);
break;
}
@@ -723,7 +726,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
info.is_shadow = depth_compare;
Node index_var;
- const std::optional<Sampler> sampler =
+ const std::optional<SamplerEntry> sampler =
is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
: GetSampler(instr.sampler, info);
Node4 values;
@@ -763,7 +766,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
const auto texture_type{instr.tld.texture_type};
- const bool is_array{instr.tld.is_array};
+ const bool is_array{instr.tld.is_array != 0};
const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
const std::size_t coord_count{GetCoordCount(texture_type)};
@@ -782,7 +785,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
// const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
// const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
- const std::optional<Sampler> sampler = GetSampler(instr.sampler, {});
+ const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
@@ -799,7 +802,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
info.type = texture_type;
info.is_array = is_array;
info.is_shadow = false;
- const std::optional<Sampler> sampler = GetSampler(instr.sampler, info);
+ const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
const std::size_t type_coord_count = GetCoordCount(texture_type);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index 11b77f795..37433d783 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -27,7 +27,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) {
case VoteOperation::Eq:
return OperationCode::VoteEqual;
default:
- UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op));
+ UNREACHABLE_MSG("Invalid vote operation={}", vote_op);
return OperationCode::VoteAll;
}
}
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
index 4e8264367..cda284c72 100644
--- a/src/video_core/shader/expr.h
+++ b/src/video_core/shader/expr.h
@@ -76,7 +76,7 @@ public:
class ExprPredicate final {
public:
- explicit ExprPredicate(u32 predicate) : predicate{predicate} {}
+ explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {}
bool operator==(const ExprPredicate& b) const {
return predicate == b.predicate;
@@ -91,7 +91,7 @@ public:
class ExprCondCode final {
public:
- explicit ExprCondCode(ConditionCode cc) : cc{cc} {}
+ explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {}
bool operator==(const ExprCondCode& b) const {
return cc == b.cc;
@@ -121,7 +121,7 @@ public:
class ExprGprEqual final {
public:
- ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {}
+ explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {}
bool operator==(const ExprGprEqual& b) const {
return gpr == b.gpr && value == b.value;
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
index 5071c83ca..e18ccba8e 100644
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@@ -16,11 +16,10 @@
namespace VideoCommon::Shader {
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
- const auto& gpu{system.GPU().Maxwell3D()};
- const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
- return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+ const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
+ return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
}
bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
index be90d24fd..4624d38e6 100644
--- a/src/video_core/shader/memory_util.h
+++ b/src/video_core/shader/memory_util.h
@@ -11,10 +11,6 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
-namespace Core {
-class System;
-}
-
namespace Tegra {
class MemoryManager;
}
@@ -27,7 +23,7 @@ constexpr u32 STAGE_MAIN_OFFSET = 10;
constexpr u32 KERNEL_MAIN_OFFSET = 0;
/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
/// Gets if the current instruction offset is a scheduler instruction
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 8f230d57a..b54d33763 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -282,26 +282,27 @@ struct SeparateSamplerNode;
using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
using TrackSampler = std::shared_ptr<TrackSamplerData>;
-struct Sampler {
+struct SamplerEntry {
/// Bound samplers constructor
- constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
- : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
- is_buffer{is_buffer}, is_indexed{is_indexed} {}
+ explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_,
+ bool is_shadow_, bool is_buffer_, bool is_indexed_)
+ : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_},
+ is_buffer{is_buffer_}, is_indexed{is_indexed_} {}
/// Separate sampler constructor
- constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
- Tegra::Shader::TextureType type, bool is_array, bool is_shadow,
- bool is_buffer)
- : index{index}, offset{offsets.first}, secondary_offset{offsets.second},
- buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array},
- is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {}
+ explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
+ Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_,
+ bool is_buffer_)
+ : index{index_}, offset{offsets.first}, secondary_offset{offsets.second},
+ buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_},
+ is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {}
/// Bindless samplers constructor
- constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
- : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
- is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
+ explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_,
+ bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_)
+ : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_},
+ is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} {
+ }
u32 index = 0; ///< Emulated index given for the this sampler.
u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
@@ -338,15 +339,15 @@ struct BindlessSamplerNode {
u32 offset;
};
-struct Image {
+struct ImageEntry {
public:
/// Bound images constructor
- constexpr explicit Image(u32 index, u32 offset, Tegra::Shader::ImageType type)
- : index{index}, offset{offset}, type{type} {}
+ explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_)
+ : index{index_}, offset{offset_}, type{type_} {}
/// Bindless samplers constructor
- constexpr explicit Image(u32 index, u32 offset, u32 buffer, Tegra::Shader::ImageType type)
- : index{index}, offset{offset}, buffer{buffer}, type{type}, is_bindless{true} {}
+ explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_)
+ : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {}
void MarkWrite() {
is_written = true;
@@ -377,7 +378,7 @@ struct GlobalMemoryBase {
u32 cbuf_index = 0;
u32 cbuf_offset = 0;
- bool operator<(const GlobalMemoryBase& rhs) const {
+ [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const {
return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
}
};
@@ -389,7 +390,7 @@ struct MetaArithmetic {
/// Parameters describing a texture sampler
struct MetaTexture {
- Sampler sampler;
+ SamplerEntry sampler;
Node array;
Node depth_compare;
std::vector<Node> aoffi;
@@ -403,7 +404,7 @@ struct MetaTexture {
};
struct MetaImage {
- const Image& image;
+ const ImageEntry& image;
std::vector<Node> values;
u32 element{};
};
@@ -414,7 +415,7 @@ using Meta =
class AmendNode {
public:
- std::optional<std::size_t> GetAmendIndex() const {
+ [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const {
if (amend_index == amend_null_index) {
return std::nullopt;
}
@@ -437,34 +438,34 @@ private:
/// Holds any kind of operation that can be done in the IR
class OperationNode final : public AmendNode {
public:
- explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}
+ explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {}
- explicit OperationNode(OperationCode code, Meta meta)
- : OperationNode(code, std::move(meta), std::vector<Node>{}) {}
+ explicit OperationNode(OperationCode code_, Meta meta_)
+ : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {}
- explicit OperationNode(OperationCode code, std::vector<Node> operands)
- : OperationNode(code, Meta{}, std::move(operands)) {}
+ explicit OperationNode(OperationCode code_, std::vector<Node> operands_)
+ : OperationNode(code_, Meta{}, std::move(operands_)) {}
- explicit OperationNode(OperationCode code, Meta meta, std::vector<Node> operands)
- : code{code}, meta{std::move(meta)}, operands{std::move(operands)} {}
+ explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_)
+ : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {}
template <typename... Args>
- explicit OperationNode(OperationCode code, Meta meta, Args&&... operands)
- : code{code}, meta{std::move(meta)}, operands{operands...} {}
+ explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_)
+ : code{code_}, meta{std::move(meta_)}, operands{operands_...} {}
- OperationCode GetCode() const {
+ [[nodiscard]] OperationCode GetCode() const {
return code;
}
- const Meta& GetMeta() const {
+ [[nodiscard]] const Meta& GetMeta() const {
return meta;
}
- std::size_t GetOperandsCount() const {
+ [[nodiscard]] std::size_t GetOperandsCount() const {
return operands.size();
}
- const Node& operator[](std::size_t operand_index) const {
+ [[nodiscard]] const Node& operator[](std::size_t operand_index) const {
return operands.at(operand_index);
}
@@ -477,14 +478,14 @@ private:
/// Encloses inside any kind of node that returns a boolean conditionally-executed code
class ConditionalNode final : public AmendNode {
public:
- explicit ConditionalNode(Node condition, std::vector<Node>&& code)
- : condition{std::move(condition)}, code{std::move(code)} {}
+ explicit ConditionalNode(Node condition_, std::vector<Node>&& code_)
+ : condition{std::move(condition_)}, code{std::move(code_)} {}
- const Node& GetCondition() const {
+ [[nodiscard]] const Node& GetCondition() const {
return condition;
}
- const std::vector<Node>& GetCode() const {
+ [[nodiscard]] const std::vector<Node>& GetCode() const {
return code;
}
@@ -496,9 +497,9 @@ private:
/// A general purpose register
class GprNode final {
public:
- explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {}
+ explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {}
- u32 GetIndex() const {
+ [[nodiscard]] constexpr u32 GetIndex() const {
return static_cast<u32>(index);
}
@@ -509,9 +510,9 @@ private:
/// A custom variable
class CustomVarNode final {
public:
- explicit constexpr CustomVarNode(u32 index) : index{index} {}
+ explicit constexpr CustomVarNode(u32 index_) : index{index_} {}
- constexpr u32 GetIndex() const {
+ [[nodiscard]] constexpr u32 GetIndex() const {
return index;
}
@@ -522,9 +523,9 @@ private:
/// A 32-bits value that represents an immediate value
class ImmediateNode final {
public:
- explicit constexpr ImmediateNode(u32 value) : value{value} {}
+ explicit constexpr ImmediateNode(u32 value_) : value{value_} {}
- u32 GetValue() const {
+ [[nodiscard]] constexpr u32 GetValue() const {
return value;
}
@@ -535,9 +536,9 @@ private:
/// One of Maxwell's internal flags
class InternalFlagNode final {
public:
- explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {}
+ explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {}
- InternalFlag GetFlag() const {
+ [[nodiscard]] constexpr InternalFlag GetFlag() const {
return flag;
}
@@ -548,14 +549,14 @@ private:
/// A predicate register, it can be negated without additional nodes
class PredicateNode final {
public:
- explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated)
- : index{index}, negated{negated} {}
+ explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_)
+ : index{index_}, negated{negated_} {}
- Tegra::Shader::Pred GetIndex() const {
+ [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const {
return index;
}
- bool IsNegated() const {
+ [[nodiscard]] constexpr bool IsNegated() const {
return negated;
}
@@ -568,30 +569,30 @@ private:
class AbufNode final {
public:
// Initialize for standard attributes (index is explicit).
- explicit AbufNode(Tegra::Shader::Attribute::Index index, u32 element, Node buffer = {})
- : buffer{std::move(buffer)}, index{index}, element{element} {}
+ explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {})
+ : buffer{std::move(buffer_)}, index{index_}, element{element_} {}
// Initialize for physical attributes (index is a variable value).
- explicit AbufNode(Node physical_address, Node buffer = {})
- : physical_address{std::move(physical_address)}, buffer{std::move(buffer)} {}
+ explicit AbufNode(Node physical_address_, Node buffer_ = {})
+ : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {}
- Tegra::Shader::Attribute::Index GetIndex() const {
+ [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const {
return index;
}
- u32 GetElement() const {
+ [[nodiscard]] u32 GetElement() const {
return element;
}
- const Node& GetBuffer() const {
+ [[nodiscard]] const Node& GetBuffer() const {
return buffer;
}
- bool IsPhysicalBuffer() const {
+ [[nodiscard]] bool IsPhysicalBuffer() const {
return static_cast<bool>(physical_address);
}
- const Node& GetPhysicalAddress() const {
+ [[nodiscard]] const Node& GetPhysicalAddress() const {
return physical_address;
}
@@ -605,9 +606,9 @@ private:
/// Patch memory (used to communicate tessellation stages).
class PatchNode final {
public:
- explicit PatchNode(u32 offset) : offset{offset} {}
+ explicit constexpr PatchNode(u32 offset_) : offset{offset_} {}
- u32 GetOffset() const {
+ [[nodiscard]] constexpr u32 GetOffset() const {
return offset;
}
@@ -618,13 +619,13 @@ private:
/// Constant buffer node, usually mapped to uniform buffers in GLSL
class CbufNode final {
public:
- explicit CbufNode(u32 index, Node offset) : index{index}, offset{std::move(offset)} {}
+ explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {}
- u32 GetIndex() const {
+ [[nodiscard]] u32 GetIndex() const {
return index;
}
- const Node& GetOffset() const {
+ [[nodiscard]] const Node& GetOffset() const {
return offset;
}
@@ -636,9 +637,9 @@ private:
/// Local memory node
class LmemNode final {
public:
- explicit LmemNode(Node address) : address{std::move(address)} {}
+ explicit LmemNode(Node address_) : address{std::move(address_)} {}
- const Node& GetAddress() const {
+ [[nodiscard]] const Node& GetAddress() const {
return address;
}
@@ -649,9 +650,9 @@ private:
/// Shared memory node
class SmemNode final {
public:
- explicit SmemNode(Node address) : address{std::move(address)} {}
+ explicit SmemNode(Node address_) : address{std::move(address_)} {}
- const Node& GetAddress() const {
+ [[nodiscard]] const Node& GetAddress() const {
return address;
}
@@ -662,19 +663,19 @@ private:
/// Global memory node
class GmemNode final {
public:
- explicit GmemNode(Node real_address, Node base_address, const GlobalMemoryBase& descriptor)
- : real_address{std::move(real_address)}, base_address{std::move(base_address)},
- descriptor{descriptor} {}
+ explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_)
+ : real_address{std::move(real_address_)}, base_address{std::move(base_address_)},
+ descriptor{descriptor_} {}
- const Node& GetRealAddress() const {
+ [[nodiscard]] const Node& GetRealAddress() const {
return real_address;
}
- const Node& GetBaseAddress() const {
+ [[nodiscard]] const Node& GetBaseAddress() const {
return base_address;
}
- const GlobalMemoryBase& GetDescriptor() const {
+ [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const {
return descriptor;
}
@@ -687,9 +688,9 @@ private:
/// Commentary, can be dropped
class CommentNode final {
public:
- explicit CommentNode(std::string text) : text{std::move(text)} {}
+ explicit CommentNode(std::string text_) : text{std::move(text_)} {}
- const std::string& GetText() const {
+ [[nodiscard]] const std::string& GetText() const {
return text;
}
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index 7bf4ff387..6a5b6940d 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -107,7 +107,7 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
return {};
default:
- UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
+ UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code);
return {};
}
}
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
index cdf274e54..148d91fcb 100644
--- a/src/video_core/shader/registry.cpp
+++ b/src/video_core/shader/registry.cpp
@@ -24,44 +24,45 @@ GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterfac
if (shader_stage == ShaderType::Compute) {
return {};
}
- auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
-
- GraphicsInfo info;
- info.tfb_layouts = graphics.regs.tfb_layouts;
- info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
- info.primitive_topology = graphics.regs.draw.topology;
- info.tessellation_primitive = graphics.regs.tess_mode.prim;
- info.tessellation_spacing = graphics.regs.tess_mode.spacing;
- info.tfb_enabled = graphics.regs.tfb_enabled;
- info.tessellation_clockwise = graphics.regs.tess_mode.cw;
- return info;
+
+ auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine);
+
+ return {
+ .tfb_layouts = graphics.regs.tfb_layouts,
+ .tfb_varying_locs = graphics.regs.tfb_varying_locs,
+ .primitive_topology = graphics.regs.draw.topology,
+ .tessellation_primitive = graphics.regs.tess_mode.prim,
+ .tessellation_spacing = graphics.regs.tess_mode.spacing,
+ .tfb_enabled = graphics.regs.tfb_enabled != 0,
+ .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0,
+ };
}
ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
if (shader_stage != ShaderType::Compute) {
return {};
}
- auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
+
+ auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine);
const auto& launch = compute.launch_description;
- ComputeInfo info;
- info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
- info.local_memory_size_in_words = launch.local_pos_alloc;
- info.shared_memory_size_in_words = launch.shared_alloc;
- return info;
+ return {
+ .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z},
+ .shared_memory_size_in_words = launch.shared_alloc,
+ .local_memory_size_in_words = launch.local_pos_alloc,
+ };
}
} // Anonymous namespace
-Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
+Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info)
: stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
-Registry::Registry(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine)
- : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
- graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
- shader_stage, engine)} {}
+Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_)
+ : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()},
+ graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo(
+ shader_stage, engine_)} {}
Registry::~Registry() = default;
@@ -113,8 +114,7 @@ std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler
return value;
}
-std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
- u32 offset) {
+std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) {
const std::pair key = {buffer, offset};
const auto iter = bindless_samplers.find(key);
if (iter != bindless_samplers.end()) {
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
index 231206765..4bebefdde 100644
--- a/src/video_core/shader/registry.h
+++ b/src/video_core/shader/registry.h
@@ -94,7 +94,7 @@ public:
explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
explicit Registry(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine);
+ Tegra::Engines::ConstBufferEngineInterface& engine_);
~Registry();
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 29d794b34..a4987ffc6 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -25,9 +25,10 @@ using Tegra::Shader::PredCondition;
using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
-ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
- Registry& registry)
- : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} {
+ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_,
+ Registry& registry_)
+ : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{
+ registry_} {
Decode();
PostDecode();
}
@@ -170,7 +171,7 @@ Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signe
// Default - do nothing
return value;
default:
- UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
+ UNREACHABLE_MSG("Unimplemented conversion size: {}", size);
return value;
}
}
@@ -335,15 +336,15 @@ OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
return operation_table[index];
}
-Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const {
+Node ShaderIR::GetConditionCode(ConditionCode cc) const {
switch (cc) {
- case Tegra::Shader::ConditionCode::NEU:
+ case ConditionCode::NEU:
return GetInternalFlag(InternalFlag::Zero, true);
- case Tegra::Shader::ConditionCode::FCSM_TR:
+ case ConditionCode::FCSM_TR:
UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
return MakeNode<PredicateNode>(Pred::NeverExecute, false);
default:
- UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
+ UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc);
return MakeNode<PredicateNode>(Pred::NeverExecute, false);
}
}
@@ -451,8 +452,8 @@ void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
}
std::size_t ShaderIR::DeclareAmend(Node new_amend) {
- const std::size_t id = amend_code.size();
- amend_code.push_back(new_amend);
+ const auto id = amend_code.size();
+ amend_code.push_back(std::move(new_amend));
return id;
}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 3a98b2104..1cd7c14d7 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -29,8 +29,8 @@ struct ShaderBlock;
constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
struct ConstBuffer {
- constexpr explicit ConstBuffer(u32 max_offset, bool is_indirect)
- : max_offset{max_offset}, is_indirect{is_indirect} {}
+ constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_)
+ : max_offset{max_offset_}, is_indirect{is_indirect_} {}
constexpr ConstBuffer() = default;
@@ -66,8 +66,8 @@ struct GlobalMemoryUsage {
class ShaderIR final {
public:
- explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
- Registry& registry);
+ explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
+ CompilerSettings settings_, Registry& registry_);
~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -94,11 +94,11 @@ public:
return used_cbufs;
}
- const std::list<Sampler>& GetSamplers() const {
+ const std::list<SamplerEntry>& GetSamplers() const {
return used_samplers;
}
- const std::list<Image>& GetImages() const {
+ const std::list<ImageEntry>& GetImages() const {
return used_images;
}
@@ -139,6 +139,10 @@ public:
return uses_legacy_varyings;
}
+ bool UsesYNegate() const {
+ return uses_y_negate;
+ }
+
bool UsesWarps() const {
return uses_warps;
}
@@ -334,17 +338,17 @@ private:
std::optional<Tegra::Engines::SamplerDescriptor> sampler);
/// Accesses a texture sampler.
- std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
+ std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
/// Accesses a texture sampler for a bindless texture.
- std::optional<Sampler> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
- Node& index_var);
+ std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
+ Node& index_var);
/// Accesses an image.
- Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
+ ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
/// Access a bindless image sampler.
- Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
+ ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -454,8 +458,8 @@ private:
std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
std::map<u32, ConstBuffer> used_cbufs;
- std::list<Sampler> used_samplers;
- std::list<Image> used_images;
+ std::list<SamplerEntry> used_samplers;
+ std::list<ImageEntry> used_images;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
bool uses_layer{};
@@ -465,6 +469,7 @@ private:
bool uses_instance_id{};
bool uses_vertex_id{};
bool uses_legacy_varyings{};
+ bool uses_y_negate{};
bool uses_warps{};
bool uses_indexed_samplers{};
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index d5ed81442..6be3ea92b 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -205,12 +205,12 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code,
const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
const auto& found = result.first;
if (!found) {
- return {};
+ return std::nullopt;
}
if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
return immediate->GetValue();
}
- return {};
+ return std::nullopt;
}
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 1688267bb..6308aef94 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -28,7 +28,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
case Tegra::Texture::TextureType::Texture2DArray:
return SurfaceTarget::Texture2DArray;
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", static_cast<u32>(texture_type));
+ LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", texture_type);
UNREACHABLE();
return SurfaceTarget::Texture2D;
}
@@ -47,7 +47,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) {
case SurfaceTarget::TextureCubeArray:
return true;
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
+ LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target);
UNREACHABLE();
return false;
}
@@ -66,7 +66,7 @@ bool SurfaceTargetIsArray(SurfaceTarget target) {
case SurfaceTarget::TextureCubeArray:
return true;
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
+ LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target);
UNREACHABLE();
return false;
}
@@ -85,7 +85,7 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT:
return PixelFormat::D32_FLOAT_S8_UINT;
default:
- UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format));
+ UNIMPLEMENTED_MSG("Unimplemented format={}", format);
return PixelFormat::S8_UINT_D24_UNORM;
}
}
@@ -183,7 +183,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
case Tegra::RenderTargetFormat::R8_UINT:
return PixelFormat::R8_UINT;
default:
- UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<int>(format));
+ UNIMPLEMENTED_MSG("Unimplemented format={}", format);
return PixelFormat::A8B8G8R8_UNORM;
}
}
@@ -197,7 +197,7 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM:
return PixelFormat::B8G8R8A8_UNORM;
default:
- UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format));
+ UNIMPLEMENTED_MSG("Unimplemented format={}", format);
return PixelFormat::A8B8G8R8_UNORM;
}
}
@@ -280,7 +280,7 @@ bool IsPixelFormatSRGB(PixelFormat format) {
}
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
- return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)};
+ return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
}
} // namespace VideoCore::Surface
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index cfd12fa61..c40ab89d0 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -120,7 +120,7 @@ enum class PixelFormat {
Max = MaxDepthStencilFormat,
Invalid = 255,
};
-static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
+constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
enum class SurfaceType {
ColorTexture = 0,
@@ -140,117 +140,7 @@ enum class SurfaceTarget {
TextureCubeArray,
};
-constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
- 0, // A8B8G8R8_UNORM
- 0, // A8B8G8R8_SNORM
- 0, // A8B8G8R8_SINT
- 0, // A8B8G8R8_UINT
- 0, // R5G6B5_UNORM
- 0, // B5G6R5_UNORM
- 0, // A1R5G5B5_UNORM
- 0, // A2B10G10R10_UNORM
- 0, // A2B10G10R10_UINT
- 0, // A1B5G5R5_UNORM
- 0, // R8_UNORM
- 0, // R8_SNORM
- 0, // R8_SINT
- 0, // R8_UINT
- 0, // R16G16B16A16_FLOAT
- 0, // R16G16B16A16_UNORM
- 0, // R16G16B16A16_SNORM
- 0, // R16G16B16A16_SINT
- 0, // R16G16B16A16_UINT
- 0, // B10G11R11_FLOAT
- 0, // R32G32B32A32_UINT
- 2, // BC1_RGBA_UNORM
- 2, // BC2_UNORM
- 2, // BC3_UNORM
- 2, // BC4_UNORM
- 2, // BC4_SNORM
- 2, // BC5_UNORM
- 2, // BC5_SNORM
- 2, // BC7_UNORM
- 2, // BC6H_UFLOAT
- 2, // BC6H_SFLOAT
- 2, // ASTC_2D_4X4_UNORM
- 0, // B8G8R8A8_UNORM
- 0, // R32G32B32A32_FLOAT
- 0, // R32G32B32A32_SINT
- 0, // R32G32_FLOAT
- 0, // R32G32_SINT
- 0, // R32_FLOAT
- 0, // R16_FLOAT
- 0, // R16_UNORM
- 0, // R16_SNORM
- 0, // R16_UINT
- 0, // R16_SINT
- 0, // R16G16_UNORM
- 0, // R16G16_FLOAT
- 0, // R16G16_UINT
- 0, // R16G16_SINT
- 0, // R16G16_SNORM
- 0, // R32G32B32_FLOAT
- 0, // A8B8G8R8_SRGB
- 0, // R8G8_UNORM
- 0, // R8G8_SNORM
- 0, // R8G8_SINT
- 0, // R8G8_UINT
- 0, // R32G32_UINT
- 0, // R16G16B16X16_FLOAT
- 0, // R32_UINT
- 0, // R32_SINT
- 2, // ASTC_2D_8X8_UNORM
- 2, // ASTC_2D_8X5_UNORM
- 2, // ASTC_2D_5X4_UNORM
- 0, // B8G8R8A8_SRGB
- 2, // BC1_RGBA_SRGB
- 2, // BC2_SRGB
- 2, // BC3_SRGB
- 2, // BC7_SRGB
- 0, // A4B4G4R4_UNORM
- 2, // ASTC_2D_4X4_SRGB
- 2, // ASTC_2D_8X8_SRGB
- 2, // ASTC_2D_8X5_SRGB
- 2, // ASTC_2D_5X4_SRGB
- 2, // ASTC_2D_5X5_UNORM
- 2, // ASTC_2D_5X5_SRGB
- 2, // ASTC_2D_10X8_UNORM
- 2, // ASTC_2D_10X8_SRGB
- 2, // ASTC_2D_6X6_UNORM
- 2, // ASTC_2D_6X6_SRGB
- 2, // ASTC_2D_10X10_UNORM
- 2, // ASTC_2D_10X10_SRGB
- 2, // ASTC_2D_12X12_UNORM
- 2, // ASTC_2D_12X12_SRGB
- 2, // ASTC_2D_8X6_UNORM
- 2, // ASTC_2D_8X6_SRGB
- 2, // ASTC_2D_6X5_UNORM
- 2, // ASTC_2D_6X5_SRGB
- 0, // E5B9G9R9_FLOAT
- 0, // D32_FLOAT
- 0, // D16_UNORM
- 0, // D24_UNORM_S8_UINT
- 0, // S8_UINT_D24_UNORM
- 0, // D32_FLOAT_S8_UINT
-}};
-
-/**
- * Gets the compression factor for the specified PixelFormat. This applies to just the
- * "compressed width" and "compressed height", not the overall compression factor of a
- * compressed image. This is used for maintaining proper surface sizes for compressed
- * texture formats.
- */
-inline constexpr u32 GetCompressionFactorShift(PixelFormat format) {
- DEBUG_ASSERT(format != PixelFormat::Invalid);
- DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_factor_shift_table.size());
- return compression_factor_shift_table[static_cast<std::size_t>(format)];
-}
-
-inline constexpr u32 GetCompressionFactor(PixelFormat format) {
- return 1U << GetCompressionFactorShift(format);
-}
-
-constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
+constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
1, // A8B8G8R8_UNORM
1, // A8B8G8R8_SNORM
1, // A8B8G8R8_SINT
@@ -344,15 +234,12 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
1, // D32_FLOAT_S8_UINT
}};
-static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
- if (format == PixelFormat::Invalid)
- return 0;
-
- ASSERT(static_cast<std::size_t>(format) < block_width_table.size());
- return block_width_table[static_cast<std::size_t>(format)];
+constexpr u32 DefaultBlockWidth(PixelFormat format) {
+ ASSERT(static_cast<std::size_t>(format) < BLOCK_WIDTH_TABLE.size());
+ return BLOCK_WIDTH_TABLE[static_cast<std::size_t>(format)];
}
-constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
+constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
1, // A8B8G8R8_UNORM
1, // A8B8G8R8_SNORM
1, // A8B8G8R8_SINT
@@ -446,15 +333,12 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
1, // D32_FLOAT_S8_UINT
}};
-static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
- if (format == PixelFormat::Invalid)
- return 0;
-
- ASSERT(static_cast<std::size_t>(format) < block_height_table.size());
- return block_height_table[static_cast<std::size_t>(format)];
+constexpr u32 DefaultBlockHeight(PixelFormat format) {
+ ASSERT(static_cast<std::size_t>(format) < BLOCK_HEIGHT_TABLE.size());
+ return BLOCK_HEIGHT_TABLE[static_cast<std::size_t>(format)];
}
-constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
+constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
32, // A8B8G8R8_UNORM
32, // A8B8G8R8_SNORM
32, // A8B8G8R8_SINT
@@ -548,20 +432,14 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
64, // D32_FLOAT_S8_UINT
}};
-static constexpr u32 GetFormatBpp(PixelFormat format) {
- if (format == PixelFormat::Invalid)
- return 0;
-
- ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
- return bpp_table[static_cast<std::size_t>(format)];
+constexpr u32 BitsPerBlock(PixelFormat format) {
+ ASSERT(static_cast<std::size_t>(format) < BITS_PER_BLOCK_TABLE.size());
+ return BITS_PER_BLOCK_TABLE[static_cast<std::size_t>(format)];
}
/// Returns the sizer in bytes of the specified pixel format
-static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
- if (pixel_format == PixelFormat::Invalid) {
- return 0;
- }
- return GetFormatBpp(pixel_format) / CHAR_BIT;
+constexpr u32 BytesPerBlock(PixelFormat pixel_format) {
+ return BitsPerBlock(pixel_format) / CHAR_BIT;
}
SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type);
diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp
new file mode 100644
index 000000000..15585caeb
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.cpp
@@ -0,0 +1,70 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bit>
+
+#include "common/alignment.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/accelerated_swizzle.h"
+#include "video_core/texture_cache/util.h"
+#include "video_core/textures/decoders.h"
+
+namespace VideoCommon::Accelerated {
+
+using Tegra::Texture::GOB_SIZE_SHIFT;
+using Tegra::Texture::GOB_SIZE_X;
+using Tegra::Texture::GOB_SIZE_X_SHIFT;
+using Tegra::Texture::GOB_SIZE_Y_SHIFT;
+using VideoCore::Surface::BytesPerBlock;
+
+BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle,
+ const ImageInfo& info) {
+ const Extent3D block = swizzle.block;
+ const Extent3D num_tiles = swizzle.num_tiles;
+ const u32 bytes_per_block = BytesPerBlock(info.format);
+ const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
+ const u32 stride = Common::AlignUpLog2(num_tiles.width, stride_alignment) * bytes_per_block;
+ const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
+ return BlockLinearSwizzle2DParams{
+ .origin{0, 0, 0},
+ .destination{0, 0, 0},
+ .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
+ .layer_stride = info.layer_stride,
+ .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth),
+ .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
+ .block_height = block.height,
+ .block_height_mask = (1U << block.height) - 1,
+ };
+}
+
+BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle,
+ const ImageInfo& info) {
+ const Extent3D block = swizzle.block;
+ const Extent3D num_tiles = swizzle.num_tiles;
+ const u32 bytes_per_block = BytesPerBlock(info.format);
+ const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
+ const u32 stride = Common::AlignUpLog2(num_tiles.width, stride_alignment) * bytes_per_block;
+
+ const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
+ const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
+ const u32 slice_size =
+ Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size;
+ return BlockLinearSwizzle3DParams{
+ .origin{0, 0, 0},
+ .destination{0, 0, 0},
+ .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
+ .slice_size = slice_size,
+ .block_size = block_size,
+ .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
+ .block_height = block.height,
+ .block_height_mask = (1U << block.height) - 1,
+ .block_depth = block.depth,
+ .block_depth_mask = (1U << block.depth) - 1,
+ };
+}
+
+} // namespace VideoCommon::Accelerated \ No newline at end of file
diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h
new file mode 100644
index 000000000..6ec5c78c4
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.h
@@ -0,0 +1,45 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+
+#include "common/common_types.h"
+#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon::Accelerated {
+
+struct BlockLinearSwizzle2DParams {
+ std::array<u32, 3> origin;
+ std::array<s32, 3> destination;
+ u32 bytes_per_block_log2;
+ u32 layer_stride;
+ u32 block_size;
+ u32 x_shift;
+ u32 block_height;
+ u32 block_height_mask;
+};
+
+struct BlockLinearSwizzle3DParams {
+ std::array<u32, 3> origin;
+ std::array<s32, 3> destination;
+ u32 bytes_per_block_log2;
+ u32 slice_size;
+ u32 block_size;
+ u32 x_shift;
+ u32 block_height;
+ u32 block_height_mask;
+ u32 block_depth;
+ u32 block_depth_mask;
+};
+
+[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(
+ const SwizzleParameters& swizzle, const ImageInfo& info);
+
+[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(
+ const SwizzleParameters& swizzle, const ImageInfo& info);
+
+} // namespace VideoCommon::Accelerated
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h
deleted file mode 100644
index 9c21a0649..000000000
--- a/src/video_core/texture_cache/copy_params.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace VideoCommon {
-
-struct CopyParams {
- constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y,
- u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height,
- u32 depth)
- : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x},
- dest_y{dest_y}, dest_z{dest_z}, source_level{source_level},
- dest_level{dest_level}, width{width}, height{height}, depth{depth} {}
-
- constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level)
- : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level},
- dest_level{level}, width{width}, height{height}, depth{depth} {}
-
- u32 source_x;
- u32 source_y;
- u32 source_z;
- u32 dest_x;
- u32 dest_y;
- u32 dest_z;
- u32 source_level;
- u32 dest_level;
- u32 width;
- u32 height;
- u32 depth;
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp
new file mode 100644
index 000000000..017327975
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.cpp
@@ -0,0 +1,97 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <span>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/texture_cache/decode_bc4.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
+[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) {
+ const u32 code_offset = 16 + 3 * (4 * y + x);
+ const u32 code = (bits >> code_offset) & 7;
+ const u32 red0 = (bits >> 0) & 0xff;
+ const u32 red1 = (bits >> 8) & 0xff;
+ if (red0 > red1) {
+ switch (code) {
+ case 0:
+ return red0;
+ case 1:
+ return red1;
+ case 2:
+ return (6 * red0 + 1 * red1) / 7;
+ case 3:
+ return (5 * red0 + 2 * red1) / 7;
+ case 4:
+ return (4 * red0 + 3 * red1) / 7;
+ case 5:
+ return (3 * red0 + 4 * red1) / 7;
+ case 6:
+ return (2 * red0 + 5 * red1) / 7;
+ case 7:
+ return (1 * red0 + 6 * red1) / 7;
+ }
+ } else {
+ switch (code) {
+ case 0:
+ return red0;
+ case 1:
+ return red1;
+ case 2:
+ return (4 * red0 + 1 * red1) / 5;
+ case 3:
+ return (3 * red0 + 2 * red1) / 5;
+ case 4:
+ return (2 * red0 + 3 * red1) / 5;
+ case 5:
+ return (1 * red0 + 4 * red1) / 5;
+ case 6:
+ return 0;
+ case 7:
+ return 0xff;
+ }
+ }
+ return 0;
+}
+
+void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) {
+ UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width);
+ UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height);
+ static constexpr u32 BLOCK_SIZE = 4;
+ size_t input_offset = 0;
+ for (u32 slice = 0; slice < extent.depth; ++slice) {
+ for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) {
+ for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) {
+ u64 bits;
+ std::memcpy(&bits, &input[input_offset], sizeof(bits));
+ input_offset += sizeof(bits);
+
+ for (u32 y = 0; y < BLOCK_SIZE; ++y) {
+ for (u32 x = 0; x < BLOCK_SIZE; ++x) {
+ const u32 linear_z = slice;
+ const u32 linear_y = block_y * BLOCK_SIZE + y;
+ const u32 linear_x = block_x * BLOCK_SIZE + x;
+ const u32 offset_z = linear_z * extent.width * extent.height;
+ const u32 offset_y = linear_y * extent.width;
+ const u32 offset_x = linear_x;
+ const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL;
+ const u32 color = DecompressBlock(bits, x, y);
+ output[output_offset + 0] = static_cast<u8>(color);
+ output[output_offset + 1] = 0;
+ output[output_offset + 2] = 0;
+ output[output_offset + 3] = 0xff;
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h
new file mode 100644
index 000000000..63fb23508
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.h
@@ -0,0 +1,16 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <span>
+
+#include "common/common_types.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output);
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h
new file mode 100644
index 000000000..3a03b786f
--- /dev/null
+++ b/src/video_core/texture_cache/descriptor_table.h
@@ -0,0 +1,82 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "common/logging/log.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+template <typename Descriptor>
+class DescriptorTable {
+public:
+ explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {}
+
+ [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) {
+ [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) {
+ return false;
+ }
+ Refresh(gpu_addr, limit);
+ return true;
+ }
+
+ void Invalidate() noexcept {
+ std::ranges::fill(read_descriptors, 0);
+ }
+
+ [[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) {
+ DEBUG_ASSERT(index <= current_limit);
+ const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor);
+ std::pair<Descriptor, bool> result;
+ gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor));
+ if (IsDescriptorRead(index)) {
+ result.second = result.first != descriptors[index];
+ } else {
+ MarkDescriptorAsRead(index);
+ result.second = true;
+ }
+ if (result.second) {
+ descriptors[index] = result.first;
+ }
+ return result;
+ }
+
+ [[nodiscard]] u32 Limit() const noexcept {
+ return current_limit;
+ }
+
+private:
+ void Refresh(GPUVAddr gpu_addr, u32 limit) {
+ current_gpu_addr = gpu_addr;
+ current_limit = limit;
+
+ const size_t num_descriptors = static_cast<size_t>(limit) + 1;
+ read_descriptors.clear();
+ read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0);
+ descriptors.resize(num_descriptors);
+ }
+
+ void MarkDescriptorAsRead(u32 index) noexcept {
+ read_descriptors[index / 64] |= 1ULL << (index % 64);
+ }
+
+ [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept {
+ return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0;
+ }
+
+ Tegra::MemoryManager& gpu_memory;
+ GPUVAddr current_gpu_addr{};
+ u32 current_limit{};
+ std::vector<u64> read_descriptors;
+ std::vector<Descriptor> descriptors;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 7d5a75648..ddfb726fe 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <array>
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/texture_cache/format_lookup_table.h"
@@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM;
constexpr auto SINT = ComponentType::SINT;
constexpr auto UINT = ComponentType::UINT;
constexpr auto FLOAT = ComponentType::FLOAT;
-constexpr bool C = false; // Normal color
-constexpr bool S = true; // Srgb
-
-struct Table {
- constexpr Table(TextureFormat texture_format, bool is_srgb, ComponentType red_component,
- ComponentType green_component, ComponentType blue_component,
- ComponentType alpha_component, PixelFormat pixel_format)
- : texture_format{texture_format}, pixel_format{pixel_format}, red_component{red_component},
- green_component{green_component}, blue_component{blue_component},
- alpha_component{alpha_component}, is_srgb{is_srgb} {}
-
- TextureFormat texture_format;
- PixelFormat pixel_format;
- ComponentType red_component;
- ComponentType green_component;
- ComponentType blue_component;
- ComponentType alpha_component;
- bool is_srgb;
-};
-constexpr std::array<Table, 86> DefinitionTable = {{
- {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM},
- {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM},
- {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT},
- {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT},
- {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB},
-
- {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM},
-
- {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM},
- {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT},
-
- {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM},
-
- {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM},
-
- {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM},
- {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM},
- {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT},
- {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT},
-
- {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM},
- {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM},
- {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT},
- {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT},
-
- {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM},
- {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM},
- {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT},
- {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT},
- {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT},
-
- {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT},
- {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM},
- {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM},
- {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT},
- {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT},
-
- {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT},
- {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM},
- {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM},
- {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT},
- {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT},
-
- {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT},
-
- {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT},
- {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT},
- {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT},
-
- {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT},
-
- {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT},
- {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT},
- {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT},
-
- {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT},
- {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT},
- {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT},
-
- {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT},
-
- {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT},
- {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM},
- {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
- {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
- {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT},
-
- {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM},
- {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB},
-
- {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM},
- {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB},
-
- {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM},
- {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB},
-
- {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM},
- {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM},
-
- {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM},
- {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM},
-
- {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM},
- {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB},
-
- {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT},
- {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT},
-
- {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM},
- {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB},
-
- {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM},
- {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB},
-
- {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM},
- {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB},
-
- {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM},
- {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB},
-
- {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM},
- {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB},
-
- {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM},
- {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB},
-
- {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM},
- {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB},
-
- {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM},
- {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB},
-
- {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM},
- {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB},
-
- {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM},
- {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB},
+constexpr bool LINEAR = false;
+constexpr bool SRGB = true;
+
+constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component,
+ ComponentType blue_component, ComponentType alpha_component, bool is_srgb) {
+ u32 hash = is_srgb ? 1 : 0;
+ hash |= static_cast<u32>(red_component) << 1;
+ hash |= static_cast<u32>(green_component) << 4;
+ hash |= static_cast<u32>(blue_component) << 7;
+ hash |= static_cast<u32>(alpha_component) << 10;
+ hash |= static_cast<u32>(format) << 13;
+ return hash;
+}
- {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM},
- {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB},
-}};
+constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) {
+ return Hash(format, component, component, component, component, is_srgb);
+}
} // Anonymous namespace
-FormatLookupTable::FormatLookupTable() {
- table.fill(static_cast<u8>(PixelFormat::Invalid));
-
- for (const auto& entry : DefinitionTable) {
- table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component,
- entry.green_component, entry.blue_component, entry.alpha_component)] =
- static_cast<u8>(entry.pixel_format);
- }
-}
-
-PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb,
- ComponentType red_component,
- ComponentType green_component,
- ComponentType blue_component,
- ComponentType alpha_component) const noexcept {
- const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex(
- format, is_srgb, red_component, green_component, blue_component, alpha_component)]);
- // [[likely]]
- if (pixel_format != PixelFormat::Invalid) {
- return pixel_format;
+PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green,
+ ComponentType blue, ComponentType alpha,
+ bool is_srgb) noexcept {
+ switch (Hash(format, red, green, blue, alpha, is_srgb)) {
+ case Hash(TextureFormat::A8R8G8B8, UNORM):
+ return PixelFormat::A8B8G8R8_UNORM;
+ case Hash(TextureFormat::A8R8G8B8, SNORM):
+ return PixelFormat::A8B8G8R8_SNORM;
+ case Hash(TextureFormat::A8R8G8B8, UINT):
+ return PixelFormat::A8B8G8R8_UINT;
+ case Hash(TextureFormat::A8R8G8B8, SINT):
+ return PixelFormat::A8B8G8R8_SINT;
+ case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB):
+ return PixelFormat::A8B8G8R8_SRGB;
+ case Hash(TextureFormat::B5G6R5, UNORM):
+ return PixelFormat::B5G6R5_UNORM;
+ case Hash(TextureFormat::A2B10G10R10, UNORM):
+ return PixelFormat::A2B10G10R10_UNORM;
+ case Hash(TextureFormat::A2B10G10R10, UINT):
+ return PixelFormat::A2B10G10R10_UINT;
+ case Hash(TextureFormat::A1B5G5R5, UNORM):
+ return PixelFormat::A1B5G5R5_UNORM;
+ case Hash(TextureFormat::A4B4G4R4, UNORM):
+ return PixelFormat::A4B4G4R4_UNORM;
+ case Hash(TextureFormat::R8, UNORM):
+ return PixelFormat::R8_UNORM;
+ case Hash(TextureFormat::R8, SNORM):
+ return PixelFormat::R8_SNORM;
+ case Hash(TextureFormat::R8, UINT):
+ return PixelFormat::R8_UINT;
+ case Hash(TextureFormat::R8, SINT):
+ return PixelFormat::R8_SINT;
+ case Hash(TextureFormat::R8G8, UNORM):
+ return PixelFormat::R8G8_UNORM;
+ case Hash(TextureFormat::R8G8, SNORM):
+ return PixelFormat::R8G8_SNORM;
+ case Hash(TextureFormat::R8G8, UINT):
+ return PixelFormat::R8G8_UINT;
+ case Hash(TextureFormat::R8G8, SINT):
+ return PixelFormat::R8G8_SINT;
+ case Hash(TextureFormat::R16G16B16A16, FLOAT):
+ return PixelFormat::R16G16B16A16_FLOAT;
+ case Hash(TextureFormat::R16G16B16A16, UNORM):
+ return PixelFormat::R16G16B16A16_UNORM;
+ case Hash(TextureFormat::R16G16B16A16, SNORM):
+ return PixelFormat::R16G16B16A16_SNORM;
+ case Hash(TextureFormat::R16G16B16A16, UINT):
+ return PixelFormat::R16G16B16A16_UINT;
+ case Hash(TextureFormat::R16G16B16A16, SINT):
+ return PixelFormat::R16G16B16A16_SINT;
+ case Hash(TextureFormat::R16G16, FLOAT):
+ return PixelFormat::R16G16_FLOAT;
+ case Hash(TextureFormat::R16G16, UNORM):
+ return PixelFormat::R16G16_UNORM;
+ case Hash(TextureFormat::R16G16, SNORM):
+ return PixelFormat::R16G16_SNORM;
+ case Hash(TextureFormat::R16G16, UINT):
+ return PixelFormat::R16G16_UINT;
+ case Hash(TextureFormat::R16G16, SINT):
+ return PixelFormat::R16G16_SINT;
+ case Hash(TextureFormat::R16, FLOAT):
+ return PixelFormat::R16_FLOAT;
+ case Hash(TextureFormat::R16, UNORM):
+ return PixelFormat::R16_UNORM;
+ case Hash(TextureFormat::R16, SNORM):
+ return PixelFormat::R16_SNORM;
+ case Hash(TextureFormat::R16, UINT):
+ return PixelFormat::R16_UINT;
+ case Hash(TextureFormat::R16, SINT):
+ return PixelFormat::R16_SINT;
+ case Hash(TextureFormat::B10G11R11, FLOAT):
+ return PixelFormat::B10G11R11_FLOAT;
+ case Hash(TextureFormat::R32G32B32A32, FLOAT):
+ return PixelFormat::R32G32B32A32_FLOAT;
+ case Hash(TextureFormat::R32G32B32A32, UINT):
+ return PixelFormat::R32G32B32A32_UINT;
+ case Hash(TextureFormat::R32G32B32A32, SINT):
+ return PixelFormat::R32G32B32A32_SINT;
+ case Hash(TextureFormat::R32G32B32, FLOAT):
+ return PixelFormat::R32G32B32_FLOAT;
+ case Hash(TextureFormat::R32G32, FLOAT):
+ return PixelFormat::R32G32_FLOAT;
+ case Hash(TextureFormat::R32G32, UINT):
+ return PixelFormat::R32G32_UINT;
+ case Hash(TextureFormat::R32G32, SINT):
+ return PixelFormat::R32G32_SINT;
+ case Hash(TextureFormat::R32, FLOAT):
+ return PixelFormat::R32_FLOAT;
+ case Hash(TextureFormat::R32, UINT):
+ return PixelFormat::R32_UINT;
+ case Hash(TextureFormat::R32, SINT):
+ return PixelFormat::R32_SINT;
+ case Hash(TextureFormat::E5B9G9R9, FLOAT):
+ return PixelFormat::E5B9G9R9_FLOAT;
+ case Hash(TextureFormat::D32, FLOAT):
+ return PixelFormat::D32_FLOAT;
+ case Hash(TextureFormat::D16, UNORM):
+ return PixelFormat::D16_UNORM;
+ case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR):
+ return PixelFormat::S8_UINT_D24_UNORM;
+ case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
+ return PixelFormat::S8_UINT_D24_UNORM;
+ case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
+ return PixelFormat::D32_FLOAT_S8_UINT;
+ case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
+ return PixelFormat::BC1_RGBA_UNORM;
+ case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB):
+ return PixelFormat::BC1_RGBA_SRGB;
+ case Hash(TextureFormat::BC2, UNORM, LINEAR):
+ return PixelFormat::BC2_UNORM;
+ case Hash(TextureFormat::BC2, UNORM, SRGB):
+ return PixelFormat::BC2_SRGB;
+ case Hash(TextureFormat::BC3, UNORM, LINEAR):
+ return PixelFormat::BC3_UNORM;
+ case Hash(TextureFormat::BC3, UNORM, SRGB):
+ return PixelFormat::BC3_SRGB;
+ case Hash(TextureFormat::BC4, UNORM):
+ return PixelFormat::BC4_UNORM;
+ case Hash(TextureFormat::BC4, SNORM):
+ return PixelFormat::BC4_SNORM;
+ case Hash(TextureFormat::BC5, UNORM):
+ return PixelFormat::BC5_UNORM;
+ case Hash(TextureFormat::BC5, SNORM):
+ return PixelFormat::BC5_SNORM;
+ case Hash(TextureFormat::BC7, UNORM, LINEAR):
+ return PixelFormat::BC7_UNORM;
+ case Hash(TextureFormat::BC7, UNORM, SRGB):
+ return PixelFormat::BC7_SRGB;
+ case Hash(TextureFormat::BC6H_SFLOAT, FLOAT):
+ return PixelFormat::BC6H_SFLOAT;
+ case Hash(TextureFormat::BC6H_UFLOAT, FLOAT):
+ return PixelFormat::BC6H_UFLOAT;
+ case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_4X4_UNORM;
+ case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_4X4_SRGB;
+ case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_5X4_UNORM;
+ case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_5X4_SRGB;
+ case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_5X5_UNORM;
+ case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_5X5_SRGB;
+ case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_8X8_UNORM;
+ case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_8X8_SRGB;
+ case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_8X5_UNORM;
+ case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_8X5_SRGB;
+ case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_10X8_UNORM;
+ case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_10X8_SRGB;
+ case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_6X6_UNORM;
+ case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_6X6_SRGB;
+ case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_10X10_UNORM;
+ case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_10X10_SRGB;
+ case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_12X12_UNORM;
+ case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_12X12_SRGB;
+ case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_8X6_UNORM;
+ case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_8X6_SRGB;
+ case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR):
+ return PixelFormat::ASTC_2D_6X5_UNORM;
+ case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB):
+ return PixelFormat::ASTC_2D_6X5_SRGB;
}
UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}",
- static_cast<int>(format), is_srgb, static_cast<int>(red_component),
- static_cast<int>(green_component), static_cast<int>(blue_component),
- static_cast<int>(alpha_component));
+ static_cast<int>(format), is_srgb, static_cast<int>(red),
+ static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha));
return PixelFormat::A8B8G8R8_UNORM;
}
-void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component,
- ComponentType green_component, ComponentType blue_component,
- ComponentType alpha_component, PixelFormat pixel_format) {}
-
-std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb,
- ComponentType red_component,
- ComponentType green_component,
- ComponentType blue_component,
- ComponentType alpha_component) noexcept {
- const auto format_index = static_cast<std::size_t>(format);
- const auto red_index = static_cast<std::size_t>(red_component);
- const auto green_index = static_cast<std::size_t>(green_component);
- const auto blue_index = static_cast<std::size_t>(blue_component);
- const auto alpha_index = static_cast<std::size_t>(alpha_component);
- const std::size_t srgb_index = is_srgb ? 1 : 0;
-
- return format_index * PerFormat +
- srgb_index * PerComponent * PerComponent * PerComponent * PerComponent +
- alpha_index * PerComponent * PerComponent * PerComponent +
- blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index;
-}
-
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h
index aa77e0a5a..729533999 100644
--- a/src/video_core/texture_cache/format_lookup_table.h
+++ b/src/video_core/texture_cache/format_lookup_table.h
@@ -4,48 +4,14 @@
#pragma once
-#include <array>
-#include <limits>
#include "video_core/surface.h"
#include "video_core/textures/texture.h"
namespace VideoCommon {
-class FormatLookupTable {
-public:
- explicit FormatLookupTable();
-
- VideoCore::Surface::PixelFormat GetPixelFormat(
- Tegra::Texture::TextureFormat format, bool is_srgb,
- Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component,
- Tegra::Texture::ComponentType blue_component,
- Tegra::Texture::ComponentType alpha_component) const noexcept;
-
-private:
- static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max());
-
- static constexpr std::size_t NumTextureFormats = 128;
-
- static constexpr std::size_t PerComponent = 8;
- static constexpr std::size_t PerComponents2 = PerComponent * PerComponent;
- static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent;
- static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent;
- static constexpr std::size_t PerFormat = PerComponents4 * 2;
-
- static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb,
- Tegra::Texture::ComponentType red_component,
- Tegra::Texture::ComponentType green_component,
- Tegra::Texture::ComponentType blue_component,
- Tegra::Texture::ComponentType alpha_component) noexcept;
-
- void Set(Tegra::Texture::TextureFormat format, bool is_srgb,
- Tegra::Texture::ComponentType red_component,
- Tegra::Texture::ComponentType green_component,
- Tegra::Texture::ComponentType blue_component,
- Tegra::Texture::ComponentType alpha_component,
- VideoCore::Surface::PixelFormat pixel_format);
-
- std::array<u8, NumTextureFormats * PerFormat> table;
-};
+VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo(
+ Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component,
+ Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component,
+ Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept;
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
new file mode 100644
index 000000000..d10ba4ccd
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -0,0 +1,95 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <string>
+
+#include "video_core/texture_cache/formatter.h"
+#include "video_core/texture_cache/image_base.h"
+#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/image_view_base.h"
+#include "video_core/texture_cache/render_targets.h"
+
+namespace VideoCommon {
+
+std::string Name(const ImageBase& image) {
+ const GPUVAddr gpu_addr = image.gpu_addr;
+ const ImageInfo& info = image.info;
+ const u32 width = info.size.width;
+ const u32 height = info.size.height;
+ const u32 depth = info.size.depth;
+ const u32 num_layers = image.info.resources.layers;
+ const u32 num_levels = image.info.resources.levels;
+ std::string resource;
+ if (num_layers > 1) {
+ resource += fmt::format(":L{}", num_layers);
+ }
+ if (num_levels > 1) {
+ resource += fmt::format(":M{}", num_levels);
+ }
+ switch (image.info.type) {
+ case ImageType::e1D:
+ return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource);
+ case ImageType::e2D:
+ return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource);
+ case ImageType::e3D:
+ return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource);
+ case ImageType::Linear:
+ return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height);
+ case ImageType::Buffer:
+ return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width);
+ }
+ return "Invalid";
+}
+
+std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) {
+ const u32 width = image_view.size.width;
+ const u32 height = image_view.size.height;
+ const u32 depth = image_view.size.depth;
+ const u32 num_levels = image_view.range.extent.levels;
+ const u32 num_layers = image_view.range.extent.layers;
+
+ const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
+ switch (type.value_or(image_view.type)) {
+ case ImageViewType::e1D:
+ return fmt::format("ImageView 1D {}{}", width, level);
+ case ImageViewType::e2D:
+ return fmt::format("ImageView 2D {}x{}{}", width, height, level);
+ case ImageViewType::Cube:
+ return fmt::format("ImageView Cube {}x{}{}", width, height, level);
+ case ImageViewType::e3D:
+ return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level);
+ case ImageViewType::e1DArray:
+ return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers);
+ case ImageViewType::e2DArray:
+ return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers);
+ case ImageViewType::CubeArray:
+ return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers);
+ case ImageViewType::Rect:
+ return fmt::format("ImageView Rect {}x{}{}", width, height, level);
+ case ImageViewType::Buffer:
+ return fmt::format("BufferView {}", width);
+ }
+ return "Invalid";
+}
+
+std::string Name(const RenderTargets& render_targets) {
+ std::string_view debug_prefix;
+ const auto num_color = std::ranges::count_if(
+ render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast<bool>(id); });
+ if (render_targets.depth_buffer_id) {
+ debug_prefix = num_color > 0 ? "R" : "Z";
+ } else {
+ debug_prefix = num_color > 0 ? "C" : "X";
+ }
+ const Extent2D size = render_targets.size;
+ if (num_color > 0) {
+ return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width,
+ size.height);
+ } else {
+ return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height);
+ }
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
new file mode 100644
index 000000000..a48413983
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.h
@@ -0,0 +1,263 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "video_core/surface.h"
+#include "video_core/texture_cache/types.h"
+
+template <>
+struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::string_view> {
+ template <typename FormatContext>
+ auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) {
+ using VideoCore::Surface::PixelFormat;
+ const string_view name = [format] {
+ switch (format) {
+ case PixelFormat::A8B8G8R8_UNORM:
+ return "A8B8G8R8_UNORM";
+ case PixelFormat::A8B8G8R8_SNORM:
+ return "A8B8G8R8_SNORM";
+ case PixelFormat::A8B8G8R8_SINT:
+ return "A8B8G8R8_SINT";
+ case PixelFormat::A8B8G8R8_UINT:
+ return "A8B8G8R8_UINT";
+ case PixelFormat::R5G6B5_UNORM:
+ return "R5G6B5_UNORM";
+ case PixelFormat::B5G6R5_UNORM:
+ return "B5G6R5_UNORM";
+ case PixelFormat::A1R5G5B5_UNORM:
+ return "A1R5G5B5_UNORM";
+ case PixelFormat::A2B10G10R10_UNORM:
+ return "A2B10G10R10_UNORM";
+ case PixelFormat::A2B10G10R10_UINT:
+ return "A2B10G10R10_UINT";
+ case PixelFormat::A1B5G5R5_UNORM:
+ return "A1B5G5R5_UNORM";
+ case PixelFormat::R8_UNORM:
+ return "R8_UNORM";
+ case PixelFormat::R8_SNORM:
+ return "R8_SNORM";
+ case PixelFormat::R8_SINT:
+ return "R8_SINT";
+ case PixelFormat::R8_UINT:
+ return "R8_UINT";
+ case PixelFormat::R16G16B16A16_FLOAT:
+ return "R16G16B16A16_FLOAT";
+ case PixelFormat::R16G16B16A16_UNORM:
+ return "R16G16B16A16_UNORM";
+ case PixelFormat::R16G16B16A16_SNORM:
+ return "R16G16B16A16_SNORM";
+ case PixelFormat::R16G16B16A16_SINT:
+ return "R16G16B16A16_SINT";
+ case PixelFormat::R16G16B16A16_UINT:
+ return "R16G16B16A16_UINT";
+ case PixelFormat::B10G11R11_FLOAT:
+ return "B10G11R11_FLOAT";
+ case PixelFormat::R32G32B32A32_UINT:
+ return "R32G32B32A32_UINT";
+ case PixelFormat::BC1_RGBA_UNORM:
+ return "BC1_RGBA_UNORM";
+ case PixelFormat::BC2_UNORM:
+ return "BC2_UNORM";
+ case PixelFormat::BC3_UNORM:
+ return "BC3_UNORM";
+ case PixelFormat::BC4_UNORM:
+ return "BC4_UNORM";
+ case PixelFormat::BC4_SNORM:
+ return "BC4_SNORM";
+ case PixelFormat::BC5_UNORM:
+ return "BC5_UNORM";
+ case PixelFormat::BC5_SNORM:
+ return "BC5_SNORM";
+ case PixelFormat::BC7_UNORM:
+ return "BC7_UNORM";
+ case PixelFormat::BC6H_UFLOAT:
+ return "BC6H_UFLOAT";
+ case PixelFormat::BC6H_SFLOAT:
+ return "BC6H_SFLOAT";
+ case PixelFormat::ASTC_2D_4X4_UNORM:
+ return "ASTC_2D_4X4_UNORM";
+ case PixelFormat::B8G8R8A8_UNORM:
+ return "B8G8R8A8_UNORM";
+ case PixelFormat::R32G32B32A32_FLOAT:
+ return "R32G32B32A32_FLOAT";
+ case PixelFormat::R32G32B32A32_SINT:
+ return "R32G32B32A32_SINT";
+ case PixelFormat::R32G32_FLOAT:
+ return "R32G32_FLOAT";
+ case PixelFormat::R32G32_SINT:
+ return "R32G32_SINT";
+ case PixelFormat::R32_FLOAT:
+ return "R32_FLOAT";
+ case PixelFormat::R16_FLOAT:
+ return "R16_FLOAT";
+ case PixelFormat::R16_UNORM:
+ return "R16_UNORM";
+ case PixelFormat::R16_SNORM:
+ return "R16_SNORM";
+ case PixelFormat::R16_UINT:
+ return "R16_UINT";
+ case PixelFormat::R16_SINT:
+ return "R16_SINT";
+ case PixelFormat::R16G16_UNORM:
+ return "R16G16_UNORM";
+ case PixelFormat::R16G16_FLOAT:
+ return "R16G16_FLOAT";
+ case PixelFormat::R16G16_UINT:
+ return "R16G16_UINT";
+ case PixelFormat::R16G16_SINT:
+ return "R16G16_SINT";
+ case PixelFormat::R16G16_SNORM:
+ return "R16G16_SNORM";
+ case PixelFormat::R32G32B32_FLOAT:
+ return "R32G32B32_FLOAT";
+ case PixelFormat::A8B8G8R8_SRGB:
+ return "A8B8G8R8_SRGB";
+ case PixelFormat::R8G8_UNORM:
+ return "R8G8_UNORM";
+ case PixelFormat::R8G8_SNORM:
+ return "R8G8_SNORM";
+ case PixelFormat::R8G8_SINT:
+ return "R8G8_SINT";
+ case PixelFormat::R8G8_UINT:
+ return "R8G8_UINT";
+ case PixelFormat::R32G32_UINT:
+ return "R32G32_UINT";
+ case PixelFormat::R16G16B16X16_FLOAT:
+ return "R16G16B16X16_FLOAT";
+ case PixelFormat::R32_UINT:
+ return "R32_UINT";
+ case PixelFormat::R32_SINT:
+ return "R32_SINT";
+ case PixelFormat::ASTC_2D_8X8_UNORM:
+ return "ASTC_2D_8X8_UNORM";
+ case PixelFormat::ASTC_2D_8X5_UNORM:
+ return "ASTC_2D_8X5_UNORM";
+ case PixelFormat::ASTC_2D_5X4_UNORM:
+ return "ASTC_2D_5X4_UNORM";
+ case PixelFormat::B8G8R8A8_SRGB:
+ return "B8G8R8A8_SRGB";
+ case PixelFormat::BC1_RGBA_SRGB:
+ return "BC1_RGBA_SRGB";
+ case PixelFormat::BC2_SRGB:
+ return "BC2_SRGB";
+ case PixelFormat::BC3_SRGB:
+ return "BC3_SRGB";
+ case PixelFormat::BC7_SRGB:
+ return "BC7_SRGB";
+ case PixelFormat::A4B4G4R4_UNORM:
+ return "A4B4G4R4_UNORM";
+ case PixelFormat::ASTC_2D_4X4_SRGB:
+ return "ASTC_2D_4X4_SRGB";
+ case PixelFormat::ASTC_2D_8X8_SRGB:
+ return "ASTC_2D_8X8_SRGB";
+ case PixelFormat::ASTC_2D_8X5_SRGB:
+ return "ASTC_2D_8X5_SRGB";
+ case PixelFormat::ASTC_2D_5X4_SRGB:
+ return "ASTC_2D_5X4_SRGB";
+ case PixelFormat::ASTC_2D_5X5_UNORM:
+ return "ASTC_2D_5X5_UNORM";
+ case PixelFormat::ASTC_2D_5X5_SRGB:
+ return "ASTC_2D_5X5_SRGB";
+ case PixelFormat::ASTC_2D_10X8_UNORM:
+ return "ASTC_2D_10X8_UNORM";
+ case PixelFormat::ASTC_2D_10X8_SRGB:
+ return "ASTC_2D_10X8_SRGB";
+ case PixelFormat::ASTC_2D_6X6_UNORM:
+ return "ASTC_2D_6X6_UNORM";
+ case PixelFormat::ASTC_2D_6X6_SRGB:
+ return "ASTC_2D_6X6_SRGB";
+ case PixelFormat::ASTC_2D_10X10_UNORM:
+ return "ASTC_2D_10X10_UNORM";
+ case PixelFormat::ASTC_2D_10X10_SRGB:
+ return "ASTC_2D_10X10_SRGB";
+ case PixelFormat::ASTC_2D_12X12_UNORM:
+ return "ASTC_2D_12X12_UNORM";
+ case PixelFormat::ASTC_2D_12X12_SRGB:
+ return "ASTC_2D_12X12_SRGB";
+ case PixelFormat::ASTC_2D_8X6_UNORM:
+ return "ASTC_2D_8X6_UNORM";
+ case PixelFormat::ASTC_2D_8X6_SRGB:
+ return "ASTC_2D_8X6_SRGB";
+ case PixelFormat::ASTC_2D_6X5_UNORM:
+ return "ASTC_2D_6X5_UNORM";
+ case PixelFormat::ASTC_2D_6X5_SRGB:
+ return "ASTC_2D_6X5_SRGB";
+ case PixelFormat::E5B9G9R9_FLOAT:
+ return "E5B9G9R9_FLOAT";
+ case PixelFormat::D32_FLOAT:
+ return "D32_FLOAT";
+ case PixelFormat::D16_UNORM:
+ return "D16_UNORM";
+ case PixelFormat::D24_UNORM_S8_UINT:
+ return "D24_UNORM_S8_UINT";
+ case PixelFormat::S8_UINT_D24_UNORM:
+ return "S8_UINT_D24_UNORM";
+ case PixelFormat::D32_FLOAT_S8_UINT:
+ return "D32_FLOAT_S8_UINT";
+ case PixelFormat::MaxDepthStencilFormat:
+ case PixelFormat::Invalid:
+ return "Invalid";
+ }
+ return "Invalid";
+ }();
+ return formatter<string_view>::format(name, ctx);
+ }
+};
+
+template <>
+struct fmt::formatter<VideoCommon::ImageType> : fmt::formatter<fmt::string_view> {
+ template <typename FormatContext>
+ auto format(VideoCommon::ImageType type, FormatContext& ctx) {
+ const string_view name = [type] {
+ using VideoCommon::ImageType;
+ switch (type) {
+ case ImageType::e1D:
+ return "1D";
+ case ImageType::e2D:
+ return "2D";
+ case ImageType::e3D:
+ return "3D";
+ case ImageType::Linear:
+ return "Linear";
+ case ImageType::Buffer:
+ return "Buffer";
+ }
+ return "Invalid";
+ }();
+ return formatter<string_view>::format(name, ctx);
+ }
+};
+
+template <>
+struct fmt::formatter<VideoCommon::Extent3D> {
+ constexpr auto parse(fmt::format_parse_context& ctx) {
+ return ctx.begin();
+ }
+
+ template <typename FormatContext>
+ auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) {
+ return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height,
+ extent.depth);
+ }
+};
+
+namespace VideoCommon {
+
+struct ImageBase;
+struct ImageViewBase;
+struct RenderTargets;
+
+[[nodiscard]] std::string Name(const ImageBase& image);
+
+[[nodiscard]] std::string Name(const ImageViewBase& image_view,
+ std::optional<ImageViewType> type = std::nullopt);
+
+[[nodiscard]] std::string Name(const RenderTargets& render_targets);
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
new file mode 100644
index 000000000..959b3f115
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -0,0 +1,218 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <optional>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/formatter.h"
+#include "video_core/texture_cache/image_base.h"
+#include "video_core/texture_cache/image_view_info.h"
+#include "video_core/texture_cache/util.h"
+
+namespace VideoCommon {
+
+using VideoCore::Surface::DefaultBlockHeight;
+using VideoCore::Surface::DefaultBlockWidth;
+
+namespace {
+/// Returns the base layer and mip level offset
+[[nodiscard]] std::pair<s32, s32> LayerMipOffset(s32 diff, u32 layer_stride) {
+ if (layer_stride == 0) {
+ return {0, diff};
+ } else {
+ return {diff / layer_stride, diff % layer_stride};
+ }
+}
+
+[[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) {
+ return layers.base_level < info.resources.levels &&
+ layers.base_layer + layers.num_layers <= info.resources.layers;
+}
+
+[[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) {
+ const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level);
+ const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level);
+ if (!ValidateLayers(copy.src_subresource, src)) {
+ return false;
+ }
+ if (!ValidateLayers(copy.dst_subresource, dst)) {
+ return false;
+ }
+ if (copy.src_offset.x + copy.extent.width > src_size.width ||
+ copy.src_offset.y + copy.extent.height > src_size.height ||
+ copy.src_offset.z + copy.extent.depth > src_size.depth) {
+ return false;
+ }
+ if (copy.dst_offset.x + copy.extent.width > dst_size.width ||
+ copy.dst_offset.y + copy.extent.height > dst_size.height ||
+ copy.dst_offset.z + copy.extent.depth > dst_size.depth) {
+ return false;
+ }
+ return true;
+}
+} // Anonymous namespace
+
+ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
+ : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
+ unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
+ converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_},
+ cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes},
+ mip_level_offsets{CalculateMipLevelOffsets(info)} {
+ if (info.type == ImageType::e3D) {
+ slice_offsets = CalculateSliceOffsets(info);
+ slice_subresources = CalculateSliceSubresources(info);
+ }
+}
+
+std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
+ if (other_addr < gpu_addr) {
+ // Subresource address can't be lower than the base
+ return std::nullopt;
+ }
+ const u32 diff = static_cast<u32>(other_addr - gpu_addr);
+ if (diff > guest_size_bytes) {
+ // This can happen when two CPU addresses are used for different GPU addresses
+ return std::nullopt;
+ }
+ if (info.type != ImageType::e3D) {
+ const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride);
+ const auto end = mip_level_offsets.begin() + info.resources.levels;
+ const auto it = std::find(mip_level_offsets.begin(), end, mip_offset);
+ if (layer > info.resources.layers || it == end) {
+ return std::nullopt;
+ }
+ return SubresourceBase{
+ .level = static_cast<s32>(std::distance(mip_level_offsets.begin(), it)),
+ .layer = layer,
+ };
+ } else {
+ // TODO: Consider using binary_search after a threshold
+ const auto it = std::ranges::find(slice_offsets, diff);
+ if (it == slice_offsets.cend()) {
+ return std::nullopt;
+ }
+ return slice_subresources[std::distance(slice_offsets.begin(), it)];
+ }
+}
+
+ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept {
+ const auto it = std::ranges::find(image_view_infos, view_info);
+ if (it == image_view_infos.end()) {
+ return ImageViewId{};
+ }
+ return image_view_ids[std::distance(image_view_infos.begin(), it)];
+}
+
+void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) {
+ image_view_infos.push_back(view_info);
+ image_view_ids.push_back(image_view_id);
+}
+
+void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
+ static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
+ ASSERT(lhs.info.type == rhs.info.type);
+ std::optional<SubresourceBase> base;
+ if (lhs.info.type == ImageType::Linear) {
+ base = SubresourceBase{.level = 0, .layer = 0};
+ } else {
+ // We are passing relaxed formats as an option, having broken views or not won't matter
+ static constexpr bool broken_views = false;
+ base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views);
+ }
+ if (!base) {
+ LOG_ERROR(HW_GPU, "Image alias should have been flipped");
+ return;
+ }
+ const PixelFormat lhs_format = lhs.info.format;
+ const PixelFormat rhs_format = rhs.info.format;
+ const Extent2D lhs_block{
+ .width = DefaultBlockWidth(lhs_format),
+ .height = DefaultBlockHeight(lhs_format),
+ };
+ const Extent2D rhs_block{
+ .width = DefaultBlockWidth(rhs_format),
+ .height = DefaultBlockHeight(rhs_format),
+ };
+ const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1;
+ const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1;
+ if (is_lhs_compressed && is_rhs_compressed) {
+ LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented");
+ return;
+ }
+ const s32 lhs_mips = lhs.info.resources.levels;
+ const s32 rhs_mips = rhs.info.resources.levels;
+ const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips);
+ AliasedImage lhs_alias;
+ AliasedImage rhs_alias;
+ lhs_alias.id = rhs_id;
+ rhs_alias.id = lhs_id;
+ lhs_alias.copies.reserve(num_mips);
+ rhs_alias.copies.reserve(num_mips);
+ for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) {
+ Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level);
+ Extent3D rhs_size = MipSize(rhs.info.size, mip_level);
+ if (is_lhs_compressed) {
+ lhs_size.width /= lhs_block.width;
+ lhs_size.height /= lhs_block.height;
+ }
+ if (is_rhs_compressed) {
+ rhs_size.width /= rhs_block.width;
+ rhs_size.height /= rhs_block.height;
+ }
+ const Extent3D copy_size{
+ .width = std::min(lhs_size.width, rhs_size.width),
+ .height = std::min(lhs_size.height, rhs_size.height),
+ .depth = std::min(lhs_size.depth, rhs_size.depth),
+ };
+ if (copy_size.width == 0 || copy_size.height == 0) {
+ LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased.");
+ continue;
+ }
+ const bool is_lhs_3d = lhs.info.type == ImageType::e3D;
+ const bool is_rhs_3d = rhs.info.type == ImageType::e3D;
+ const Offset3D lhs_offset{0, 0, 0};
+ const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0};
+ const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer;
+ const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers;
+ const s32 num_layers = std::min(lhs_layers, rhs_layers);
+ const SubresourceLayers lhs_subresource{
+ .base_level = mip_level,
+ .base_layer = 0,
+ .num_layers = num_layers,
+ };
+ const SubresourceLayers rhs_subresource{
+ .base_level = base->level + mip_level,
+ .base_layer = is_rhs_3d ? 0 : base->layer,
+ .num_layers = num_layers,
+ };
+ [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{
+ .src_subresource = lhs_subresource,
+ .dst_subresource = rhs_subresource,
+ .src_offset = lhs_offset,
+ .dst_offset = rhs_offset,
+ .extent = copy_size,
+ });
+ [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{
+ .src_subresource = rhs_subresource,
+ .dst_subresource = lhs_subresource,
+ .src_offset = rhs_offset,
+ .dst_offset = lhs_offset,
+ .extent = copy_size,
+ });
+ ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy");
+ ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy");
+ }
+ ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
+ if (lhs_alias.copies.empty()) {
+ return;
+ }
+ lhs.aliased_images.push_back(std::move(lhs_alias));
+ rhs.aliased_images.push_back(std::move(rhs_alias));
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
new file mode 100644
index 000000000..b7f3b7e43
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.h
@@ -0,0 +1,83 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <optional>
+#include <vector>
+
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/image_view_info.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+enum class ImageFlagBits : u32 {
+ AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU
+ Converted = 1 << 1, ///< Guest format is not supported natively and it has to be converted
+ CpuModified = 1 << 2, ///< Contents have been modified from the CPU
+ GpuModified = 1 << 3, ///< Contents have been modified from the GPU
+ Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT
+ Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
+ Registered = 1 << 6, ///< True when the image is registered
+ Picked = 1 << 7, ///< Temporary flag to mark the image as picked
+};
+DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
+
+struct ImageViewInfo;
+
+struct AliasedImage {
+ std::vector<ImageCopy> copies;
+ ImageId id;
+};
+
+struct ImageBase {
+ explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
+
+ [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
+
+ [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept;
+
+ void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
+
+ [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
+ const VAddr overlap_end = overlap_cpu_addr + overlap_size;
+ return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
+ }
+
+ ImageInfo info;
+
+ u32 guest_size_bytes = 0;
+ u32 unswizzled_size_bytes = 0;
+ u32 converted_size_bytes = 0;
+ ImageFlagBits flags = ImageFlagBits::CpuModified;
+
+ GPUVAddr gpu_addr = 0;
+ VAddr cpu_addr = 0;
+ VAddr cpu_addr_end = 0;
+
+ u64 modification_tick = 0;
+ u64 frame_tick = 0;
+
+ std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};
+
+ std::vector<ImageViewInfo> image_view_infos;
+ std::vector<ImageViewId> image_view_ids;
+
+ std::vector<u32> slice_offsets;
+ std::vector<SubresourceBase> slice_subresources;
+
+ std::vector<AliasedImage> aliased_images;
+};
+
+struct ImageAllocBase {
+ std::vector<ImageId> images;
+};
+
+void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp
new file mode 100644
index 000000000..64fd7010a
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.cpp
@@ -0,0 +1,189 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/format_lookup_table.h"
+#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/samples_helper.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/texture_cache/util.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+using Tegra::Texture::TextureType;
+using Tegra::Texture::TICEntry;
+using VideoCore::Surface::PixelFormat;
+
+ImageInfo::ImageInfo(const TICEntry& config) noexcept {
+ format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
+ config.a_type, config.srgb_conversion);
+ num_samples = NumSamples(config.msaa_mode);
+ resources.levels = config.max_mip_level + 1;
+ if (config.IsPitchLinear()) {
+ pitch = config.Pitch();
+ } else if (config.IsBlockLinear()) {
+ block = Extent3D{
+ .width = config.block_width,
+ .height = config.block_height,
+ .depth = config.block_depth,
+ };
+ }
+ tile_width_spacing = config.tile_width_spacing;
+ if (config.texture_type != TextureType::Texture2D &&
+ config.texture_type != TextureType::Texture2DNoMipmap) {
+ ASSERT(!config.IsPitchLinear());
+ }
+ switch (config.texture_type) {
+ case TextureType::Texture1D:
+ ASSERT(config.BaseLayer() == 0);
+ type = ImageType::e1D;
+ size.width = config.Width();
+ break;
+ case TextureType::Texture1DArray:
+ UNIMPLEMENTED_IF(config.BaseLayer() != 0);
+ type = ImageType::e1D;
+ size.width = config.Width();
+ resources.layers = config.Depth();
+ break;
+ case TextureType::Texture2D:
+ case TextureType::Texture2DNoMipmap:
+ ASSERT(config.Depth() == 1);
+ type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D;
+ size.width = config.Width();
+ size.height = config.Height();
+ resources.layers = config.BaseLayer() + 1;
+ break;
+ case TextureType::Texture2DArray:
+ type = ImageType::e2D;
+ size.width = config.Width();
+ size.height = config.Height();
+ resources.layers = config.BaseLayer() + config.Depth();
+ break;
+ case TextureType::TextureCubemap:
+ ASSERT(config.Depth() == 1);
+ type = ImageType::e2D;
+ size.width = config.Width();
+ size.height = config.Height();
+ resources.layers = config.BaseLayer() + 6;
+ break;
+ case TextureType::TextureCubeArray:
+ UNIMPLEMENTED_IF(config.load_store_hint != 0);
+ type = ImageType::e2D;
+ size.width = config.Width();
+ size.height = config.Height();
+ resources.layers = config.BaseLayer() + config.Depth() * 6;
+ break;
+ case TextureType::Texture3D:
+ ASSERT(config.BaseLayer() == 0);
+ type = ImageType::e3D;
+ size.width = config.Width();
+ size.height = config.Height();
+ size.depth = config.Depth();
+ break;
+ case TextureType::Texture1DBuffer:
+ type = ImageType::Buffer;
+ size.width = config.Width();
+ break;
+ default:
+ UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
+ break;
+ }
+ if (type != ImageType::Linear) {
+ // FIXME: Call this without passing *this
+ layer_stride = CalculateLayerStride(*this);
+ maybe_unaligned_layer_stride = CalculateLayerSize(*this);
+ }
+}
+
+ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept {
+ const auto& rt = regs.rt[index];
+ format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format);
+ if (rt.tile_mode.is_pitch_linear) {
+ ASSERT(rt.tile_mode.is_3d == 0);
+ type = ImageType::Linear;
+ pitch = rt.width;
+ size = Extent3D{
+ .width = pitch / BytesPerBlock(format),
+ .height = rt.height,
+ .depth = 1,
+ };
+ return;
+ }
+ size.width = rt.width;
+ size.height = rt.height;
+ layer_stride = rt.layer_stride * 4;
+ maybe_unaligned_layer_stride = layer_stride;
+ num_samples = NumSamples(regs.multisample_mode);
+ block = Extent3D{
+ .width = rt.tile_mode.block_width,
+ .height = rt.tile_mode.block_height,
+ .depth = rt.tile_mode.block_depth,
+ };
+ if (rt.tile_mode.is_3d) {
+ type = ImageType::e3D;
+ size.depth = rt.depth;
+ } else {
+ type = ImageType::e2D;
+ resources.layers = rt.depth;
+ }
+}
+
+ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
+ format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format);
+ size.width = regs.zeta_width;
+ size.height = regs.zeta_height;
+ resources.levels = 1;
+ layer_stride = regs.zeta.layer_stride * 4;
+ maybe_unaligned_layer_stride = layer_stride;
+ num_samples = NumSamples(regs.multisample_mode);
+ block = Extent3D{
+ .width = regs.zeta.tile_mode.block_width,
+ .height = regs.zeta.tile_mode.block_height,
+ .depth = regs.zeta.tile_mode.block_depth,
+ };
+ if (regs.zeta.tile_mode.is_pitch_linear) {
+ ASSERT(regs.zeta.tile_mode.is_3d == 0);
+ type = ImageType::Linear;
+ pitch = size.width * BytesPerBlock(format);
+ } else if (regs.zeta.tile_mode.is_3d) {
+ ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0);
+ type = ImageType::e3D;
+ size.depth = regs.zeta_depth;
+ } else {
+ type = ImageType::e2D;
+ resources.layers = regs.zeta_depth;
+ }
+}
+
+ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
+ UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero");
+ format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format);
+ if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) {
+ type = ImageType::Linear;
+ size = Extent3D{
+ .width = config.pitch / VideoCore::Surface::BytesPerBlock(format),
+ .height = config.height,
+ .depth = 1,
+ };
+ pitch = config.pitch;
+ } else {
+ type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D;
+ block = Extent3D{
+ .width = config.block_width,
+ .height = config.block_height,
+ .depth = config.block_depth,
+ };
+ // 3D blits with more than once slice are not implemented for now
+ // Render to individual slices
+ size = Extent3D{
+ .width = config.width,
+ .height = config.height,
+ .depth = 1,
+ };
+ }
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h
new file mode 100644
index 000000000..5049fc36e
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.h
@@ -0,0 +1,38 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+using Tegra::Texture::TICEntry;
+using VideoCore::Surface::PixelFormat;
+
+struct ImageInfo {
+ explicit ImageInfo() = default;
+ explicit ImageInfo(const TICEntry& config) noexcept;
+ explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept;
+ explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept;
+ explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept;
+
+ PixelFormat format = PixelFormat::Invalid;
+ ImageType type = ImageType::e1D;
+ SubresourceExtent resources;
+ Extent3D size{1, 1, 1};
+ union {
+ Extent3D block{0, 0, 0};
+ u32 pitch;
+ };
+ u32 layer_stride = 0;
+ u32 maybe_unaligned_layer_stride = 0;
+ u32 num_samples = 1;
+ u32 tile_width_spacing = 0;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
new file mode 100644
index 000000000..18f72e508
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -0,0 +1,41 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+
+#include "common/assert.h"
+#include "core/settings.h"
+#include "video_core/compatible_formats.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/formatter.h"
+#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/image_view_base.h"
+#include "video_core/texture_cache/image_view_info.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
+ ImageId image_id_)
+ : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range},
+ size{
+ .width = std::max(image_info.size.width >> range.base.level, 1u),
+ .height = std::max(image_info.size.height >> range.base.level, 1u),
+ .depth = std::max(image_info.size.depth >> range.base.level, 1u),
+ } {
+ ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false),
+ "Image view format {} is incompatible with image format {}", info.format,
+ image_info.format);
+ const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
+ if (image_info.type == ImageType::Linear && is_async) {
+ flags |= ImageViewFlagBits::PreemtiveDownload;
+ }
+ if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) {
+ flags |= ImageViewFlagBits::Slice;
+ }
+}
+
+ImageViewBase::ImageViewBase(const NullImageParams&) {}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
new file mode 100644
index 000000000..73954167e
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -0,0 +1,47 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_funcs.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+using VideoCore::Surface::PixelFormat;
+
+struct ImageViewInfo;
+struct ImageInfo;
+
+struct NullImageParams {};
+
+enum class ImageViewFlagBits : u16 {
+ PreemtiveDownload = 1 << 0,
+ Strong = 1 << 1,
+ Slice = 1 << 2,
+};
+DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
+
+struct ImageViewBase {
+ explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
+ ImageId image_id);
+ explicit ImageViewBase(const NullImageParams&);
+
+ [[nodiscard]] bool IsBuffer() const noexcept {
+ return type == ImageViewType::Buffer;
+ }
+
+ ImageId image_id{};
+ PixelFormat format{};
+ ImageViewType type{};
+ SubresourceRange range;
+ Extent3D size{0, 0, 0};
+ ImageViewFlagBits flags{};
+
+ u64 invalidation_tick = 0;
+ u64 modification_tick = 0;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp
new file mode 100644
index 000000000..faf5b151f
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.cpp
@@ -0,0 +1,88 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <limits>
+
+#include "common/assert.h"
+#include "video_core/texture_cache/image_view_info.h"
+#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+namespace {
+
+constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max();
+
+[[nodiscard]] u8 CastSwizzle(SwizzleSource source) {
+ const u8 casted = static_cast<u8>(source);
+ ASSERT(static_cast<SwizzleSource>(casted) == source);
+ return casted;
+}
+
+} // Anonymous namespace
+
+ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept
+ : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)},
+ y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)},
+ w_source{CastSwizzle(config.w_source)} {
+ range.base = SubresourceBase{
+ .level = static_cast<s32>(config.res_min_mip_level),
+ .layer = base_layer,
+ };
+ range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1;
+
+ switch (config.texture_type) {
+ case TextureType::Texture1D:
+ ASSERT(config.Height() == 1);
+ ASSERT(config.Depth() == 1);
+ type = ImageViewType::e1D;
+ break;
+ case TextureType::Texture2D:
+ case TextureType::Texture2DNoMipmap:
+ ASSERT(config.Depth() == 1);
+ type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect;
+ break;
+ case TextureType::Texture3D:
+ type = ImageViewType::e3D;
+ break;
+ case TextureType::TextureCubemap:
+ ASSERT(config.Depth() == 1);
+ type = ImageViewType::Cube;
+ range.extent.layers = 6;
+ break;
+ case TextureType::Texture1DArray:
+ type = ImageViewType::e1DArray;
+ range.extent.layers = config.Depth();
+ break;
+ case TextureType::Texture2DArray:
+ type = ImageViewType::e2DArray;
+ range.extent.layers = config.Depth();
+ break;
+ case TextureType::Texture1DBuffer:
+ type = ImageViewType::Buffer;
+ break;
+ case TextureType::TextureCubeArray:
+ type = ImageViewType::CubeArray;
+ range.extent.layers = config.Depth() * 6;
+ break;
+ default:
+ UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
+ break;
+ }
+}
+
+ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_,
+ SubresourceRange range_) noexcept
+ : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE},
+ y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE},
+ w_source{RENDER_TARGET_SWIZZLE} {}
+
+bool ImageViewInfo::IsRenderTarget() const noexcept {
+ return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE &&
+ z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h
new file mode 100644
index 000000000..0c1f99117
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.h
@@ -0,0 +1,50 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <type_traits>
+
+#include "video_core/surface.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+using Tegra::Texture::SwizzleSource;
+using Tegra::Texture::TICEntry;
+using VideoCore::Surface::PixelFormat;
+
+/// Properties used to determine a image view
+struct ImageViewInfo {
+ explicit ImageViewInfo() noexcept = default;
+ explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept;
+ explicit ImageViewInfo(ImageViewType type, PixelFormat format,
+ SubresourceRange range = {}) noexcept;
+
+ auto operator<=>(const ImageViewInfo&) const noexcept = default;
+
+ [[nodiscard]] bool IsRenderTarget() const noexcept;
+
+ [[nodiscard]] std::array<SwizzleSource, 4> Swizzle() const noexcept {
+ return std::array{
+ static_cast<SwizzleSource>(x_source),
+ static_cast<SwizzleSource>(y_source),
+ static_cast<SwizzleSource>(z_source),
+ static_cast<SwizzleSource>(w_source),
+ };
+ }
+
+ ImageViewType type{};
+ PixelFormat format{};
+ SubresourceRange range;
+ u8 x_source = static_cast<u8>(SwizzleSource::R);
+ u8 y_source = static_cast<u8>(SwizzleSource::G);
+ u8 z_source = static_cast<u8>(SwizzleSource::B);
+ u8 w_source = static_cast<u8>(SwizzleSource::A);
+};
+static_assert(std::has_unique_object_representations_v<ImageViewInfo>);
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h
new file mode 100644
index 000000000..9b9544b07
--- /dev/null
+++ b/src/video_core/texture_cache/render_targets.h
@@ -0,0 +1,51 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <span>
+#include <utility>
+
+#include "common/bit_cast.h"
+#include "video_core/texture_cache/types.h"
+
+namespace VideoCommon {
+
+/// Framebuffer properties used to lookup a framebuffer
+struct RenderTargets {
+ constexpr auto operator<=>(const RenderTargets&) const noexcept = default;
+
+ constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept {
+ const auto contains = [elements](ImageViewId item) {
+ return std::ranges::find(elements, item) != elements.end();
+ };
+ return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id);
+ }
+
+ std::array<ImageViewId, NUM_RT> color_buffer_ids;
+ ImageViewId depth_buffer_id;
+ std::array<u8, NUM_RT> draw_buffers{};
+ Extent2D size;
+};
+
+} // namespace VideoCommon
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::RenderTargets> {
+ size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept {
+ using VideoCommon::ImageViewId;
+ size_t value = std::hash<ImageViewId>{}(rt.depth_buffer_id);
+ for (const ImageViewId color_buffer_id : rt.color_buffer_ids) {
+ value ^= std::hash<ImageViewId>{}(color_buffer_id);
+ }
+ value ^= Common::BitCast<u64>(rt.draw_buffers);
+ value ^= Common::BitCast<u64>(rt.size);
+ return value;
+ }
+};
+
+} // namespace std
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h
new file mode 100644
index 000000000..04539a43c
--- /dev/null
+++ b/src/video_core/texture_cache/samples_helper.h
@@ -0,0 +1,55 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <utility>
+
+#include "common/assert.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+[[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) {
+ switch (num_samples) {
+ case 1:
+ return {0, 0};
+ case 2:
+ return {1, 0};
+ case 4:
+ return {1, 1};
+ case 8:
+ return {2, 1};
+ case 16:
+ return {2, 2};
+ }
+ UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
+ return {1, 1};
+}
+
+[[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) {
+ using Tegra::Texture::MsaaMode;
+ switch (msaa_mode) {
+ case MsaaMode::Msaa1x1:
+ return 1;
+ case MsaaMode::Msaa2x1:
+ case MsaaMode::Msaa2x1_D3D:
+ return 2;
+ case MsaaMode::Msaa2x2:
+ case MsaaMode::Msaa2x2_VC4:
+ case MsaaMode::Msaa2x2_VC12:
+ return 4;
+ case MsaaMode::Msaa4x2:
+ case MsaaMode::Msaa4x2_D3D:
+ case MsaaMode::Msaa4x2_VC8:
+ case MsaaMode::Msaa4x2_VC24:
+ return 8;
+ case MsaaMode::Msaa4x4:
+ return 16;
+ }
+ UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast<int>(msaa_mode));
+ return 1;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
new file mode 100644
index 000000000..eae3be6ea
--- /dev/null
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -0,0 +1,156 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <concepts>
+#include <numeric>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+
+namespace VideoCommon {
+
+struct SlotId {
+ static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
+
+ constexpr auto operator<=>(const SlotId&) const noexcept = default;
+
+ constexpr explicit operator bool() const noexcept {
+ return index != INVALID_INDEX;
+ }
+
+ u32 index = INVALID_INDEX;
+};
+
+template <class T>
+requires std::is_nothrow_move_assignable_v<T>&&
+ std::is_nothrow_move_constructible_v<T> class SlotVector {
+public:
+ ~SlotVector() noexcept {
+ size_t index = 0;
+ for (u64 bits : stored_bitset) {
+ for (size_t bit = 0; bits; ++bit, bits >>= 1) {
+ if ((bits & 1) != 0) {
+ values[index + bit].object.~T();
+ }
+ }
+ index += 64;
+ }
+ delete[] values;
+ }
+
+ [[nodiscard]] T& operator[](SlotId id) noexcept {
+ ValidateIndex(id);
+ return values[id.index].object;
+ }
+
+ [[nodiscard]] const T& operator[](SlotId id) const noexcept {
+ ValidateIndex(id);
+ return values[id.index].object;
+ }
+
+ template <typename... Args>
+ [[nodiscard]] SlotId insert(Args&&... args) noexcept {
+ const u32 index = FreeValueIndex();
+ new (&values[index].object) T(std::forward<Args>(args)...);
+ SetStorageBit(index);
+
+ return SlotId{index};
+ }
+
+ void erase(SlotId id) noexcept {
+ values[id.index].object.~T();
+ free_list.push_back(id.index);
+ ResetStorageBit(id.index);
+ }
+
+private:
+ struct NonTrivialDummy {
+ NonTrivialDummy() noexcept {}
+ };
+
+ union Entry {
+ Entry() noexcept : dummy{} {}
+ ~Entry() noexcept {}
+
+ NonTrivialDummy dummy;
+ T object;
+ };
+
+ void SetStorageBit(u32 index) noexcept {
+ stored_bitset[index / 64] |= u64(1) << (index % 64);
+ }
+
+ void ResetStorageBit(u32 index) noexcept {
+ stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
+ }
+
+ bool ReadStorageBit(u32 index) noexcept {
+ return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
+ }
+
+ void ValidateIndex(SlotId id) const noexcept {
+ DEBUG_ASSERT(id);
+ DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
+ DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
+ }
+
+ [[nodiscard]] u32 FreeValueIndex() noexcept {
+ if (free_list.empty()) {
+ Reserve(values_capacity ? (values_capacity << 1) : 1);
+ }
+ const u32 free_index = free_list.back();
+ free_list.pop_back();
+ return free_index;
+ }
+
+ void Reserve(size_t new_capacity) noexcept {
+ Entry* const new_values = new Entry[new_capacity];
+ size_t index = 0;
+ for (u64 bits : stored_bitset) {
+ for (size_t bit = 0; bits; ++bit, bits >>= 1) {
+ const size_t i = index + bit;
+ if ((bits & 1) == 0) {
+ continue;
+ }
+ T& old_value = values[i].object;
+ new (&new_values[i].object) T(std::move(old_value));
+ old_value.~T();
+ }
+ index += 64;
+ }
+
+ stored_bitset.resize((new_capacity + 63) / 64);
+
+ const size_t old_free_size = free_list.size();
+ free_list.resize(old_free_size + (new_capacity - values_capacity));
+ std::iota(free_list.begin() + old_free_size, free_list.end(),
+ static_cast<u32>(values_capacity));
+
+ delete[] values;
+ values = new_values;
+ values_capacity = new_capacity;
+ }
+
+ Entry* values = nullptr;
+ size_t values_capacity = 0;
+ size_t values_size = 0;
+
+ std::vector<u64> stored_bitset;
+ std::vector<u32> free_list;
+};
+
+} // namespace VideoCommon
+
+template <>
+struct std::hash<VideoCommon::SlotId> {
+ size_t operator()(const VideoCommon::SlotId& id) const noexcept {
+ return std::hash<u32>{}(id.index);
+ }
+};
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
deleted file mode 100644
index dfcf36e0b..000000000
--- a/src/video_core/texture_cache/surface_base.cpp
+++ /dev/null
@@ -1,294 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/algorithm.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/microprofile.h"
-#include "video_core/memory_manager.h"
-#include "video_core/texture_cache/surface_base.h"
-#include "video_core/texture_cache/surface_params.h"
-#include "video_core/textures/convert.h"
-
-namespace VideoCommon {
-
-MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128));
-MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128));
-
-using Tegra::Texture::ConvertFromGuestToHost;
-using VideoCore::MortonSwizzleMode;
-using VideoCore::Surface::IsPixelFormatASTC;
-using VideoCore::Surface::PixelFormat;
-
-StagingCache::StagingCache() = default;
-
-StagingCache::~StagingCache() = default;
-
-SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
- bool is_astc_supported)
- : params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels),
- mipmap_offsets(params.num_levels) {
- is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported;
- host_memory_size = params.GetHostSizeInBytes(is_converted);
-
- std::size_t offset = 0;
- for (u32 level = 0; level < params.num_levels; ++level) {
- const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
- mipmap_sizes[level] = mipmap_size;
- mipmap_offsets[level] = offset;
- offset += mipmap_size;
- }
- layer_size = offset;
- if (params.is_layered) {
- if (params.is_tiled) {
- layer_size =
- SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth);
- }
- guest_memory_size = layer_size * params.depth;
- } else {
- guest_memory_size = layer_size;
- }
-}
-
-MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const {
- const u32 src_bpp{params.GetBytesPerPixel()};
- const u32 dst_bpp{rhs.GetBytesPerPixel()};
- const bool ib1 = params.IsBuffer();
- const bool ib2 = rhs.IsBuffer();
- if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) {
- const bool cb1 = params.IsCompressed();
- const bool cb2 = rhs.IsCompressed();
- if (cb1 == cb2) {
- return MatchTopologyResult::FullMatch;
- }
- return MatchTopologyResult::CompressUnmatch;
- }
- return MatchTopologyResult::None;
-}
-
-MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const {
- // Buffer surface Check
- if (params.IsBuffer()) {
- const std::size_t wd1 = params.width * params.GetBytesPerPixel();
- const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel();
- if (wd1 == wd2) {
- return MatchStructureResult::FullMatch;
- }
- return MatchStructureResult::None;
- }
-
- // Linear Surface check
- if (!params.is_tiled) {
- if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
- if (params.width == rhs.width) {
- return MatchStructureResult::FullMatch;
- } else {
- return MatchStructureResult::SemiMatch;
- }
- }
- return MatchStructureResult::None;
- }
-
- // Tiled Surface check
- if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth,
- params.tile_width_spacing, params.num_levels) ==
- std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth,
- rhs.tile_width_spacing, rhs.num_levels)) {
- if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) {
- return MatchStructureResult::FullMatch;
- }
- const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format,
- rhs.pixel_format);
- const u32 hs =
- SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format);
- const u32 w1 = params.GetBlockAlignedWidth();
- if (std::tie(w1, params.height) == std::tie(ws, hs)) {
- return MatchStructureResult::SemiMatch;
- }
- }
- return MatchStructureResult::None;
-}
-
-std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
- const GPUVAddr candidate_gpu_addr) const {
- if (gpu_addr == candidate_gpu_addr) {
- return {{0, 0}};
- }
- if (candidate_gpu_addr < gpu_addr) {
- return {};
- }
- const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
- const auto layer{static_cast<u32>(relative_address / layer_size)};
- if (layer >= params.depth) {
- return {};
- }
- const GPUVAddr mipmap_address = relative_address - layer_size * layer;
- const auto mipmap_it =
- Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
- if (mipmap_it == mipmap_offsets.end()) {
- return {};
- }
- const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))};
- return std::make_pair(layer, level);
-}
-
-std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const {
- const u32 layers{params.depth};
- const u32 mipmaps{params.num_levels};
- std::vector<CopyParams> result;
- result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps));
-
- for (u32 layer = 0; layer < layers; layer++) {
- for (u32 level = 0; level < mipmaps; level++) {
- const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
- const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
- result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1);
- }
- }
- return result;
-}
-
-std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const {
- const u32 mipmaps{params.num_levels};
- std::vector<CopyParams> result;
- result.reserve(mipmaps);
-
- for (u32 level = 0; level < mipmaps; level++) {
- const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
- const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
- const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))};
- result.emplace_back(width, height, depth, level);
- }
- return result;
-}
-
-void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params,
- u8* buffer, u32 level) {
- const u32 width{params.GetMipWidth(level)};
- const u32 height{params.GetMipHeight(level)};
- const u32 block_height{params.GetMipBlockHeight(level)};
- const u32 block_depth{params.GetMipBlockDepth(level)};
-
- std::size_t guest_offset{mipmap_offsets[level]};
- if (params.is_layered) {
- std::size_t host_offset = 0;
- const std::size_t guest_stride = layer_size;
- const std::size_t host_stride = params.GetHostLayerSize(level);
- for (u32 layer = 0; layer < params.depth; ++layer) {
- MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1,
- params.tile_width_spacing, buffer + host_offset, memory + guest_offset);
- guest_offset += guest_stride;
- host_offset += host_stride;
- }
- } else {
- MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth,
- params.GetMipDepth(level), params.tile_width_spacing, buffer,
- memory + guest_offset);
- }
-}
-
-void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
- StagingCache& staging_cache) {
- MICROPROFILE_SCOPE(GPU_Load_Texture);
- auto& staging_buffer = staging_cache.GetBuffer(0);
- u8* host_ptr;
- // Use an extra temporal buffer
- auto& tmp_buffer = staging_cache.GetBuffer(1);
- tmp_buffer.resize(guest_memory_size);
- host_ptr = tmp_buffer.data();
- memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
-
- if (params.is_tiled) {
- ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
- params.block_width, static_cast<u32>(params.target));
- for (u32 level = 0; level < params.num_levels; ++level) {
- const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
- SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
- staging_buffer.data() + host_offset, level);
- }
- } else {
- ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented");
- const u32 bpp{params.GetBytesPerPixel()};
- const u32 block_width{params.GetDefaultBlockWidth()};
- const u32 block_height{params.GetDefaultBlockHeight()};
- const u32 width{(params.width + block_width - 1) / block_width};
- const u32 height{(params.height + block_height - 1) / block_height};
- const u32 copy_size{width * bpp};
- if (params.pitch == copy_size) {
- std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false));
- } else {
- const u8* start{host_ptr};
- u8* write_to{staging_buffer.data()};
- for (u32 h = height; h > 0; --h) {
- std::memcpy(write_to, start, copy_size);
- start += params.pitch;
- write_to += copy_size;
- }
- }
- }
-
- if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) {
- return;
- }
-
- for (u32 level = params.num_levels; level--;) {
- const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)};
- const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)};
- u8* const in_buffer = staging_buffer.data() + in_host_offset;
- u8* const out_buffer = staging_buffer.data() + out_host_offset;
- ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
- params.GetMipWidth(level), params.GetMipHeight(level),
- params.GetMipDepth(level), true, true);
- }
-}
-
-void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
- StagingCache& staging_cache) {
- MICROPROFILE_SCOPE(GPU_Flush_Texture);
- auto& staging_buffer = staging_cache.GetBuffer(0);
- u8* host_ptr;
-
- // Use an extra temporal buffer
- auto& tmp_buffer = staging_cache.GetBuffer(1);
- tmp_buffer.resize(guest_memory_size);
- host_ptr = tmp_buffer.data();
-
- if (params.target == SurfaceTarget::Texture3D) {
- // Special case for 3D texture segments
- memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
- }
-
- if (params.is_tiled) {
- ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
- for (u32 level = 0; level < params.num_levels; ++level) {
- const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
- SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
- staging_buffer.data() + host_offset, level);
- }
- } else if (params.IsBuffer()) {
- // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest
- // memory.
- std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
- } else {
- ASSERT(params.target == SurfaceTarget::Texture2D);
- ASSERT(params.num_levels == 1);
-
- const u32 bpp{params.GetBytesPerPixel()};
- const u32 copy_size{params.width * bpp};
- if (params.pitch == copy_size) {
- std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
- } else {
- u8* start{host_ptr};
- const u8* read_to{staging_buffer.data()};
- for (u32 h = params.height; h > 0; --h) {
- std::memcpy(start, read_to, copy_size);
- start += params.pitch;
- read_to += copy_size;
- }
- }
- }
- memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
deleted file mode 100644
index 173f2edba..000000000
--- a/src/video_core/texture_cache/surface_base.h
+++ /dev/null
@@ -1,333 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <optional>
-#include <tuple>
-#include <unordered_map>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/gpu.h"
-#include "video_core/morton.h"
-#include "video_core/texture_cache/copy_params.h"
-#include "video_core/texture_cache/surface_params.h"
-#include "video_core/texture_cache/surface_view.h"
-
-namespace Tegra {
-class MemoryManager;
-}
-
-namespace VideoCommon {
-
-using VideoCore::MortonSwizzleMode;
-using VideoCore::Surface::SurfaceTarget;
-
-enum class MatchStructureResult : u32 {
- FullMatch = 0,
- SemiMatch = 1,
- None = 2,
-};
-
-enum class MatchTopologyResult : u32 {
- FullMatch = 0,
- CompressUnmatch = 1,
- None = 2,
-};
-
-class StagingCache {
-public:
- explicit StagingCache();
- ~StagingCache();
-
- std::vector<u8>& GetBuffer(std::size_t index) {
- return staging_buffer[index];
- }
-
- const std::vector<u8>& GetBuffer(std::size_t index) const {
- return staging_buffer[index];
- }
-
- void SetSize(std::size_t size) {
- staging_buffer.resize(size);
- }
-
-private:
- std::vector<std::vector<u8>> staging_buffer;
-};
-
-class SurfaceBaseImpl {
-public:
- void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
-
- void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
-
- GPUVAddr GetGpuAddr() const {
- return gpu_addr;
- }
-
- bool Overlaps(const VAddr start, const VAddr end) const {
- return (cpu_addr < end) && (cpu_addr_end > start);
- }
-
- bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const {
- const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size;
- return gpu_addr <= other_start && other_end <= gpu_addr_end;
- }
-
- // Use only when recycling a surface
- void SetGpuAddr(const GPUVAddr new_addr) {
- gpu_addr = new_addr;
- }
-
- VAddr GetCpuAddr() const {
- return cpu_addr;
- }
-
- VAddr GetCpuAddrEnd() const {
- return cpu_addr_end;
- }
-
- void SetCpuAddr(const VAddr new_addr) {
- cpu_addr = new_addr;
- cpu_addr_end = new_addr + guest_memory_size;
- }
-
- const SurfaceParams& GetSurfaceParams() const {
- return params;
- }
-
- std::size_t GetSizeInBytes() const {
- return guest_memory_size;
- }
-
- std::size_t GetHostSizeInBytes() const {
- return host_memory_size;
- }
-
- std::size_t GetMipmapSize(const u32 level) const {
- return mipmap_sizes[level];
- }
-
- bool IsLinear() const {
- return !params.is_tiled;
- }
-
- bool IsConverted() const {
- return is_converted;
- }
-
- bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
- return params.pixel_format == pixel_format;
- }
-
- VideoCore::Surface::PixelFormat GetFormat() const {
- return params.pixel_format;
- }
-
- bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const {
- return params.target == target;
- }
-
- MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const;
-
- MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const;
-
- bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const {
- return std::tie(gpu_addr, params.target, params.num_levels) ==
- std::tie(other_gpu_addr, rhs.target, rhs.num_levels) &&
- params.target == SurfaceTarget::Texture2D && params.num_levels == 1;
- }
-
- std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const;
-
- std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const {
- return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params);
- }
-
-protected:
- explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
- bool is_astc_supported);
- ~SurfaceBaseImpl() = default;
-
- virtual void DecorateSurfaceName() = 0;
-
- const SurfaceParams params;
- std::size_t layer_size;
- std::size_t guest_memory_size;
- std::size_t host_memory_size;
- GPUVAddr gpu_addr{};
- VAddr cpu_addr{};
- VAddr cpu_addr_end{};
- bool is_converted{};
-
- std::vector<std::size_t> mipmap_sizes;
- std::vector<std::size_t> mipmap_offsets;
-
-private:
- void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer,
- u32 level);
-
- std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const;
-
- std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const;
-};
-
-template <typename TView>
-class SurfaceBase : public SurfaceBaseImpl {
-public:
- virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0;
-
- virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0;
-
- void MarkAsModified(bool is_modified_, u64 tick) {
- is_modified = is_modified_ || is_target;
- modification_tick = tick;
- }
-
- void MarkAsRenderTarget(bool is_target_, u32 index_) {
- is_target = is_target_;
- index = index_;
- }
-
- void SetMemoryMarked(bool is_memory_marked_) {
- is_memory_marked = is_memory_marked_;
- }
-
- bool IsMemoryMarked() const {
- return is_memory_marked;
- }
-
- void SetSyncPending(bool is_sync_pending_) {
- is_sync_pending = is_sync_pending_;
- }
-
- bool IsSyncPending() const {
- return is_sync_pending;
- }
-
- void MarkAsPicked(bool is_picked_) {
- is_picked = is_picked_;
- }
-
- bool IsModified() const {
- return is_modified;
- }
-
- bool IsProtected() const {
- // Only 3D slices are to be protected
- return is_target && params.target == SurfaceTarget::Texture3D;
- }
-
- bool IsRenderTarget() const {
- return is_target;
- }
-
- u32 GetRenderTarget() const {
- return index;
- }
-
- bool IsRegistered() const {
- return is_registered;
- }
-
- bool IsPicked() const {
- return is_picked;
- }
-
- void MarkAsRegistered(bool is_reg) {
- is_registered = is_reg;
- }
-
- u64 GetModificationTick() const {
- return modification_tick;
- }
-
- TView EmplaceOverview(const SurfaceParams& overview_params) {
- const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth};
- return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
- }
-
- TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
- return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
- base_level, num_levels));
- }
-
- std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
- const GPUVAddr view_addr,
- const std::size_t candidate_size, const u32 mipmap,
- const u32 layer) {
- const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)};
- if (!layer_mipmap) {
- return {};
- }
- const auto [end_layer, end_mipmap] = *layer_mipmap;
- if (layer != end_layer) {
- if (mipmap == 0 && end_mipmap == 0) {
- return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1));
- }
- return {};
- } else {
- return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap));
- }
- }
-
- std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
- const std::size_t candidate_size) {
- if (params.target == SurfaceTarget::Texture3D ||
- view_params.target == SurfaceTarget::Texture3D ||
- (params.num_levels == 1 && !params.is_layered)) {
- return {};
- }
- const auto layer_mipmap{GetLayerMipmap(view_addr)};
- if (!layer_mipmap) {
- return {};
- }
- const auto [layer, mipmap] = *layer_mipmap;
- if (GetMipmapSize(mipmap) != candidate_size) {
- return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer);
- }
- return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1));
- }
-
- TView GetMainView() const {
- return main_view;
- }
-
-protected:
- explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params,
- bool is_astc_supported)
- : SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {}
-
- ~SurfaceBase() = default;
-
- virtual TView CreateView(const ViewParams& view_key) = 0;
-
- TView main_view;
- std::unordered_map<ViewParams, TView> views;
-
-private:
- TView GetView(const ViewParams& key) {
- const auto [entry, is_cache_miss] = views.try_emplace(key);
- auto& view{entry->second};
- if (is_cache_miss) {
- view = CreateView(key);
- }
- return view;
- }
-
- static constexpr u32 NO_RT = 0xFFFFFFFF;
-
- bool is_modified{};
- bool is_target{};
- bool is_registered{};
- bool is_picked{};
- bool is_memory_marked{};
- bool is_sync_pending{};
- u32 index{NO_RT};
- u64 modification_tick{};
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
deleted file mode 100644
index e614a92df..000000000
--- a/src/video_core/texture_cache/surface_params.cpp
+++ /dev/null
@@ -1,445 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <string>
-#include <tuple>
-
-#include "common/alignment.h"
-#include "common/bit_util.h"
-#include "core/core.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/surface.h"
-#include "video_core/texture_cache/format_lookup_table.h"
-#include "video_core/texture_cache/surface_params.h"
-
-namespace VideoCommon {
-
-using VideoCore::Surface::PixelFormat;
-using VideoCore::Surface::PixelFormatFromDepthFormat;
-using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
-using VideoCore::Surface::SurfaceTarget;
-using VideoCore::Surface::SurfaceTargetFromTextureType;
-using VideoCore::Surface::SurfaceType;
-
-namespace {
-
-SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) {
- switch (type) {
- case Tegra::Shader::TextureType::Texture1D:
- return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D;
- case Tegra::Shader::TextureType::Texture2D:
- return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
- case Tegra::Shader::TextureType::Texture3D:
- ASSERT(!is_array);
- return SurfaceTarget::Texture3D;
- case Tegra::Shader::TextureType::TextureCube:
- return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap;
- default:
- UNREACHABLE();
- return SurfaceTarget::Texture2D;
- }
-}
-
-SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) {
- switch (type) {
- case Tegra::Shader::ImageType::Texture1D:
- return SurfaceTarget::Texture1D;
- case Tegra::Shader::ImageType::TextureBuffer:
- return SurfaceTarget::TextureBuffer;
- case Tegra::Shader::ImageType::Texture1DArray:
- return SurfaceTarget::Texture1DArray;
- case Tegra::Shader::ImageType::Texture2D:
- return SurfaceTarget::Texture2D;
- case Tegra::Shader::ImageType::Texture2DArray:
- return SurfaceTarget::Texture2DArray;
- case Tegra::Shader::ImageType::Texture3D:
- return SurfaceTarget::Texture3D;
- default:
- UNREACHABLE();
- return SurfaceTarget::Texture2D;
- }
-}
-
-constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
- return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
-}
-
-} // Anonymous namespace
-
-SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table,
- const Tegra::Texture::TICEntry& tic,
- const VideoCommon::Shader::Sampler& entry) {
- SurfaceParams params;
- params.is_tiled = tic.IsTiled();
- params.srgb_conversion = tic.IsSrgbConversionEnabled();
- params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
- params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
- params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
- params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
- params.pixel_format = lookup_table.GetPixelFormat(
- tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
- params.type = GetFormatType(params.pixel_format);
- if (entry.is_shadow && params.type == SurfaceType::ColorTexture) {
- switch (params.pixel_format) {
- case PixelFormat::R16_UNORM:
- case PixelFormat::R16_FLOAT:
- params.pixel_format = PixelFormat::D16_UNORM;
- break;
- case PixelFormat::R32_FLOAT:
- params.pixel_format = PixelFormat::D32_FLOAT;
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
- static_cast<u32>(params.pixel_format));
- }
- params.type = GetFormatType(params.pixel_format);
- }
- // TODO: on 1DBuffer we should use the tic info.
- if (tic.IsBuffer()) {
- params.target = SurfaceTarget::TextureBuffer;
- params.width = tic.Width();
- params.pitch = params.width * params.GetBytesPerPixel();
- params.height = 1;
- params.depth = 1;
- params.num_levels = 1;
- params.emulated_levels = 1;
- params.is_layered = false;
- } else {
- params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array);
- params.width = tic.Width();
- params.height = tic.Height();
- params.depth = tic.Depth();
- params.pitch = params.is_tiled ? 0 : tic.Pitch();
- if (params.target == SurfaceTarget::TextureCubemap ||
- params.target == SurfaceTarget::TextureCubeArray) {
- params.depth *= 6;
- }
- params.num_levels = tic.max_mip_level + 1;
- params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
- params.is_layered = params.IsLayered();
- }
- return params;
-}
-
-SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table,
- const Tegra::Texture::TICEntry& tic,
- const VideoCommon::Shader::Image& entry) {
- SurfaceParams params;
- params.is_tiled = tic.IsTiled();
- params.srgb_conversion = tic.IsSrgbConversionEnabled();
- params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
- params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
- params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
- params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
- params.pixel_format = lookup_table.GetPixelFormat(
- tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
- params.type = GetFormatType(params.pixel_format);
- params.target = ImageTypeToSurfaceTarget(entry.type);
- // TODO: on 1DBuffer we should use the tic info.
- if (tic.IsBuffer()) {
- params.target = SurfaceTarget::TextureBuffer;
- params.width = tic.Width();
- params.pitch = params.width * params.GetBytesPerPixel();
- params.height = 1;
- params.depth = 1;
- params.num_levels = 1;
- params.emulated_levels = 1;
- params.is_layered = false;
- } else {
- params.width = tic.Width();
- params.height = tic.Height();
- params.depth = tic.Depth();
- params.pitch = params.is_tiled ? 0 : tic.Pitch();
- if (params.target == SurfaceTarget::TextureCubemap ||
- params.target == SurfaceTarget::TextureCubeArray) {
- params.depth *= 6;
- }
- params.num_levels = tic.max_mip_level + 1;
- params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
- params.is_layered = params.IsLayered();
- }
- return params;
-}
-
-SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) {
- const auto& regs = system.GPU().Maxwell3D().regs;
-
- const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
- const bool is_layered = regs.zeta_layers > 1 && block_depth == 0;
- const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
-
- return {
- .is_tiled = regs.zeta.memory_layout.type ==
- Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear,
- .srgb_conversion = false,
- .is_layered = is_layered,
- .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U),
- .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U),
- .block_depth = block_depth,
- .tile_width_spacing = 1,
- .width = regs.zeta_width,
- .height = regs.zeta_height,
- .depth = is_layered ? regs.zeta_layers.Value() : 1U,
- .pitch = 0,
- .num_levels = 1,
- .emulated_levels = 1,
- .pixel_format = pixel_format,
- .type = GetFormatType(pixel_format),
- .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D,
- };
-}
-
-SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
- const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
- SurfaceParams params;
- params.is_tiled =
- config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
- params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
- config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB;
- params.block_width = config.memory_layout.block_width;
- params.block_height = config.memory_layout.block_height;
- params.block_depth = config.memory_layout.block_depth;
- params.tile_width_spacing = 1;
- params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
- params.type = GetFormatType(params.pixel_format);
- if (params.is_tiled) {
- params.pitch = 0;
- params.width = config.width;
- } else {
- const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
- params.pitch = config.width;
- params.width = params.pitch / bpp;
- }
- params.height = config.height;
- params.num_levels = 1;
- params.emulated_levels = 1;
-
- if (config.memory_layout.is_3d != 0) {
- params.depth = config.layers.Value();
- params.is_layered = false;
- params.target = SurfaceTarget::Texture3D;
- } else if (config.layers > 1) {
- params.depth = config.layers.Value();
- params.is_layered = true;
- params.target = SurfaceTarget::Texture2DArray;
- } else {
- params.depth = 1;
- params.is_layered = false;
- params.target = SurfaceTarget::Texture2D;
- }
- return params;
-}
-
-SurfaceParams SurfaceParams::CreateForFermiCopySurface(
- const Tegra::Engines::Fermi2D::Regs::Surface& config) {
- const bool is_tiled = !config.linear;
- const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format);
-
- SurfaceParams params{
- .is_tiled = is_tiled,
- .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
- config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
- .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
- .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
- .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,
- .tile_width_spacing = 1,
- .width = config.width,
- .height = config.height,
- .depth = 1,
- .pitch = config.pitch,
- .num_levels = 1,
- .emulated_levels = 1,
- .pixel_format = pixel_format,
- .type = GetFormatType(pixel_format),
- // TODO(Rodrigo): Try to guess texture arrays from parameters
- .target = SurfaceTarget::Texture2D,
- };
-
- params.is_layered = params.IsLayered();
- return params;
-}
-
-VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
- const VideoCommon::Shader::Sampler& entry) {
- return TextureTypeToSurfaceTarget(entry.type, entry.is_array);
-}
-
-VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
- const VideoCommon::Shader::Image& entry) {
- return ImageTypeToSurfaceTarget(entry.type);
-}
-
-bool SurfaceParams::IsLayered() const {
- switch (target) {
- case SurfaceTarget::Texture1DArray:
- case SurfaceTarget::Texture2DArray:
- case SurfaceTarget::TextureCubemap:
- case SurfaceTarget::TextureCubeArray:
- return true;
- default:
- return false;
- }
-}
-
-// Auto block resizing algorithm from:
-// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
-u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
- if (level == 0) {
- return this->block_height;
- }
-
- const u32 height_new{GetMipHeight(level)};
- const u32 default_block_height{GetDefaultBlockHeight()};
- const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height};
- const u32 block_height_new = Common::Log2Ceil32(blocks_in_y);
- return std::clamp(block_height_new, 3U, 7U) - 3U;
-}
-
-u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
- if (level == 0) {
- return this->block_depth;
- }
- if (is_layered) {
- return 0;
- }
-
- const u32 depth_new{GetMipDepth(level)};
- const u32 block_depth_new = Common::Log2Ceil32(depth_new);
- if (block_depth_new > 4) {
- return 5 - (GetMipBlockHeight(level) >= 2);
- }
- return block_depth_new;
-}
-
-std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
- std::size_t offset = 0;
- for (u32 i = 0; i < level; i++) {
- offset += GetInnerMipmapMemorySize(i, false, false);
- }
- return offset;
-}
-
-std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const {
- std::size_t offset = 0;
- if (is_converted) {
- for (u32 i = 0; i < level; ++i) {
- offset += GetConvertedMipmapSize(i) * GetNumLayers();
- }
- } else {
- for (u32 i = 0; i < level; ++i) {
- offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
- }
- }
- return offset;
-}
-
-std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
- constexpr std::size_t rgba8_bpp = 4ULL;
- const std::size_t mip_width = GetMipWidth(level);
- const std::size_t mip_height = GetMipHeight(level);
- const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level);
- return mip_width * mip_height * mip_depth * rgba8_bpp;
-}
-
-std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
- std::size_t size = 0;
- for (u32 level = 0; level < num_levels; ++level) {
- size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed);
- }
- if (is_tiled && is_layered) {
- return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
- }
- return size;
-}
-
-std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
- bool uncompressed) const {
- const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
- const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
- const u32 depth{is_layered ? 1U : GetMipDepth(level)};
- if (is_tiled) {
- return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height,
- depth, GetMipBlockHeight(level),
- GetMipBlockDepth(level));
- } else if (as_host_size || IsBuffer()) {
- return GetBytesPerPixel() * width * height * depth;
- } else {
- // Linear Texture Case
- return pitch * height * depth;
- }
-}
-
-bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
- return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
- height, depth, pitch, num_levels, pixel_format, type, target) ==
- std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
- rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
- rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target);
-}
-
-std::string SurfaceParams::TargetName() const {
- switch (target) {
- case SurfaceTarget::Texture1D:
- return "1D";
- case SurfaceTarget::TextureBuffer:
- return "TexBuffer";
- case SurfaceTarget::Texture2D:
- return "2D";
- case SurfaceTarget::Texture3D:
- return "3D";
- case SurfaceTarget::Texture1DArray:
- return "1DArray";
- case SurfaceTarget::Texture2DArray:
- return "2DArray";
- case SurfaceTarget::TextureCubemap:
- return "Cube";
- case SurfaceTarget::TextureCubeArray:
- return "CubeArray";
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
- UNREACHABLE();
- return fmt::format("TUK({})", static_cast<u32>(target));
- }
-}
-
-u32 SurfaceParams::GetBlockSize() const {
- const u32 x = 64U << block_width;
- const u32 y = 8U << block_height;
- const u32 z = 1U << block_depth;
- return x * y * z;
-}
-
-std::pair<u32, u32> SurfaceParams::GetBlockXY() const {
- const u32 x_pixels = 64U / GetBytesPerPixel();
- const u32 x = x_pixels << block_width;
- const u32 y = 8U << block_height;
- return {x, y};
-}
-
-std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
- const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
- const u32 block_size = GetBlockSize();
- const u32 block_index = offset / block_size;
- const u32 gob_offset = offset % block_size;
- const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE);
- const u32 x_gob_pixels = 64U / GetBytesPerPixel();
- const u32 x_block_pixels = x_gob_pixels << block_width;
- const u32 y_block_pixels = 8U << block_height;
- const u32 z_block_pixels = 1U << block_depth;
- const u32 x_blocks = div_ceil(width, x_block_pixels);
- const u32 y_blocks = div_ceil(height, y_block_pixels);
- const u32 z_blocks = div_ceil(depth, z_block_pixels);
- const u32 base_x = block_index % x_blocks;
- const u32 base_y = (block_index / x_blocks) % y_blocks;
- const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks;
- u32 x = base_x * x_block_pixels;
- u32 y = base_y * y_block_pixels;
- u32 z = base_z * z_block_pixels;
- z += gob_index >> block_height;
- y += (gob_index * 8U) % y_block_pixels;
- return {x, y, z};
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
deleted file mode 100644
index 118aa689e..000000000
--- a/src/video_core/texture_cache/surface_params.h
+++ /dev/null
@@ -1,293 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <utility>
-
-#include "common/alignment.h"
-#include "common/bit_util.h"
-#include "common/cityhash.h"
-#include "common/common_types.h"
-#include "video_core/engines/fermi_2d.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/shader/shader_ir.h"
-#include "video_core/surface.h"
-#include "video_core/textures/decoders.h"
-
-namespace VideoCommon {
-
-class FormatLookupTable;
-
-class SurfaceParams {
-public:
- /// Creates SurfaceCachedParams from a texture configuration.
- static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table,
- const Tegra::Texture::TICEntry& tic,
- const VideoCommon::Shader::Sampler& entry);
-
- /// Creates SurfaceCachedParams from an image configuration.
- static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table,
- const Tegra::Texture::TICEntry& tic,
- const VideoCommon::Shader::Image& entry);
-
- /// Creates SurfaceCachedParams for a depth buffer configuration.
- static SurfaceParams CreateForDepthBuffer(Core::System& system);
-
- /// Creates SurfaceCachedParams from a framebuffer configuration.
- static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
-
- /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
- static SurfaceParams CreateForFermiCopySurface(
- const Tegra::Engines::Fermi2D::Regs::Surface& config);
-
- /// Obtains the texture target from a shader's sampler entry.
- static VideoCore::Surface::SurfaceTarget ExpectedTarget(
- const VideoCommon::Shader::Sampler& entry);
-
- /// Obtains the texture target from a shader's sampler entry.
- static VideoCore::Surface::SurfaceTarget ExpectedTarget(
- const VideoCommon::Shader::Image& entry);
-
- std::size_t Hash() const {
- return static_cast<std::size_t>(
- Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
- }
-
- bool operator==(const SurfaceParams& rhs) const;
-
- bool operator!=(const SurfaceParams& rhs) const {
- return !operator==(rhs);
- }
-
- std::size_t GetGuestSizeInBytes() const {
- return GetInnerMemorySize(false, false, false);
- }
-
- std::size_t GetHostSizeInBytes(bool is_converted) const {
- if (!is_converted) {
- return GetInnerMemorySize(true, false, false);
- }
- // ASTC is uncompressed in software, in emulated as RGBA8
- std::size_t host_size_in_bytes = 0;
- for (u32 level = 0; level < num_levels; ++level) {
- host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers();
- }
- return host_size_in_bytes;
- }
-
- u32 GetBlockAlignedWidth() const {
- return Common::AlignUp(width, 64 / GetBytesPerPixel());
- }
-
- /// Returns the width of a given mipmap level.
- u32 GetMipWidth(u32 level) const {
- return std::max(1U, width >> level);
- }
-
- /// Returns the height of a given mipmap level.
- u32 GetMipHeight(u32 level) const {
- return std::max(1U, height >> level);
- }
-
- /// Returns the depth of a given mipmap level.
- u32 GetMipDepth(u32 level) const {
- return is_layered ? depth : std::max(1U, depth >> level);
- }
-
- /// Returns the block height of a given mipmap level.
- u32 GetMipBlockHeight(u32 level) const;
-
- /// Returns the block depth of a given mipmap level.
- u32 GetMipBlockDepth(u32 level) const;
-
- /// Returns the best possible row/pitch alignment for the surface.
- u32 GetRowAlignment(u32 level, bool is_converted) const {
- const u32 bpp = is_converted ? 4 : GetBytesPerPixel();
- return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
- }
-
- /// Returns the offset in bytes in guest memory of a given mipmap level.
- std::size_t GetGuestMipmapLevelOffset(u32 level) const;
-
- /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
- std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const;
-
- /// Returns the size in bytes in guest memory of a given mipmap level.
- std::size_t GetGuestMipmapSize(u32 level) const {
- return GetInnerMipmapMemorySize(level, false, false);
- }
-
- /// Returns the size in bytes in host memory (linear) of a given mipmap level.
- std::size_t GetHostMipmapSize(u32 level) const {
- return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers();
- }
-
- std::size_t GetConvertedMipmapSize(u32 level) const;
-
- /// Get this texture Tegra Block size in guest memory layout
- u32 GetBlockSize() const;
-
- /// Get X, Y coordinates max sizes of a single block.
- std::pair<u32, u32> GetBlockXY() const;
-
- /// Get the offset in x, y, z coordinates from a memory offset
- std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const;
-
- /// Returns the size of a layer in bytes in guest memory.
- std::size_t GetGuestLayerSize() const {
- return GetLayerSize(false, false);
- }
-
- /// Returns the size of a layer in bytes in host memory for a given mipmap level.
- std::size_t GetHostLayerSize(u32 level) const {
- ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D);
- return GetInnerMipmapMemorySize(level, true, false);
- }
-
- /// Returns the max possible mipmap that the texture can have in host gpu
- u32 MaxPossibleMipmap() const {
- const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U;
- const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U;
- const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h);
- if (target != VideoCore::Surface::SurfaceTarget::Texture3D)
- return max_mipmap;
- return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U);
- }
-
- /// Returns if the guest surface is a compressed surface.
- bool IsCompressed() const {
- return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1;
- }
-
- /// Returns the default block width.
- u32 GetDefaultBlockWidth() const {
- return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
- }
-
- /// Returns the default block height.
- u32 GetDefaultBlockHeight() const {
- return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
- }
-
- /// Returns the bits per pixel.
- u32 GetBitsPerPixel() const {
- return VideoCore::Surface::GetFormatBpp(pixel_format);
- }
-
- /// Returns the bytes per pixel.
- u32 GetBytesPerPixel() const {
- return VideoCore::Surface::GetBytesPerPixel(pixel_format);
- }
-
- /// Returns true if the pixel format is a depth and/or stencil format.
- bool IsPixelFormatZeta() const {
- return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
- pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
- }
-
- /// Returns is the surface is a TextureBuffer type of surface.
- bool IsBuffer() const {
- return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
- }
-
- /// Returns the number of layers in the surface.
- std::size_t GetNumLayers() const {
- return is_layered ? depth : 1;
- }
-
- /// Returns the debug name of the texture for use in graphic debuggers.
- std::string TargetName() const;
-
- // Helper used for out of class size calculations
- static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
- const u32 block_depth) {
- return Common::AlignBits(out_size,
- Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
- }
-
- /// Converts a width from a type of surface into another. This helps represent the
- /// equivalent value between compressed/non-compressed textures.
- static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from,
- VideoCore::Surface::PixelFormat pixel_format_to) {
- const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from);
- const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to);
- return (width * bw2 + bw1 - 1) / bw1;
- }
-
- /// Converts a height from a type of surface into another. This helps represent the
- /// equivalent value between compressed/non-compressed textures.
- static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from,
- VideoCore::Surface::PixelFormat pixel_format_to) {
- const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from);
- const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to);
- return (height * bh2 + bh1 - 1) / bh1;
- }
-
- // Finds the maximun possible width between 2 2D layers of different formats
- static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params,
- const u32 src_level, const u32 dst_level) {
- const u32 bw1 = src_params.GetDefaultBlockWidth();
- const u32 bw2 = dst_params.GetDefaultBlockWidth();
- const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1;
- const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2;
- return std::min(t_src_width, t_dst_width);
- }
-
- // Finds the maximun possible height between 2 2D layers of different formats
- static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params,
- const u32 src_level, const u32 dst_level) {
- const u32 bh1 = src_params.GetDefaultBlockHeight();
- const u32 bh2 = dst_params.GetDefaultBlockHeight();
- const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1;
- const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2;
- return std::min(t_src_height, t_dst_height);
- }
-
- bool is_tiled;
- bool srgb_conversion;
- bool is_layered;
- u32 block_width;
- u32 block_height;
- u32 block_depth;
- u32 tile_width_spacing;
- u32 width;
- u32 height;
- u32 depth;
- u32 pitch;
- u32 num_levels;
- u32 emulated_levels;
- VideoCore::Surface::PixelFormat pixel_format;
- VideoCore::Surface::SurfaceType type;
- VideoCore::Surface::SurfaceTarget target;
-
-private:
- /// Returns the size of a given mipmap level inside a layer.
- std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const;
-
- /// Returns the size of all mipmap levels and aligns as needed.
- std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
- return GetLayerSize(as_host_size, uncompressed) *
- (layer_only ? 1U : (is_layered ? depth : 1U));
- }
-
- /// Returns the size of a layer
- std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
-
- /// Returns true if these parameters are from a layered surface.
- bool IsLayered() const;
-};
-
-} // namespace VideoCommon
-
-namespace std {
-
-template <>
-struct hash<VideoCommon::SurfaceParams> {
- std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
- return k.Hash();
- }
-};
-
-} // namespace std
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp
deleted file mode 100644
index 6b5f5984b..000000000
--- a/src/video_core/texture_cache/surface_view.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <tuple>
-
-#include "common/common_types.h"
-#include "video_core/texture_cache/surface_view.h"
-
-namespace VideoCommon {
-
-std::size_t ViewParams::Hash() const {
- return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^
- (static_cast<std::size_t>(base_level) << 24) ^
- (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36);
-}
-
-bool ViewParams::operator==(const ViewParams& rhs) const {
- return std::tie(base_layer, num_layers, base_level, num_levels, target) ==
- std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target);
-}
-
-bool ViewParams::operator!=(const ViewParams& rhs) const {
- return !operator==(rhs);
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h
deleted file mode 100644
index 90a8bb0ae..000000000
--- a/src/video_core/texture_cache/surface_view.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <functional>
-
-#include "common/common_types.h"
-#include "video_core/surface.h"
-#include "video_core/texture_cache/surface_params.h"
-
-namespace VideoCommon {
-
-struct ViewParams {
- constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer,
- u32 num_layers, u32 base_level, u32 num_levels)
- : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level},
- num_levels{num_levels} {}
-
- std::size_t Hash() const;
-
- bool operator==(const ViewParams& rhs) const;
- bool operator!=(const ViewParams& rhs) const;
-
- bool IsLayered() const {
- switch (target) {
- case VideoCore::Surface::SurfaceTarget::Texture1DArray:
- case VideoCore::Surface::SurfaceTarget::Texture2DArray:
- case VideoCore::Surface::SurfaceTarget::TextureCubemap:
- case VideoCore::Surface::SurfaceTarget::TextureCubeArray:
- return true;
- default:
- return false;
- }
- }
-
- VideoCore::Surface::SurfaceTarget target{};
- u32 base_layer{};
- u32 num_layers{};
- u32 base_level{};
- u32 num_levels{};
-};
-
-class ViewBase {
-public:
- constexpr explicit ViewBase(const ViewParams& params) : params{params} {}
-
- constexpr const ViewParams& GetViewParams() const {
- return params;
- }
-
-protected:
- ViewParams params;
-};
-
-} // namespace VideoCommon
-
-namespace std {
-
-template <>
-struct hash<VideoCommon::ViewParams> {
- std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept {
- return k.Hash();
- }
-};
-
-} // namespace std
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 96c4e4cc2..b1da69971 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -6,1304 +6,1446 @@
#include <algorithm>
#include <array>
-#include <list>
+#include <bit>
#include <memory>
#include <mutex>
-#include <set>
-#include <tuple>
+#include <optional>
+#include <span>
+#include <type_traits>
#include <unordered_map>
+#include <utility>
#include <vector>
#include <boost/container/small_vector.hpp>
-#include <boost/icl/interval_map.hpp>
-#include <boost/range/iterator_range.hpp>
-#include "common/assert.h"
+#include "common/alignment.h"
+#include "common/common_funcs.h"
#include "common/common_types.h"
-#include "common/math_util.h"
-#include "core/core.h"
-#include "core/memory.h"
-#include "core/settings.h"
+#include "common/logging/log.h"
#include "video_core/compatible_formats.h"
+#include "video_core/delayed_destruction_ring.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
-#include "video_core/texture_cache/copy_params.h"
+#include "video_core/texture_cache/descriptor_table.h"
#include "video_core/texture_cache/format_lookup_table.h"
-#include "video_core/texture_cache/surface_base.h"
-#include "video_core/texture_cache/surface_params.h"
-#include "video_core/texture_cache/surface_view.h"
-
-namespace Tegra::Texture {
-struct FullTextureInfo;
-}
-
-namespace VideoCore {
-class RasterizerInterface;
-}
+#include "video_core/texture_cache/formatter.h"
+#include "video_core/texture_cache/image_base.h"
+#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/image_view_base.h"
+#include "video_core/texture_cache/image_view_info.h"
+#include "video_core/texture_cache/render_targets.h"
+#include "video_core/texture_cache/samples_helper.h"
+#include "video_core/texture_cache/slot_vector.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/texture_cache/util.h"
+#include "video_core/textures/texture.h"
namespace VideoCommon {
-using VideoCore::Surface::FormatCompatibility;
+using Tegra::Texture::SwizzleSource;
+using Tegra::Texture::TextureType;
+using Tegra::Texture::TICEntry;
+using Tegra::Texture::TSCEntry;
+using VideoCore::Surface::GetFormatType;
+using VideoCore::Surface::IsCopyCompatible;
using VideoCore::Surface::PixelFormat;
-using VideoCore::Surface::SurfaceTarget;
-using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+using VideoCore::Surface::SurfaceType;
-template <typename TSurface, typename TView>
+template <class P>
class TextureCache {
- using VectorSurface = boost::container::small_vector<TSurface, 1>;
+ /// Address shift for caching images into a hash table
+ static constexpr u64 PAGE_BITS = 20;
+
+ /// Enables debugging features to the texture cache
+ static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
+ /// Implement blits as copies between framebuffers
+ static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
+ /// True when some copies have to be emulated
+ static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
+
+ /// Image view ID for null descriptors
+ static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
+ /// Sampler ID for bugged sampler ids
+ static constexpr SamplerId NULL_SAMPLER_ID{0};
+
+ using Runtime = typename P::Runtime;
+ using Image = typename P::Image;
+ using ImageAlloc = typename P::ImageAlloc;
+ using ImageView = typename P::ImageView;
+ using Sampler = typename P::Sampler;
+ using Framebuffer = typename P::Framebuffer;
+
+ struct BlitImages {
+ ImageId dst_id;
+ ImageId src_id;
+ PixelFormat dst_format;
+ PixelFormat src_format;
+ };
+
+ template <typename T>
+ struct IdentityHash {
+ [[nodiscard]] size_t operator()(T value) const noexcept {
+ return static_cast<size_t>(value);
+ }
+ };
public:
- void InvalidateRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
+ Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
- for (const auto& surface : GetSurfacesInRegion(addr, size)) {
- Unregister(surface);
- }
- }
+ /// Notify the cache that a new frame has been queued
+ void TickFrame();
- void OnCPUWrite(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ /// Return a constant reference to the given image view id
+ [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
- for (const auto& surface : GetSurfacesInRegion(addr, size)) {
- if (surface->IsMemoryMarked()) {
- UnmarkMemory(surface);
- surface->SetSyncPending(true);
- marked_for_unregister.emplace_back(surface);
- }
- }
- }
+ /// Return a reference to the given image view id
+ [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
- void SyncGuestHost() {
- std::lock_guard lock{mutex};
+ /// Fill image_view_ids with the graphics images in indices
+ void FillGraphicsImageViews(std::span<const u32> indices,
+ std::span<ImageViewId> image_view_ids);
- for (const auto& surface : marked_for_unregister) {
- if (surface->IsRegistered()) {
- surface->SetSyncPending(false);
- Unregister(surface);
- }
- }
- marked_for_unregister.clear();
- }
+ /// Fill image_view_ids with the compute images in indices
+ void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
- /**
- * Guarantees that rendertargets don't unregister themselves if the
- * collide. Protection is currently only done on 3D slices.
- */
- void GuardRenderTargets(bool new_guard) {
- guard_render_targets = new_guard;
- }
+ /// Get the sampler from the graphics descriptor table in the specified index
+ Sampler* GetGraphicsSampler(u32 index);
- void GuardSamplers(bool new_guard) {
- guard_samplers = new_guard;
- }
+ /// Get the sampler from the compute descriptor table in the specified index
+ Sampler* GetComputeSampler(u32 index);
- void FlushRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ /// Refresh the state for graphics image view and sampler descriptors
+ void SynchronizeGraphicsDescriptors();
- auto surfaces = GetSurfacesInRegion(addr, size);
- if (surfaces.empty()) {
- return;
- }
- std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) {
- return a->GetModificationTick() < b->GetModificationTick();
- });
- for (const auto& surface : surfaces) {
- mutex.unlock();
- FlushSurface(surface);
- mutex.lock();
- }
- }
+ /// Refresh the state for compute image view and sampler descriptors
+ void SynchronizeComputeDescriptors();
- bool MustFlushRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ /// Update bound render targets and upload memory if necessary
+ /// @param is_clear True when the render targets are being used for clears
+ void UpdateRenderTargets(bool is_clear);
- const auto surfaces = GetSurfacesInRegion(addr, size);
- return std::any_of(surfaces.cbegin(), surfaces.cend(),
- [](const TSurface& surface) { return surface->IsModified(); });
- }
+ /// Find a framebuffer with the currently bound render targets
+ /// UpdateRenderTargets should be called before this
+ Framebuffer* GetFramebuffer();
- TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
- const VideoCommon::Shader::Sampler& entry) {
- std::lock_guard lock{mutex};
- const auto gpu_addr{tic.Address()};
- if (!gpu_addr) {
- return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
- }
+ /// Mark images in a range as modified from the CPU
+ void WriteMemory(VAddr cpu_addr, size_t size);
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
- if (!cpu_addr) {
- return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
- }
+ /// Download contents of host images to guest memory in a region
+ void DownloadMemory(VAddr cpu_addr, size_t size);
- if (!IsTypeCompatible(tic.texture_type, entry)) {
- return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
- }
+ /// Remove images in a region
+ void UnmapMemory(VAddr cpu_addr, size_t size);
- const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
- const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
- if (guard_samplers) {
- sampled_textures.push_back(surface);
- }
- return view;
- }
+ /// Blit an image with the given parameters
+ void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
+ const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Config& copy);
- TView GetImageSurface(const Tegra::Texture::TICEntry& tic,
- const VideoCommon::Shader::Image& entry) {
- std::lock_guard lock{mutex};
- const auto gpu_addr{tic.Address()};
- if (!gpu_addr) {
- return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
- }
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
- if (!cpu_addr) {
- return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
- }
- const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
- const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
- if (guard_samplers) {
- sampled_textures.push_back(surface);
- }
- return view;
- }
+ /// Invalidate the contents of the color buffer index
+ /// These contents become unspecified, the cache can assume aggressive optimizations.
+ void InvalidateColorBuffer(size_t index);
- bool TextureBarrier() {
- const bool any_rt =
- std::any_of(sampled_textures.begin(), sampled_textures.end(),
- [](const auto& surface) { return surface->IsRenderTarget(); });
- sampled_textures.clear();
- return any_rt;
- }
+ /// Invalidate the contents of the depth buffer
+ /// These contents become unspecified, the cache can assume aggressive optimizations.
+ void InvalidateDepthBuffer();
- TView GetDepthBufferSurface(bool preserve_contents) {
- std::lock_guard lock{mutex};
- auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
- return depth_buffer.view;
- }
- maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
+ /// Try to find a cached image view in the given CPU address
+ [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
- const auto& regs{maxwell3d.regs};
- const auto gpu_addr{regs.zeta.Address()};
- if (!gpu_addr || !regs.zeta_enable) {
- SetEmptyDepthBuffer();
- return {};
- }
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
- if (!cpu_addr) {
- SetEmptyDepthBuffer();
- return {};
- }
- const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
- auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
- if (depth_buffer.target)
- depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
- depth_buffer.target = surface_view.first;
- depth_buffer.view = surface_view.second;
- if (depth_buffer.target)
- depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
- return surface_view.second;
- }
-
- TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
- std::lock_guard lock{mutex};
- ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
- auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
- return render_targets[index].view;
- }
- maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false;
+ /// Return true when there are uncommitted images to be downloaded
+ [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
- const auto& regs{maxwell3d.regs};
- if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
- regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
- SetEmptyColorBuffer(index);
- return {};
- }
+ /// Return true when the caller should wait for async downloads
+ [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
- const auto& config{regs.rt[index]};
- const auto gpu_addr{config.Address()};
- if (!gpu_addr) {
- SetEmptyColorBuffer(index);
- return {};
- }
+ /// Commit asynchronous downloads
+ void CommitAsyncFlushes();
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
- if (!cpu_addr) {
- SetEmptyColorBuffer(index);
- return {};
- }
+ /// Pop asynchronous downloads
+ void PopAsyncFlushes();
+
+ /// Return true when a CPU region is modified from the GPU
+ [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
- auto surface_view =
- GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
- preserve_contents, true);
- if (render_targets[index].target) {
- auto& surface = render_targets[index].target;
- surface->MarkAsRenderTarget(false, NO_RT);
- const auto& cr_params = surface->GetSurfaceParams();
- if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- AsyncFlushSurface(surface);
+ std::mutex mutex;
+
+private:
+ /// Iterate over all page indices in a range
+ template <typename Func>
+ static void ForEachPage(VAddr addr, size_t size, Func&& func) {
+ static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
+ const u64 page_end = (addr + size - 1) >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
+ if constexpr (RETURNS_BOOL) {
+ if (func(page)) {
+ break;
+ }
+ } else {
+ func(page);
}
}
- render_targets[index].target = surface_view.first;
- render_targets[index].view = surface_view.second;
- if (render_targets[index].target)
- render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
- return surface_view.second;
}
- void MarkColorBufferInUse(std::size_t index) {
- if (auto& render_target = render_targets[index].target) {
- render_target->MarkAsModified(true, Tick());
- }
- }
+ /// Fills image_view_ids in the image views in indices
+ void FillImageViews(DescriptorTable<TICEntry>& table,
+ std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
+ std::span<ImageViewId> image_view_ids);
- void MarkDepthBufferInUse() {
- if (depth_buffer.target) {
- depth_buffer.target->MarkAsModified(true, Tick());
- }
- }
+ /// Find or create an image view in the guest descriptor table
+ ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
+ std::span<ImageViewId> cached_image_view_ids, u32 index);
- void SetEmptyDepthBuffer() {
- if (depth_buffer.target == nullptr) {
- return;
- }
- depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
- depth_buffer.target = nullptr;
- depth_buffer.view = nullptr;
- }
+ /// Find or create a framebuffer with the given render target parameters
+ FramebufferId GetFramebufferId(const RenderTargets& key);
- void SetEmptyColorBuffer(std::size_t index) {
- if (render_targets[index].target == nullptr) {
- return;
- }
- render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
- render_targets[index].target = nullptr;
- render_targets[index].view = nullptr;
- }
-
- void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
- const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
- const Tegra::Engines::Fermi2D::Config& copy_config) {
- std::lock_guard lock{mutex};
- SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
- SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
- const GPUVAddr src_gpu_addr = src_config.Address();
- const GPUVAddr dst_gpu_addr = dst_config.Address();
- DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
-
- const auto& memory_manager = system.GPU().MemoryManager();
- const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr);
- const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr);
- std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
- TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
- ImageBlit(src_surface, dst_surface.second, copy_config);
- dst_surface.first->MarkAsModified(true, Tick());
- }
-
- TSurface TryFindFramebufferSurface(VAddr addr) const {
- if (!addr) {
- return nullptr;
- }
- const VAddr page = addr >> registry_page_bits;
- const auto it = registry.find(page);
- if (it == registry.end()) {
- return nullptr;
- }
- const auto& list = it->second;
- const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
- return surface->GetCpuAddr() == addr;
- });
- return found != list.end() ? *found : nullptr;
- }
+ /// Refresh the contents (pixel data) of an image
+ void RefreshContents(Image& image);
- u64 Tick() {
- return ++ticks;
- }
+ /// Upload data from guest to an image
+ template <typename StagingBuffer>
+ void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
- void CommitAsyncFlushes() {
- committed_flushes.push_back(uncommitted_flushes);
- uncommitted_flushes.reset();
- }
+ /// Find or create an image view from a guest descriptor
+ [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
- bool HasUncommittedFlushes() const {
- return uncommitted_flushes != nullptr;
- }
+ /// Create a new image view from a guest descriptor
+ [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
- bool ShouldWaitAsyncFlushes() const {
- return !committed_flushes.empty() && committed_flushes.front() != nullptr;
- }
+ /// Find or create an image from the given parameters
+ [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
+ RelaxedOptions options = RelaxedOptions{});
- void PopAsyncFlushes() {
- if (committed_flushes.empty()) {
- return;
- }
- auto& flush_list = committed_flushes.front();
- if (!flush_list) {
- committed_flushes.pop_front();
- return;
- }
- for (TSurface& surface : *flush_list) {
- FlushSurface(surface);
- }
- committed_flushes.pop_front();
- }
+ /// Find an image from the given parameters
+ [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
+ RelaxedOptions options);
-protected:
- explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- bool is_astc_supported)
- : system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} {
- for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
- SetEmptyColorBuffer(i);
- }
+ /// Create an image from the given parameters
+ [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
+ RelaxedOptions options);
- SetEmptyDepthBuffer();
- staging_cache.SetSize(2);
+ /// Create a new image and join perfectly matching existing images
+ /// Remove joined images from the cache
+ [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
- const auto make_siblings = [this](PixelFormat a, PixelFormat b) {
- siblings_table[static_cast<std::size_t>(a)] = b;
- siblings_table[static_cast<std::size_t>(b)] = a;
- };
- std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
- make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM);
- make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT);
- make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT);
+ /// Return a blit image pair from the given guest blit parameters
+ [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
+ const Tegra::Engines::Fermi2D::Surface& src);
- sampled_textures.reserve(64);
- }
+ /// Find or create a sampler from a guest descriptor sampler
+ [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
- ~TextureCache() = default;
+ /// Find or create an image view for the given color buffer index
+ [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
- virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0;
+ /// Find or create an image view for the depth buffer
+ [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
- virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface,
- const CopyParams& copy_params) = 0;
+ /// Find or create a view for a render target with the given image parameters
+ [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
+ bool is_clear);
- virtual void ImageBlit(TView& src_view, TView& dst_view,
- const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
+ /// Iterates over all the images in a region calling func
+ template <typename Func>
+ void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
- // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
- // and reading it from a separate buffer.
- virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
+ /// Find or create an image view in the given image with the passed parameters
+ [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
- void ManageRenderTargetUnregister(TSurface& surface) {
- auto& dirty = system.GPU().Maxwell3D().dirty;
- const u32 index = surface->GetRenderTarget();
- if (index == DEPTH_RT) {
- dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true;
- } else {
- dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true;
- }
- dirty.flags[VideoCommon::Dirty::RenderTargets] = true;
+ /// Register image in the page table
+ void RegisterImage(ImageId image);
+
+ /// Unregister image from the page table
+ void UnregisterImage(ImageId image);
+
+ /// Track CPU reads and writes for image
+ void TrackImage(ImageBase& image);
+
+ /// Stop tracking CPU reads and writes for image
+ void UntrackImage(ImageBase& image);
+
+ /// Delete image from the cache
+ void DeleteImage(ImageId image);
+
+ /// Remove image views references from the cache
+ void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
+
+ /// Remove framebuffers using the given image views from the cache
+ void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
+
+ /// Mark an image as modified from the GPU
+ void MarkModification(ImageBase& image) noexcept;
+
+ /// Synchronize image aliases, copying data if needed
+ void SynchronizeAliases(ImageId image_id);
+
+ /// Prepare an image to be used
+ void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
+
+ /// Prepare an image view to be used
+ void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
+
+ /// Execute copies from one image to the other, even if they are incompatible
+ void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
+
+ /// Bind an image view as render target, downloading resources preemtively if needed
+ void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
+
+ /// Create a render target from a given image and image view parameters
+ [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
+ ImageId, const ImageViewInfo& view_info);
+
+ /// Returns true if the current clear parameters clear the whole image of a given image view
+ [[nodiscard]] bool IsFullClear(ImageViewId id);
+
+ Runtime& runtime;
+ VideoCore::RasterizerInterface& rasterizer;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
+
+ DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
+ DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
+ std::vector<SamplerId> graphics_sampler_ids;
+ std::vector<ImageViewId> graphics_image_view_ids;
+
+ DescriptorTable<TICEntry> compute_image_table{gpu_memory};
+ DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
+ std::vector<SamplerId> compute_sampler_ids;
+ std::vector<ImageViewId> compute_image_view_ids;
+
+ RenderTargets render_targets;
+
+ std::unordered_map<TICEntry, ImageViewId> image_views;
+ std::unordered_map<TSCEntry, SamplerId> samplers;
+ std::unordered_map<RenderTargets, FramebufferId> framebuffers;
+
+ std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
+
+ bool has_deleted_images = false;
+
+ SlotVector<Image> slot_images;
+ SlotVector<ImageView> slot_image_views;
+ SlotVector<ImageAlloc> slot_image_allocs;
+ SlotVector<Sampler> slot_samplers;
+ SlotVector<Framebuffer> slot_framebuffers;
+
+ // TODO: This data structure is not optimal and it should be reworked
+ std::vector<ImageId> uncommitted_downloads;
+ std::queue<std::vector<ImageId>> committed_downloads;
+
+ static constexpr size_t TICKS_TO_DESTROY = 6;
+ DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
+ DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
+ DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
+
+ std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
+
+ u64 modification_tick = 0;
+ u64 frame_tick = 0;
+};
+
+template <class P>
+TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_)
+ : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+ kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
+ // Configure null sampler
+ TSCEntry sampler_descriptor{};
+ sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
+ sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear);
+ sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
+ sampler_descriptor.cubemap_anisotropy.Assign(1);
+
+ // Make sure the first index is reserved for the null resources
+ // This way the null resource becomes a compile time constant
+ void(slot_image_views.insert(runtime, NullImageParams{}));
+ void(slot_samplers.insert(runtime, sampler_descriptor));
+}
+
+template <class P>
+void TextureCache<P>::TickFrame() {
+ // Tick sentenced resources in this order to ensure they are destroyed in the right order
+ sentenced_images.Tick();
+ sentenced_framebuffers.Tick();
+ sentenced_image_view.Tick();
+ ++frame_tick;
+}
+
+template <class P>
+const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
+ return slot_image_views[id];
+}
+
+template <class P>
+typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
+ return slot_image_views[id];
+}
+
+template <class P>
+void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
+ std::span<ImageViewId> image_view_ids) {
+ FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
+}
+
+template <class P>
+void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
+ std::span<ImageViewId> image_view_ids) {
+ FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
+}
+
+template <class P>
+typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
+ [[unlikely]] if (index > graphics_sampler_table.Limit()) {
+ LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
+ return &slot_samplers[NULL_SAMPLER_ID];
+ }
+ const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
+ SamplerId& id = graphics_sampler_ids[index];
+ [[unlikely]] if (is_new) {
+ id = FindSampler(descriptor);
}
+ return &slot_samplers[id];
+}
- void Register(TSurface surface) {
- const GPUVAddr gpu_addr = surface->GetGpuAddr();
- const std::size_t size = surface->GetSizeInBytes();
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
- if (!cpu_addr) {
- LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
- gpu_addr);
- return;
- }
- surface->SetCpuAddr(*cpu_addr);
- RegisterInnerCache(surface);
- surface->MarkAsRegistered(true);
- surface->SetMemoryMarked(true);
- rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
+template <class P>
+typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
+ [[unlikely]] if (index > compute_sampler_table.Limit()) {
+ LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
+ return &slot_samplers[NULL_SAMPLER_ID];
}
+ const auto [descriptor, is_new] = compute_sampler_table.Read(index);
+ SamplerId& id = compute_sampler_ids[index];
+ [[unlikely]] if (is_new) {
+ id = FindSampler(descriptor);
+ }
+ return &slot_samplers[id];
+}
- void UnmarkMemory(TSurface surface) {
- if (!surface->IsMemoryMarked()) {
- return;
- }
- const std::size_t size = surface->GetSizeInBytes();
- const VAddr cpu_addr = surface->GetCpuAddr();
- rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
- surface->SetMemoryMarked(false);
+template <class P>
+void TextureCache<P>::SynchronizeGraphicsDescriptors() {
+ using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
+ const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
+ const u32 tic_limit = maxwell3d.regs.tic.limit;
+ const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
+ if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
+ graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+ }
+ if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
+ graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
}
+}
- void Unregister(TSurface surface) {
- if (guard_render_targets && surface->IsProtected()) {
- return;
- }
- if (!guard_render_targets && surface->IsRenderTarget()) {
- ManageRenderTargetUnregister(surface);
- }
- UnmarkMemory(surface);
- if (surface->IsSyncPending()) {
- marked_for_unregister.remove(surface);
- surface->SetSyncPending(false);
- }
- UnregisterInnerCache(surface);
- surface->MarkAsRegistered(false);
- ReserveSurface(surface->GetSurfaceParams(), surface);
+template <class P>
+void TextureCache<P>::SynchronizeComputeDescriptors() {
+ const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
+ const u32 tic_limit = kepler_compute.regs.tic.limit;
+ const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
+ const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
+ if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
+ compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+ }
+ if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
+ compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
}
+}
- TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
- if (const auto surface = TryGetReservedSurface(params); surface) {
- surface->SetGpuAddr(gpu_addr);
- return surface;
- }
- // No reserved surface available, create a new one and reserve it
- auto new_surface{CreateSurface(gpu_addr, params)};
- return new_surface;
+template <class P>
+void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
+ using namespace VideoCommon::Dirty;
+ auto& flags = maxwell3d.dirty.flags;
+ if (!flags[Dirty::RenderTargets]) {
+ return;
}
+ flags[Dirty::RenderTargets] = false;
- Core::System& system;
- const bool is_astc_supported;
+ // Render target control is used on all render targets, so force look ups when this one is up
+ const bool force = flags[Dirty::RenderTargetControl];
+ flags[Dirty::RenderTargetControl] = false;
-private:
- enum class RecycleStrategy : u32 {
- Ignore = 0,
- Flush = 1,
- BufferCopy = 3,
- };
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
+ if (flags[Dirty::ColorBuffer0 + index] || force) {
+ flags[Dirty::ColorBuffer0 + index] = false;
+ BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
+ }
+ PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
+ }
+ if (flags[Dirty::ZetaBuffer] || force) {
+ flags[Dirty::ZetaBuffer] = false;
+ BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
+ }
+ const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
+ PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
- enum class DeductionType : u32 {
- DeductionComplete,
- DeductionIncomplete,
- DeductionFailed,
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
+ }
+ render_targets.size = Extent2D{
+ maxwell3d.regs.render_area.width,
+ maxwell3d.regs.render_area.height,
};
+}
- struct Deduction {
- DeductionType type{DeductionType::DeductionFailed};
- TSurface surface{};
+template <class P>
+typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
+ return &slot_framebuffers[GetFramebufferId(render_targets)];
+}
- bool Failed() const {
- return type == DeductionType::DeductionFailed;
- }
+template <class P>
+void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
+ std::span<ImageViewId> cached_image_view_ids,
+ std::span<const u32> indices,
+ std::span<ImageViewId> image_view_ids) {
+ ASSERT(indices.size() <= image_view_ids.size());
+ do {
+ has_deleted_images = false;
+ std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
+ return VisitImageView(table, cached_image_view_ids, index);
+ });
+ } while (has_deleted_images);
+}
- bool Incomplete() const {
- return type == DeductionType::DeductionIncomplete;
- }
+template <class P>
+ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
+ std::span<ImageViewId> cached_image_view_ids,
+ u32 index) {
+ if (index > table.Limit()) {
+ LOG_ERROR(HW_GPU, "Invalid image view index={}", index);
+ return NULL_IMAGE_VIEW_ID;
+ }
+ const auto [descriptor, is_new] = table.Read(index);
+ ImageViewId& image_view_id = cached_image_view_ids[index];
+ if (is_new) {
+ image_view_id = FindImageView(descriptor);
+ }
+ if (image_view_id != NULL_IMAGE_VIEW_ID) {
+ PrepareImageView(image_view_id, false, false);
+ }
+ return image_view_id;
+}
- bool IsDepth() const {
- return surface->GetSurfaceParams().IsPixelFormatZeta();
- }
- };
+template <class P>
+FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
+ const auto [pair, is_new] = framebuffers.try_emplace(key);
+ FramebufferId& framebuffer_id = pair->second;
+ if (!is_new) {
+ return framebuffer_id;
+ }
+ std::array<ImageView*, NUM_RT> color_buffers;
+ std::ranges::transform(key.color_buffer_ids, color_buffers.begin(),
+ [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; });
+ ImageView* const depth_buffer =
+ key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
+ framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key);
+ return framebuffer_id;
+}
- /**
- * Takes care of selecting a proper strategy to deal with a texture recycle.
- *
- * @param overlaps The overlapping surfaces registered in the cache.
- * @param params The parameters on the new surface.
- * @param gpu_addr The starting address of the new surface.
- * @param untopological Indicates to the recycler that the texture has no way
- * to match the overlaps due to topological reasons.
- **/
- RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
- const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
- if (Settings::IsGPULevelExtreme()) {
- return RecycleStrategy::Flush;
- }
- // 3D Textures decision
- if (params.target == SurfaceTarget::Texture3D) {
- return RecycleStrategy::Flush;
- }
- for (const auto& s : overlaps) {
- const auto& s_params = s->GetSurfaceParams();
- if (s_params.target == SurfaceTarget::Texture3D) {
- return RecycleStrategy::Flush;
- }
- }
- // Untopological decision
- if (untopological == MatchTopologyResult::CompressUnmatch) {
- return RecycleStrategy::Flush;
- }
- if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
- return RecycleStrategy::Flush;
- }
- return RecycleStrategy::Ignore;
- }
-
- /**
- * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented
- * strategies: Ignore and Flush.
- *
- * - Ignore: Just unregisters all the overlaps and loads the new texture.
- * - Flush: Flushes all the overlaps into memory and loads the new surface from that data.
- *
- * @param overlaps The overlapping surfaces registered in the cache.
- * @param params The parameters for the new surface.
- * @param gpu_addr The starting address of the new surface.
- * @param preserve_contents Indicates that the new surface should be loaded from memory or left
- * blank.
- * @param untopological Indicates to the recycler that the texture has no way to match the
- * overlaps due to topological reasons.
- **/
- std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
- const GPUVAddr gpu_addr, const bool preserve_contents,
- const MatchTopologyResult untopological) {
- const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
- for (auto& surface : overlaps) {
- Unregister(surface);
- }
- switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
- case RecycleStrategy::Ignore: {
- return InitializeSurface(gpu_addr, params, do_load);
- }
- case RecycleStrategy::Flush: {
- std::sort(overlaps.begin(), overlaps.end(),
- [](const TSurface& a, const TSurface& b) -> bool {
- return a->GetModificationTick() < b->GetModificationTick();
- });
- for (auto& surface : overlaps) {
- FlushSurface(surface);
- }
- return InitializeSurface(gpu_addr, params, preserve_contents);
+template <class P>
+void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
+ ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) {
+ if (True(image.flags & ImageFlagBits::CpuModified)) {
+ return;
}
- case RecycleStrategy::BufferCopy: {
- auto new_surface = GetUncachedSurface(gpu_addr, params);
- BufferCopy(overlaps[0], new_surface);
- return {new_surface, new_surface->GetMainView()};
+ image.flags |= ImageFlagBits::CpuModified;
+ UntrackImage(image);
+ });
+}
+
+template <class P>
+void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
+ std::vector<ImageId> images;
+ ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
+ // Skip images that were not modified from the GPU
+ if (False(image.flags & ImageFlagBits::GpuModified)) {
+ return;
}
- default: {
- UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
- return InitializeSurface(gpu_addr, params, do_load);
+ // Skip images that .are. modified from the CPU
+ // We don't want to write sensitive data from the guest
+ if (True(image.flags & ImageFlagBits::CpuModified)) {
+ return;
}
+ if (image.info.num_samples > 1) {
+ LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
+ return;
}
+ image.flags &= ~ImageFlagBits::GpuModified;
+ images.push_back(image_id);
+ });
+ if (images.empty()) {
+ return;
}
+ std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) {
+ return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
+ });
+ for (const ImageId image_id : images) {
+ Image& image = slot_images[image_id];
+ auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
+ const auto copies = FullDownloadCopies(image.info);
+ image.DownloadMemory(map, copies);
+ runtime.Finish();
+ SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
+ }
+}
- /**
- * Takes a single surface and recreates into another that may differ in
- * format, target or width alignment.
- *
- * @param current_surface The registered surface in the cache which we want to convert.
- * @param params The new surface params which we'll use to recreate the surface.
- * @param is_render Whether or not the surface is a render target.
- **/
- std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params,
- bool is_render) {
- const auto gpu_addr = current_surface->GetGpuAddr();
- const auto& cr_params = current_surface->GetSurfaceParams();
- TSurface new_surface;
- if (cr_params.pixel_format != params.pixel_format && !is_render &&
- GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
- SurfaceParams new_params = params;
- new_params.pixel_format = cr_params.pixel_format;
- new_params.type = cr_params.type;
- new_surface = GetUncachedSurface(gpu_addr, new_params);
- } else {
- new_surface = GetUncachedSurface(gpu_addr, params);
- }
- const SurfaceParams& final_params = new_surface->GetSurfaceParams();
- if (cr_params.type != final_params.type) {
- if (Settings::IsGPULevelExtreme()) {
- BufferCopy(current_surface, new_surface);
- }
- } else {
- std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
- for (auto& brick : bricks) {
- TryCopyImage(current_surface, new_surface, brick);
- }
- }
- Unregister(current_surface);
- Register(new_surface);
- new_surface->MarkAsModified(current_surface->IsModified(), Tick());
- return {new_surface, new_surface->GetMainView()};
- }
-
- /**
- * Takes a single surface and checks with the new surface's params if it's an exact
- * match, we return the main view of the registered surface. If its formats don't
- * match, we rebuild the surface. We call this last method a `Mirage`. If formats
- * match but the targets don't, we create an overview View of the registered surface.
- *
- * @param current_surface The registered surface in the cache which we want to convert.
- * @param params The new surface params which we want to check.
- * @param is_render Whether or not the surface is a render target.
- **/
- std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
- const SurfaceParams& params, bool is_render) {
- const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
- const bool matches_target = current_surface->MatchTarget(params.target);
- const auto match_check = [&]() -> std::pair<TSurface, TView> {
- if (matches_target) {
- return {current_surface, current_surface->GetMainView()};
- }
- return {current_surface, current_surface->EmplaceOverview(params)};
- };
- if (!is_mirage) {
- return match_check();
- }
- if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) {
- return match_check();
- }
- return RebuildSurface(current_surface, params, is_render);
- }
-
- /**
- * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate
- * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps
- * of the new surface, if they all match we end up recreating a surface for them,
- * else we return nothing.
- *
- * @param overlaps The overlapping surfaces registered in the cache.
- * @param params The parameters on the new surface.
- * @param gpu_addr The starting address of the new surface.
- **/
- std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
- const SurfaceParams& params,
- GPUVAddr gpu_addr) {
- if (params.target == SurfaceTarget::Texture3D) {
- return std::nullopt;
- }
- const auto test_modified = [](TSurface& surface) { return surface->IsModified(); };
- TSurface new_surface = GetUncachedSurface(gpu_addr, params);
+template <class P>
+void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
+ std::vector<ImageId> deleted_images;
+ ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
+ for (const ImageId id : deleted_images) {
+ Image& image = slot_images[id];
+ if (True(image.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(image);
+ }
+ UnregisterImage(id);
+ DeleteImage(id);
+ }
+}
- if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) {
- LoadSurface(new_surface);
- for (const auto& surface : overlaps) {
- Unregister(surface);
- }
- Register(new_surface);
- return {{new_surface, new_surface->GetMainView()}};
- }
+template <class P>
+void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
+ const Tegra::Engines::Fermi2D::Surface& src,
+ const Tegra::Engines::Fermi2D::Config& copy) {
+ const BlitImages images = GetBlitImages(dst, src);
+ const ImageId dst_id = images.dst_id;
+ const ImageId src_id = images.src_id;
+ PrepareImage(src_id, false, false);
+ PrepareImage(dst_id, true, false);
+
+ ImageBase& dst_image = slot_images[dst_id];
+ const ImageBase& src_image = slot_images[src_id];
+
+ // TODO: Deduplicate
+ const std::optional dst_base = dst_image.TryFindBase(dst.Address());
+ const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
+ const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
+ const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
+ const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
+ const std::array src_region{
+ Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
+ Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
+ };
- std::size_t passed_tests = 0;
- for (auto& surface : overlaps) {
- const SurfaceParams& src_params = surface->GetSurfaceParams();
- const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
- if (!mipmap_layer) {
- continue;
- }
- const auto [base_layer, base_mipmap] = *mipmap_layer;
- if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) {
- continue;
- }
- ++passed_tests;
-
- // Copy all mipmaps and layers
- const u32 block_width = params.GetDefaultBlockWidth();
- const u32 block_height = params.GetDefaultBlockHeight();
- for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) {
- const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
- const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
- if (width < block_width || height < block_height) {
- // Current APIs forbid copying small compressed textures, avoid errors
- break;
- }
- const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
- src_params.depth);
- TryCopyImage(surface, new_surface, copy_params);
- }
- }
- if (passed_tests == 0) {
- return std::nullopt;
- }
- if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
- // In Accurate GPU all tests should pass, else we recycle
- return std::nullopt;
- }
+ const std::optional src_base = src_image.TryFindBase(src.Address());
+ const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
+ const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
+ const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
+ const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
+ const std::array dst_region{
+ Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
+ Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
+ };
- const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified);
- for (const auto& surface : overlaps) {
- Unregister(surface);
- }
+ // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
+ Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
+ if constexpr (FRAMEBUFFER_BLITS) {
+ // OpenGL blits from framebuffers, not images
+ Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id];
+ runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region,
+ copy.filter, copy.operation);
+ } else {
+ // Vulkan can blit images, but it lacks format reinterpretations
+ // Provide a framebuffer in case it's necessary
+ ImageView& dst_view = slot_image_views[dst_view_id];
+ ImageView& src_view = slot_image_views[src_view_id];
+ runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
+ copy.operation);
+ }
+}
- new_surface->MarkAsModified(modified, Tick());
- Register(new_surface);
- return {{new_surface, new_surface->GetMainView()}};
- }
-
- /**
- * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D
- * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of
- * the HLE methods.
- *
- * @param overlaps The overlapping surfaces registered in the cache.
- * @param params The parameters on the new surface.
- * @param gpu_addr The starting address of the new surface.
- * @param cpu_addr The starting address of the new surface on physical memory.
- * @param preserve_contents Indicates that the new surface should be loaded from memory or
- * left blank.
- */
- std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
- const SurfaceParams& params,
- GPUVAddr gpu_addr, VAddr cpu_addr,
- bool preserve_contents) {
- if (params.target != SurfaceTarget::Texture3D) {
- for (const auto& surface : overlaps) {
- if (!surface->MatchTarget(params.target)) {
- if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
- if (Settings::IsGPULevelExtreme()) {
- return std::nullopt;
- }
- Unregister(surface);
- return InitializeSurface(gpu_addr, params, preserve_contents);
- }
- return std::nullopt;
- }
- if (surface->GetCpuAddr() != cpu_addr) {
- continue;
- }
- if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
- return std::make_pair(surface, surface->GetMainView());
- }
- }
- return InitializeSurface(gpu_addr, params, preserve_contents);
- }
+template <class P>
+void TextureCache<P>::InvalidateColorBuffer(size_t index) {
+ ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
+ color_buffer_id = FindColorBuffer(index, false);
+ if (!color_buffer_id) {
+ LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index);
+ return;
+ }
+ // When invalidating a color buffer, the old contents are no longer relevant
+ ImageView& color_buffer = slot_image_views[color_buffer_id];
+ Image& image = slot_images[color_buffer.image_id];
+ image.flags &= ~ImageFlagBits::CpuModified;
+ image.flags &= ~ImageFlagBits::GpuModified;
- if (params.num_levels > 1) {
- // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
- return std::nullopt;
- }
+ runtime.InvalidateColorBuffer(color_buffer, index);
+}
- if (overlaps.size() == 1) {
- const auto& surface = overlaps[0];
- const SurfaceParams& overlap_params = surface->GetSurfaceParams();
- // Don't attempt to render to textures with more than one level for now
- // The texture has to be to the right or the sample address if we want to render to it
- if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) {
- const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr());
- const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
- if (slice < overlap_params.depth) {
- auto view = surface->Emplace3DView(slice, params.depth, 0, 1);
- return std::make_pair(std::move(surface), std::move(view));
- }
- }
- }
+template <class P>
+void TextureCache<P>::InvalidateDepthBuffer() {
+ ImageViewId& depth_buffer_id = render_targets.depth_buffer_id;
+ depth_buffer_id = FindDepthBuffer(false);
+ if (!depth_buffer_id) {
+ LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer");
+ return;
+ }
+ // When invalidating the depth buffer, the old contents are no longer relevant
+ ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id];
+ image.flags &= ~ImageFlagBits::CpuModified;
+ image.flags &= ~ImageFlagBits::GpuModified;
- TSurface new_surface = GetUncachedSurface(gpu_addr, params);
- bool modified = false;
+ ImageView& depth_buffer = slot_image_views[depth_buffer_id];
+ runtime.InvalidateDepthBuffer(depth_buffer);
+}
- for (auto& surface : overlaps) {
- const SurfaceParams& src_params = surface->GetSurfaceParams();
- if (src_params.target != SurfaceTarget::Texture2D ||
- src_params.height != params.height ||
- src_params.block_depth != params.block_depth ||
- src_params.block_height != params.block_height) {
- return std::nullopt;
- }
- modified |= surface->IsModified();
-
- const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
- const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
- const u32 width = params.width;
- const u32 height = params.height;
- const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
- TryCopyImage(surface, new_surface, copy_params);
+template <class P>
+typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
+ // TODO: Properly implement this
+ const auto it = page_table.find(cpu_addr >> PAGE_BITS);
+ if (it == page_table.end()) {
+ return nullptr;
+ }
+ const auto& image_ids = it->second;
+ for (const ImageId image_id : image_ids) {
+ const ImageBase& image = slot_images[image_id];
+ if (image.cpu_addr != cpu_addr) {
+ continue;
}
- for (const auto& surface : overlaps) {
- Unregister(surface);
+ if (image.image_view_ids.empty()) {
+ continue;
}
- new_surface->MarkAsModified(modified, Tick());
- Register(new_surface);
-
- TView view = new_surface->GetMainView();
- return std::make_pair(std::move(new_surface), std::move(view));
- }
-
- /**
- * Gets the starting address and parameters of a candidate surface and tries
- * to find a matching surface within the cache. This is done in 3 big steps:
- *
- * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
- *
- * 2. Check if there are any overlaps at all, if there are none, we just load the texture from
- * memory else we move to step 3.
- *
- * 3. Consists of figuring out the relationship between the candidate texture and the
- * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If
- * there's many, we just try to reconstruct a new surface out of them based on the
- * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we
- * have to check if the candidate is a view (layer/mipmap) of the overlap or if the
- * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct
- * a new surface.
- *
- * @param gpu_addr The starting address of the candidate surface.
- * @param params The parameters on the candidate surface.
- * @param preserve_contents Indicates that the new surface should be loaded from memory or
- * left blank.
- * @param is_render Whether or not the surface is a render target.
- **/
- std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
- const SurfaceParams& params, bool preserve_contents,
- bool is_render) {
- // Step 1
- // Check Level 1 Cache for a fast structural match. If candidate surface
- // matches at certain level we are pretty much done.
- if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
- TSurface& current_surface = iter->second;
- const auto topological_result = current_surface->MatchesTopology(params);
- if (topological_result != MatchTopologyResult::FullMatch) {
- VectorSurface overlaps{current_surface};
- return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
- topological_result);
- }
+ return &slot_image_views[image.image_view_ids.at(0)];
+ }
+ return nullptr;
+}
- const auto struct_result = current_surface->MatchesStructure(params);
- if (struct_result != MatchStructureResult::None) {
- const auto& old_params = current_surface->GetSurfaceParams();
- const bool not_3d = params.target != SurfaceTarget::Texture3D &&
- old_params.target != SurfaceTarget::Texture3D;
- if (not_3d || current_surface->MatchTarget(params.target)) {
- if (struct_result == MatchStructureResult::FullMatch) {
- return ManageStructuralMatch(current_surface, params, is_render);
- } else {
- return RebuildSurface(current_surface, params, is_render);
- }
- }
- }
- }
+template <class P>
+bool TextureCache<P>::HasUncommittedFlushes() const noexcept {
+ return !uncommitted_downloads.empty();
+}
- // Step 2
- // Obtain all possible overlaps in the memory region
- const std::size_t candidate_size = params.GetGuestSizeInBytes();
- auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};
+template <class P>
+bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
+ return !committed_downloads.empty() && !committed_downloads.front().empty();
+}
- // If none are found, we are done. we just load the surface and create it.
- if (overlaps.empty()) {
- return InitializeSurface(gpu_addr, params, preserve_contents);
- }
+template <class P>
+void TextureCache<P>::CommitAsyncFlushes() {
+ // This is intentionally passing the value by copy
+ committed_downloads.push(uncommitted_downloads);
+ uncommitted_downloads.clear();
+}
- // Step 3
- // Now we need to figure the relationship between the texture and its overlaps
- // we do a topological test to ensure we can find some relationship. If it fails
- // immediately recycle the texture
- for (const auto& surface : overlaps) {
- const auto topological_result = surface->MatchesTopology(params);
- if (topological_result != MatchTopologyResult::FullMatch) {
- return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
- topological_result);
- }
- }
+template <class P>
+void TextureCache<P>::PopAsyncFlushes() {
+ if (committed_downloads.empty()) {
+ return;
+ }
+ const std::span<const ImageId> download_ids = committed_downloads.front();
+ if (download_ids.empty()) {
+ committed_downloads.pop();
+ return;
+ }
+ size_t total_size_bytes = 0;
+ for (const ImageId image_id : download_ids) {
+ total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
+ }
+ auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
+ const size_t original_offset = download_map.offset;
+ for (const ImageId image_id : download_ids) {
+ Image& image = slot_images[image_id];
+ const auto copies = FullDownloadCopies(image.info);
+ image.DownloadMemory(download_map, copies);
+ download_map.offset += image.unswizzled_size_bytes;
+ }
+ // Wait for downloads to finish
+ runtime.Finish();
+
+ download_map.offset = original_offset;
+ std::span<u8> download_span = download_map.mapped_span;
+ for (const ImageId image_id : download_ids) {
+ const ImageBase& image = slot_images[image_id];
+ const auto copies = FullDownloadCopies(image.info);
+ SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
+ download_map.offset += image.unswizzled_size_bytes;
+ download_span = download_span.subspan(image.unswizzled_size_bytes);
+ }
+ committed_downloads.pop();
+}
- // Manage 3D textures
- if (params.block_depth > 0) {
- auto surface =
- Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
- if (surface) {
- return *surface;
- }
+template <class P>
+bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
+ bool is_modified = false;
+ ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
+ if (False(image.flags & ImageFlagBits::GpuModified)) {
+ return false;
}
+ is_modified = true;
+ return true;
+ });
+ return is_modified;
+}
- // Split cases between 1 overlap or many.
- if (overlaps.size() == 1) {
- TSurface current_surface = overlaps[0];
- // First check if the surface is within the overlap. If not, it means
- // two things either the candidate surface is a supertexture of the overlap
- // or they don't match in any known way.
- if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
- const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr);
- if (view) {
- return *view;
- }
- return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
- MatchTopologyResult::FullMatch);
- }
- // Now we check if the candidate is a mipmap/layer of the overlap
- std::optional<TView> view =
- current_surface->EmplaceView(params, gpu_addr, candidate_size);
- if (view) {
- const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
- if (is_mirage) {
- // On a mirage view, we need to recreate the surface under this new view
- // and then obtain a view again.
- SurfaceParams new_params = current_surface->GetSurfaceParams();
- const u32 wh = SurfaceParams::ConvertWidth(
- new_params.width, new_params.pixel_format, params.pixel_format);
- const u32 hh = SurfaceParams::ConvertHeight(
- new_params.height, new_params.pixel_format, params.pixel_format);
- new_params.width = wh;
- new_params.height = hh;
- new_params.pixel_format = params.pixel_format;
- std::pair<TSurface, TView> pair =
- RebuildSurface(current_surface, new_params, is_render);
- std::optional<TView> mirage_view =
- pair.first->EmplaceView(params, gpu_addr, candidate_size);
- if (mirage_view)
- return {pair.first, *mirage_view};
- return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
- MatchTopologyResult::FullMatch);
- }
- return {current_surface, *view};
- }
- } else {
- // If there are many overlaps, odds are they are subtextures of the candidate
- // surface. We try to construct a new surface based on the candidate parameters,
- // using the overlaps. If a single overlap fails, this will fail.
- std::optional<std::pair<TSurface, TView>> view =
- TryReconstructSurface(overlaps, params, gpu_addr);
- if (view) {
- return *view;
- }
- }
- // We failed all the tests, recycle the overlaps into a new texture.
- return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
- MatchTopologyResult::FullMatch);
- }
-
- /**
- * Gets the starting address and parameters of a candidate surface and tries to find a
- * matching surface within the cache that's similar to it. If there are many textures
- * or the texture found if entirely incompatible, it will fail. If no texture is found, the
- * blit will be unsuccessful.
- *
- * @param gpu_addr The starting address of the candidate surface.
- * @param params The parameters on the candidate surface.
- **/
- Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
-
- if (!cpu_addr) {
- Deduction result{};
- result.type = DeductionType::DeductionFailed;
- return result;
- }
+template <class P>
+void TextureCache<P>::RefreshContents(Image& image) {
+ if (False(image.flags & ImageFlagBits::CpuModified)) {
+ // Only upload modified images
+ return;
+ }
+ image.flags &= ~ImageFlagBits::CpuModified;
+ TrackImage(image);
- if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {
- TSurface& current_surface = iter->second;
- const auto topological_result = current_surface->MatchesTopology(params);
- if (topological_result != MatchTopologyResult::FullMatch) {
- Deduction result{};
- result.type = DeductionType::DeductionFailed;
- return result;
- }
- const auto struct_result = current_surface->MatchesStructure(params);
- if (struct_result != MatchStructureResult::None &&
- current_surface->MatchTarget(params.target)) {
- Deduction result{};
- result.type = DeductionType::DeductionComplete;
- result.surface = current_surface;
- return result;
- }
- }
+ if (image.info.num_samples > 1) {
+ LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
+ return;
+ }
+ auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
+ UploadImageContents(image, staging);
+ runtime.InsertUploadMemoryBarrier();
+}
- const std::size_t candidate_size = params.GetGuestSizeInBytes();
- auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};
+template <class P>
+template <typename StagingBuffer>
+void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) {
+ const std::span<u8> mapped_span = staging.mapped_span;
+ const GPUVAddr gpu_addr = image.gpu_addr;
+
+ if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
+ gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
+ const auto uploads = FullUploadSwizzles(image.info);
+ runtime.AccelerateImageUpload(image, staging, uploads);
+ } else if (True(image.flags & ImageFlagBits::Converted)) {
+ std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
+ auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
+ ConvertImage(unswizzled_data, image.info, mapped_span, copies);
+ image.UploadMemory(staging, copies);
+ } else if (image.info.type == ImageType::Buffer) {
+ const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
+ image.UploadMemory(staging, copies);
+ } else {
+ const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
+ image.UploadMemory(staging, copies);
+ }
+}
- if (overlaps.empty()) {
- Deduction result{};
- result.type = DeductionType::DeductionIncomplete;
- return result;
- }
+template <class P>
+ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
+ if (!IsValidAddress(gpu_memory, config)) {
+ return NULL_IMAGE_VIEW_ID;
+ }
+ const auto [pair, is_new] = image_views.try_emplace(config);
+ ImageViewId& image_view_id = pair->second;
+ if (is_new) {
+ image_view_id = CreateImageView(config);
+ }
+ return image_view_id;
+}
- if (overlaps.size() > 1) {
- Deduction result{};
- result.type = DeductionType::DeductionFailed;
- return result;
- } else {
- Deduction result{};
- result.type = DeductionType::DeductionComplete;
- result.surface = overlaps[0];
- return result;
- }
+template <class P>
+ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
+ const ImageInfo info(config);
+ const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride;
+ const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
+ if (!image_id) {
+ return NULL_IMAGE_VIEW_ID;
}
+ ImageBase& image = slot_images[image_id];
+ const SubresourceBase base = image.TryFindBase(config.Address()).value();
+ ASSERT(base.level == 0);
+ const ImageViewInfo view_info(config, base.layer);
+ const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info);
+ ImageViewBase& image_view = slot_image_views[image_view_id];
+ image_view.flags |= ImageViewFlagBits::Strong;
+ image.flags |= ImageFlagBits::Strong;
+ return image_view_id;
+}
- /**
- * Gets a null surface based on a target texture.
- * @param target The target of the null surface.
- */
- TView GetNullSurface(SurfaceTarget target) {
- const u32 i_target = static_cast<u32>(target);
- if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) {
- return it->second->GetMainView();
- }
- SurfaceParams params{};
- params.target = target;
- params.is_tiled = false;
- params.srgb_conversion = false;
- params.is_layered =
- target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray ||
- target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray;
- params.block_width = 0;
- params.block_height = 0;
- params.block_depth = 0;
- params.tile_width_spacing = 1;
- params.width = 1;
- params.height = 1;
- params.depth = 1;
- if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) {
- params.depth = 6;
- }
- params.pitch = 4;
- params.num_levels = 1;
- params.emulated_levels = 1;
- params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM;
- params.type = VideoCore::Surface::SurfaceType::ColorTexture;
- auto surface = CreateSurface(0ULL, params);
- invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
- surface->UploadTexture(invalid_memory);
- surface->MarkAsModified(false, Tick());
- invalid_cache.emplace(i_target, surface);
- return surface->GetMainView();
- }
-
- /**
- * Gets the a source and destination starting address and parameters,
- * and tries to deduce if they are supposed to be depth textures. If so, their
- * parameters are modified and fixed into so.
- *
- * @param src_params The parameters of the candidate surface.
- * @param dst_params The parameters of the destination surface.
- * @param src_gpu_addr The starting address of the candidate surface.
- * @param dst_gpu_addr The starting address of the destination surface.
- **/
- void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
- const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
- auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
- auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
- if (deduced_src.Failed() || deduced_dst.Failed()) {
- return;
+template <class P>
+ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
+ RelaxedOptions options) {
+ if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) {
+ return image_id;
+ }
+ return InsertImage(info, gpu_addr, options);
+}
+
+template <class P>
+ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
+ RelaxedOptions options) {
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
+ return ImageId{};
+ }
+ const bool broken_views = runtime.HasBrokenTextureViewFormats();
+ ImageId image_id;
+ const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
+ if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
+ const bool strict_size = False(options & RelaxedOptions::Size) &&
+ True(existing_image.flags & ImageFlagBits::Strong);
+ const ImageInfo& existing = existing_image.info;
+ if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
+ existing.pitch == info.pitch &&
+ IsPitchLinearSameSize(existing, info, strict_size) &&
+ IsViewCompatible(existing.format, info.format, broken_views)) {
+ image_id = existing_image_id;
+ return true;
+ }
+ } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views)) {
+ image_id = existing_image_id;
+ return true;
}
+ return false;
+ };
+ ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
+ return image_id;
+}
- const bool incomplete_src = deduced_src.Incomplete();
- const bool incomplete_dst = deduced_dst.Incomplete();
+template <class P>
+ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
+ RelaxedOptions options) {
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
+ const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
+ const Image& image = slot_images[image_id];
+ // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different
+ const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr);
+ if (is_new) {
+ it->second = slot_image_allocs.insert();
+ }
+ slot_image_allocs[it->second].images.push_back(image_id);
+ return image_id;
+}
- if (incomplete_src && incomplete_dst) {
+template <class P>
+ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) {
+ ImageInfo new_info = info;
+ const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
+ const bool broken_views = runtime.HasBrokenTextureViewFormats();
+ std::vector<ImageId> overlap_ids;
+ std::vector<ImageId> left_aliased_ids;
+ std::vector<ImageId> right_aliased_ids;
+ ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
+ if (info.type != overlap.info.type) {
return;
}
-
- const bool any_incomplete = incomplete_src || incomplete_dst;
-
- if (!any_incomplete) {
- if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
- return;
- }
- } else {
- if (incomplete_src && !(deduced_dst.IsDepth())) {
- return;
- }
-
- if (incomplete_dst && !(deduced_src.IsDepth())) {
- return;
+ if (info.type == ImageType::Linear) {
+ if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
+ // Alias linear images with the same pitch
+ left_aliased_ids.push_back(overlap_id);
}
+ return;
}
-
- const auto inherit_format = [](SurfaceParams& to, TSurface from) {
- const SurfaceParams& params = from->GetSurfaceParams();
- to.pixel_format = params.pixel_format;
- to.type = params.type;
- };
- // Now we got the cases where one or both is Depth and the other is not known
- if (!incomplete_src) {
- inherit_format(src_params, deduced_src.surface);
- } else {
- inherit_format(src_params, deduced_dst.surface);
+ static constexpr bool strict_size = true;
+ const std::optional<OverlapResult> solution =
+ ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views);
+ if (solution) {
+ gpu_addr = solution->gpu_addr;
+ cpu_addr = solution->cpu_addr;
+ new_info.resources = solution->resources;
+ overlap_ids.push_back(overlap_id);
+ return;
}
- if (!incomplete_dst) {
- inherit_format(dst_params, deduced_dst.surface);
+ static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
+ const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
+ if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views)) {
+ left_aliased_ids.push_back(overlap_id);
+ } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
+ broken_views)) {
+ right_aliased_ids.push_back(overlap_id);
+ }
+ });
+ const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
+ Image& new_image = slot_images[new_image_id];
+
+ // TODO: Only upload what we need
+ RefreshContents(new_image);
+
+ for (const ImageId overlap_id : overlap_ids) {
+ Image& overlap = slot_images[overlap_id];
+ if (overlap.info.num_samples != new_image.info.num_samples) {
+ LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
} else {
- inherit_format(dst_params, deduced_src.surface);
+ const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
+ const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
+ runtime.CopyImage(new_image, overlap, copies);
+ }
+ if (True(overlap.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(overlap);
}
+ UnregisterImage(overlap_id);
+ DeleteImage(overlap_id);
+ }
+ ImageBase& new_image_base = new_image;
+ for (const ImageId aliased_id : right_aliased_ids) {
+ ImageBase& aliased = slot_images[aliased_id];
+ AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
}
+ for (const ImageId aliased_id : left_aliased_ids) {
+ ImageBase& aliased = slot_images[aliased_id];
+ AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
+ }
+ RegisterImage(new_image_id);
+ return new_image_id;
+}
- std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
- bool preserve_contents) {
- auto new_surface{GetUncachedSurface(gpu_addr, params)};
- Register(new_surface);
- if (preserve_contents) {
- LoadSurface(new_surface);
- }
- return {new_surface, new_surface->GetMainView()};
+template <class P>
+typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
+ const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
+ static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
+ const GPUVAddr dst_addr = dst.Address();
+ const GPUVAddr src_addr = src.Address();
+ ImageInfo dst_info(dst);
+ ImageInfo src_info(src);
+ ImageId dst_id;
+ ImageId src_id;
+ do {
+ has_deleted_images = false;
+ dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
+ src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
+ const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
+ const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
+ DeduceBlitImages(dst_info, src_info, dst_image, src_image);
+ if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
+ continue;
+ }
+ if (!dst_id) {
+ dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
+ }
+ if (!src_id) {
+ src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
+ }
+ } while (has_deleted_images);
+ return BlitImages{
+ .dst_id = dst_id,
+ .src_id = src_id,
+ .dst_format = dst_info.format,
+ .src_format = src_info.format,
+ };
+}
+
+template <class P>
+SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
+ if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
+ return NULL_SAMPLER_ID;
}
+ const auto [pair, is_new] = samplers.try_emplace(config);
+ if (is_new) {
+ pair->second = slot_samplers.insert(runtime, config);
+ }
+ return pair->second;
+}
- void LoadSurface(const TSurface& surface) {
- staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
- surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache);
- surface->UploadTexture(staging_cache.GetBuffer(0));
- surface->MarkAsModified(false, Tick());
+template <class P>
+ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
+ const auto& regs = maxwell3d.regs;
+ if (index >= regs.rt_control.count) {
+ return ImageViewId{};
+ }
+ const auto& rt = regs.rt[index];
+ const GPUVAddr gpu_addr = rt.Address();
+ if (gpu_addr == 0) {
+ return ImageViewId{};
}
+ if (rt.format == Tegra::RenderTargetFormat::NONE) {
+ return ImageViewId{};
+ }
+ const ImageInfo info(regs, index);
+ return FindRenderTargetView(info, gpu_addr, is_clear);
+}
- void FlushSurface(const TSurface& surface) {
- if (!surface->IsModified()) {
- return;
- }
- staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
- surface->DownloadTexture(staging_cache.GetBuffer(0));
- surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache);
- surface->MarkAsModified(false, Tick());
- }
-
- void RegisterInnerCache(TSurface& surface) {
- const VAddr cpu_addr = surface->GetCpuAddr();
- VAddr start = cpu_addr >> registry_page_bits;
- const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
- l1_cache[cpu_addr] = surface;
- while (start <= end) {
- registry[start].push_back(surface);
- start++;
- }
+template <class P>
+ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
+ const auto& regs = maxwell3d.regs;
+ if (!regs.zeta_enable) {
+ return ImageViewId{};
+ }
+ const GPUVAddr gpu_addr = regs.zeta.Address();
+ if (gpu_addr == 0) {
+ return ImageViewId{};
}
+ const ImageInfo info(regs);
+ return FindRenderTargetView(info, gpu_addr, is_clear);
+}
- void UnregisterInnerCache(TSurface& surface) {
- const VAddr cpu_addr = surface->GetCpuAddr();
- VAddr start = cpu_addr >> registry_page_bits;
- const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
- l1_cache.erase(cpu_addr);
- while (start <= end) {
- auto& reg{registry[start]};
- reg.erase(std::find(reg.begin(), reg.end(), surface));
- start++;
- }
+template <class P>
+ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
+ bool is_clear) {
+ const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
+ const ImageId image_id = FindOrInsertImage(info, gpu_addr, options);
+ if (!image_id) {
+ return NULL_IMAGE_VIEW_ID;
+ }
+ Image& image = slot_images[image_id];
+ const ImageViewType view_type = RenderTargetImageViewType(info);
+ SubresourceBase base;
+ if (image.info.type == ImageType::Linear) {
+ base = SubresourceBase{.level = 0, .layer = 0};
+ } else {
+ base = image.TryFindBase(gpu_addr).value();
}
+ const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers;
+ const SubresourceRange range{
+ .base = base,
+ .extent = {.levels = 1, .layers = layers},
+ };
+ return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range));
+}
- VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
- if (size == 0) {
- return {};
+template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
+ using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
+ static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
+ boost::container::small_vector<ImageId, 32> images;
+ ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
+ const auto it = page_table.find(page);
+ if (it == page_table.end()) {
+ if constexpr (BOOL_BREAK) {
+ return false;
+ } else {
+ return;
+ }
}
- const VAddr cpu_addr_end = cpu_addr + size;
- const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
- VectorSurface surfaces;
- for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
- const auto it = registry.find(start);
- if (it == registry.end()) {
+ for (const ImageId image_id : it->second) {
+ Image& image = slot_images[image_id];
+ if (True(image.flags & ImageFlagBits::Picked)) {
+ continue;
+ }
+ if (!image.Overlaps(cpu_addr, size)) {
continue;
}
- for (auto& surface : it->second) {
- if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
- continue;
+ image.flags |= ImageFlagBits::Picked;
+ images.push_back(image_id);
+ if constexpr (BOOL_BREAK) {
+ if (func(image_id, image)) {
+ return true;
}
- surface->MarkAsPicked(true);
- surfaces.push_back(surface);
+ } else {
+ func(image_id, image);
}
}
- for (auto& surface : surfaces) {
- surface->MarkAsPicked(false);
+ if constexpr (BOOL_BREAK) {
+ return false;
}
- return surfaces;
+ });
+ for (const ImageId image_id : images) {
+ slot_images[image_id].flags &= ~ImageFlagBits::Picked;
}
+}
- void ReserveSurface(const SurfaceParams& params, TSurface surface) {
- surface_reserve[params].push_back(std::move(surface));
+template <class P>
+ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
+ Image& image = slot_images[image_id];
+ if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
+ return image_view_id;
}
+ const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image);
+ image.InsertView(info, image_view_id);
+ return image_view_id;
+}
+
+template <class P>
+void TextureCache<P>::RegisterImage(ImageId image_id) {
+ ImageBase& image = slot_images[image_id];
+ ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
+ "Trying to register an already registered image");
+ image.flags |= ImageFlagBits::Registered;
+ ForEachPage(image.cpu_addr, image.guest_size_bytes,
+ [this, image_id](u64 page) { page_table[page].push_back(image_id); });
+}
- TSurface TryGetReservedSurface(const SurfaceParams& params) {
- auto search{surface_reserve.find(params)};
- if (search == surface_reserve.end()) {
- return {};
+template <class P>
+void TextureCache<P>::UnregisterImage(ImageId image_id) {
+ Image& image = slot_images[image_id];
+ ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
+ "Trying to unregister an already registered image");
+ image.flags &= ~ImageFlagBits::Registered;
+ ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
+ const auto page_it = page_table.find(page);
+ if (page_it == page_table.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+ return;
}
- for (auto& surface : search->second) {
- if (!surface->IsRegistered()) {
- return surface;
- }
+ std::vector<ImageId>& image_ids = page_it->second;
+ const auto vector_it = std::ranges::find(image_ids, image_id);
+ if (vector_it == image_ids.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS);
+ return;
}
- return {};
- }
+ image_ids.erase(vector_it);
+ });
+}
- /// Try to do an image copy logging when formats are incompatible.
- void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) {
- const SurfaceParams& src_params = src->GetSurfaceParams();
- const SurfaceParams& dst_params = dst->GetSurfaceParams();
- if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) {
- LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}",
- static_cast<int>(dst_params.pixel_format),
- static_cast<int>(src_params.pixel_format));
- return;
+template <class P>
+void TextureCache<P>::TrackImage(ImageBase& image) {
+ ASSERT(False(image.flags & ImageFlagBits::Tracked));
+ image.flags |= ImageFlagBits::Tracked;
+ rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
+}
+
+template <class P>
+void TextureCache<P>::UntrackImage(ImageBase& image) {
+ ASSERT(True(image.flags & ImageFlagBits::Tracked));
+ image.flags &= ~ImageFlagBits::Tracked;
+ rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
+}
+
+template <class P>
+void TextureCache<P>::DeleteImage(ImageId image_id) {
+ ImageBase& image = slot_images[image_id];
+ const GPUVAddr gpu_addr = image.gpu_addr;
+ const auto alloc_it = image_allocs_table.find(gpu_addr);
+ if (alloc_it == image_allocs_table.end()) {
+ UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}",
+ gpu_addr);
+ return;
+ }
+ const ImageAllocId alloc_id = alloc_it->second;
+ std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
+ const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
+ if (alloc_image_it == alloc_images.end()) {
+ UNREACHABLE_MSG("Trying to delete an image that does not exist");
+ return;
+ }
+ ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
+ ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
+
+ // Mark render targets as dirty
+ auto& dirty = maxwell3d.dirty.flags;
+ dirty[Dirty::RenderTargets] = true;
+ dirty[Dirty::ZetaBuffer] = true;
+ for (size_t rt = 0; rt < NUM_RT; ++rt) {
+ dirty[Dirty::ColorBuffer0 + rt] = true;
+ }
+ const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
+ for (const ImageViewId image_view_id : image_view_ids) {
+ std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
+ if (render_targets.depth_buffer_id == image_view_id) {
+ render_targets.depth_buffer_id = ImageViewId{};
}
- ImageCopy(src, dst, copy);
}
+ RemoveImageViewReferences(image_view_ids);
+ RemoveFramebuffers(image_view_ids);
+
+ for (const AliasedImage& alias : image.aliased_images) {
+ ImageBase& other_image = slot_images[alias.id];
+ [[maybe_unused]] const size_t num_removed_aliases =
+ std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
+ return other_alias.id == image_id;
+ });
+ ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
+ num_removed_aliases);
+ }
+ for (const ImageViewId image_view_id : image_view_ids) {
+ sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
+ slot_image_views.erase(image_view_id);
+ }
+ sentenced_images.Push(std::move(slot_images[image_id]));
+ slot_images.erase(image_id);
- constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
- return siblings_table[static_cast<std::size_t>(format)];
+ alloc_images.erase(alloc_image_it);
+ if (alloc_images.empty()) {
+ image_allocs_table.erase(alloc_it);
+ }
+ if constexpr (ENABLE_VALIDATION) {
+ std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
+ std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
}
+ graphics_image_table.Invalidate();
+ compute_image_table.Invalidate();
+ has_deleted_images = true;
+}
- /// Returns true the shader sampler entry is compatible with the TIC texture type.
- static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type,
- const VideoCommon::Shader::Sampler& entry) {
- const auto shader_type = entry.type;
- switch (tic_type) {
- case Tegra::Texture::TextureType::Texture1D:
- case Tegra::Texture::TextureType::Texture1DArray:
- return shader_type == Tegra::Shader::TextureType::Texture1D;
- case Tegra::Texture::TextureType::Texture1DBuffer:
- // TODO(Rodrigo): Assume as valid for now
- return true;
- case Tegra::Texture::TextureType::Texture2D:
- case Tegra::Texture::TextureType::Texture2DNoMipmap:
- return shader_type == Tegra::Shader::TextureType::Texture2D;
- case Tegra::Texture::TextureType::Texture2DArray:
- return shader_type == Tegra::Shader::TextureType::Texture2D ||
- shader_type == Tegra::Shader::TextureType::TextureCube;
- case Tegra::Texture::TextureType::Texture3D:
- return shader_type == Tegra::Shader::TextureType::Texture3D;
- case Tegra::Texture::TextureType::TextureCubeArray:
- case Tegra::Texture::TextureType::TextureCubemap:
- if (shader_type == Tegra::Shader::TextureType::TextureCube) {
- return true;
- }
- return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array;
+template <class P>
+void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
+ auto it = image_views.begin();
+ while (it != image_views.end()) {
+ const auto found = std::ranges::find(removed_views, it->second);
+ if (found != removed_views.end()) {
+ it = image_views.erase(it);
+ } else {
+ ++it;
}
- UNREACHABLE();
- return true;
}
+}
- struct FramebufferTargetInfo {
- TSurface target;
- TView view;
- };
-
- void AsyncFlushSurface(TSurface& surface) {
- if (!uncommitted_flushes) {
- uncommitted_flushes = std::make_shared<std::list<TSurface>>();
+template <class P>
+void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) {
+ auto it = framebuffers.begin();
+ while (it != framebuffers.end()) {
+ if (it->first.Contains(removed_views)) {
+ it = framebuffers.erase(it);
+ } else {
+ ++it;
}
- uncommitted_flushes->push_back(surface);
}
+}
- VideoCore::RasterizerInterface& rasterizer;
-
- FormatLookupTable format_lookup_table;
- FormatCompatibility format_compatibility;
-
- u64 ticks{};
-
- // Guards the cache for protection conflicts.
- bool guard_render_targets{};
- bool guard_samplers{};
-
- // The siblings table is for formats that can inter exchange with one another
- // without causing issues. This is only valid when a conflict occurs on a non
- // rendering use.
- std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table;
-
- // The internal Cache is different for the Texture Cache. It's based on buckets
- // of 1MB. This fits better for the purpose of this cache as textures are normaly
- // large in size.
- static constexpr u64 registry_page_bits{20};
- static constexpr u64 registry_page_size{1 << registry_page_bits};
- std::unordered_map<VAddr, std::vector<TSurface>> registry;
+template <class P>
+void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
+ image.flags |= ImageFlagBits::GpuModified;
+ image.modification_tick = ++modification_tick;
+}
- static constexpr u32 DEPTH_RT = 8;
- static constexpr u32 NO_RT = 0xFFFFFFFF;
+template <class P>
+void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
+ boost::container::small_vector<const AliasedImage*, 1> aliased_images;
+ ImageBase& image = slot_images[image_id];
+ u64 most_recent_tick = image.modification_tick;
+ for (const AliasedImage& aliased : image.aliased_images) {
+ ImageBase& aliased_image = slot_images[aliased.id];
+ if (image.modification_tick < aliased_image.modification_tick) {
+ most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
+ aliased_images.push_back(&aliased);
+ }
+ }
+ if (aliased_images.empty()) {
+ return;
+ }
+ image.modification_tick = most_recent_tick;
+ std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
+ const ImageBase& lhs_image = slot_images[lhs->id];
+ const ImageBase& rhs_image = slot_images[rhs->id];
+ return lhs_image.modification_tick < rhs_image.modification_tick;
+ });
+ for (const AliasedImage* const aliased : aliased_images) {
+ CopyImage(image_id, aliased->id, aliased->copies);
+ }
+}
- // The L1 Cache is used for fast texture lookup before checking the overlaps
- // This avoids calculating size and other stuffs.
- std::unordered_map<VAddr, TSurface> l1_cache;
+template <class P>
+void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
+ Image& image = slot_images[image_id];
+ if (invalidate) {
+ image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
+ if (False(image.flags & ImageFlagBits::Tracked)) {
+ TrackImage(image);
+ }
+ } else {
+ RefreshContents(image);
+ SynchronizeAliases(image_id);
+ }
+ if (is_modification) {
+ MarkModification(image);
+ }
+ image.frame_tick = frame_tick;
+}
- /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
- /// previously been used. This is to prevent surfaces from being constantly created and
- /// destroyed when used with different surface parameters.
- std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve;
- std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
- render_targets;
- FramebufferTargetInfo depth_buffer;
+template <class P>
+void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
+ bool invalidate) {
+ if (!image_view_id) {
+ return;
+ }
+ const ImageViewBase& image_view = slot_image_views[image_view_id];
+ PrepareImage(image_view.image_id, is_modification, invalidate);
+}
- std::vector<TSurface> sampled_textures;
+template <class P>
+void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
+ Image& dst = slot_images[dst_id];
+ Image& src = slot_images[src_id];
+ const auto dst_format_type = GetFormatType(dst.info.format);
+ const auto src_format_type = GetFormatType(src.info.format);
+ if (src_format_type == dst_format_type) {
+ if constexpr (HAS_EMULATED_COPIES) {
+ if (!runtime.CanImageBeCopied(dst, src)) {
+ return runtime.EmulateCopyImage(dst, src, copies);
+ }
+ }
+ return runtime.CopyImage(dst, src, copies);
+ }
+ UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
+ UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
+ for (const ImageCopy& copy : copies) {
+ UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
+ UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1);
+ UNIMPLEMENTED_IF(copy.src_offset != Offset3D{});
+ UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{});
+
+ const SubresourceBase dst_base{
+ .level = copy.dst_subresource.base_level,
+ .layer = copy.dst_subresource.base_layer,
+ };
+ const SubresourceBase src_base{
+ .level = copy.src_subresource.base_level,
+ .layer = copy.src_subresource.base_layer,
+ };
+ const SubresourceExtent dst_extent{.levels = 1, .layers = 1};
+ const SubresourceExtent src_extent{.levels = 1, .layers = 1};
+ const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};
+ const SubresourceRange src_range{.base = src_base, .extent = src_extent};
+ const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range);
+ const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);
+ const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
+ Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
+ const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info);
+ ImageView& dst_view = slot_image_views[dst_view_id];
+ ImageView& src_view = slot_image_views[src_view_id];
+ [[maybe_unused]] const Extent3D expected_size{
+ .width = std::min(dst_view.size.width, src_view.size.width),
+ .height = std::min(dst_view.size.height, src_view.size.height),
+ .depth = std::min(dst_view.size.depth, src_view.size.depth),
+ };
+ UNIMPLEMENTED_IF(copy.extent != expected_size);
- /// This cache stores null surfaces in order to be used as a placeholder
- /// for invalid texture calls.
- std::unordered_map<u32, TSurface> invalid_cache;
- std::vector<u8> invalid_memory;
+ runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
+ }
+}
- std::list<TSurface> marked_for_unregister;
+template <class P>
+void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) {
+ if (*old_id == new_id) {
+ return;
+ }
+ if (*old_id) {
+ const ImageViewBase& old_view = slot_image_views[*old_id];
+ if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
+ uncommitted_downloads.push_back(old_view.image_id);
+ }
+ }
+ *old_id = new_id;
+}
- std::shared_ptr<std::list<TSurface>> uncommitted_flushes{};
- std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes;
+template <class P>
+std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
+ ImageId image_id, const ImageViewInfo& view_info) {
+ const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
+ const ImageBase& image = slot_images[image_id];
+ const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
+ const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
+ const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
+ const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
+ const u32 num_samples = image.info.num_samples;
+ const auto [samples_x, samples_y] = SamplesLog2(num_samples);
+ const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
+ .color_buffer_ids = {color_view_id},
+ .depth_buffer_id = depth_view_id,
+ .size = {extent.width >> samples_x, extent.height >> samples_y},
+ });
+ return {framebuffer_id, view_id};
+}
- StagingCache staging_cache;
- std::recursive_mutex mutex;
-};
+template <class P>
+bool TextureCache<P>::IsFullClear(ImageViewId id) {
+ if (!id) {
+ return true;
+ }
+ const ImageViewBase& image_view = slot_image_views[id];
+ const ImageBase& image = slot_images[image_view.image_id];
+ const Extent3D size = image_view.size;
+ const auto& regs = maxwell3d.regs;
+ const auto& scissor = regs.scissor_test[0];
+ if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
+ // Images with multiple resources can't be cleared in a single call
+ return false;
+ }
+ if (regs.clear_flags.scissor == 0) {
+ // If scissor testing is disabled, the clear is always full
+ return true;
+ }
+ // Make sure the clear covers all texels in the subresource
+ return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width &&
+ scissor.max_y >= size.height;
+}
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
new file mode 100644
index 000000000..2ad2d72a6
--- /dev/null
+++ b/src/video_core/texture_cache/types.h
@@ -0,0 +1,140 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/texture_cache/slot_vector.h"
+
+namespace VideoCommon {
+
+constexpr size_t NUM_RT = 8;
+constexpr size_t MAX_MIP_LEVELS = 14;
+
+constexpr SlotId CORRUPT_ID{0xfffffffe};
+
+using ImageId = SlotId;
+using ImageViewId = SlotId;
+using ImageAllocId = SlotId;
+using SamplerId = SlotId;
+using FramebufferId = SlotId;
+
+enum class ImageType : u32 {
+ e1D,
+ e2D,
+ e3D,
+ Linear,
+ Buffer,
+};
+
+enum class ImageViewType : u32 {
+ e1D,
+ e2D,
+ Cube,
+ e3D,
+ e1DArray,
+ e2DArray,
+ CubeArray,
+ Rect,
+ Buffer,
+};
+constexpr size_t NUM_IMAGE_VIEW_TYPES = 9;
+
+enum class RelaxedOptions : u32 {
+ Size = 1 << 0,
+ Format = 1 << 1,
+ Samples = 1 << 2,
+};
+DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions)
+
+struct Offset2D {
+ constexpr auto operator<=>(const Offset2D&) const noexcept = default;
+
+ s32 x;
+ s32 y;
+};
+
+struct Offset3D {
+ constexpr auto operator<=>(const Offset3D&) const noexcept = default;
+
+ s32 x;
+ s32 y;
+ s32 z;
+};
+
+struct Extent2D {
+ constexpr auto operator<=>(const Extent2D&) const noexcept = default;
+
+ u32 width;
+ u32 height;
+};
+
+struct Extent3D {
+ constexpr auto operator<=>(const Extent3D&) const noexcept = default;
+
+ u32 width;
+ u32 height;
+ u32 depth;
+};
+
+struct SubresourceLayers {
+ s32 base_level = 0;
+ s32 base_layer = 0;
+ s32 num_layers = 1;
+};
+
+struct SubresourceBase {
+ constexpr auto operator<=>(const SubresourceBase&) const noexcept = default;
+
+ s32 level = 0;
+ s32 layer = 0;
+};
+
+struct SubresourceExtent {
+ constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default;
+
+ s32 levels = 1;
+ s32 layers = 1;
+};
+
+struct SubresourceRange {
+ constexpr auto operator<=>(const SubresourceRange&) const noexcept = default;
+
+ SubresourceBase base;
+ SubresourceExtent extent;
+};
+
+struct ImageCopy {
+ SubresourceLayers src_subresource;
+ SubresourceLayers dst_subresource;
+ Offset3D src_offset;
+ Offset3D dst_offset;
+ Extent3D extent;
+};
+
+struct BufferImageCopy {
+ size_t buffer_offset;
+ size_t buffer_size;
+ u32 buffer_row_length;
+ u32 buffer_image_height;
+ SubresourceLayers image_subresource;
+ Offset3D image_offset;
+ Extent3D image_extent;
+};
+
+struct BufferCopy {
+ size_t src_offset;
+ size_t dst_offset;
+ size_t size;
+};
+
+struct SwizzleParameters {
+ Extent3D num_tiles;
+ Extent3D block;
+ size_t buffer_offset;
+ s32 level;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
new file mode 100644
index 000000000..a0bc1f7b6
--- /dev/null
+++ b/src/video_core/texture_cache/util.cpp
@@ -0,0 +1,1210 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// This files contains code from Ryujinx
+// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx
+// The sections using code from Ryujinx are marked with a link to the original version
+
+// MIT License
+//
+// Copyright (c) Ryujinx Team and Contributors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+// associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#include <algorithm>
+#include <array>
+#include <numeric>
+#include <optional>
+#include <span>
+#include <vector>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "video_core/compatible_formats.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/decode_bc4.h"
+#include "video_core/texture_cache/format_lookup_table.h"
+#include "video_core/texture_cache/formatter.h"
+#include "video_core/texture_cache/samples_helper.h"
+#include "video_core/texture_cache/util.h"
+#include "video_core/textures/astc.h"
+#include "video_core/textures/decoders.h"
+
+namespace VideoCommon {
+
+namespace {
+
+using Tegra::Texture::GOB_SIZE;
+using Tegra::Texture::GOB_SIZE_SHIFT;
+using Tegra::Texture::GOB_SIZE_X;
+using Tegra::Texture::GOB_SIZE_X_SHIFT;
+using Tegra::Texture::GOB_SIZE_Y;
+using Tegra::Texture::GOB_SIZE_Y_SHIFT;
+using Tegra::Texture::GOB_SIZE_Z;
+using Tegra::Texture::GOB_SIZE_Z_SHIFT;
+using Tegra::Texture::MsaaMode;
+using Tegra::Texture::SwizzleTexture;
+using Tegra::Texture::TextureFormat;
+using Tegra::Texture::TextureType;
+using Tegra::Texture::TICEntry;
+using Tegra::Texture::UnswizzleTexture;
+using VideoCore::Surface::BytesPerBlock;
+using VideoCore::Surface::DefaultBlockHeight;
+using VideoCore::Surface::DefaultBlockWidth;
+using VideoCore::Surface::IsCopyCompatible;
+using VideoCore::Surface::IsPixelFormatASTC;
+using VideoCore::Surface::IsViewCompatible;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+using VideoCore::Surface::SurfaceType;
+
+constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
+
+struct LevelInfo {
+ Extent3D size;
+ Extent3D block;
+ Extent2D tile_size;
+ u32 bpp_log2;
+ u32 tile_width_spacing;
+};
+
+[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) {
+ if (shift == 0) {
+ return 0;
+ }
+ u32 x = unit_factor << (shift - 1);
+ if (x >= dimension) {
+ while (--shift) {
+ x >>= 1;
+ if (x < dimension) {
+ break;
+ }
+ }
+ }
+ return shift;
+}
+
+[[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) {
+ return std::max<u32>(size >> level, 1);
+}
+
+[[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) {
+ return Extent3D{
+ .width = AdjustMipSize(size.width, level),
+ .height = AdjustMipSize(size.height, level),
+ .depth = AdjustMipSize(size.depth, level),
+ };
+}
+
+[[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) {
+ const auto [samples_x, samples_y] = SamplesLog2(num_samples);
+ return Extent3D{
+ .width = size.width >> samples_x,
+ .height = size.height >> samples_y,
+ .depth = size.depth,
+ };
+}
+
+template <u32 GOB_EXTENT>
+[[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) {
+ do {
+ while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) {
+ --block_size;
+ }
+ } while (level--);
+ return block_size;
+}
+
+[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size,
+ u32 level) {
+ return {
+ .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
+ .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
+ .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
+ };
+}
+
+[[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) {
+ return {
+ .width = Common::DivCeil(size.width, tile_size.width),
+ .height = Common::DivCeil(size.height, tile_size.height),
+ .depth = size.depth,
+ };
+}
+
+[[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) {
+ return std::countl_zero(bytes_per_block) ^ 0x1F;
+}
+
+[[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) {
+ return BytesPerBlockLog2(BytesPerBlock(format));
+}
+
+[[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) {
+ const Extent3D num_blocks = AdjustTileSize(size, tile_size);
+ return num_blocks.width * num_blocks.height * num_blocks.depth;
+}
+
+[[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) {
+ return Common::DivCeil(AdjustMipSize(size, level), block_size);
+}
+
+[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) {
+ switch (num_samples) {
+ case 1:
+ return {1, 1};
+ case 2:
+ return {2, 1};
+ case 4:
+ return {2, 2};
+ case 8:
+ return {4, 2};
+ case 16:
+ return {4, 4};
+ }
+ UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
+ return {1, 1};
+}
+
+[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) {
+ return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
+}
+
+[[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) {
+ return Extent3D{
+ .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2,
+ .height = AdjustSize(info.size.height, level, info.tile_size.height),
+ .depth = AdjustMipSize(info.size.depth, level),
+ };
+}
+
+[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
+ const Extent3D blocks = NumLevelBlocks(info, level);
+ return Extent3D{
+ .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width),
+ .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height),
+ .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth),
+ };
+}
+
+[[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) {
+ return Extent2D{
+ .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing,
+ .height = GOB_SIZE_Y_SHIFT + block_height,
+ };
+}
+
+[[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob,
+ u32 block_depth) {
+ return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) ||
+ num_tiles.depth < (1U << block_depth);
+}
+
+[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob,
+ u32 bpp_log2) {
+ if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) {
+ return GOB_SIZE_X_SHIFT - bpp_log2;
+ } else {
+ return gob.width;
+ }
+}
+
+[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2,
+ u32 tile_width_spacing) {
+ const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing);
+ return StrideAlignment(num_tiles, block, gob, bpp_log2);
+}
+
+[[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) {
+ const Extent3D blocks = NumLevelBlocks(info, level);
+ const Extent2D gobs{
+ .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT),
+ .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT),
+ };
+ const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing);
+ const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth);
+ const u32 alignment = is_small ? 0 : info.tile_width_spacing;
+ return Extent2D{
+ .width = Common::AlignUpLog2(gobs.width, alignment),
+ .height = gobs.height,
+ };
+}
+
+[[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) {
+ const Extent3D blocks = NumLevelBlocks(info, level);
+ const Extent3D tile_shift = TileShift(info, level);
+ const Extent2D gobs = NumGobs(info, level);
+ return Extent3D{
+ .width = Common::DivCeilLog2(gobs.width, tile_shift.width),
+ .height = Common::DivCeilLog2(gobs.height, tile_shift.height),
+ .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth),
+ };
+}
+
+[[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) {
+ const Extent3D tile_shift = TileShift(info, level);
+ const Extent3D tiles = LevelTiles(info, level);
+ const u32 num_tiles = tiles.width * tiles.height * tiles.depth;
+ const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth;
+ return num_tiles << shift;
+}
+
+[[nodiscard]] constexpr std::array<u32, MAX_MIP_LEVELS> CalculateLevelSizes(const LevelInfo& info,
+ u32 num_levels) {
+ ASSERT(num_levels <= MAX_MIP_LEVELS);
+ std::array<u32, MAX_MIP_LEVELS> sizes{};
+ for (u32 level = 0; level < num_levels; ++level) {
+ sizes[level] = CalculateLevelSize(info, level);
+ }
+ return sizes;
+}
+
+[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
+ u32 num_samples, u32 tile_width_spacing) {
+ const auto [samples_x, samples_y] = Samples(num_samples);
+ const u32 bytes_per_block = BytesPerBlock(format);
+ return {
+ .size =
+ {
+ .width = size.width * samples_x,
+ .height = size.height * samples_y,
+ .depth = size.depth,
+ },
+ .block = block,
+ .tile_size = DefaultBlockSize(format),
+ .bpp_log2 = BytesPerBlockLog2(bytes_per_block),
+ .tile_width_spacing = tile_width_spacing,
+ };
+}
+
+[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) {
+ return MakeLevelInfo(info.format, info.size, info.block, info.num_samples,
+ info.tile_width_spacing);
+}
+
+[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
+ u32 num_samples, u32 tile_width_spacing,
+ u32 level) {
+ const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing);
+ u32 offset = 0;
+ for (u32 current_level = 0; current_level < level; ++current_level) {
+ offset += CalculateLevelSize(info, current_level);
+ }
+ return offset;
+}
+
+[[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block,
+ u32 tile_size_y, u32 tile_width_spacing) {
+ // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134
+ if (tile_width_spacing > 0) {
+ const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth;
+ return Common::AlignUpLog2(size_bytes, alignment_log2);
+ }
+ const u32 aligned_height = Common::AlignUp(size.height, tile_size_y);
+ while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) {
+ --block.height;
+ }
+ while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) {
+ --block.depth;
+ }
+ const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth;
+ const u32 num_blocks = size_bytes >> block_shift;
+ if (size_bytes != num_blocks << block_shift) {
+ return (num_blocks + 1) << block_shift;
+ }
+ return size_bytes;
+}
+
+[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapEqualAddress(const ImageInfo& new_info,
+ const ImageBase& overlap,
+ bool strict_size) {
+ const ImageInfo& info = overlap.info;
+ if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) {
+ return std::nullopt;
+ }
+ if (new_info.block != info.block) {
+ return std::nullopt;
+ }
+ const SubresourceExtent resources = new_info.resources;
+ return SubresourceExtent{
+ .levels = std::max(resources.levels, info.resources.levels),
+ .layers = std::max(resources.layers, info.resources.layers),
+ };
+}
+
+[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
+ const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
+ const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info);
+ const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
+ const auto it = std::ranges::find(slice_offsets, diff);
+ if (it == slice_offsets.end()) {
+ return std::nullopt;
+ }
+ const std::vector subresources = CalculateSliceSubresources(new_info);
+ const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
+ const ImageInfo& info = overlap.info;
+ if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
+ return std::nullopt;
+ }
+ const u32 mip_depth = std::max(1U, new_info.size.depth << base.level);
+ if (mip_depth < info.size.depth + base.layer) {
+ return std::nullopt;
+ }
+ if (MipBlockSize(new_info, base.level) != info.block) {
+ return std::nullopt;
+ }
+ return SubresourceExtent{
+ .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
+ .layers = 1,
+ };
+}
+
+[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D(
+ const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
+ const u32 layer_stride = new_info.layer_stride;
+ const s32 new_size = layer_stride * new_info.resources.layers;
+ const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr);
+ if (diff > new_size) {
+ return std::nullopt;
+ }
+ const s32 base_layer = diff / layer_stride;
+ const s32 mip_offset = diff % layer_stride;
+ const std::array offsets = CalculateMipLevelOffsets(new_info);
+ const auto end = offsets.begin() + new_info.resources.levels;
+ const auto it = std::find(offsets.begin(), end, mip_offset);
+ if (it == end) {
+ // Mipmap is not aligned to any valid size
+ return std::nullopt;
+ }
+ const SubresourceBase base{
+ .level = static_cast<s32>(std::distance(offsets.begin(), it)),
+ .layer = base_layer,
+ };
+ const ImageInfo& info = overlap.info;
+ if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
+ return std::nullopt;
+ }
+ if (MipBlockSize(new_info, base.level) != info.block) {
+ return std::nullopt;
+ }
+ return SubresourceExtent{
+ .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
+ .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer),
+ };
+}
+
+[[nodiscard]] std::optional<OverlapResult> ResolveOverlapRightAddress(const ImageInfo& new_info,
+ GPUVAddr gpu_addr,
+ VAddr cpu_addr,
+ const ImageBase& overlap,
+ bool strict_size) {
+ std::optional<SubresourceExtent> resources;
+ if (new_info.type != ImageType::e3D) {
+ resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size);
+ } else {
+ resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size);
+ }
+ if (!resources) {
+ return std::nullopt;
+ }
+ return OverlapResult{
+ .gpu_addr = gpu_addr,
+ .cpu_addr = cpu_addr,
+ .resources = *resources,
+ };
+}
+
+[[nodiscard]] std::optional<OverlapResult> ResolveOverlapLeftAddress(const ImageInfo& new_info,
+ GPUVAddr gpu_addr,
+ VAddr cpu_addr,
+ const ImageBase& overlap,
+ bool strict_size) {
+ const std::optional<SubresourceBase> base = overlap.TryFindBase(gpu_addr);
+ if (!base) {
+ return std::nullopt;
+ }
+ const ImageInfo& info = overlap.info;
+ if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) {
+ return std::nullopt;
+ }
+ if (new_info.block != MipBlockSize(info, base->level)) {
+ return std::nullopt;
+ }
+ const SubresourceExtent resources = new_info.resources;
+ s32 layers = 1;
+ if (info.type != ImageType::e3D) {
+ layers = std::max(resources.layers, info.resources.layers + base->layer);
+ }
+ return OverlapResult{
+ .gpu_addr = overlap.gpu_addr,
+ .cpu_addr = overlap.cpu_addr,
+ .resources =
+ {
+ .levels = std::max(resources.levels + base->level, info.resources.levels),
+ .layers = layers,
+ },
+ };
+}
+
+[[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) {
+ // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212
+ static constexpr u32 STRIDE_ALIGNMENT = 32;
+ ASSERT(info.type == ImageType::Linear);
+ const Extent2D num_tiles{
+ .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)),
+ .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)),
+ };
+ const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format);
+ return Extent2D{
+ .width = Common::AlignUp(num_tiles.width, width_alignment),
+ .height = num_tiles.height,
+ };
+}
+
+[[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) {
+ // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176
+ ASSERT(info.type != ImageType::Linear);
+ const Extent3D size = AdjustMipSize(info.size, level);
+ const Extent3D num_tiles{
+ .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)),
+ .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)),
+ .depth = size.depth,
+ };
+ const u32 bpp_log2 = BytesPerBlockLog2(info.format);
+ const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing);
+ const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0);
+ return Extent3D{
+ .width = Common::AlignUpLog2(num_tiles.width, alignment),
+ .height = Common::AlignUpLog2(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height),
+ .depth = Common::AlignUpLog2(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth),
+ };
+}
+
+[[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept {
+ u32 num_blocks = 0;
+ for (s32 level = 0; level < info.resources.levels; ++level) {
+ const Extent3D mip_size = AdjustMipSize(info.size, level);
+ num_blocks += NumBlocks(mip_size, tile_size);
+ }
+ return num_blocks;
+}
+
+[[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept {
+ ASSERT(info.type == ImageType::e3D);
+ u32 num_slices = 0;
+ for (s32 level = 0; level < info.resources.levels; ++level) {
+ num_slices += AdjustMipSize(info.size.depth, level);
+ }
+ return num_slices;
+}
+
+void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
+ const ImageInfo& info, const BufferImageCopy& copy,
+ std::span<const u8> memory) {
+ ASSERT(copy.image_offset.z == 0);
+ ASSERT(copy.image_extent.depth == 1);
+ ASSERT(copy.image_subresource.base_level == 0);
+ ASSERT(copy.image_subresource.base_layer == 0);
+ ASSERT(copy.image_subresource.num_layers == 1);
+
+ const u32 bytes_per_block = BytesPerBlock(info.format);
+ const u32 row_length = copy.image_extent.width * bytes_per_block;
+ const u32 guest_offset_x = copy.image_offset.x * bytes_per_block;
+
+ for (u32 line = 0; line < copy.image_extent.height; ++line) {
+ const u32 host_offset_y = line * info.pitch;
+ const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch;
+ const u32 guest_offset = guest_offset_x + guest_offset_y;
+ gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y,
+ row_length);
+ }
+}
+
+void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
+ const ImageInfo& info, const BufferImageCopy& copy,
+ std::span<const u8> input) {
+ const Extent3D size = info.size;
+ const LevelInfo level_info = MakeLevelInfo(info);
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ const u32 bytes_per_block = BytesPerBlock(info.format);
+
+ const s32 level = copy.image_subresource.base_level;
+ const Extent3D level_size = AdjustMipSize(size, level);
+ const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
+ const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block;
+
+ UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
+
+ UNIMPLEMENTED_IF(copy.image_offset.x != 0);
+ UNIMPLEMENTED_IF(copy.image_offset.y != 0);
+ UNIMPLEMENTED_IF(copy.image_offset.z != 0);
+ UNIMPLEMENTED_IF(copy.image_extent != level_size);
+
+ const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
+ const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
+
+ size_t host_offset = copy.buffer_offset;
+
+ const u32 num_levels = info.resources.levels;
+ const std::array sizes = CalculateLevelSizes(level_info, num_levels);
+ size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0);
+ const size_t layer_stride =
+ AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size,
+ level_info.block, tile_size.height, info.tile_width_spacing);
+ const size_t subresource_size = sizes[level];
+
+ const auto dst_data = std::make_unique<u8[]>(subresource_size);
+ const std::span<u8> dst(dst_data.get(), subresource_size);
+
+ for (s32 layer = 0; layer < info.resources.layers; ++layer) {
+ const std::span<const u8> src = input.subspan(host_offset);
+ SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
+ num_tiles.depth, block.height, block.depth);
+
+ gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
+
+ host_offset += host_bytes_per_layer;
+ guest_offset += layer_stride;
+ }
+ ASSERT(host_offset - copy.buffer_offset == copy.buffer_size);
+}
+
+} // Anonymous namespace
+
+u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept {
+ if (info.type == ImageType::Buffer) {
+ return info.size.width * BytesPerBlock(info.format);
+ }
+ if (info.type == ImageType::Linear) {
+ return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
+ }
+ if (info.resources.layers > 1) {
+ ASSERT(info.layer_stride != 0);
+ return info.layer_stride * info.resources.layers;
+ } else {
+ return CalculateLayerSize(info);
+ }
+}
+
+u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept {
+ if (info.type == ImageType::Buffer) {
+ return info.size.width * BytesPerBlock(info.format);
+ }
+ if (info.num_samples > 1) {
+ // Multisample images can't be uploaded or downloaded to the host
+ return 0;
+ }
+ if (info.type == ImageType::Linear) {
+ return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
+ }
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format);
+}
+
+u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
+ if (info.type == ImageType::Buffer) {
+ return info.size.width * BytesPerBlock(info.format);
+ }
+ static constexpr Extent2D TILE_SIZE{1, 1};
+ return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
+}
+
+u32 CalculateLayerStride(const ImageInfo& info) noexcept {
+ ASSERT(info.type != ImageType::Linear);
+ const u32 layer_size = CalculateLayerSize(info);
+ const Extent3D size = info.size;
+ const Extent3D block = info.block;
+ const u32 tile_size_y = DefaultBlockHeight(info.format);
+ return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing);
+}
+
+u32 CalculateLayerSize(const ImageInfo& info) noexcept {
+ ASSERT(info.type != ImageType::Linear);
+ return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples,
+ info.tile_width_spacing, info.resources.levels);
+}
+
+std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
+ ASSERT(info.resources.levels <= static_cast<s32>(MAX_MIP_LEVELS));
+ const LevelInfo level_info = MakeLevelInfo(info);
+ std::array<u32, MAX_MIP_LEVELS> offsets{};
+ u32 offset = 0;
+ for (s32 level = 0; level < info.resources.levels; ++level) {
+ offsets[level] = offset;
+ offset += CalculateLevelSize(level_info, level);
+ }
+ return offsets;
+}
+
+std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
+ ASSERT(info.type == ImageType::e3D);
+ std::vector<u32> offsets;
+ offsets.reserve(NumSlices(info));
+
+ const LevelInfo level_info = MakeLevelInfo(info);
+ u32 mip_offset = 0;
+ for (s32 level = 0; level < info.resources.levels; ++level) {
+ const Extent3D tile_shift = TileShift(level_info, level);
+ const Extent3D tiles = LevelTiles(level_info, level);
+ const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT;
+ const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift;
+ const u32 z_mask = (1U << tile_shift.depth) - 1;
+ const u32 depth = AdjustMipSize(info.size.depth, level);
+ for (u32 slice = 0; slice < depth; ++slice) {
+ const u32 z_low = slice & z_mask;
+ const u32 z_high = slice & ~z_mask;
+ offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size));
+ }
+ mip_offset += CalculateLevelSize(level_info, level);
+ }
+ return offsets;
+}
+
+std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) {
+ ASSERT(info.type == ImageType::e3D);
+ std::vector<SubresourceBase> subresources;
+ subresources.reserve(NumSlices(info));
+ for (s32 level = 0; level < info.resources.levels; ++level) {
+ const s32 depth = AdjustMipSize(info.size.depth, level);
+ for (s32 slice = 0; slice < depth; ++slice) {
+ subresources.emplace_back(SubresourceBase{
+ .level = level,
+ .layer = slice,
+ });
+ }
+ }
+ return subresources;
+}
+
+u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) {
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ const Extent3D level_size = AdjustMipSize(info.size, level);
+ const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
+ const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level);
+ const u32 bpp_log2 = BytesPerBlockLog2(info.format);
+ return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing);
+}
+
+PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept {
+ return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
+ config.a_type, config.srgb_conversion);
+}
+
+ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
+ switch (info.type) {
+ case ImageType::e2D:
+ return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D;
+ case ImageType::e3D:
+ return ImageViewType::e2DArray;
+ case ImageType::Linear:
+ return ImageViewType::e2D;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(info.type));
+ return ImageViewType{};
+ }
+}
+
+std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
+ SubresourceBase base) {
+ ASSERT(dst.resources.levels >= src.resources.levels);
+ ASSERT(dst.num_samples == src.num_samples);
+
+ const bool is_dst_3d = dst.type == ImageType::e3D;
+ if (is_dst_3d) {
+ ASSERT(src.type == ImageType::e3D);
+ ASSERT(src.resources.levels == 1);
+ }
+
+ std::vector<ImageCopy> copies;
+ copies.reserve(src.resources.levels);
+ for (s32 level = 0; level < src.resources.levels; ++level) {
+ ImageCopy& copy = copies.emplace_back();
+ copy.src_subresource = SubresourceLayers{
+ .base_level = level,
+ .base_layer = 0,
+ .num_layers = src.resources.layers,
+ };
+ copy.dst_subresource = SubresourceLayers{
+ .base_level = base.level + level,
+ .base_layer = is_dst_3d ? 0 : base.layer,
+ .num_layers = is_dst_3d ? 1 : src.resources.layers,
+ };
+ copy.src_offset = Offset3D{
+ .x = 0,
+ .y = 0,
+ .z = 0,
+ };
+ copy.dst_offset = Offset3D{
+ .x = 0,
+ .y = 0,
+ .z = is_dst_3d ? base.layer : 0,
+ };
+ const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level);
+ copy.extent = AdjustSamplesSize(mip_size, dst.num_samples);
+ if (is_dst_3d) {
+ copy.extent.depth = src.size.depth;
+ }
+ }
+ return copies;
+}
+
+bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
+ if (config.Address() == 0) {
+ return false;
+ }
+ if (config.Address() > (u64(1) << 48)) {
+ return false;
+ }
+ return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
+}
+
+std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
+ const ImageInfo& info, std::span<u8> output) {
+ const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
+ const u32 bpp_log2 = BytesPerBlockLog2(info.format);
+ const Extent3D size = info.size;
+
+ if (info.type == ImageType::Linear) {
+ gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes);
+
+ ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch);
+ return {{
+ .buffer_offset = 0,
+ .buffer_size = guest_size_bytes,
+ .buffer_row_length = info.pitch >> bpp_log2,
+ .buffer_image_height = size.height,
+ .image_subresource =
+ {
+ .base_level = 0,
+ .base_layer = 0,
+ .num_layers = 1,
+ },
+ .image_offset = {0, 0, 0},
+ .image_extent = size,
+ }};
+ }
+ const auto input_data = std::make_unique<u8[]>(guest_size_bytes);
+ gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes);
+ const std::span<const u8> input(input_data.get(), guest_size_bytes);
+
+ const LevelInfo level_info = MakeLevelInfo(info);
+ const s32 num_layers = info.resources.layers;
+ const s32 num_levels = info.resources.levels;
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ const std::array level_sizes = CalculateLevelSizes(level_info, num_levels);
+ const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing);
+ const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0);
+ const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height,
+ info.tile_width_spacing);
+ size_t guest_offset = 0;
+ u32 host_offset = 0;
+ std::vector<BufferImageCopy> copies(num_levels);
+
+ for (s32 level = 0; level < num_levels; ++level) {
+ const Extent3D level_size = AdjustMipSize(size, level);
+ const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
+ const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2;
+ copies[level] = BufferImageCopy{
+ .buffer_offset = host_offset,
+ .buffer_size = static_cast<size_t>(host_bytes_per_layer) * num_layers,
+ .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width),
+ .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height),
+ .image_subresource =
+ {
+ .base_level = level,
+ .base_layer = 0,
+ .num_layers = info.resources.layers,
+ },
+ .image_offset = {0, 0, 0},
+ .image_extent = level_size,
+ };
+ const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
+ const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
+ const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2);
+ size_t guest_layer_offset = 0;
+
+ for (s32 layer = 0; layer < info.resources.layers; ++layer) {
+ const std::span<u8> dst = output.subspan(host_offset);
+ const std::span<const u8> src = input.subspan(guest_offset + guest_layer_offset);
+ UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height,
+ num_tiles.depth, block.height, block.depth, stride_alignment);
+ guest_layer_offset += layer_stride;
+ host_offset += host_bytes_per_layer;
+ }
+ guest_offset += level_sizes[level];
+ }
+ return copies;
+}
+
+BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
+ const ImageBase& image, std::span<u8> output) {
+ gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes);
+ return BufferCopy{
+ .src_offset = 0,
+ .dst_offset = 0,
+ .size = image.guest_size_bytes,
+ };
+}
+
+void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
+ std::span<BufferImageCopy> copies) {
+ u32 output_offset = 0;
+
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ for (BufferImageCopy& copy : copies) {
+ const u32 level = copy.image_subresource.base_level;
+ const Extent3D mip_size = AdjustMipSize(info.size, level);
+ ASSERT(copy.image_offset == Offset3D{});
+ ASSERT(copy.image_subresource.base_layer == 0);
+ ASSERT(copy.image_extent == mip_size);
+ ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
+ ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
+
+ if (IsPixelFormatASTC(info.format)) {
+ ASSERT(copy.image_extent.depth == 1);
+ Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset),
+ copy.image_extent.width, copy.image_extent.height,
+ copy.image_subresource.num_layers, tile_size.width,
+ tile_size.height, output.subspan(output_offset));
+ } else {
+ DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent,
+ output.subspan(output_offset));
+ }
+ copy.buffer_offset = output_offset;
+ copy.buffer_row_length = mip_size.width;
+ copy.buffer_image_height = mip_size.height;
+
+ output_offset += copy.image_extent.width * copy.image_extent.height *
+ copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
+ }
+}
+
+std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
+ const Extent3D size = info.size;
+ const u32 bytes_per_block = BytesPerBlock(info.format);
+ if (info.type == ImageType::Linear) {
+ ASSERT(info.pitch % bytes_per_block == 0);
+ return {{
+ .buffer_offset = 0,
+ .buffer_size = static_cast<size_t>(info.pitch) * size.height,
+ .buffer_row_length = info.pitch / bytes_per_block,
+ .buffer_image_height = size.height,
+ .image_subresource =
+ {
+ .base_level = 0,
+ .base_layer = 0,
+ .num_layers = 1,
+ },
+ .image_offset = {0, 0, 0},
+ .image_extent = size,
+ }};
+ }
+ UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
+
+ const s32 num_layers = info.resources.layers;
+ const s32 num_levels = info.resources.levels;
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+
+ u32 host_offset = 0;
+
+ std::vector<BufferImageCopy> copies(num_levels);
+ for (s32 level = 0; level < num_levels; ++level) {
+ const Extent3D level_size = AdjustMipSize(size, level);
+ const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
+ const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers;
+ copies[level] = BufferImageCopy{
+ .buffer_offset = host_offset,
+ .buffer_size = host_bytes_per_level,
+ .buffer_row_length = level_size.width,
+ .buffer_image_height = level_size.height,
+ .image_subresource =
+ {
+ .base_level = level,
+ .base_layer = 0,
+ .num_layers = info.resources.layers,
+ },
+ .image_offset = {0, 0, 0},
+ .image_extent = level_size,
+ };
+ host_offset += host_bytes_per_level;
+ }
+ return copies;
+}
+
+Extent3D MipSize(Extent3D size, u32 level) {
+ return AdjustMipSize(size, level);
+}
+
+Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
+ const LevelInfo level_info = MakeLevelInfo(info);
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ const Extent3D level_size = AdjustMipSize(info.size, level);
+ const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
+ return AdjustMipBlockSize(num_tiles, level_info.block, level);
+}
+
+std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
+ const Extent2D tile_size = DefaultBlockSize(info.format);
+ if (info.type == ImageType::Linear) {
+ return std::vector{SwizzleParameters{
+ .num_tiles = AdjustTileSize(info.size, tile_size),
+ .block = {},
+ .buffer_offset = 0,
+ .level = 0,
+ }};
+ }
+ const LevelInfo level_info = MakeLevelInfo(info);
+ const Extent3D size = info.size;
+ const s32 num_levels = info.resources.levels;
+
+ u32 guest_offset = 0;
+ std::vector<SwizzleParameters> params(num_levels);
+ for (s32 level = 0; level < num_levels; ++level) {
+ const Extent3D level_size = AdjustMipSize(size, level);
+ const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
+ const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
+ params[level] = SwizzleParameters{
+ .num_tiles = num_tiles,
+ .block = block,
+ .buffer_offset = guest_offset,
+ .level = level,
+ };
+ guest_offset += CalculateLevelSize(level_info, level);
+ }
+ return params;
+}
+
+void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
+ std::span<const BufferImageCopy> copies, std::span<const u8> memory) {
+ const bool is_pitch_linear = info.type == ImageType::Linear;
+ for (const BufferImageCopy& copy : copies) {
+ if (is_pitch_linear) {
+ SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory);
+ } else {
+ SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory);
+ }
+ }
+}
+
+bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level,
+ u32 rhs_level, bool strict_size) noexcept {
+ ASSERT(lhs.type != ImageType::Linear);
+ ASSERT(rhs.type != ImageType::Linear);
+ if (strict_size) {
+ const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level);
+ const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level);
+ return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
+ } else {
+ const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level);
+ const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level);
+ return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
+ }
+}
+
+bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept {
+ ASSERT(lhs.type == ImageType::Linear);
+ ASSERT(rhs.type == ImageType::Linear);
+ if (strict_size) {
+ return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height;
+ } else {
+ const Extent2D lhs_size = PitchLinearAlignedSize(lhs);
+ const Extent2D rhs_size = PitchLinearAlignedSize(rhs);
+ return lhs_size == rhs_size;
+ }
+}
+
+std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr,
+ VAddr cpu_addr, const ImageBase& overlap,
+ bool strict_size, bool broken_views) {
+ ASSERT(new_info.type != ImageType::Linear);
+ ASSERT(overlap.info.type != ImageType::Linear);
+ if (!IsLayerStrideCompatible(new_info, overlap.info)) {
+ return std::nullopt;
+ }
+ if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views)) {
+ return std::nullopt;
+ }
+ if (gpu_addr == overlap.gpu_addr) {
+ const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size);
+ if (!solution) {
+ return std::nullopt;
+ }
+ return OverlapResult{
+ .gpu_addr = gpu_addr,
+ .cpu_addr = cpu_addr,
+ .resources = *solution,
+ };
+ }
+ if (overlap.gpu_addr > gpu_addr) {
+ return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
+ }
+ // if overlap.gpu_addr < gpu_addr
+ return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
+}
+
+bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) {
+ // If either of the layer strides is zero, we can assume they are compatible
+ // These images generally come from rendertargets
+ if (lhs.layer_stride == 0) {
+ return true;
+ }
+ if (rhs.layer_stride == 0) {
+ return true;
+ }
+ // It's definitely compatible if the layer stride matches
+ if (lhs.layer_stride == rhs.layer_stride) {
+ return true;
+ }
+ // Although we also have to compare for cases where it can be unaligned
+ // This can happen if the image doesn't have layers, so the stride is not aligned
+ if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) {
+ return true;
+ }
+ return false;
+}
+
+std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image,
+ GPUVAddr candidate_addr, RelaxedOptions options,
+ bool broken_views) {
+ const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
+ if (!base) {
+ return std::nullopt;
+ }
+ const ImageInfo& existing = image.info;
+ if (False(options & RelaxedOptions::Format)) {
+ if (!IsViewCompatible(existing.format, candidate.format, broken_views)) {
+ return std::nullopt;
+ }
+ }
+ if (!IsLayerStrideCompatible(existing, candidate)) {
+ return std::nullopt;
+ }
+ if (existing.type != candidate.type) {
+ return std::nullopt;
+ }
+ if (False(options & RelaxedOptions::Samples)) {
+ if (existing.num_samples != candidate.num_samples) {
+ return std::nullopt;
+ }
+ }
+ if (existing.resources.levels < candidate.resources.levels + base->level) {
+ return std::nullopt;
+ }
+ if (existing.type == ImageType::e3D) {
+ const u32 mip_depth = std::max(1U, existing.size.depth << base->level);
+ if (mip_depth < candidate.size.depth + base->layer) {
+ return std::nullopt;
+ }
+ } else {
+ if (existing.resources.layers < candidate.resources.layers + base->layer) {
+ return std::nullopt;
+ }
+ }
+ const bool strict_size = False(options & RelaxedOptions::Size);
+ if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) {
+ return std::nullopt;
+ }
+ // TODO: compare block sizes
+ return base;
+}
+
+bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr,
+ RelaxedOptions options, bool broken_views) {
+ return FindSubresource(candidate, image, candidate_addr, options, broken_views).has_value();
+}
+
+void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
+ const ImageBase* src) {
+ if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
+ src_info.format = src->info.format;
+ }
+ if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
+ dst_info.format = dst->info.format;
+ }
+ if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
+ dst_info.format = src->info.format;
+ }
+ if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
+ src_info.format = src->info.format;
+ }
+}
+
+u32 MapSizeBytes(const ImageBase& image) {
+ if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
+ return image.guest_size_bytes;
+ } else if (True(image.flags & ImageFlagBits::Converted)) {
+ return image.converted_size_bytes;
+ } else {
+ return image.unswizzled_size_bytes;
+ }
+}
+
+static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0}, 0) ==
+ 0x7f8000);
+static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000);
+
+static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 1, 0, 7) ==
+ 0x2afc00);
+static_assert(CalculateLevelOffset(PixelFormat::ASTC_2D_12X12_UNORM, {8192, 4096, 1}, {0, 2, 0}, 1,
+ 0, 12) == 0x50d200);
+
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
+ 0) == 0);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
+ 1) == 0x400000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
+ 2) == 0x500000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
+ 3) == 0x540000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
+ 4) == 0x550000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
+ 5) == 0x554000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
+ 6) == 0x555000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
+ 7) == 0x555400);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
+ 8) == 0x555600);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
+ 9) == 0x555800);
+
+constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height,
+ u32 tile_width_spacing, u32 level) {
+ const Extent3D size{width, height, 1};
+ const Extent3D block{0, block_height, 0};
+ const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level);
+ return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing);
+}
+
+static_assert(ValidateLayerSize(PixelFormat::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) ==
+ 0x50d800);
+static_assert(ValidateLayerSize(PixelFormat::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000);
+static_assert(ValidateLayerSize(PixelFormat::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000);
+
+static_assert(ValidateLayerSize(PixelFormat::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000,
+ "Tile width spacing is not working");
+static_assert(ValidateLayerSize(PixelFormat::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000,
+ "Compressed tile width spacing is not working");
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
new file mode 100644
index 000000000..52a9207d6
--- /dev/null
+++ b/src/video_core/texture_cache/util.h
@@ -0,0 +1,109 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <span>
+
+#include "common/common_types.h"
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/image_base.h"
+#include "video_core/texture_cache/image_view_base.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+using Tegra::Texture::TICEntry;
+
+struct OverlapResult {
+ GPUVAddr gpu_addr;
+ VAddr cpu_addr;
+ SubresourceExtent resources;
+};
+
+[[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept;
+
+[[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept;
+
+[[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept;
+
+[[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept;
+
+[[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept;
+
+[[nodiscard]] std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(
+ const ImageInfo& info) noexcept;
+
+[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
+
+[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
+
+[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
+
+[[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC(
+ const Tegra::Texture::TICEntry& config) noexcept;
+
+[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
+
+[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst,
+ const ImageInfo& src,
+ SubresourceBase base);
+
+[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
+
+[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
+ GPUVAddr gpu_addr, const ImageInfo& info,
+ std::span<u8> output);
+
+[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
+ const ImageBase& image, std::span<u8> output);
+
+void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
+ std::span<BufferImageCopy> copies);
+
+[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
+
+[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
+
+[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
+
+[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info);
+
+void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
+ std::span<const BufferImageCopy> copies, std::span<const u8> memory);
+
+[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info,
+ const ImageInfo& overlap_info, u32 new_level,
+ u32 overlap_level, bool strict_size) noexcept;
+
+[[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs,
+ bool strict_size) noexcept;
+
+[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info,
+ GPUVAddr gpu_addr, VAddr cpu_addr,
+ const ImageBase& overlap,
+ bool strict_size, bool broken_views);
+
+[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs);
+
+[[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate,
+ const ImageBase& image,
+ GPUVAddr candidate_addr,
+ RelaxedOptions options,
+ bool broken_views);
+
+[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image,
+ GPUVAddr candidate_addr, RelaxedOptions options,
+ bool broken_views);
+
+void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
+ const ImageBase* src);
+
+[[nodiscard]] u32 MapSizeBytes(const ImageBase& image);
+
+} // namespace VideoCommon
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 365bde2f1..3625b666c 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -18,6 +18,7 @@
#include <algorithm>
#include <cassert>
#include <cstring>
+#include <span>
#include <vector>
#include <boost/container/static_vector.hpp>
@@ -41,21 +42,24 @@ constexpr u32 Popcnt(u32 n) {
class InputBitStream {
public:
- constexpr explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0)
- : cur_byte{ptr}, next_bit{start_offset % 8} {}
+ constexpr explicit InputBitStream(std::span<const u8> data, size_t start_offset = 0)
+ : cur_byte{data.data()}, total_bits{data.size()}, next_bit{start_offset % 8} {}
- constexpr std::size_t GetBitsRead() const {
+ constexpr size_t GetBitsRead() const {
return bits_read;
}
constexpr bool ReadBit() {
- const bool bit = (*cur_byte >> next_bit++) & 1;
+ if (bits_read >= total_bits * 8) {
+ return 0;
+ }
+ const bool bit = ((*cur_byte >> next_bit) & 1) != 0;
+ ++next_bit;
while (next_bit >= 8) {
next_bit -= 8;
- cur_byte++;
+ ++cur_byte;
}
-
- bits_read++;
+ ++bits_read;
return bit;
}
@@ -78,8 +82,9 @@ public:
private:
const u8* cur_byte;
- std::size_t next_bit = 0;
- std::size_t bits_read = 0;
+ size_t total_bits = 0;
+ size_t next_bit = 0;
+ size_t bits_read = 0;
};
class OutputBitStream {
@@ -192,15 +197,15 @@ struct IntegerEncodedValue {
};
};
using IntegerEncodedVector = boost::container::static_vector<
- IntegerEncodedValue, 64,
+ IntegerEncodedValue, 256,
boost::container::static_vector_options<
boost::container::inplace_alignment<alignof(IntegerEncodedValue)>,
boost::container::throw_on_overflow<false>>::type>;
static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) {
// Implement the algorithm in section C.2.12
- u32 m[5];
- u32 t[5];
+ std::array<u32, 5> m;
+ std::array<u32, 5> t;
u32 T;
// Read the trit encoded block according to
@@ -600,7 +605,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
return params;
}
-static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth,
+static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
u32 blockHeight) {
// Don't actually care about the void extent, just read the bits...
for (s32 i = 0; i < 4; ++i) {
@@ -623,7 +628,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block
}
}
-static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) {
+static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
for (u32 j = 0; j < blockHeight; j++) {
for (u32 i = 0; i < blockWidth; i++) {
outBuf[j * blockWidth + i] = 0xFFFF00FF;
@@ -865,7 +870,7 @@ public:
}
};
-static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nPartitions,
+static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, const u32 nPartitions,
const u32 nBitsForColorData) {
// First figure out how many color values we have
u32 nValues = 0;
@@ -897,7 +902,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP
// We now have enough to decode our integer sequence.
IntegerEncodedVector decodedColorValues;
- InputBitStream colorStream(data);
+ InputBitStream colorStream(data, 0);
DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
// Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1438,8 +1443,8 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
#undef READ_INT_VALUES
}
-static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight,
- u32* outBuf) {
+static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
+ const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
InputBitStream strm(inBuf);
TexelWeightParams weightParams = DecodeBlockInfo(strm);
@@ -1601,8 +1606,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
}
// Read the texel weight data..
- u8 texelWeightData[16];
- memcpy(texelWeightData, inBuf, sizeof(texelWeightData));
+ std::array<u8, 16> texelWeightData;
+ std::ranges::copy(inBuf, texelWeightData.begin());
// Reverse everything
for (u32 i = 0; i < 8; i++) {
@@ -1618,10 +1623,12 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
// Make sure that higher non-texel bits are set to zero
const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
- texelWeightData[clearByteStart - 1] =
- texelWeightData[clearByteStart - 1] &
- static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
- memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
+ if (clearByteStart > 0 && clearByteStart <= texelWeightData.size()) {
+ texelWeightData[clearByteStart - 1] &=
+ static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
+ std::memset(texelWeightData.data() + clearByteStart, 0,
+ std::min(16U - clearByteStart, 16U));
+ }
IntegerEncodedVector texelWeightValues;
@@ -1672,36 +1679,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
namespace Tegra::Texture::ASTC {
-std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width,
- u32 block_height) {
- u32 blockIdx = 0;
+void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
+ u32 block_index = 0;
std::size_t depth_offset = 0;
- std::vector<u8> outData(height * width * depth * 4);
- for (u32 k = 0; k < depth; k++) {
- for (u32 j = 0; j < height; j += block_height) {
- for (u32 i = 0; i < width; i += block_width) {
-
- const u8* blockPtr = data + blockIdx * 16;
+ for (u32 z = 0; z < depth; z++) {
+ for (u32 y = 0; y < height; y += block_height) {
+ for (u32 x = 0; x < width; x += block_width) {
+ const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
// Blocks can be at most 12x12
- u32 uncompData[144];
+ std::array<u32, 12 * 12> uncompData;
ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
- u32 decompWidth = std::min(block_width, width - i);
- u32 decompHeight = std::min(block_height, height - j);
+ u32 decompWidth = std::min(block_width, width - x);
+ u32 decompHeight = std::min(block_height, height - y);
- u8* outRow = depth_offset + outData.data() + (j * width + i) * 4;
+ const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
for (u32 jj = 0; jj < decompHeight; jj++) {
- memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
+ std::memcpy(outRow.data() + jj * width * 4,
+ uncompData.data() + jj * block_width, decompWidth * 4);
}
-
- blockIdx++;
+ ++block_index;
}
}
depth_offset += height * width * 4;
}
-
- return outData;
}
} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 991cdba72..9105119bc 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -5,11 +5,10 @@
#pragma once
#include <cstdint>
-#include <vector>
namespace Tegra::Texture::ASTC {
-std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
- uint32_t depth, uint32_t block_width, uint32_t block_height);
+void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
deleted file mode 100644
index 962921483..000000000
--- a/src/video_core/textures/convert.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <cstring>
-#include <tuple>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/surface.h"
-#include "video_core/textures/astc.h"
-#include "video_core/textures/convert.h"
-
-namespace Tegra::Texture {
-
-using VideoCore::Surface::PixelFormat;
-
-template <bool reverse>
-void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
- union S8Z24 {
- BitField<0, 24, u32> z24;
- BitField<24, 8, u32> s8;
- };
- static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
-
- union Z24S8 {
- BitField<0, 8, u32> s8;
- BitField<8, 24, u32> z24;
- };
- static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
-
- S8Z24 s8z24_pixel{};
- Z24S8 z24s8_pixel{};
- constexpr auto bpp{
- VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)};
- for (std::size_t y = 0; y < height; ++y) {
- for (std::size_t x = 0; x < width; ++x) {
- const std::size_t offset{bpp * (y * width + x)};
- if constexpr (reverse) {
- std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
- s8z24_pixel.s8.Assign(z24s8_pixel.s8);
- s8z24_pixel.z24.Assign(z24s8_pixel.z24);
- std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
- } else {
- std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
- z24s8_pixel.s8.Assign(s8z24_pixel.s8);
- z24s8_pixel.z24.Assign(s8z24_pixel.z24);
- std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
- }
- }
- }
-}
-
-static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
- SwapS8Z24ToZ24S8<false>(data, width, height);
-}
-
-static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
- SwapS8Z24ToZ24S8<true>(data, width, height);
-}
-
-void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width,
- u32 height, u32 depth, bool convert_astc, bool convert_s8z24) {
- if (convert_astc && IsPixelFormatASTC(pixel_format)) {
- // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
- u32 block_width{};
- u32 block_height{};
- std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
- const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress(
- in_data, width, height, depth, block_width, block_height);
- std::copy(rgba8_data.begin(), rgba8_data.end(), out_data);
-
- } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
- Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height);
- }
-}
-
-void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
- bool convert_astc, bool convert_s8z24) {
- if (convert_astc && IsPixelFormatASTC(pixel_format)) {
- LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
- static_cast<u32>(pixel_format));
- UNREACHABLE();
-
- } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
- Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
- }
-}
-
-} // namespace Tegra::Texture
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
deleted file mode 100644
index d5d6c77bb..000000000
--- a/src/video_core/textures/convert.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace VideoCore::Surface {
-enum class PixelFormat;
-}
-
-namespace Tegra::Texture {
-
-void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format,
- u32 width, u32 height, u32 depth, bool convert_astc,
- bool convert_s8z24);
-
-void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
- u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
-
-} // namespace Tegra::Texture
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 16d46a018..62685a183 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -2,204 +2,111 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <array>
#include <cmath>
#include <cstring>
+#include <span>
+#include <utility>
+
#include "common/alignment.h"
#include "common/assert.h"
#include "common/bit_util.h"
+#include "common/div_ceil.h"
#include "video_core/gpu.h"
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"
namespace Tegra::Texture {
-namespace {
+namespace {
/**
- * This table represents the internal swizzle of a gob,
- * in format 16 bytes x 2 sector packing.
+ * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing.
* Calculates the offset of an (x, y) position within a swizzled texture.
* Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188
*/
-template <std::size_t N, std::size_t M, u32 Align>
-struct alignas(64) SwizzleTable {
- static_assert(M * Align == 64, "Swizzle Table does not align to GOB");
- constexpr SwizzleTable() {
- for (u32 y = 0; y < N; ++y) {
- for (u32 x = 0; x < M; ++x) {
- const u32 x2 = x * Align;
- values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
- ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16));
- }
+constexpr SwizzleTable MakeSwizzleTableConst() {
+ SwizzleTable table{};
+ for (u32 y = 0; y < table.size(); ++y) {
+ for (u32 x = 0; x < table[0].size(); ++x) {
+ table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
+ (y % 2) * 16 + (x % 16);
}
}
- const std::array<u16, M>& operator[](std::size_t index) const {
- return values[index];
- }
- std::array<std::array<u16, M>, N> values{};
-};
+ return table;
+}
-constexpr u32 FAST_SWIZZLE_ALIGN = 16;
+constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst();
-constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>();
-constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>();
+template <bool TO_LINEAR>
+void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
+ u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
+ // The origin of the transformation can be configured here, leave it as zero as the current API
+ // doesn't expose it.
+ static constexpr u32 origin_x = 0;
+ static constexpr u32 origin_y = 0;
+ static constexpr u32 origin_z = 0;
-/**
- * This function manages ALL the GOBs(Group of Bytes) Inside a single block.
- * Instead of going gob by gob, we map the coordinates inside a block and manage from
- * those. Block_Width is assumed to be 1.
- */
-void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
- const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
- const u32 y_end, const u32 z_end, const u32 tile_offset,
- const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
- const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
- std::array<u8*, 2> data_ptrs;
- u32 z_address = tile_offset;
-
- for (u32 z = z_start; z < z_end; z++) {
- u32 y_address = z_address;
- u32 pixel_base = layer_z * z + y_start * stride_x;
- for (u32 y = y_start; y < y_end; y++) {
- const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
- for (u32 x = x_start; x < x_end; x++) {
- const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]};
- const u32 pixel_index{x * out_bytes_per_pixel + pixel_base};
- data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
- data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
- std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
- }
- pixel_base += stride_x;
- if ((y + 1) % GOB_SIZE_Y == 0)
- y_address += GOB_SIZE;
- }
- z_address += xy_block_size;
- }
-}
+ // We can configure here a custom pitch
+ // As it's not exposed 'width * bpp' will be the expected pitch.
+ const u32 pitch = width * bytes_per_pixel;
+ const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
-/**
- * This function manages ALL the GOBs(Group of Bytes) Inside a single block.
- * Instead of going gob by gob, we map the coordinates inside a block and manage from
- * those. Block_Width is assumed to be 1.
- */
-void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
- const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
- const u32 y_end, const u32 z_end, const u32 tile_offset,
- const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
- const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
- std::array<u8*, 2> data_ptrs;
- u32 z_address = tile_offset;
- const u32 x_startb = x_start * bytes_per_pixel;
- const u32 x_endb = x_end * bytes_per_pixel;
-
- for (u32 z = z_start; z < z_end; z++) {
- u32 y_address = z_address;
- u32 pixel_base = layer_z * z + y_start * stride_x;
- for (u32 y = y_start; y < y_end; y++) {
- const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y];
- for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) {
- const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]};
- const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
- const u32 pixel_index{out_x + pixel_base};
- data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
- data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
- std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN);
- }
- pixel_base += stride_x;
- if ((y + 1) % GOB_SIZE_Y == 0)
- y_address += GOB_SIZE;
- }
- z_address += xy_block_size;
- }
-}
+ const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
+ const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
+ const u32 slice_size =
+ Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
-/**
- * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue.
- * The body of this function takes care of splitting the swizzled texture into blocks,
- * and managing the extents of it. Once all the parameters of a single block are obtained,
- * the function calls 'ProcessBlock' to process that particular Block.
- *
- * Documentation for the memory layout and decoding can be found at:
- * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces
- */
-template <bool fast>
-void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
- const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel,
- const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth,
- const u32 width_spacing) {
- auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
- const u32 stride_x = width * out_bytes_per_pixel;
- const u32 layer_z = height * stride_x;
- const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel;
- constexpr u32 gob_elements_y = GOB_SIZE_Y;
- constexpr u32 gob_elements_z = GOB_SIZE_Z;
- const u32 block_x_elements = gob_elements_x;
- const u32 block_y_elements = gob_elements_y * block_height;
- const u32 block_z_elements = gob_elements_z * block_depth;
- const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing);
- const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements);
- const u32 blocks_on_y = div_ceil(height, block_y_elements);
- const u32 blocks_on_z = div_ceil(depth, block_z_elements);
- const u32 xy_block_size = GOB_SIZE * block_height;
- const u32 block_size = xy_block_size * block_depth;
- u32 tile_offset = 0;
- for (u32 zb = 0; zb < blocks_on_z; zb++) {
- const u32 z_start = zb * block_z_elements;
- const u32 z_end = std::min(depth, z_start + block_z_elements);
- for (u32 yb = 0; yb < blocks_on_y; yb++) {
- const u32 y_start = yb * block_y_elements;
- const u32 y_end = std::min(height, y_start + block_y_elements);
- for (u32 xb = 0; xb < blocks_on_x; xb++) {
- const u32 x_start = xb * block_x_elements;
- const u32 x_end = std::min(width, x_start + block_x_elements);
- if constexpr (fast) {
- FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
- z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
- layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
- } else {
- PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
- z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
- layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
- }
- tile_offset += block_size;
+ const u32 block_height_mask = (1U << block_height) - 1;
+ const u32 block_depth_mask = (1U << block_depth) - 1;
+ const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
+
+ for (u32 slice = 0; slice < depth; ++slice) {
+ const u32 z = slice + origin_z;
+ const u32 offset_z = (z >> block_depth) * slice_size +
+ ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
+ for (u32 line = 0; line < height; ++line) {
+ const u32 y = line + origin_y;
+ const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
+
+ const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
+ const u32 offset_y = (block_y >> block_height) * block_size +
+ ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
+
+ for (u32 column = 0; column < width; ++column) {
+ const u32 x = (column + origin_x) * bytes_per_pixel;
+ const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
+
+ const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
+ const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X];
+
+ const u32 unswizzled_offset =
+ slice * pitch * height + line * pitch + column * bytes_per_pixel;
+
+ u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
+ const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
+ std::memcpy(dst, src, bytes_per_pixel);
}
}
}
}
-
} // Anonymous namespace
-void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
- u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data,
- bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
- const u32 block_height_size{1U << block_height};
- const u32 block_depth_size{1U << block_depth};
- if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) {
- SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
- bytes_per_pixel, out_bytes_per_pixel, block_height_size,
- block_depth_size, width_spacing);
- } else {
- SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
- bytes_per_pixel, out_bytes_per_pixel, block_height_size,
- block_depth_size, width_spacing);
- }
+SwizzleTable MakeSwizzleTable() {
+ return SWIZZLE_TABLE;
}
-void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
- u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
- u32 block_depth, u32 width_spacing) {
- CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
- (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
- bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
- width_spacing);
+void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
+ u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
+ u32 stride_alignment) {
+ Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
+ stride_alignment);
}
-std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
- u32 width, u32 height, u32 depth, u32 block_height,
- u32 block_depth, u32 width_spacing) {
- std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
- UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
- width, height, depth, block_height, block_depth, width_spacing);
- return unswizzled_data;
+void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
+ u32 height, u32 depth, u32 block_height, u32 block_depth,
+ u32 stride_alignment) {
+ Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
+ stride_alignment);
}
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
@@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
const u32 gob_address_y =
(dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
- const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
+ const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
for (u32 x = 0; x < subrect_width; ++x) {
const u32 dst_x = x + offset_x;
const u32 gob_address =
@@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
const u32 block_height_mask = (1U << block_height) - 1;
- const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height;
+ const u32 x_shift = GOB_SIZE_SHIFT + block_height;
for (u32 line = 0; line < line_count; ++line) {
const u32 src_y = line + origin_y;
- const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
+ const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
const u32 src_offset_y = (block_y >> block_height) * block_size +
@@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
for (u32 line = 0; line < line_count; ++line) {
- const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y];
+ const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y];
const u32 block_y = line / GOB_SIZE_Y;
const u32 dst_offset_y =
(block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
@@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
const std::size_t gob_address_y =
(y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
- const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
+ const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
const std::size_t gob_address =
gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
@@ -310,9 +217,9 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth) {
if (tiled) {
- const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, GOB_SIZE_X_SHIFT);
- const u32 aligned_height = Common::AlignBits(height, GOB_SIZE_Y_SHIFT + block_height);
- const u32 aligned_depth = Common::AlignBits(depth, GOB_SIZE_Z_SHIFT + block_depth);
+ const u32 aligned_width = Common::AlignUpLog2(width * bytes_per_pixel, GOB_SIZE_X_SHIFT);
+ const u32 aligned_height = Common::AlignUpLog2(height, GOB_SIZE_Y_SHIFT + block_height);
+ const u32 aligned_depth = Common::AlignUpLog2(depth, GOB_SIZE_Z_SHIFT + block_depth);
return aligned_width * aligned_height * aligned_depth;
} else {
return width * height * depth * bytes_per_pixel;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 01e156bc8..d7cdc81e8 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -4,7 +4,8 @@
#pragma once
-#include <vector>
+#include <span>
+
#include "common/common_types.h"
#include "video_core/textures/texture.h"
@@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8;
constexpr u32 GOB_SIZE_Z = 1;
constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
-constexpr std::size_t GOB_SIZE_X_SHIFT = 6;
-constexpr std::size_t GOB_SIZE_Y_SHIFT = 3;
-constexpr std::size_t GOB_SIZE_Z_SHIFT = 0;
-constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
-
-/// Unswizzles a swizzled texture without changing its format.
-void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
- u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
- u32 block_height = TICEntry::DefaultBlockHeight,
- u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
-
-/// Unswizzles a swizzled texture without changing its format.
-std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
- u32 width, u32 height, u32 depth,
- u32 block_height = TICEntry::DefaultBlockHeight,
- u32 block_depth = TICEntry::DefaultBlockHeight,
- u32 width_spacing = 0);
-
-/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary.
-void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
- u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
- bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
+constexpr u32 GOB_SIZE_X_SHIFT = 6;
+constexpr u32 GOB_SIZE_Y_SHIFT = 3;
+constexpr u32 GOB_SIZE_Z_SHIFT = 0;
+constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
+
+using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>;
+
+/// Returns a z-order swizzle table
+SwizzleTable MakeSwizzleTable();
+
+/// Unswizzles a block linear texture into linear memory.
+void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
+ u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
+ u32 stride_alignment = 1);
+
+/// Swizzles linear memory into a block linear texture.
+void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
+ u32 height, u32 depth, u32 block_height, u32 block_depth,
+ u32 stride_alignment = 1);
/// This function calculates the correct size of a texture depending if it's tiled or not.
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 4171e3ef2..ae5621a7d 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -5,9 +5,13 @@
#include <algorithm>
#include <array>
+#include "common/cityhash.h"
#include "core/settings.h"
#include "video_core/textures/texture.h"
+using Tegra::Texture::TICEntry;
+using Tegra::Texture::TSCEntry;
+
namespace Tegra::Texture {
namespace {
@@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept {
} // Anonymous namespace
-std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
+std::array<float, 4> TSCEntry::BorderColor() const noexcept {
if (!srgb_conversion) {
return border_color;
}
@@ -73,8 +77,16 @@ std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
}
-float TSCEntry::GetMaxAnisotropy() const noexcept {
+float TSCEntry::MaxAnisotropy() const noexcept {
return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
}
} // namespace Tegra::Texture
+
+size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept {
+ return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic);
+}
+
+size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept {
+ return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc);
+}
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 0574fef12..c1d14335e 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -53,27 +53,27 @@ enum class TextureFormat : u32 {
BC4 = 0x27,
BC5 = 0x28,
S8D24 = 0x29,
- X8Z24 = 0x2a,
+ X8D24 = 0x2a,
D24S8 = 0x2b,
- X4V4Z24__COV4R4V = 0x2c,
- X4V4Z24__COV8R8V = 0x2d,
- V8Z24__COV4R12V = 0x2e,
+ X4V4D24__COV4R4V = 0x2c,
+ X4V4D24__COV8R8V = 0x2d,
+ V8D24__COV4R12V = 0x2e,
D32 = 0x2f,
D32S8 = 0x30,
- X8Z24_X20V4S8__COV4R4V = 0x31,
- X8Z24_X20V4S8__COV8R8V = 0x32,
- ZF32_X20V4X8__COV4R4V = 0x33,
- ZF32_X20V4X8__COV8R8V = 0x34,
- ZF32_X20V4S8__COV4R4V = 0x35,
- ZF32_X20V4S8__COV8R8V = 0x36,
- X8Z24_X16V8S8__COV4R12V = 0x37,
- ZF32_X16V8X8__COV4R12V = 0x38,
- ZF32_X16V8S8__COV4R12V = 0x39,
+ X8D24_X20V4S8__COV4R4V = 0x31,
+ X8D24_X20V4S8__COV8R8V = 0x32,
+ D32_X20V4X8__COV4R4V = 0x33,
+ D32_X20V4X8__COV8R8V = 0x34,
+ D32_X20V4S8__COV4R4V = 0x35,
+ D32_X20V4S8__COV8R8V = 0x36,
+ X8D24_X16V8S8__COV4R12V = 0x37,
+ D32_X16V8X8__COV4R12V = 0x38,
+ D32_X16V8S8__COV4R12V = 0x39,
D16 = 0x3a,
- V8Z24__COV8R24V = 0x3b,
- X8Z24_X16V8S8__COV8R24V = 0x3c,
- ZF32_X16V8X8__COV8R24V = 0x3d,
- ZF32_X16V8S8__COV8R24V = 0x3e,
+ V8D24__COV8R24V = 0x3b,
+ X8D24_X16V8S8__COV8R24V = 0x3c,
+ D32_X16V8X8__COV8R24V = 0x3d,
+ D32_X16V8S8__COV8R24V = 0x3e,
ASTC_2D_4X4 = 0x40,
ASTC_2D_5X5 = 0x41,
ASTC_2D_6X6 = 0x42,
@@ -146,7 +146,7 @@ enum class MsaaMode : u32 {
};
union TextureHandle {
- TextureHandle(u32 raw) : raw{raw} {}
+ /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {}
u32 raw;
BitField<0, 20, u32> tic_id;
@@ -155,124 +155,124 @@ union TextureHandle {
static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
struct TICEntry {
- static constexpr u32 DefaultBlockHeight = 16;
- static constexpr u32 DefaultBlockDepth = 1;
-
- union {
- u32 raw;
- BitField<0, 7, TextureFormat> format;
- BitField<7, 3, ComponentType> r_type;
- BitField<10, 3, ComponentType> g_type;
- BitField<13, 3, ComponentType> b_type;
- BitField<16, 3, ComponentType> a_type;
-
- BitField<19, 3, SwizzleSource> x_source;
- BitField<22, 3, SwizzleSource> y_source;
- BitField<25, 3, SwizzleSource> z_source;
- BitField<28, 3, SwizzleSource> w_source;
- };
- u32 address_low;
union {
- BitField<0, 16, u32> address_high;
- BitField<21, 3, TICHeaderVersion> header_version;
- };
- union {
- BitField<0, 3, u32> block_width;
- BitField<3, 3, u32> block_height;
- BitField<6, 3, u32> block_depth;
+ struct {
+ union {
+ BitField<0, 7, TextureFormat> format;
+ BitField<7, 3, ComponentType> r_type;
+ BitField<10, 3, ComponentType> g_type;
+ BitField<13, 3, ComponentType> b_type;
+ BitField<16, 3, ComponentType> a_type;
+
+ BitField<19, 3, SwizzleSource> x_source;
+ BitField<22, 3, SwizzleSource> y_source;
+ BitField<25, 3, SwizzleSource> z_source;
+ BitField<28, 3, SwizzleSource> w_source;
+ };
+ u32 address_low;
+ union {
+ BitField<0, 16, u32> address_high;
+ BitField<16, 5, u32> layer_base_3_7;
+ BitField<21, 3, TICHeaderVersion> header_version;
+ BitField<24, 1, u32> load_store_hint;
+ BitField<25, 4, u32> view_coherency_hash;
+ BitField<29, 3, u32> layer_base_8_10;
+ };
+ union {
+ BitField<0, 3, u32> block_width;
+ BitField<3, 3, u32> block_height;
+ BitField<6, 3, u32> block_depth;
- BitField<10, 3, u32> tile_width_spacing;
+ BitField<10, 3, u32> tile_width_spacing;
- // High 16 bits of the pitch value
- BitField<0, 16, u32> pitch_high;
- BitField<26, 1, u32> use_header_opt_control;
- BitField<27, 1, u32> depth_texture;
- BitField<28, 4, u32> max_mip_level;
+ // High 16 bits of the pitch value
+ BitField<0, 16, u32> pitch_high;
+ BitField<26, 1, u32> use_header_opt_control;
+ BitField<27, 1, u32> depth_texture;
+ BitField<28, 4, u32> max_mip_level;
- BitField<0, 16, u32> buffer_high_width_minus_one;
- };
- union {
- BitField<0, 16, u32> width_minus_1;
- BitField<22, 1, u32> srgb_conversion;
- BitField<23, 4, TextureType> texture_type;
- BitField<29, 3, u32> border_size;
+ BitField<0, 16, u32> buffer_high_width_minus_one;
+ };
+ union {
+ BitField<0, 16, u32> width_minus_one;
+ BitField<16, 3, u32> layer_base_0_2;
+ BitField<22, 1, u32> srgb_conversion;
+ BitField<23, 4, TextureType> texture_type;
+ BitField<29, 3, u32> border_size;
- BitField<0, 16, u32> buffer_low_width_minus_one;
- };
- union {
- BitField<0, 16, u32> height_minus_1;
- BitField<16, 14, u32> depth_minus_1;
- };
- union {
- BitField<6, 13, u32> mip_lod_bias;
- BitField<27, 3, u32> max_anisotropy;
+ BitField<0, 16, u32> buffer_low_width_minus_one;
+ };
+ union {
+ BitField<0, 16, u32> height_minus_1;
+ BitField<16, 14, u32> depth_minus_1;
+ BitField<30, 1, u32> is_sparse;
+ BitField<31, 1, u32> normalized_coords;
+ };
+ union {
+ BitField<6, 13, u32> mip_lod_bias;
+ BitField<27, 3, u32> max_anisotropy;
+ };
+ union {
+ BitField<0, 4, u32> res_min_mip_level;
+ BitField<4, 4, u32> res_max_mip_level;
+ BitField<8, 4, MsaaMode> msaa_mode;
+ BitField<12, 12, u32> min_lod_clamp;
+ };
+ };
+ std::array<u64, 4> raw;
};
- union {
- BitField<0, 4, u32> res_min_mip_level;
- BitField<4, 4, u32> res_max_mip_level;
- BitField<8, 4, MsaaMode> msaa_mode;
- BitField<12, 12, u32> min_lod_clamp;
- };
+ constexpr bool operator==(const TICEntry& rhs) const noexcept {
+ return raw == rhs.raw;
+ }
- GPUVAddr Address() const {
+ constexpr bool operator!=(const TICEntry& rhs) const noexcept {
+ return raw != rhs.raw;
+ }
+
+ constexpr GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
}
- u32 Pitch() const {
+ constexpr u32 Pitch() const {
ASSERT(header_version == TICHeaderVersion::Pitch ||
header_version == TICHeaderVersion::PitchColorKey);
// The pitch value is 21 bits, and is 32B aligned.
return pitch_high << 5;
}
- u32 Width() const {
+ constexpr u32 Width() const {
if (header_version != TICHeaderVersion::OneDBuffer) {
- return width_minus_1 + 1;
+ return width_minus_one + 1;
}
- return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1;
+ return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1;
}
- u32 Height() const {
+ constexpr u32 Height() const {
return height_minus_1 + 1;
}
- u32 Depth() const {
+ constexpr u32 Depth() const {
return depth_minus_1 + 1;
}
- u32 BlockWidth() const {
- ASSERT(IsTiled());
- return block_width;
- }
-
- u32 BlockHeight() const {
- ASSERT(IsTiled());
- return block_height;
- }
-
- u32 BlockDepth() const {
- ASSERT(IsTiled());
- return block_depth;
+ constexpr u32 BaseLayer() const {
+ return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8;
}
- bool IsTiled() const {
+ constexpr bool IsBlockLinear() const {
return header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey;
}
- bool IsLineal() const {
+ constexpr bool IsPitchLinear() const {
return header_version == TICHeaderVersion::Pitch ||
header_version == TICHeaderVersion::PitchColorKey;
}
- bool IsBuffer() const {
+ constexpr bool IsBuffer() const {
return header_version == TICHeaderVersion::OneDBuffer;
}
-
- bool IsSrgbConversionEnabled() const {
- return srgb_conversion != 0;
- }
};
static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");
@@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 {
Linear = 3,
};
+enum class SamplerReduction : u32 {
+ WeightedAverage = 0,
+ Min = 1,
+ Max = 2,
+};
+
enum class Anisotropy {
Default,
Filter2x,
@@ -333,8 +339,12 @@ struct TSCEntry {
BitField<0, 2, TextureFilter> mag_filter;
BitField<4, 2, TextureFilter> min_filter;
BitField<6, 2, TextureMipmapFilter> mipmap_filter;
+ BitField<8, 1, u32> cubemap_anisotropy;
BitField<9, 1, u32> cubemap_interface_filtering;
+ BitField<10, 2, SamplerReduction> reduction_filter;
BitField<12, 13, u32> mip_lod_bias;
+ BitField<25, 1, u32> float_coord_normalization;
+ BitField<26, 5, u32> trilin_opt;
};
union {
BitField<0, 12, u32> min_lod_clamp;
@@ -347,32 +357,45 @@ struct TSCEntry {
};
std::array<f32, 4> border_color;
};
- std::array<u8, 0x20> raw;
+ std::array<u64, 4> raw;
};
- std::array<float, 4> GetBorderColor() const noexcept;
+ constexpr bool operator==(const TSCEntry& rhs) const noexcept {
+ return raw == rhs.raw;
+ }
+
+ constexpr bool operator!=(const TSCEntry& rhs) const noexcept {
+ return raw != rhs.raw;
+ }
+
+ std::array<float, 4> BorderColor() const noexcept;
- float GetMaxAnisotropy() const noexcept;
+ float MaxAnisotropy() const noexcept;
- float GetMinLod() const {
+ float MinLod() const {
return static_cast<float>(min_lod_clamp) / 256.0f;
}
- float GetMaxLod() const {
+ float MaxLod() const {
return static_cast<float>(max_lod_clamp) / 256.0f;
}
- float GetLodBias() const {
+ float LodBias() const {
// Sign extend the 13-bit value.
- constexpr u32 mask = 1U << (13 - 1);
+ static constexpr u32 mask = 1U << (13 - 1);
return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
}
};
static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
-struct FullTextureInfo {
- TICEntry tic;
- TSCEntry tsc;
+} // namespace Tegra::Texture
+
+template <>
+struct std::hash<Tegra::Texture::TICEntry> {
+ size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept;
};
-} // namespace Tegra::Texture
+template <>
+struct std::hash<Tegra::Texture::TSCEntry> {
+ size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept;
+};
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 4e3a092c7..e1b38c6ac 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -7,13 +7,9 @@
#include "common/logging/log.h"
#include "core/core.h"
#include "core/settings.h"
-#include "video_core/gpu_asynch.h"
-#include "video_core/gpu_synch.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
-#ifdef HAS_VULKAN
#include "video_core/renderer_vulkan/renderer_vulkan.h"
-#endif
#include "video_core/video_core.h"
namespace {
@@ -21,15 +17,16 @@ namespace {
std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
std::unique_ptr<Core::Frontend::GraphicsContext> context) {
+ auto& telemetry_session = system.TelemetrySession();
+ auto& cpu_memory = system.Memory();
+
switch (Settings::values.renderer_backend.GetValue()) {
case Settings::RendererBackend::OpenGL:
- return std::make_unique<OpenGL::RendererOpenGL>(system, emu_window, gpu,
- std::move(context));
-#ifdef HAS_VULKAN
+ return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory,
+ gpu, std::move(context));
case Settings::RendererBackend::Vulkan:
- return std::make_unique<Vulkan::RendererVulkan>(system, emu_window, gpu,
- std::move(context));
-#endif
+ return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory,
+ gpu, std::move(context));
default:
return nullptr;
}
@@ -40,23 +37,19 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
namespace VideoCore {
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
- std::unique_ptr<Tegra::GPU> gpu;
- if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- gpu = std::make_unique<VideoCommon::GPUAsynch>(system);
- } else {
- gpu = std::make_unique<VideoCommon::GPUSynch>(system);
- }
-
+ const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
+ const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
+ auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
auto context = emu_window.CreateSharedContext();
- const auto scope = context->Acquire();
-
- auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
- if (!renderer->Init()) {
+ auto scope = context->Acquire();
+ try {
+ auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
+ gpu->BindRenderer(std::move(renderer));
+ return gpu;
+ } catch (const std::runtime_error& exception) {
+ LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
return nullptr;
}
-
- gpu->BindRenderer(std::move(renderer));
- return gpu;
}
u16 GetResolutionScaleFactor(const RendererBase& renderer) {
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
index 5b01020ec..7a9d00d4f 100644
--- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
@@ -12,40 +12,22 @@
#include <fmt/format.h>
-#define VK_NO_PROTOTYPES
-#include <vulkan/vulkan.h>
-
-#include <GFSDK_Aftermath.h>
-#include <GFSDK_Aftermath_Defines.h>
-#include <GFSDK_Aftermath_GpuCrashDump.h>
-#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
-
#include "common/common_paths.h"
#include "common/common_types.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
-
-#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
+#include "video_core/vulkan_common/nsight_aftermath_tracker.h"
namespace Vulkan {
static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll";
-NsightAftermathTracker::NsightAftermathTracker() = default;
-
-NsightAftermathTracker::~NsightAftermathTracker() {
- if (initialized) {
- (void)GFSDK_Aftermath_DisableGpuCrashDumps();
- }
-}
-
-bool NsightAftermathTracker::Initialize() {
+NsightAftermathTracker::NsightAftermathTracker() {
if (!dl.Open(AFTERMATH_LIB_NAME)) {
LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL");
- return false;
+ return;
}
-
if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps",
&GFSDK_Aftermath_DisableGpuCrashDumps) ||
!dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps",
@@ -62,29 +44,30 @@ bool NsightAftermathTracker::Initialize() {
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GetJSON",
&GFSDK_Aftermath_GpuCrashDump_GetJSON)) {
LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers");
- return false;
+ return;
}
-
dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash";
- (void)Common::FS::DeleteDirRecursively(dump_dir);
+ void(Common::FS::DeleteDirRecursively(dump_dir));
if (!Common::FS::CreateDir(dump_dir)) {
LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
- return false;
+ return;
}
-
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps(
GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan,
GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback,
ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) {
LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed");
- return false;
+ return;
}
-
LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir);
-
initialized = true;
- return true;
+}
+
+NsightAftermathTracker::~NsightAftermathTracker() {
+ if (initialized) {
+ (void)GFSDK_Aftermath_DisableGpuCrashDumps();
+ }
}
void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
index afe7ae99e..1ce8d4e8e 100644
--- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
@@ -8,8 +8,9 @@
#include <string>
#include <vector>
-#define VK_NO_PROTOTYPES
-#include <vulkan/vulkan.h>
+#include "common/common_types.h"
+#include "common/dynamic_library.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
#ifdef HAS_NSIGHT_AFTERMATH
#include <GFSDK_Aftermath_Defines.h>
@@ -17,9 +18,6 @@
#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
#endif
-#include "common/common_types.h"
-#include "common/dynamic_library.h"
-
namespace Vulkan {
class NsightAftermathTracker {
@@ -34,8 +32,6 @@ public:
NsightAftermathTracker(NsightAftermathTracker&&) = delete;
NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
- bool Initialize();
-
void SaveShader(const std::vector<u32>& spirv) const;
private:
@@ -78,9 +74,6 @@ private:
#ifndef HAS_NSIGHT_AFTERMATH
inline NsightAftermathTracker::NsightAftermathTracker() = default;
inline NsightAftermathTracker::~NsightAftermathTracker() = default;
-inline bool NsightAftermathTracker::Initialize() {
- return false;
-}
inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {}
#endif
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
new file mode 100644
index 000000000..5c64c9bf7
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
@@ -0,0 +1,46 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+#include "common/logging/log.h"
+#include "video_core/vulkan_common/vulkan_debug_callback.h"
+
+namespace Vulkan {
+namespace {
+VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
+ VkDebugUtilsMessageTypeFlagsEXT type,
+ const VkDebugUtilsMessengerCallbackDataEXT* data,
+ [[maybe_unused]] void* user_data) {
+ const std::string_view message{data->pMessage};
+ if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
+ LOG_CRITICAL(Render_Vulkan, "{}", message);
+ } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) {
+ LOG_WARNING(Render_Vulkan, "{}", message);
+ } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
+ LOG_INFO(Render_Vulkan, "{}", message);
+ } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
+ LOG_DEBUG(Render_Vulkan, "{}", message);
+ }
+ return VK_FALSE;
+}
+} // Anonymous namespace
+
+vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance) {
+ return instance.CreateDebugUtilsMessenger(VkDebugUtilsMessengerCreateInfoEXT{
+ .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .flags = 0,
+ .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,
+ .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
+ .pfnUserCallback = Callback,
+ .pUserData = nullptr,
+ });
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h
new file mode 100644
index 000000000..b0519f132
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.h
@@ -0,0 +1,13 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance);
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 030b4dbd3..34d396434 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -13,39 +13,47 @@
#include "common/assert.h"
#include "core/settings.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/nsight_aftermath_tracker.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-
namespace {
-
namespace Alternatives {
-
-constexpr std::array Depth24UnormS8_UINT{
+constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{
VK_FORMAT_D32_SFLOAT_S8_UINT,
VK_FORMAT_D16_UNORM_S8_UINT,
- VkFormat{},
+ VK_FORMAT_UNDEFINED,
};
-constexpr std::array Depth16UnormS8_UINT{
+constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{
VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_D32_SFLOAT_S8_UINT,
- VkFormat{},
+ VK_FORMAT_UNDEFINED,
};
-
} // namespace Alternatives
constexpr std::array REQUIRED_EXTENSIONS{
- VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+ VK_KHR_MAINTENANCE1_EXTENSION_NAME,
+ VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
+ VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
+ VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
+ VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
+ VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
+#ifdef _WIN32
+ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
+#endif
+#ifdef __linux__
+ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
+#endif
};
template <typename T>
@@ -57,9 +65,9 @@ void SetNext(void**& next, T& data) {
constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
switch (format) {
case VK_FORMAT_D24_UNORM_S8_UINT:
- return Alternatives::Depth24UnormS8_UINT.data();
+ return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data();
case VK_FORMAT_D16_UNORM_S8_UINT:
- return Alternatives::Depth16UnormS8_UINT.data();
+ return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data();
default:
return nullptr;
}
@@ -78,8 +86,7 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType
}
}
-std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
- vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
+std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::PhysicalDevice physical) {
static constexpr std::array formats{
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_A8B8G8R8_UINT_PACK32,
@@ -103,6 +110,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
VK_FORMAT_R16G16_UNORM,
VK_FORMAT_R16G16_SNORM,
VK_FORMAT_R16G16_SFLOAT,
+ VK_FORMAT_R16G16_SINT,
VK_FORMAT_R16_UNORM,
VK_FORMAT_R16_UINT,
VK_FORMAT_R8G8B8A8_SRGB,
@@ -142,18 +150,32 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
VK_FORMAT_BC2_SRGB_BLOCK,
VK_FORMAT_BC3_SRGB_BLOCK,
VK_FORMAT_BC7_SRGB_BLOCK,
+ VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
+ VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
- VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
- VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
+ VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
+ VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
+ VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
+ VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
+ VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
+ VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
+ VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
+ VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
+ VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
+ VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
+ VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
+ VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
+ VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
@@ -171,84 +193,87 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
} // Anonymous namespace
-VKDevice::VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface,
- const vk::InstanceDispatch& dld)
- : dld{dld}, physical{physical}, properties{physical.GetProperties()},
- format_properties{GetFormatProperties(physical, dld)} {
+Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface,
+ const vk::InstanceDispatch& dld_)
+ : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
+ format_properties{GetFormatProperties(physical)} {
+ CheckSuitability(surface != nullptr);
SetupFamilies(surface);
SetupFeatures();
-}
-
-VKDevice::~VKDevice() = default;
-bool VKDevice::Create() {
const auto queue_cis = GetDeviceQueueCreateInfos();
- const std::vector extensions = LoadExtensions();
+ const std::vector extensions = LoadExtensions(surface != nullptr);
VkPhysicalDeviceFeatures2 features2{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
.pNext = nullptr,
+ .features{
+ .robustBufferAccess = true,
+ .fullDrawIndexUint32 = false,
+ .imageCubeArray = true,
+ .independentBlend = true,
+ .geometryShader = true,
+ .tessellationShader = true,
+ .sampleRateShading = false,
+ .dualSrcBlend = false,
+ .logicOp = false,
+ .multiDrawIndirect = false,
+ .drawIndirectFirstInstance = false,
+ .depthClamp = true,
+ .depthBiasClamp = true,
+ .fillModeNonSolid = false,
+ .depthBounds = false,
+ .wideLines = false,
+ .largePoints = true,
+ .alphaToOne = false,
+ .multiViewport = true,
+ .samplerAnisotropy = true,
+ .textureCompressionETC2 = false,
+ .textureCompressionASTC_LDR = is_optimal_astc_supported,
+ .textureCompressionBC = false,
+ .occlusionQueryPrecise = true,
+ .pipelineStatisticsQuery = false,
+ .vertexPipelineStoresAndAtomics = true,
+ .fragmentStoresAndAtomics = true,
+ .shaderTessellationAndGeometryPointSize = false,
+ .shaderImageGatherExtended = true,
+ .shaderStorageImageExtendedFormats = false,
+ .shaderStorageImageMultisample = is_shader_storage_image_multisample,
+ .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
+ .shaderStorageImageWriteWithoutFormat = true,
+ .shaderUniformBufferArrayDynamicIndexing = false,
+ .shaderSampledImageArrayDynamicIndexing = false,
+ .shaderStorageBufferArrayDynamicIndexing = false,
+ .shaderStorageImageArrayDynamicIndexing = false,
+ .shaderClipDistance = false,
+ .shaderCullDistance = false,
+ .shaderFloat64 = false,
+ .shaderInt64 = false,
+ .shaderInt16 = false,
+ .shaderResourceResidency = false,
+ .shaderResourceMinLod = false,
+ .sparseBinding = false,
+ .sparseResidencyBuffer = false,
+ .sparseResidencyImage2D = false,
+ .sparseResidencyImage3D = false,
+ .sparseResidency2Samples = false,
+ .sparseResidency4Samples = false,
+ .sparseResidency8Samples = false,
+ .sparseResidency16Samples = false,
+ .sparseResidencyAliased = false,
+ .variableMultisampleRate = false,
+ .inheritedQueries = false,
+ },
};
const void* first_next = &features2;
void** next = &features2.pNext;
- features2.features = {
- .robustBufferAccess = false,
- .fullDrawIndexUint32 = false,
- .imageCubeArray = false,
- .independentBlend = true,
- .geometryShader = true,
- .tessellationShader = true,
- .sampleRateShading = false,
- .dualSrcBlend = false,
- .logicOp = false,
- .multiDrawIndirect = false,
- .drawIndirectFirstInstance = false,
- .depthClamp = true,
- .depthBiasClamp = true,
- .fillModeNonSolid = false,
- .depthBounds = false,
- .wideLines = false,
- .largePoints = true,
- .alphaToOne = false,
- .multiViewport = true,
- .samplerAnisotropy = true,
- .textureCompressionETC2 = false,
- .textureCompressionASTC_LDR = is_optimal_astc_supported,
- .textureCompressionBC = false,
- .occlusionQueryPrecise = true,
- .pipelineStatisticsQuery = false,
- .vertexPipelineStoresAndAtomics = true,
- .fragmentStoresAndAtomics = true,
- .shaderTessellationAndGeometryPointSize = false,
- .shaderImageGatherExtended = true,
- .shaderStorageImageExtendedFormats = false,
- .shaderStorageImageMultisample = false,
- .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
- .shaderStorageImageWriteWithoutFormat = true,
- .shaderUniformBufferArrayDynamicIndexing = false,
- .shaderSampledImageArrayDynamicIndexing = false,
- .shaderStorageBufferArrayDynamicIndexing = false,
- .shaderStorageImageArrayDynamicIndexing = false,
- .shaderClipDistance = false,
- .shaderCullDistance = false,
- .shaderFloat64 = false,
- .shaderInt64 = false,
- .shaderInt16 = false,
- .shaderResourceResidency = false,
- .shaderResourceMinLod = false,
- .sparseBinding = false,
- .sparseResidencyBuffer = false,
- .sparseResidencyImage2D = false,
- .sparseResidencyImage3D = false,
- .sparseResidency2Samples = false,
- .sparseResidency4Samples = false,
- .sparseResidency8Samples = false,
- .sparseResidency16Samples = false,
- .sparseResidencyAliased = false,
- .variableMultisampleRate = false,
- .inheritedQueries = false,
+ VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
+ .pNext = nullptr,
+ .timelineSemaphore = true,
};
+ SetNext(next, timeline_semaphore);
VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
@@ -271,6 +296,7 @@ bool VKDevice::Create() {
VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT,
+ .pNext = nullptr,
.hostQueryReset = true,
};
SetNext(next, host_query_reset);
@@ -360,7 +386,7 @@ bool VKDevice::Create() {
VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
if (nv_device_diagnostics_config) {
- nsight_aftermath_tracker.Initialize();
+ nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>();
diagnostics_nv = {
.sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV,
@@ -371,24 +397,33 @@ bool VKDevice::Create() {
};
first_next = &diagnostics_nv;
}
-
logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
- if (!logical) {
- LOG_ERROR(Render_Vulkan, "Failed to create logical device");
- return false;
- }
CollectTelemetryParameters();
+ CollectToolingInfo();
+
+ if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
+ LOG_WARNING(
+ Render_Vulkan,
+ "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu");
+ ext_extended_dynamic_state = false;
+ }
+ if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+ // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
+ LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math");
+ is_float16_supported = false;
+ }
graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family);
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
- return true;
}
-VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
- FormatType format_type) const {
+Device::~Device() = default;
+
+VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
+ FormatType format_type) const {
if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
return wanted_format;
}
@@ -419,18 +454,20 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla
return wanted_format;
}
-void VKDevice::ReportLoss() const {
+void Device::ReportLoss() const {
LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
// Wait for the log to flush and for Nsight Aftermath to dump the results
- std::this_thread::sleep_for(std::chrono::seconds{3});
+ std::this_thread::sleep_for(std::chrono::seconds{15});
}
-void VKDevice::SaveShader(const std::vector<u32>& spirv) const {
- nsight_aftermath_tracker.SaveShader(spirv);
+void Device::SaveShader(const std::vector<u32>& spirv) const {
+ if (nsight_aftermath_tracker) {
+ nsight_aftermath_tracker->SaveShader(spirv);
+ }
}
-bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const {
+bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const {
// Disable for now to avoid converting ASTC twice.
static constexpr std::array astc_formats = {
VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
@@ -456,16 +493,26 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features)
VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
for (const auto format : astc_formats) {
- const auto format_properties{physical.GetFormatProperties(format)};
- if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
+ const auto physical_format_properties{physical.GetFormatProperties(format)};
+ if ((physical_format_properties.optimalTilingFeatures & format_feature_usage) == 0) {
return false;
}
}
return true;
}
-bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
- FormatType format_type) const {
+bool Device::TestDepthStencilBlits() const {
+ static constexpr VkFormatFeatureFlags required_features =
+ VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+ const auto test_features = [](VkFormatProperties props) {
+ return (props.optimalTilingFeatures & required_features) == required_features;
+ };
+ return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) &&
+ test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT));
+}
+
+bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
+ FormatType format_type) const {
const auto it = format_properties.find(wanted_format);
if (it == format_properties.end()) {
UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format);
@@ -475,65 +522,65 @@ bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wa
return (supported_usage & wanted_usage) == wanted_usage;
}
-bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
- bool is_suitable = true;
+void Device::CheckSuitability(bool requires_swapchain) const {
std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
-
- for (const auto& prop : physical.EnumerateDeviceExtensionProperties()) {
- for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
+ bool has_swapchain = false;
+ for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) {
+ const std::string_view name{property.extensionName};
+ for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
if (available_extensions[i]) {
continue;
}
- const std::string_view name{prop.extensionName};
available_extensions[i] = name == REQUIRED_EXTENSIONS[i];
}
+ has_swapchain = has_swapchain || name == VK_KHR_SWAPCHAIN_EXTENSION_NAME;
}
- if (!available_extensions.all()) {
- for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
- if (available_extensions[i]) {
- continue;
- }
- LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
- is_suitable = false;
- }
- }
-
- bool has_graphics{}, has_present{};
- const std::vector queue_family_properties = physical.GetQueueFamilyProperties();
- for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
- const auto& family = queue_family_properties[i];
- if (family.queueCount == 0) {
+ for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
+ if (available_extensions[i]) {
continue;
}
- has_graphics |= family.queueFlags & VK_QUEUE_GRAPHICS_BIT;
- has_present |= physical.GetSurfaceSupportKHR(i, surface);
+ LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
+ throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
}
- if (!has_graphics || !has_present) {
- LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue");
- is_suitable = false;
+ if (requires_swapchain && !has_swapchain) {
+ LOG_ERROR(Render_Vulkan, "Missing required extension: VK_KHR_swapchain");
+ throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
}
- // TODO(Rodrigo): Check if the device matches all requeriments.
- const auto properties{physical.GetProperties()};
- const auto& limits{properties.limits};
-
- constexpr u32 required_ubo_size = 65536;
- if (limits.maxUniformBufferRange < required_ubo_size) {
- LOG_ERROR(Render_Vulkan, "Device UBO size {} is too small, {} is required",
- limits.maxUniformBufferRange, required_ubo_size);
- is_suitable = false;
+ struct LimitTuple {
+ u32 minimum;
+ u32 value;
+ const char* name;
+ };
+ const VkPhysicalDeviceLimits& limits{properties.limits};
+ const std::array limits_report{
+ LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"},
+ LimitTuple{16, limits.maxViewports, "maxViewports"},
+ LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"},
+ LimitTuple{8, limits.maxClipDistances, "maxClipDistances"},
+ };
+ for (const auto& tuple : limits_report) {
+ if (tuple.value < tuple.minimum) {
+ LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name,
+ tuple.minimum, tuple.value);
+ throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
+ }
}
+ VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
+ robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
- constexpr u32 required_num_viewports = 16;
- if (limits.maxViewports < required_num_viewports) {
- LOG_INFO(Render_Vulkan, "Device number of viewports {} is too small, {} is required",
- limits.maxViewports, required_num_viewports);
- is_suitable = false;
- }
+ VkPhysicalDeviceFeatures2 features2{};
+ features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+ features2.pNext = &robustness2;
+
+ physical.GetFeatures2KHR(features2);
- const auto features{physical.GetFeatures()};
- const std::array feature_report = {
+ const VkPhysicalDeviceFeatures& features{features2.features};
+ const std::array feature_report{
+ std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
+ std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
+ std::make_pair(features.imageCubeArray, "imageCubeArray"),
std::make_pair(features.independentBlend, "independentBlend"),
std::make_pair(features.depthClamp, "depthClamp"),
std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
@@ -547,76 +594,70 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
"shaderStorageImageWriteWithoutFormat"),
+ std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
+ std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
+ std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
};
- for (const auto& [supported, name] : feature_report) {
- if (supported) {
+ for (const auto& [is_supported, name] : feature_report) {
+ if (is_supported) {
continue;
}
LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name);
- is_suitable = false;
+ throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
}
-
- if (!is_suitable) {
- LOG_ERROR(Render_Vulkan, "{} is not suitable", properties.deviceName);
- }
-
- return is_suitable;
}
-std::vector<const char*> VKDevice::LoadExtensions() {
+std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
std::vector<const char*> extensions;
- const auto Test = [&](const VkExtensionProperties& extension,
- std::optional<std::reference_wrapper<bool>> status, const char* name,
- bool push) {
- if (extension.extensionName != std::string_view(name)) {
- return;
- }
- if (push) {
- extensions.push_back(name);
- }
- if (status) {
- status->get() = true;
- }
- };
-
- extensions.reserve(7 + REQUIRED_EXTENSIONS.size());
+ extensions.reserve(8 + REQUIRED_EXTENSIONS.size());
extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end());
+ if (requires_surface) {
+ extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+ }
bool has_khr_shader_float16_int8{};
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
bool has_ext_custom_border_color{};
bool has_ext_extended_dynamic_state{};
- for (const auto& extension : physical.EnumerateDeviceExtensionProperties()) {
- Test(extension, nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
- Test(extension, khr_uniform_buffer_standard_layout,
+ for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
+ const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
+ bool push) {
+ if (extension.extensionName != std::string_view(name)) {
+ return;
+ }
+ if (push) {
+ extensions.push_back(name);
+ }
+ if (status) {
+ status->get() = true;
+ }
+ };
+ test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
+ test(khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
- Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
- false);
- Test(extension, ext_depth_range_unrestricted,
- VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
- Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
- Test(extension, ext_shader_viewport_index_layer,
- VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
- Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
- false);
- Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
- false);
- Test(extension, has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME,
- false);
- Test(extension, has_ext_extended_dynamic_state,
- VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
+ test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
+ test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
+ test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
+ test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true);
+ test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME,
+ true);
+ test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
+ test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
+ test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
+ test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
+ test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
+ test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
if (Settings::values.renderer_debug) {
- Test(extension, nv_device_diagnostics_config,
- VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true);
+ test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
+ true);
}
}
-
VkPhysicalDeviceFeatures2KHR features;
features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
- VkPhysicalDeviceProperties2KHR properties;
- properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+ VkPhysicalDeviceProperties2KHR physical_properties;
+ physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
if (has_khr_shader_float16_int8) {
VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features;
@@ -628,7 +669,6 @@ std::vector<const char*> VKDevice::LoadExtensions() {
is_float16_supported = float16_int8_features.shaderFloat16;
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
-
if (has_ext_subgroup_size_control) {
VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features;
subgroup_features.sType =
@@ -641,8 +681,8 @@ std::vector<const char*> VKDevice::LoadExtensions() {
subgroup_properties.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
subgroup_properties.pNext = nullptr;
- properties.pNext = &subgroup_properties;
- physical.GetProperties2KHR(properties);
+ physical_properties.pNext = &subgroup_properties;
+ physical.GetProperties2KHR(physical_properties);
is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize;
@@ -655,7 +695,6 @@ std::vector<const char*> VKDevice::LoadExtensions() {
} else {
is_warp_potentially_bigger = true;
}
-
if (has_ext_transform_feedback) {
VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
@@ -666,8 +705,8 @@ std::vector<const char*> VKDevice::LoadExtensions() {
VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties;
tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT;
tfb_properties.pNext = nullptr;
- properties.pNext = &tfb_properties;
- physical.GetProperties2KHR(properties);
+ physical_properties.pNext = &tfb_properties;
+ physical.GetProperties2KHR(physical_properties);
if (tfb_features.transformFeedback && tfb_features.geometryStreams &&
tfb_properties.maxTransformFeedbackStreams >= 4 &&
@@ -677,7 +716,6 @@ std::vector<const char*> VKDevice::LoadExtensions() {
ext_transform_feedback = true;
}
}
-
if (has_ext_custom_border_color) {
VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features;
border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
@@ -690,13 +728,7 @@ std::vector<const char*> VKDevice::LoadExtensions() {
ext_custom_border_color = true;
}
}
-
- if (has_ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_AMD_PROPRIETARY) {
- // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but the <stride> field
- // seems to be bugged. Blacklisting it for now.
- LOG_WARNING(Render_Vulkan,
- "Blacklisting AMD proprietary from VK_EXT_extended_dynamic_state");
- } else if (has_ext_extended_dynamic_state) {
+ if (has_ext_extended_dynamic_state) {
VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
dynamic_state.pNext = nullptr;
@@ -708,52 +740,64 @@ std::vector<const char*> VKDevice::LoadExtensions() {
ext_extended_dynamic_state = true;
}
}
-
return extensions;
}
-void VKDevice::SetupFamilies(VkSurfaceKHR surface) {
- std::optional<u32> graphics_family_, present_family_;
-
+void Device::SetupFamilies(VkSurfaceKHR surface) {
const std::vector queue_family_properties = physical.GetQueueFamilyProperties();
- for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
- if (graphics_family_ && present_family_)
+ std::optional<u32> graphics;
+ std::optional<u32> present;
+ for (u32 index = 0; index < static_cast<u32>(queue_family_properties.size()); ++index) {
+ if (graphics && (present || !surface)) {
break;
-
- const auto& queue_family = queue_family_properties[i];
- if (queue_family.queueCount == 0)
+ }
+ const VkQueueFamilyProperties& queue_family = queue_family_properties[index];
+ if (queue_family.queueCount == 0) {
continue;
-
+ }
if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) {
- graphics_family_ = i;
+ graphics = index;
}
- if (physical.GetSurfaceSupportKHR(i, surface)) {
- present_family_ = i;
+ if (surface && physical.GetSurfaceSupportKHR(index, surface)) {
+ present = index;
}
}
- ASSERT(graphics_family_ && present_family_);
-
- graphics_family = *graphics_family_;
- present_family = *present_family_;
+ if (!graphics) {
+ LOG_ERROR(Render_Vulkan, "Device lacks a graphics queue");
+ throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
+ }
+ if (surface && !present) {
+ LOG_ERROR(Render_Vulkan, "Device lacks a present queue");
+ throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
+ }
+ graphics_family = *graphics;
+ present_family = *present;
}
-void VKDevice::SetupFeatures() {
+void Device::SetupFeatures() {
const auto supported_features{physical.GetFeatures()};
is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
+ is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample;
+ is_blit_depth_stencil_supported = TestDepthStencilBlits();
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
}
-void VKDevice::CollectTelemetryParameters() {
+void Device::CollectTelemetryParameters() {
VkPhysicalDeviceDriverPropertiesKHR driver{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR,
.pNext = nullptr,
+ .driverID = {},
+ .driverName = {},
+ .driverInfo = {},
+ .conformanceVersion = {},
};
- VkPhysicalDeviceProperties2KHR properties{
+ VkPhysicalDeviceProperties2KHR device_properties{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
.pNext = &driver,
+ .properties = {},
};
- physical.GetProperties2KHR(properties);
+ physical.GetProperties2KHR(device_properties);
driver_id = driver.driverID;
vendor_name = driver.driverName;
@@ -765,7 +809,33 @@ void VKDevice::CollectTelemetryParameters() {
}
}
-std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
+void Device::CollectToolingInfo() {
+ if (!ext_tooling_info) {
+ return;
+ }
+ const auto vkGetPhysicalDeviceToolPropertiesEXT =
+ reinterpret_cast<PFN_vkGetPhysicalDeviceToolPropertiesEXT>(
+ dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT"));
+ if (!vkGetPhysicalDeviceToolPropertiesEXT) {
+ return;
+ }
+ u32 tool_count = 0;
+ if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) {
+ return;
+ }
+ std::vector<VkPhysicalDeviceToolPropertiesEXT> tools(tool_count);
+ if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) {
+ return;
+ }
+ for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) {
+ const std::string_view name = tool.name;
+ LOG_INFO(Render_Vulkan, "{}", name);
+ has_renderdoc = has_renderdoc || name == "RenderDoc";
+ has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics";
+ }
+}
+
+std::vector<VkDeviceQueueCreateInfo> Device::GetDeviceQueueCreateInfos() const {
static constexpr float QUEUE_PRIORITY = 1.0f;
std::unordered_set<u32> unique_queue_families{graphics_family, present_family};
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 26a233db1..67d70cd22 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -10,11 +10,12 @@
#include <vector>
#include "common/common_types.h"
-#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
+class NsightAftermathTracker;
+
/// Format usage descriptor.
enum class FormatType { Linear, Optimal, Buffer };
@@ -22,14 +23,11 @@ enum class FormatType { Linear, Optimal, Buffer };
const u32 GuestWarpSize = 32;
/// Handles data specific to a physical device.
-class VKDevice final {
+class Device {
public:
- explicit VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface,
- const vk::InstanceDispatch& dld);
- ~VKDevice();
-
- /// Initializes the device. Returns true on success.
- bool Create();
+ explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface,
+ const vk::InstanceDispatch& dld);
+ ~Device();
/**
* Returns a format supported by the device for the passed requeriments.
@@ -83,7 +81,7 @@ public:
}
/// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
- u32 GetApiVersion() const {
+ u32 ApiVersion() const {
return properties.apiVersion;
}
@@ -152,6 +150,11 @@ public:
return is_formatless_image_load_supported;
}
+ /// Returns true when blitting from and to depth stencil images is supported.
+ bool IsBlitDepthStencilSupported() const {
+ return is_blit_depth_stencil_supported;
+ }
+
/// Returns true if the device supports VK_NV_viewport_swizzle.
bool IsNvViewportSwizzleSupported() const {
return nv_viewport_swizzle;
@@ -167,6 +170,11 @@ public:
return ext_index_type_uint8;
}
+ /// Returns true if the device supports VK_EXT_sampler_filter_minmax.
+ bool IsExtSamplerFilterMinmaxSupported() const {
+ return ext_sampler_filter_minmax;
+ }
+
/// Returns true if the device supports VK_EXT_depth_range_unrestricted.
bool IsExtDepthRangeUnrestrictedSupported() const {
return ext_depth_range_unrestricted;
@@ -192,6 +200,16 @@ public:
return ext_extended_dynamic_state;
}
+ /// Returns true if the device supports VK_EXT_shader_stencil_export.
+ bool IsExtShaderStencilExportSupported() const {
+ return ext_shader_stencil_export;
+ }
+
+ /// Returns true when a known debugging tool is attached.
+ bool HasDebuggingToolAttached() const {
+ return has_renderdoc || has_nsight_graphics;
+ }
+
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return vendor_name;
@@ -207,12 +225,12 @@ public:
return use_asynchronous_shaders;
}
+private:
/// Checks if the physical device is suitable.
- static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);
+ void CheckSuitability(bool requires_swapchain) const;
-private:
/// Loads extensions into a vector and stores available ones in this object.
- std::vector<const char*> LoadExtensions();
+ std::vector<const char*> LoadExtensions(bool requires_surface);
/// Sets up queue families.
void SetupFamilies(VkSurfaceKHR surface);
@@ -223,22 +241,30 @@ private:
/// Collects telemetry information from the device.
void CollectTelemetryParameters();
+ /// Collects information about attached tools.
+ void CollectToolingInfo();
+
/// Returns a list of queue initialization descriptors.
std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
/// Returns true if ASTC textures are natively supported.
bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const;
+ /// Returns true if the device natively supports blitting depth stencil images.
+ bool TestDepthStencilBlits() const;
+
/// Returns true if a format is supported.
bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const;
+ VkInstance instance; ///< Vulkan instance.
vk::DeviceDispatch dld; ///< Device function pointers.
vk::PhysicalDevice physical; ///< Physical device.
VkPhysicalDeviceProperties properties; ///< Device properties.
vk::Device logical; ///< Logical device.
vk::Queue graphics_queue; ///< Main graphics queue.
vk::Queue present_queue; ///< Main present queue.
+ u32 instance_version{}; ///< Vulkan onstance version.
u32 graphics_family{}; ///< Main graphics queue family index.
u32 present_family{}; ///< Main present queue family index.
VkDriverIdKHR driver_id{}; ///< Driver ID.
@@ -246,16 +272,23 @@ private:
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
bool is_float16_supported{}; ///< Support for float16 arithmetics.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
- bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
- bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
- bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
- bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
- bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
- bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
- bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
- bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
- bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
- bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
+ bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
+ bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
+ bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
+ bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
+ bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
+ bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
+ bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
+ bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
+ bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
+ bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
+ bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
+ bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
+ bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
+ bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
+ bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
+ bool has_renderdoc{}; ///< Has RenderDoc attached
+ bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
// Asynchronous Graphics Pipeline setting
bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
@@ -268,7 +301,7 @@ private:
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
/// Nsight Aftermath GPU crash tracker
- NsightAftermathTracker nsight_aftermath_tracker;
+ std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker;
};
} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
new file mode 100644
index 000000000..bfd6e6add
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -0,0 +1,155 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <future>
+#include <optional>
+#include <span>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/dynamic_library.h"
+#include "common/logging/log.h"
+#include "core/frontend/emu_window.h"
+#include "video_core/vulkan_common/vulkan_instance.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+// Include these late to avoid polluting previous headers
+#ifdef _WIN32
+#include <windows.h>
+// ensure include order
+#include <vulkan/vulkan_win32.h>
+#endif
+
+#if !defined(_WIN32) && !defined(__APPLE__)
+#include <X11/Xlib.h>
+#include <vulkan/vulkan_wayland.h>
+#include <vulkan/vulkan_xlib.h>
+#endif
+
+namespace Vulkan {
+namespace {
+[[nodiscard]] std::vector<const char*> RequiredExtensions(
+ Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) {
+ std::vector<const char*> extensions;
+ extensions.reserve(6);
+ switch (window_type) {
+ case Core::Frontend::WindowSystemType::Headless:
+ break;
+#ifdef _WIN32
+ case Core::Frontend::WindowSystemType::Windows:
+ extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
+ break;
+#endif
+#if !defined(_WIN32) && !defined(__APPLE__)
+ case Core::Frontend::WindowSystemType::X11:
+ extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
+ break;
+ case Core::Frontend::WindowSystemType::Wayland:
+ extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
+ break;
+#endif
+ default:
+ LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
+ break;
+ }
+ if (window_type != Core::Frontend::WindowSystemType::Headless) {
+ extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
+ }
+ if (enable_debug_utils) {
+ extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
+ }
+ extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
+ return extensions;
+}
+
+[[nodiscard]] bool AreExtensionsSupported(const vk::InstanceDispatch& dld,
+ std::span<const char* const> extensions) {
+ const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld);
+ if (!properties) {
+ LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
+ return false;
+ }
+ for (const char* extension : extensions) {
+ const auto it = std::ranges::find_if(*properties, [extension](const auto& prop) {
+ return std::strcmp(extension, prop.extensionName) == 0;
+ });
+ if (it == properties->end()) {
+ LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
+ return false;
+ }
+ }
+ return true;
+}
+
+[[nodiscard]] std::vector<const char*> Layers(bool enable_layers) {
+ std::vector<const char*> layers;
+ if (enable_layers) {
+ layers.push_back("VK_LAYER_KHRONOS_validation");
+ }
+ return layers;
+}
+
+void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector<const char*>& layers) {
+ const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld);
+ if (!layer_properties) {
+ LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers");
+ layers.clear();
+ }
+ std::erase_if(layers, [&layer_properties](const char* layer) {
+ const auto comp = [layer](const VkLayerProperties& layer_property) {
+ return std::strcmp(layer, layer_property.layerName) == 0;
+ };
+ const auto it = std::ranges::find_if(*layer_properties, comp);
+ if (it == layer_properties->end()) {
+ LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer);
+ return true;
+ }
+ return false;
+ });
+}
+} // Anonymous namespace
+
+vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld,
+ u32 required_version, Core::Frontend::WindowSystemType window_type,
+ bool enable_debug_utils, bool enable_layers) {
+ if (!library.IsOpen()) {
+ LOG_ERROR(Render_Vulkan, "Vulkan library not available");
+ throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+ }
+ if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) {
+ LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan");
+ throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+ }
+ if (!vk::Load(dld)) {
+ LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
+ throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+ }
+ const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_debug_utils);
+ if (!AreExtensionsSupported(dld, extensions)) {
+ throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
+ }
+ std::vector<const char*> layers = Layers(enable_layers);
+ RemoveUnavailableLayers(dld, layers);
+
+ const u32 available_version = vk::AvailableVersion(dld);
+ if (available_version < required_version) {
+ LOG_ERROR(Render_Vulkan, "Vulkan {}.{} is not supported, {}.{} is required",
+ VK_VERSION_MAJOR(available_version), VK_VERSION_MINOR(available_version),
+ VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version));
+ throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
+ }
+ vk::Instance instance =
+ std::async([&] {
+ return vk::Instance::Create(required_version, layers, extensions, dld);
+ }).get();
+ if (!vk::Load(*instance, dld)) {
+ LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
+ throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+ }
+ return instance;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h
new file mode 100644
index 000000000..e5e3a7144
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_instance.h
@@ -0,0 +1,32 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "common/dynamic_library.h"
+#include "core/frontend/emu_window.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+/**
+ * Create a Vulkan instance
+ *
+ * @param library Dynamic library to load the Vulkan instance from
+ * @param dld Dispatch table to load function pointers into
+ * @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1)
+ * @param window_type Window system type's enabled extension
+ * @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not
+ * @param enable_layers Whether to enable Vulkan validation layers or not
+ *
+ * @return A new Vulkan instance
+ * @throw vk::Exception on failure
+ */
+[[nodiscard]] vk::Instance CreateInstance(
+ const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version,
+ Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless,
+ bool enable_debug_utils = false, bool enable_layers = false);
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_library.cpp b/src/video_core/vulkan_common/vulkan_library.cpp
new file mode 100644
index 000000000..557871d81
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_library.cpp
@@ -0,0 +1,36 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstdlib>
+#include <string>
+
+#include "common/dynamic_library.h"
+#include "common/file_util.h"
+#include "video_core/vulkan_common/vulkan_library.h"
+
+namespace Vulkan {
+
+Common::DynamicLibrary OpenLibrary() {
+ Common::DynamicLibrary library;
+#ifdef __APPLE__
+ // Check if a path to a specific Vulkan library has been specified.
+ char* const libvulkan_env = std::getenv("LIBVULKAN_PATH");
+ if (!libvulkan_env || !library.Open(libvulkan_env)) {
+ // Use the libvulkan.dylib from the application bundle.
+ const std::string filename =
+ Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
+ void(library.Open(filename.c_str()));
+ }
+#else
+ std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
+ if (!library.Open(filename.c_str())) {
+ // Android devices may not have libvulkan.so.1, only libvulkan.so.
+ filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
+ void(library.Open(filename.c_str()));
+ }
+#endif
+ return library;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_library.h b/src/video_core/vulkan_common/vulkan_library.h
new file mode 100644
index 000000000..8b28b0e17
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_library.h
@@ -0,0 +1,13 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/dynamic_library.h"
+
+namespace Vulkan {
+
+Common::DynamicLibrary OpenLibrary();
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
new file mode 100644
index 000000000..2a8b7a907
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -0,0 +1,326 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <bit>
+#include <optional>
+#include <vector>
+
+#include <glad/glad.h>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+namespace {
+struct Range {
+ u64 begin;
+ u64 end;
+
+ [[nodiscard]] bool Contains(u64 iterator, u64 size) const noexcept {
+ return iterator < end && begin < iterator + size;
+ }
+};
+
+[[nodiscard]] u64 AllocationChunkSize(u64 required_size) {
+ static constexpr std::array sizes{
+ 0x1000ULL << 10, 0x1400ULL << 10, 0x1800ULL << 10, 0x1c00ULL << 10, 0x2000ULL << 10,
+ 0x3200ULL << 10, 0x4000ULL << 10, 0x6000ULL << 10, 0x8000ULL << 10, 0xA000ULL << 10,
+ 0x10000ULL << 10, 0x18000ULL << 10, 0x20000ULL << 10,
+ };
+ static_assert(std::is_sorted(sizes.begin(), sizes.end()));
+
+ const auto it = std::ranges::lower_bound(sizes, required_size);
+ return it != sizes.end() ? *it : Common::AlignUp(required_size, 4ULL << 20);
+}
+
+[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePropertyFlags(MemoryUsage usage) {
+ switch (usage) {
+ case MemoryUsage::DeviceLocal:
+ return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ case MemoryUsage::Upload:
+ return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+ case MemoryUsage::Download:
+ return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ }
+ UNREACHABLE_MSG("Invalid memory usage={}", usage);
+ return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+}
+} // Anonymous namespace
+
+class MemoryAllocation {
+public:
+ explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties,
+ u64 allocation_size_, u32 type)
+ : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties},
+ shifted_memory_type{1U << type} {}
+
+#if defined(_WIN32) || defined(__linux__)
+ ~MemoryAllocation() {
+ if (owning_opengl_handle != 0) {
+ glDeleteMemoryObjectsEXT(1, &owning_opengl_handle);
+ }
+ }
+#endif
+
+ MemoryAllocation& operator=(const MemoryAllocation&) = delete;
+ MemoryAllocation(const MemoryAllocation&) = delete;
+
+ MemoryAllocation& operator=(MemoryAllocation&&) = delete;
+ MemoryAllocation(MemoryAllocation&&) = delete;
+
+ [[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) {
+ const std::optional<u64> alloc = FindFreeRegion(size, alignment);
+ if (!alloc) {
+ // Signal out of memory, it'll try to do more allocations.
+ return std::nullopt;
+ }
+ const Range range{
+ .begin = *alloc,
+ .end = *alloc + size,
+ };
+ commits.insert(std::ranges::upper_bound(commits, *alloc, {}, &Range::begin), range);
+ return std::make_optional<MemoryCommit>(this, *memory, *alloc, *alloc + size);
+ }
+
+ void Free(u64 begin) {
+ const auto it = std::ranges::find(commits, begin, &Range::begin);
+ ASSERT_MSG(it != commits.end(), "Invalid commit");
+ commits.erase(it);
+ }
+
+ [[nodiscard]] std::span<u8> Map() {
+ if (memory_mapped_span.empty()) {
+ u8* const raw_pointer = memory.Map(0, allocation_size);
+ memory_mapped_span = std::span<u8>(raw_pointer, allocation_size);
+ }
+ return memory_mapped_span;
+ }
+
+#ifdef _WIN32
+ [[nodiscard]] u32 ExportOpenGLHandle() {
+ if (!owning_opengl_handle) {
+ glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
+ glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size,
+ GL_HANDLE_TYPE_OPAQUE_WIN32_EXT,
+ memory.GetMemoryWin32HandleKHR());
+ }
+ return owning_opengl_handle;
+ }
+#elif __linux__
+ [[nodiscard]] u32 ExportOpenGLHandle() {
+ if (!owning_opengl_handle) {
+ glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
+ glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT,
+ memory.GetMemoryFdKHR());
+ }
+ return owning_opengl_handle;
+ }
+#else
+ [[nodiscard]] u32 ExportOpenGLHandle() {
+ return 0;
+ }
+#endif
+
+ /// Returns whether this allocation is compatible with the arguments.
+ [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
+ return (flags & property_flags) && (type_mask & shifted_memory_type) != 0;
+ }
+
+private:
+ [[nodiscard]] static constexpr u32 ShiftType(u32 type) {
+ return 1U << type;
+ }
+
+ [[nodiscard]] std::optional<u64> FindFreeRegion(u64 size, u64 alignment) noexcept {
+ ASSERT(std::has_single_bit(alignment));
+ const u64 alignment_log2 = std::countr_zero(alignment);
+ std::optional<u64> candidate;
+ u64 iterator = 0;
+ auto commit = commits.begin();
+ while (iterator + size <= allocation_size) {
+ candidate = candidate.value_or(iterator);
+ if (commit == commits.end()) {
+ break;
+ }
+ if (commit->Contains(*candidate, size)) {
+ candidate = std::nullopt;
+ }
+ iterator = Common::AlignUpLog2(commit->end, alignment_log2);
+ ++commit;
+ }
+ return candidate;
+ }
+
+ const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
+ const u64 allocation_size; ///< Size of this allocation.
+ const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
+ const u32 shifted_memory_type; ///< Shifted Vulkan memory type.
+ std::vector<Range> commits; ///< All commit ranges done from this allocation.
+ std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
+#if defined(_WIN32) || defined(__linux__)
+ u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle.
+#endif
+};
+
+MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
+ u64 end_) noexcept
+ : allocation{allocation_}, memory{memory_}, begin{begin_}, end{end_} {}
+
+MemoryCommit::~MemoryCommit() {
+ Release();
+}
+
+MemoryCommit& MemoryCommit::operator=(MemoryCommit&& rhs) noexcept {
+ Release();
+ allocation = std::exchange(rhs.allocation, nullptr);
+ memory = rhs.memory;
+ begin = rhs.begin;
+ end = rhs.end;
+ span = std::exchange(rhs.span, std::span<u8>{});
+ return *this;
+}
+
+MemoryCommit::MemoryCommit(MemoryCommit&& rhs) noexcept
+ : allocation{std::exchange(rhs.allocation, nullptr)}, memory{rhs.memory}, begin{rhs.begin},
+ end{rhs.end}, span{std::exchange(rhs.span, std::span<u8>{})} {}
+
+std::span<u8> MemoryCommit::Map() {
+ if (span.empty()) {
+ span = allocation->Map().subspan(begin, end - begin);
+ }
+ return span;
+}
+
+u32 MemoryCommit::ExportOpenGLHandle() const {
+ return allocation->ExportOpenGLHandle();
+}
+
+void MemoryCommit::Release() {
+ if (allocation) {
+ allocation->Free(begin);
+ }
+}
+
+MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_)
+ : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()},
+ export_allocations{export_allocations_} {}
+
+MemoryAllocator::~MemoryAllocator() = default;
+
+MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) {
+ // Find the fastest memory flags we can afford with the current requirements
+ const VkMemoryPropertyFlags flags = MemoryPropertyFlags(requirements.memoryTypeBits, usage);
+ if (std::optional<MemoryCommit> commit = TryCommit(requirements, flags)) {
+ return std::move(*commit);
+ }
+ // Commit has failed, allocate more memory.
+ // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory.
+ AllocMemory(flags, requirements.memoryTypeBits, AllocationChunkSize(requirements.size));
+
+ // Commit again, this time it won't fail since there's a fresh allocation above.
+ // If it does, there's a bug.
+ return TryCommit(requirements, flags).value();
+}
+
+MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage) {
+ auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), usage);
+ buffer.BindMemory(commit.Memory(), commit.Offset());
+ return commit;
+}
+
+MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) {
+ auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), usage);
+ image.BindMemory(commit.Memory(), commit.Offset());
+ return commit;
+}
+
+void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
+ const u32 type = FindType(flags, type_mask).value();
+ const VkExportMemoryAllocateInfo export_allocate_info{
+ .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
+ .pNext = nullptr,
+#ifdef _WIN32
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
+#elif __linux__
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
+#else
+ .handleTypes = 0,
+#endif
+ };
+ vk::DeviceMemory memory = device.GetLogical().AllocateMemory({
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = export_allocations ? &export_allocate_info : nullptr,
+ .allocationSize = size,
+ .memoryTypeIndex = type,
+ });
+ allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type));
+}
+
+std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
+ VkMemoryPropertyFlags flags) {
+ for (auto& allocation : allocations) {
+ if (!allocation->IsCompatible(flags, requirements.memoryTypeBits)) {
+ continue;
+ }
+ if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
+ return commit;
+ }
+ }
+ return std::nullopt;
+}
+
+VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask, MemoryUsage usage) const {
+ return MemoryPropertyFlags(type_mask, MemoryUsagePropertyFlags(usage));
+}
+
+VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask,
+ VkMemoryPropertyFlags flags) const {
+ if (FindType(flags, type_mask)) {
+ // Found a memory type with those requirements
+ return flags;
+ }
+ if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
+ // Remove host cached bit in case it's not supported
+ return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
+ }
+ if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
+ // Remove device local, if it's not supported by the requested resource
+ return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ }
+ UNREACHABLE_MSG("No compatible memory types found");
+ return 0;
+}
+
+std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 type_mask) const {
+ for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
+ const VkMemoryPropertyFlags type_flags = properties.memoryTypes[type_index].propertyFlags;
+ if ((type_mask & (1U << type_index)) && (type_flags & flags)) {
+ // The type matches in type and in the wanted properties.
+ return type_index;
+ }
+ }
+ // Failed to find index
+ return std::nullopt;
+}
+
+bool IsHostVisible(MemoryUsage usage) noexcept {
+ switch (usage) {
+ case MemoryUsage::DeviceLocal:
+ return false;
+ case MemoryUsage::Upload:
+ case MemoryUsage::Download:
+ return true;
+ }
+ UNREACHABLE_MSG("Invalid memory usage={}", usage);
+ return false;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
new file mode 100644
index 000000000..d1ce29450
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -0,0 +1,129 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <span>
+#include <utility>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+class Device;
+class MemoryMap;
+class MemoryAllocation;
+
+/// Hints and requirements for the backing memory type of a commit
+enum class MemoryUsage {
+ DeviceLocal, ///< Hints device local usages, fastest memory type to read and write from the GPU
+ Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads
+ Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks
+};
+
+/// Ownership handle of a memory commitment.
+/// Points to a subregion of a memory allocation.
+class MemoryCommit {
+public:
+ explicit MemoryCommit() noexcept = default;
+ explicit MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
+ u64 end_) noexcept;
+ ~MemoryCommit();
+
+ MemoryCommit& operator=(MemoryCommit&&) noexcept;
+ MemoryCommit(MemoryCommit&&) noexcept;
+
+ MemoryCommit& operator=(const MemoryCommit&) = delete;
+ MemoryCommit(const MemoryCommit&) = delete;
+
+ /// Returns a host visible memory map.
+ /// It will map the backing allocation if it hasn't been mapped before.
+ std::span<u8> Map();
+
+ /// Returns an non-owning OpenGL handle, creating one if it doesn't exist.
+ u32 ExportOpenGLHandle() const;
+
+ /// Returns the Vulkan memory handler.
+ VkDeviceMemory Memory() const {
+ return memory;
+ }
+
+ /// Returns the start position of the commit relative to the allocation.
+ VkDeviceSize Offset() const {
+ return static_cast<VkDeviceSize>(begin);
+ }
+
+private:
+ void Release();
+
+ MemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
+ VkDeviceMemory memory{}; ///< Vulkan device memory handler.
+ u64 begin{}; ///< Beginning offset in bytes to where the commit exists.
+ u64 end{}; ///< Offset in bytes where the commit ends.
+ std::span<u8> span; ///< Host visible memory span. Empty if not queried before.
+};
+
+/// Memory allocator container.
+/// Allocates and releases memory allocations on demand.
+class MemoryAllocator {
+public:
+ /**
+ * Construct memory allocator
+ *
+ * @param device_ Device to allocate from
+ * @param export_allocations_ True when allocations have to be exported
+ *
+ * @throw vk::Exception on failure
+ */
+ explicit MemoryAllocator(const Device& device_, bool export_allocations_);
+ ~MemoryAllocator();
+
+ MemoryAllocator& operator=(const MemoryAllocator&) = delete;
+ MemoryAllocator(const MemoryAllocator&) = delete;
+
+ /**
+ * Commits a memory with the specified requirements.
+ *
+ * @param requirements Requirements returned from a Vulkan call.
+ * @param usage Indicates how the memory will be used.
+ *
+ * @returns A memory commit.
+ */
+ MemoryCommit Commit(const VkMemoryRequirements& requirements, MemoryUsage usage);
+
+ /// Commits memory required by the buffer and binds it.
+ MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage);
+
+ /// Commits memory required by the image and binds it.
+ MemoryCommit Commit(const vk::Image& image, MemoryUsage usage);
+
+private:
+ /// Allocates a chunk of memory.
+ void AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
+
+ /// Tries to allocate a memory commit.
+ std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements,
+ VkMemoryPropertyFlags flags);
+
+ /// Returns the fastest compatible memory property flags from a wanted usage.
+ VkMemoryPropertyFlags MemoryPropertyFlags(u32 type_mask, MemoryUsage usage) const;
+
+ /// Returns the fastest compatible memory property flags from the wanted flags.
+ VkMemoryPropertyFlags MemoryPropertyFlags(u32 type_mask, VkMemoryPropertyFlags flags) const;
+
+ /// Returns index to the fastest memory type compatible with the passed requirements.
+ std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const;
+
+ const Device& device; ///< Device handle.
+ const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
+ const bool export_allocations; ///< True when memory allocations have to be exported.
+ std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
+};
+
+/// Returns true when a memory usage is guaranteed to be host visible.
+bool IsHostVisible(MemoryUsage usage) noexcept;
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp
new file mode 100644
index 000000000..3c3238f96
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_surface.cpp
@@ -0,0 +1,81 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "core/frontend/emu_window.h"
+#include "video_core/vulkan_common/vulkan_surface.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+// Include these late to avoid polluting previous headers
+#ifdef _WIN32
+#include <windows.h>
+// ensure include order
+#include <vulkan/vulkan_win32.h>
+#endif
+
+#if !defined(_WIN32) && !defined(__APPLE__)
+#include <X11/Xlib.h>
+#include <vulkan/vulkan_wayland.h>
+#include <vulkan/vulkan_xlib.h>
+#endif
+
+namespace Vulkan {
+
+vk::SurfaceKHR CreateSurface(const vk::Instance& instance,
+ const Core::Frontend::EmuWindow& emu_window) {
+ [[maybe_unused]] const vk::InstanceDispatch& dld = instance.Dispatch();
+ [[maybe_unused]] const auto& window_info = emu_window.GetWindowInfo();
+ VkSurfaceKHR unsafe_surface = nullptr;
+
+#ifdef _WIN32
+ if (window_info.type == Core::Frontend::WindowSystemType::Windows) {
+ const HWND hWnd = static_cast<HWND>(window_info.render_surface);
+ const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
+ nullptr, 0, nullptr, hWnd};
+ const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>(
+ dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR"));
+ if (!vkCreateWin32SurfaceKHR ||
+ vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
+ LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
+ throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+ }
+ }
+#endif
+#if !defined(_WIN32) && !defined(__APPLE__)
+ if (window_info.type == Core::Frontend::WindowSystemType::X11) {
+ const VkXlibSurfaceCreateInfoKHR xlib_ci{
+ VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,
+ static_cast<Display*>(window_info.display_connection),
+ reinterpret_cast<Window>(window_info.render_surface)};
+ const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>(
+ dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR"));
+ if (!vkCreateXlibSurfaceKHR ||
+ vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
+ LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
+ throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+ }
+ }
+ if (window_info.type == Core::Frontend::WindowSystemType::Wayland) {
+ const VkWaylandSurfaceCreateInfoKHR wayland_ci{
+ VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0,
+ static_cast<wl_display*>(window_info.display_connection),
+ static_cast<wl_surface*>(window_info.render_surface)};
+ const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>(
+ dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR"));
+ if (!vkCreateWaylandSurfaceKHR ||
+ vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) !=
+ VK_SUCCESS) {
+ LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
+ throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+ }
+ }
+#endif
+ if (!unsafe_surface) {
+ LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
+ throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+ }
+ return vk::SurfaceKHR(unsafe_surface, *instance, dld);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_surface.h b/src/video_core/vulkan_common/vulkan_surface.h
new file mode 100644
index 000000000..05a169e32
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_surface.h
@@ -0,0 +1,18 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Core::Frontend {
+class EmuWindow;
+}
+
+namespace Vulkan {
+
+[[nodiscard]] vk::SurfaceKHR CreateSurface(const vk::Instance& instance,
+ const Core::Frontend::EmuWindow& emu_window);
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 013865aa4..2aa0ffbe6 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -6,32 +6,55 @@
#include <exception>
#include <memory>
#include <optional>
+#include <string_view>
#include <utility>
#include <vector>
#include "common/common_types.h"
+#include "common/logging/log.h"
-#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan::vk {
namespace {
+template <typename Func>
+void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld,
+ Func&& func) {
+ // Calling GetProperties calls Vulkan more than needed. But they are supposed to be cheap
+ // functions.
+ std::stable_sort(devices.begin(), devices.end(),
+ [&dld, &func](VkPhysicalDevice lhs, VkPhysicalDevice rhs) {
+ return func(vk::PhysicalDevice(lhs, dld).GetProperties(),
+ vk::PhysicalDevice(rhs, dld).GetProperties());
+ });
+}
+
+void SortPhysicalDevicesPerVendor(std::vector<VkPhysicalDevice>& devices,
+ const InstanceDispatch& dld,
+ std::initializer_list<u32> vendor_ids) {
+ for (auto it = vendor_ids.end(); it != vendor_ids.begin();) {
+ --it;
+ SortPhysicalDevices(devices, dld, [id = *it](const auto& lhs, const auto& rhs) {
+ return lhs.vendorID == id && rhs.vendorID != id;
+ });
+ }
+}
+
void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld) {
- std::stable_sort(devices.begin(), devices.end(), [&](auto lhs, auto rhs) {
- // This will call Vulkan more than needed, but these calls are cheap.
- const auto lhs_properties = vk::PhysicalDevice(lhs, dld).GetProperties();
- const auto rhs_properties = vk::PhysicalDevice(rhs, dld).GetProperties();
-
- // Prefer discrete GPUs, Nvidia over AMD, AMD over Intel, Intel over the rest.
- const bool preferred =
- (lhs_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU &&
- rhs_properties.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) ||
- (lhs_properties.vendorID == 0x10DE && rhs_properties.vendorID != 0x10DE) ||
- (lhs_properties.vendorID == 0x1002 && rhs_properties.vendorID != 0x1002) ||
- (lhs_properties.vendorID == 0x8086 && rhs_properties.vendorID != 0x8086);
- return !preferred;
+ // Sort by name, this will set a base and make GPUs with higher numbers appear first
+ // (e.g. GTX 1650 will intentionally be listed before a GTX 1080).
+ SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) {
+ return std::string_view{lhs.deviceName} > std::string_view{rhs.deviceName};
});
+ // Prefer discrete over non-discrete
+ SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) {
+ return lhs.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU &&
+ rhs.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
+ });
+ // Prefer Nvidia over AMD, AMD over Intel, Intel over the rest.
+ SortPhysicalDevicesPerVendor(devices, dld, {0x10DE, 0x1002, 0x8086});
}
template <typename T>
@@ -58,6 +81,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdBeginQuery);
X(vkCmdBeginRenderPass);
X(vkCmdBeginTransformFeedbackEXT);
+ X(vkCmdBeginDebugUtilsLabelEXT);
X(vkCmdBindDescriptorSets);
X(vkCmdBindIndexBuffer);
X(vkCmdBindPipeline);
@@ -75,6 +99,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdEndQuery);
X(vkCmdEndRenderPass);
X(vkCmdEndTransformFeedbackEXT);
+ X(vkCmdEndDebugUtilsLabelEXT);
X(vkCmdFillBuffer);
X(vkCmdPipelineBarrier);
X(vkCmdPushConstants);
@@ -98,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdSetPrimitiveTopologyEXT);
X(vkCmdSetStencilOpEXT);
X(vkCmdSetStencilTestEnableEXT);
+ X(vkCmdResolveImage);
X(vkCreateBuffer);
X(vkCreateBufferView);
X(vkCreateCommandPool);
@@ -142,23 +168,44 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkFreeCommandBuffers);
X(vkFreeDescriptorSets);
X(vkFreeMemory);
- X(vkGetBufferMemoryRequirements);
+ X(vkGetBufferMemoryRequirements2);
X(vkGetDeviceQueue);
X(vkGetEventStatus);
X(vkGetFenceStatus);
X(vkGetImageMemoryRequirements);
+ X(vkGetMemoryFdKHR);
+#ifdef _WIN32
+ X(vkGetMemoryWin32HandleKHR);
+#endif
X(vkGetQueryPoolResults);
+ X(vkGetSemaphoreCounterValueKHR);
X(vkMapMemory);
X(vkQueueSubmit);
X(vkResetFences);
X(vkResetQueryPoolEXT);
+ X(vkSetDebugUtilsObjectNameEXT);
+ X(vkSetDebugUtilsObjectTagEXT);
X(vkUnmapMemory);
X(vkUpdateDescriptorSetWithTemplateKHR);
X(vkUpdateDescriptorSets);
X(vkWaitForFences);
+ X(vkWaitSemaphoresKHR);
#undef X
}
+template <typename T>
+void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type,
+ const char* name) {
+ const VkDebugUtilsObjectNameInfoEXT name_info{
+ .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
+ .pNext = nullptr,
+ .objectType = VK_OBJECT_TYPE_IMAGE,
+ .objectHandle = reinterpret_cast<u64>(handle),
+ .pObjectName = name,
+ };
+ Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info));
+}
+
} // Anonymous namespace
bool Load(InstanceDispatch& dld) noexcept {
@@ -262,6 +309,22 @@ const char* ToString(VkResult result) noexcept {
return "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT";
case VkResult::VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT:
return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT";
+ case VkResult::VK_ERROR_UNKNOWN:
+ return "VK_ERROR_UNKNOWN";
+ case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR:
+ return "VK_ERROR_INCOMPATIBLE_VERSION_KHR";
+ case VkResult::VK_THREAD_IDLE_KHR:
+ return "VK_THREAD_IDLE_KHR";
+ case VkResult::VK_THREAD_DONE_KHR:
+ return "VK_THREAD_DONE_KHR";
+ case VkResult::VK_OPERATION_DEFERRED_KHR:
+ return "VK_OPERATION_DEFERRED_KHR";
+ case VkResult::VK_OPERATION_NOT_DEFERRED_KHR:
+ return "VK_OPERATION_NOT_DEFERRED_KHR";
+ case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT:
+ return "VK_PIPELINE_COMPILE_REQUIRED_EXT";
+ case VkResult::VK_RESULT_MAX_ENUM:
+ return "VK_RESULT_MAX_ENUM";
}
return "Unknown";
}
@@ -375,18 +438,17 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe
return VK_SUCCESS;
}
-Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions,
- InstanceDispatch& dld) noexcept {
- static constexpr VkApplicationInfo application_info{
+Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char*> extensions,
+ InstanceDispatch& dispatch) {
+ const VkApplicationInfo application_info{
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
.pNext = nullptr,
.pApplicationName = "yuzu Emulator",
.applicationVersion = VK_MAKE_VERSION(0, 1, 0),
.pEngineName = "yuzu Emulator",
.engineVersion = VK_MAKE_VERSION(0, 1, 0),
- .apiVersion = VK_API_VERSION_1_1,
+ .apiVersion = version,
};
-
const VkInstanceCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
.pNext = nullptr,
@@ -397,66 +459,94 @@ Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions
.enabledExtensionCount = extensions.size(),
.ppEnabledExtensionNames = extensions.data(),
};
-
VkInstance instance;
- if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) {
- // Failed to create the instance.
- return {};
- }
- if (!Proc(dld.vkDestroyInstance, dld, "vkDestroyInstance", instance)) {
+ Check(dispatch.vkCreateInstance(&ci, nullptr, &instance));
+ if (!Proc(dispatch.vkDestroyInstance, dispatch, "vkDestroyInstance", instance)) {
// We successfully created an instance but the destroy function couldn't be loaded.
// This is a good moment to panic.
- return {};
+ throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
}
-
- return Instance(instance, dld);
+ return Instance(instance, dispatch);
}
-std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() {
+std::vector<VkPhysicalDevice> Instance::EnumeratePhysicalDevices() const {
u32 num;
- if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) {
- return std::nullopt;
- }
+ Check(dld->vkEnumeratePhysicalDevices(handle, &num, nullptr));
std::vector<VkPhysicalDevice> physical_devices(num);
- if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) {
- return std::nullopt;
- }
+ Check(dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()));
SortPhysicalDevices(physical_devices, *dld);
- return std::make_optional(std::move(physical_devices));
+ return physical_devices;
}
-DebugCallback Instance::TryCreateDebugCallback(
- PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept {
- const VkDebugUtilsMessengerCreateInfoEXT ci{
- .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
- .pNext = nullptr,
- .flags = 0,
- .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,
- .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
- VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
- .pfnUserCallback = callback,
- .pUserData = nullptr,
- };
-
- VkDebugUtilsMessengerEXT messenger;
- if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) {
- return {};
- }
- return DebugCallback(messenger, handle, *dld);
+DebugUtilsMessenger Instance::CreateDebugUtilsMessenger(
+ const VkDebugUtilsMessengerCreateInfoEXT& create_info) const {
+ VkDebugUtilsMessengerEXT object;
+ Check(dld->vkCreateDebugUtilsMessengerEXT(handle, &create_info, nullptr, &object));
+ return DebugUtilsMessenger(object, handle, *dld);
}
void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
}
+void Buffer::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name);
+}
+
+void BufferView::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name);
+}
+
void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
Check(dld->vkBindImageMemory(owner, handle, memory, offset));
}
+void Image::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name);
+}
+
+void ImageView::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name);
+}
+
+int DeviceMemory::GetMemoryFdKHR() const {
+ const VkMemoryGetFdInfoKHR get_fd_info{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
+ .pNext = nullptr,
+ .memory = handle,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
+ };
+ int fd;
+ Check(dld->vkGetMemoryFdKHR(owner, &get_fd_info, &fd));
+ return fd;
+}
+
+#ifdef _WIN32
+HANDLE DeviceMemory::GetMemoryWin32HandleKHR() const {
+ const VkMemoryGetWin32HandleInfoKHR get_win32_handle_info{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
+ .pNext = nullptr,
+ .memory = handle,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
+ };
+ HANDLE win32_handle;
+ Check(dld->vkGetMemoryWin32HandleKHR(owner, &get_win32_handle_info, &win32_handle));
+ return win32_handle;
+}
+#endif
+
+void DeviceMemory::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name);
+}
+
+void Fence::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name);
+}
+
+void Framebuffer::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name);
+}
+
DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const {
const std::size_t num = ai.descriptorSetCount;
std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num);
@@ -470,6 +560,10 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c
}
}
+void DescriptorPool::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name);
+}
+
CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const {
const VkCommandBufferAllocateInfo ai{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
@@ -490,6 +584,10 @@ CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLev
}
}
+void CommandPool::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name);
+}
+
std::vector<VkImage> SwapchainKHR::GetImages() const {
u32 num;
Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr));
@@ -498,9 +596,21 @@ std::vector<VkImage> SwapchainKHR::GetImages() const {
return images;
}
+void Event::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name);
+}
+
+void ShaderModule::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name);
+}
+
+void Semaphore::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name);
+}
+
Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
Span<const char*> enabled_extensions, const void* next,
- DeviceDispatch& dld) noexcept {
+ DeviceDispatch& dispatch) {
const VkDeviceCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
.pNext = next,
@@ -513,13 +623,10 @@ Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreate
.ppEnabledExtensionNames = enabled_extensions.data(),
.pEnabledFeatures = nullptr,
};
-
VkDevice device;
- if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) {
- return {};
- }
- Load(device, dld);
- return Device(device, dld);
+ Check(dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device));
+ Load(device, dispatch);
+ return Device(device, dispatch);
}
Queue Device::GetQueue(u32 family_index) const noexcept {
@@ -558,7 +665,10 @@ Semaphore Device::CreateSemaphore() const {
.pNext = nullptr,
.flags = 0,
};
+ return CreateSemaphore(ci);
+}
+Semaphore Device::CreateSemaphore(const VkSemaphoreCreateInfo& ci) const {
VkSemaphore object;
Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object));
return Semaphore(object, handle, *dld);
@@ -644,7 +754,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
return ShaderModule(object, handle, *dld);
}
-Event Device::CreateNewEvent() const {
+Event Device::CreateEvent() const {
static constexpr VkEventCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
.pNext = nullptr,
@@ -676,10 +786,20 @@ DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const {
return DeviceMemory(memory, handle, *dld);
}
-VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept {
- VkMemoryRequirements requirements;
- dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements);
- return requirements;
+VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer,
+ void* pnext) const noexcept {
+ const VkBufferMemoryRequirementsInfo2 info{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
+ .pNext = nullptr,
+ .buffer = buffer,
+ };
+ VkMemoryRequirements2 requirements{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+ .pNext = pnext,
+ .memoryRequirements{},
+ };
+ dld->vkGetBufferMemoryRequirements2(handle, &info, &requirements);
+ return requirements.memoryRequirements;
}
VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept {
@@ -775,6 +895,21 @@ VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noe
return properties;
}
+u32 AvailableVersion(const InstanceDispatch& dld) noexcept {
+ PFN_vkEnumerateInstanceVersion vkEnumerateInstanceVersion;
+ if (!Proc(vkEnumerateInstanceVersion, dld, "vkEnumerateInstanceVersion")) {
+ // If the procedure is not found, Vulkan 1.0 is assumed
+ return VK_API_VERSION_1_0;
+ }
+ u32 version;
+ if (const VkResult result = vkEnumerateInstanceVersion(&version); result != VK_SUCCESS) {
+ LOG_ERROR(Render_Vulkan, "vkEnumerateInstanceVersion returned {}, assuming Vulkan 1.1",
+ ToString(result));
+ return VK_API_VERSION_1_1;
+ }
+ return version;
+}
+
std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
const InstanceDispatch& dld) {
u32 num;
@@ -786,7 +921,7 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp
VK_SUCCESS) {
return std::nullopt;
}
- return std::move(properties);
+ return properties;
}
std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index b9d3fedc1..3e36d356a 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -9,15 +9,31 @@
#include <limits>
#include <memory>
#include <optional>
+#include <span>
#include <type_traits>
#include <utility>
#include <vector>
#define VK_NO_PROTOTYPES
+#ifdef _WIN32
+#define VK_USE_PLATFORM_WIN32_KHR
+#endif
#include <vulkan/vulkan.h>
+// Sanitize macros
+#ifdef CreateEvent
+#undef CreateEvent
+#endif
+#ifdef CreateSemaphore
+#undef CreateSemaphore
+#endif
+
#include "common/common_types.h"
+#ifdef _MSC_VER
+#pragma warning(disable : 26812) // Disable prefer enum class over enum
+#endif
+
namespace Vulkan::vk {
/**
@@ -41,6 +57,9 @@ public:
/// Construct an empty span.
constexpr Span() noexcept = default;
+ /// Construct an empty span
+ constexpr Span(std::nullptr_t) noexcept {}
+
/// Construct a span from a single element.
constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {}
@@ -52,7 +71,7 @@ public:
/// Construct a span from a pointer and a size.
/// This is inteded for subranges.
- constexpr Span(const T* ptr, std::size_t num) noexcept : ptr{ptr}, num{num} {}
+ constexpr Span(const T* ptr_, std::size_t num_) noexcept : ptr{ptr_}, num{num_} {}
/// Returns the data pointer by the span.
constexpr const T* data() const noexcept {
@@ -136,145 +155,156 @@ inline VkResult Filter(VkResult result) {
/// Table holding Vulkan instance function pointers.
struct InstanceDispatch {
- PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr;
-
- PFN_vkCreateInstance vkCreateInstance;
- PFN_vkDestroyInstance vkDestroyInstance;
- PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties;
- PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties;
-
- PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT;
- PFN_vkCreateDevice vkCreateDevice;
- PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT;
- PFN_vkDestroyDevice vkDestroyDevice;
- PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR;
- PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties;
- PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices;
- PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr;
- PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR;
- PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties;
- PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties;
- PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties;
- PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR;
- PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties;
- PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR;
- PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR;
- PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR;
- PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR;
- PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR;
- PFN_vkQueuePresentKHR vkQueuePresentKHR;
+ PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
+
+ PFN_vkCreateInstance vkCreateInstance{};
+ PFN_vkDestroyInstance vkDestroyInstance{};
+ PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties{};
+ PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties{};
+
+ PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT{};
+ PFN_vkCreateDevice vkCreateDevice{};
+ PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT{};
+ PFN_vkDestroyDevice vkDestroyDevice{};
+ PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR{};
+ PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties{};
+ PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices{};
+ PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr{};
+ PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR{};
+ PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties{};
+ PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties{};
+ PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties{};
+ PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR{};
+ PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties{};
+ PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR{};
+ PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR{};
+ PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR{};
+ PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR{};
+ PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR{};
+ PFN_vkQueuePresentKHR vkQueuePresentKHR{};
};
/// Table holding Vulkan device function pointers.
-struct DeviceDispatch : public InstanceDispatch {
- PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR;
- PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers;
- PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets;
- PFN_vkAllocateMemory vkAllocateMemory;
- PFN_vkBeginCommandBuffer vkBeginCommandBuffer;
- PFN_vkBindBufferMemory vkBindBufferMemory;
- PFN_vkBindImageMemory vkBindImageMemory;
- PFN_vkCmdBeginQuery vkCmdBeginQuery;
- PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass;
- PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT;
- PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets;
- PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer;
- PFN_vkCmdBindPipeline vkCmdBindPipeline;
- PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT;
- PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers;
- PFN_vkCmdBlitImage vkCmdBlitImage;
- PFN_vkCmdClearAttachments vkCmdClearAttachments;
- PFN_vkCmdCopyBuffer vkCmdCopyBuffer;
- PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage;
- PFN_vkCmdCopyImage vkCmdCopyImage;
- PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer;
- PFN_vkCmdDispatch vkCmdDispatch;
- PFN_vkCmdDraw vkCmdDraw;
- PFN_vkCmdDrawIndexed vkCmdDrawIndexed;
- PFN_vkCmdEndQuery vkCmdEndQuery;
- PFN_vkCmdEndRenderPass vkCmdEndRenderPass;
- PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT;
- PFN_vkCmdFillBuffer vkCmdFillBuffer;
- PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier;
- PFN_vkCmdPushConstants vkCmdPushConstants;
- PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
- PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
- PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
- PFN_vkCmdSetEvent vkCmdSetEvent;
- PFN_vkCmdSetScissor vkCmdSetScissor;
- PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask;
- PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
- PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask;
- PFN_vkCmdSetViewport vkCmdSetViewport;
- PFN_vkCmdWaitEvents vkCmdWaitEvents;
- PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT;
- PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT;
- PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT;
- PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT;
- PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT;
- PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT;
- PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT;
- PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT;
- PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT;
- PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT;
- PFN_vkCreateBuffer vkCreateBuffer;
- PFN_vkCreateBufferView vkCreateBufferView;
- PFN_vkCreateCommandPool vkCreateCommandPool;
- PFN_vkCreateComputePipelines vkCreateComputePipelines;
- PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
- PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
- PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
- PFN_vkCreateEvent vkCreateEvent;
- PFN_vkCreateFence vkCreateFence;
- PFN_vkCreateFramebuffer vkCreateFramebuffer;
- PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines;
- PFN_vkCreateImage vkCreateImage;
- PFN_vkCreateImageView vkCreateImageView;
- PFN_vkCreatePipelineLayout vkCreatePipelineLayout;
- PFN_vkCreateQueryPool vkCreateQueryPool;
- PFN_vkCreateRenderPass vkCreateRenderPass;
- PFN_vkCreateSampler vkCreateSampler;
- PFN_vkCreateSemaphore vkCreateSemaphore;
- PFN_vkCreateShaderModule vkCreateShaderModule;
- PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR;
- PFN_vkDestroyBuffer vkDestroyBuffer;
- PFN_vkDestroyBufferView vkDestroyBufferView;
- PFN_vkDestroyCommandPool vkDestroyCommandPool;
- PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
- PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
- PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
- PFN_vkDestroyEvent vkDestroyEvent;
- PFN_vkDestroyFence vkDestroyFence;
- PFN_vkDestroyFramebuffer vkDestroyFramebuffer;
- PFN_vkDestroyImage vkDestroyImage;
- PFN_vkDestroyImageView vkDestroyImageView;
- PFN_vkDestroyPipeline vkDestroyPipeline;
- PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout;
- PFN_vkDestroyQueryPool vkDestroyQueryPool;
- PFN_vkDestroyRenderPass vkDestroyRenderPass;
- PFN_vkDestroySampler vkDestroySampler;
- PFN_vkDestroySemaphore vkDestroySemaphore;
- PFN_vkDestroyShaderModule vkDestroyShaderModule;
- PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR;
- PFN_vkDeviceWaitIdle vkDeviceWaitIdle;
- PFN_vkEndCommandBuffer vkEndCommandBuffer;
- PFN_vkFreeCommandBuffers vkFreeCommandBuffers;
- PFN_vkFreeDescriptorSets vkFreeDescriptorSets;
- PFN_vkFreeMemory vkFreeMemory;
- PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
- PFN_vkGetDeviceQueue vkGetDeviceQueue;
- PFN_vkGetEventStatus vkGetEventStatus;
- PFN_vkGetFenceStatus vkGetFenceStatus;
- PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
- PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
- PFN_vkMapMemory vkMapMemory;
- PFN_vkQueueSubmit vkQueueSubmit;
- PFN_vkResetFences vkResetFences;
- PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT;
- PFN_vkUnmapMemory vkUnmapMemory;
- PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;
- PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets;
- PFN_vkWaitForFences vkWaitForFences;
+struct DeviceDispatch : InstanceDispatch {
+ PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR{};
+ PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers{};
+ PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets{};
+ PFN_vkAllocateMemory vkAllocateMemory{};
+ PFN_vkBeginCommandBuffer vkBeginCommandBuffer{};
+ PFN_vkBindBufferMemory vkBindBufferMemory{};
+ PFN_vkBindImageMemory vkBindImageMemory{};
+ PFN_vkCmdBeginQuery vkCmdBeginQuery{};
+ PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{};
+ PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{};
+ PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
+ PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{};
+ PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{};
+ PFN_vkCmdBindPipeline vkCmdBindPipeline{};
+ PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{};
+ PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{};
+ PFN_vkCmdBlitImage vkCmdBlitImage{};
+ PFN_vkCmdClearAttachments vkCmdClearAttachments{};
+ PFN_vkCmdCopyBuffer vkCmdCopyBuffer{};
+ PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage{};
+ PFN_vkCmdCopyImage vkCmdCopyImage{};
+ PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{};
+ PFN_vkCmdDispatch vkCmdDispatch{};
+ PFN_vkCmdDraw vkCmdDraw{};
+ PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
+ PFN_vkCmdEndQuery vkCmdEndQuery{};
+ PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
+ PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{};
+ PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
+ PFN_vkCmdFillBuffer vkCmdFillBuffer{};
+ PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{};
+ PFN_vkCmdPushConstants vkCmdPushConstants{};
+ PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{};
+ PFN_vkCmdSetDepthBias vkCmdSetDepthBias{};
+ PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{};
+ PFN_vkCmdSetEvent vkCmdSetEvent{};
+ PFN_vkCmdSetScissor vkCmdSetScissor{};
+ PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
+ PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
+ PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
+ PFN_vkCmdSetViewport vkCmdSetViewport{};
+ PFN_vkCmdWaitEvents vkCmdWaitEvents{};
+ PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
+ PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
+ PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{};
+ PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{};
+ PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{};
+ PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{};
+ PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{};
+ PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{};
+ PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{};
+ PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{};
+ PFN_vkCmdResolveImage vkCmdResolveImage{};
+ PFN_vkCreateBuffer vkCreateBuffer{};
+ PFN_vkCreateBufferView vkCreateBufferView{};
+ PFN_vkCreateCommandPool vkCreateCommandPool{};
+ PFN_vkCreateComputePipelines vkCreateComputePipelines{};
+ PFN_vkCreateDescriptorPool vkCreateDescriptorPool{};
+ PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout{};
+ PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR{};
+ PFN_vkCreateEvent vkCreateEvent{};
+ PFN_vkCreateFence vkCreateFence{};
+ PFN_vkCreateFramebuffer vkCreateFramebuffer{};
+ PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines{};
+ PFN_vkCreateImage vkCreateImage{};
+ PFN_vkCreateImageView vkCreateImageView{};
+ PFN_vkCreatePipelineLayout vkCreatePipelineLayout{};
+ PFN_vkCreateQueryPool vkCreateQueryPool{};
+ PFN_vkCreateRenderPass vkCreateRenderPass{};
+ PFN_vkCreateSampler vkCreateSampler{};
+ PFN_vkCreateSemaphore vkCreateSemaphore{};
+ PFN_vkCreateShaderModule vkCreateShaderModule{};
+ PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR{};
+ PFN_vkDestroyBuffer vkDestroyBuffer{};
+ PFN_vkDestroyBufferView vkDestroyBufferView{};
+ PFN_vkDestroyCommandPool vkDestroyCommandPool{};
+ PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool{};
+ PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout{};
+ PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR{};
+ PFN_vkDestroyEvent vkDestroyEvent{};
+ PFN_vkDestroyFence vkDestroyFence{};
+ PFN_vkDestroyFramebuffer vkDestroyFramebuffer{};
+ PFN_vkDestroyImage vkDestroyImage{};
+ PFN_vkDestroyImageView vkDestroyImageView{};
+ PFN_vkDestroyPipeline vkDestroyPipeline{};
+ PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout{};
+ PFN_vkDestroyQueryPool vkDestroyQueryPool{};
+ PFN_vkDestroyRenderPass vkDestroyRenderPass{};
+ PFN_vkDestroySampler vkDestroySampler{};
+ PFN_vkDestroySemaphore vkDestroySemaphore{};
+ PFN_vkDestroyShaderModule vkDestroyShaderModule{};
+ PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR{};
+ PFN_vkDeviceWaitIdle vkDeviceWaitIdle{};
+ PFN_vkEndCommandBuffer vkEndCommandBuffer{};
+ PFN_vkFreeCommandBuffers vkFreeCommandBuffers{};
+ PFN_vkFreeDescriptorSets vkFreeDescriptorSets{};
+ PFN_vkFreeMemory vkFreeMemory{};
+ PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2{};
+ PFN_vkGetDeviceQueue vkGetDeviceQueue{};
+ PFN_vkGetEventStatus vkGetEventStatus{};
+ PFN_vkGetFenceStatus vkGetFenceStatus{};
+ PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{};
+ PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{};
+#ifdef _WIN32
+ PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{};
+#endif
+ PFN_vkGetQueryPoolResults vkGetQueryPoolResults{};
+ PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{};
+ PFN_vkMapMemory vkMapMemory{};
+ PFN_vkQueueSubmit vkQueueSubmit{};
+ PFN_vkResetFences vkResetFences{};
+ PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT{};
+ PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT{};
+ PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT{};
+ PFN_vkUnmapMemory vkUnmapMemory{};
+ PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR{};
+ PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets{};
+ PFN_vkWaitForFences vkWaitForFences{};
+ PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR{};
};
/// Loads instance agnostic function pointers.
@@ -329,6 +359,9 @@ public:
/// Construct an empty handle.
Handle() = default;
+ /// Construct an empty handle.
+ Handle(std::nullptr_t) {}
+
/// Copying Vulkan objects is not supported and will never be.
Handle(const Handle&) = delete;
Handle& operator=(const Handle&) = delete;
@@ -467,9 +500,10 @@ public:
PoolAllocations() = default;
/// Construct an allocation. Errors are reported through IsOutOfPoolMemory().
- explicit PoolAllocations(std::unique_ptr<AllocationType[]> allocations, std::size_t num,
- VkDevice device, PoolType pool, const DeviceDispatch& dld) noexcept
- : allocations{std::move(allocations)}, num{num}, device{device}, pool{pool}, dld{&dld} {}
+ explicit PoolAllocations(std::unique_ptr<AllocationType[]> allocations_, std::size_t num_,
+ VkDevice device_, PoolType pool_, const DeviceDispatch& dld_) noexcept
+ : allocations{std::move(allocations_)}, num{num_}, device{device_}, pool{pool_},
+ dld{&dld_} {}
/// Copying Vulkan allocations is not supported and will never be.
PoolAllocations(const PoolAllocations&) = delete;
@@ -539,19 +573,14 @@ private:
const DeviceDispatch* dld = nullptr;
};
-using BufferView = Handle<VkBufferView, VkDevice, DeviceDispatch>;
-using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>;
+using DebugUtilsMessenger = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>;
using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>;
using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>;
-using Framebuffer = Handle<VkFramebuffer, VkDevice, DeviceDispatch>;
-using ImageView = Handle<VkImageView, VkDevice, DeviceDispatch>;
using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>;
using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>;
using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>;
using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>;
using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>;
-using Semaphore = Handle<VkSemaphore, VkDevice, DeviceDispatch>;
-using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>;
using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>;
using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>;
@@ -562,16 +591,25 @@ class Instance : public Handle<VkInstance, NoOwner, InstanceDispatch> {
using Handle<VkInstance, NoOwner, InstanceDispatch>::Handle;
public:
- /// Creates a Vulkan instance. Use "operator bool" for error handling.
- static Instance Create(Span<const char*> layers, Span<const char*> extensions,
- InstanceDispatch& dld) noexcept;
+ /// Creates a Vulkan instance.
+ /// @throw Exception on initialization error.
+ static Instance Create(u32 version, Span<const char*> layers, Span<const char*> extensions,
+ InstanceDispatch& dispatch);
/// Enumerates physical devices.
/// @return Physical devices and an empty handle on failure.
- std::optional<std::vector<VkPhysicalDevice>> EnumeratePhysicalDevices();
+ /// @throw Exception on Vulkan error.
+ std::vector<VkPhysicalDevice> EnumeratePhysicalDevices() const;
+
+ /// Creates a debug callback messenger.
+ /// @throw Exception on creation failure.
+ DebugUtilsMessenger CreateDebugUtilsMessenger(
+ const VkDebugUtilsMessengerCreateInfoEXT& create_info) const;
- /// Tries to create a debug callback messenger. Returns an empty handle on failure.
- DebugCallback TryCreateDebugCallback(PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept;
+ /// Returns dispatch table.
+ const InstanceDispatch& Dispatch() const noexcept {
+ return *dld;
+ }
};
class Queue {
@@ -580,9 +618,11 @@ public:
constexpr Queue() noexcept = default;
/// Construct a queue handle.
- constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {}
+ constexpr Queue(VkQueue queue_, const DeviceDispatch& dld_) noexcept
+ : queue{queue_}, dld{&dld_} {}
- VkResult Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const noexcept {
+ VkResult Submit(Span<VkSubmitInfo> submit_infos,
+ VkFence fence = VK_NULL_HANDLE) const noexcept {
return dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence);
}
@@ -601,6 +641,17 @@ class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> {
public:
/// Attaches a memory allocation.
void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
+
+ /// Set object name.
+ void SetObjectNameEXT(const char* name) const;
+};
+
+class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> {
+ using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ /// Set object name.
+ void SetObjectNameEXT(const char* name) const;
};
class Image : public Handle<VkImage, VkDevice, DeviceDispatch> {
@@ -609,12 +660,32 @@ class Image : public Handle<VkImage, VkDevice, DeviceDispatch> {
public:
/// Attaches a memory allocation.
void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
+
+ /// Set object name.
+ void SetObjectNameEXT(const char* name) const;
+};
+
+class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> {
+ using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ /// Set object name.
+ void SetObjectNameEXT(const char* name) const;
};
class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> {
using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle;
public:
+ int GetMemoryFdKHR() const;
+
+#ifdef _WIN32
+ HANDLE GetMemoryWin32HandleKHR() const;
+#endif
+
+ /// Set object name.
+ void SetObjectNameEXT(const char* name) const;
+
u8* Map(VkDeviceSize offset, VkDeviceSize size) const {
void* data;
Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data));
@@ -630,6 +701,9 @@ class Fence : public Handle<VkFence, VkDevice, DeviceDispatch> {
using Handle<VkFence, VkDevice, DeviceDispatch>::Handle;
public:
+ /// Set object name.
+ void SetObjectNameEXT(const char* name) const;
+
VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept {
return dld->vkWaitForFences(owner, 1, &handle, true, timeout);
}
@@ -643,11 +717,22 @@ public:
}
};
+class Framebuffer : public Handle<VkFramebuffer, VkDevice, DeviceDispatch> {
+ using Handle<VkFramebuffer, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ /// Set object name.
+ void SetObjectNameEXT(const char* name) const;
+};
+
class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> {
using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle;
public:
DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const;
+
+ /// Set object name.
+ void SetObjectNameEXT(const char* name) const;
};
class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> {
@@ -656,6 +741,9 @@ class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> {
public:
CommandBuffers Allocate(std::size_t num_buffers,
VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const;
+
+ /// Set object name.
+ void SetObjectNameEXT(const char* name) const;
};
class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> {
@@ -669,18 +757,70 @@ class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> {
using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle;
public:
+ /// Set object name.
+ void SetObjectNameEXT(const char* name) const;
+
VkResult GetStatus() const noexcept {
return dld->vkGetEventStatus(owner, handle);
}
};
+class ShaderModule : public Handle<VkShaderModule, VkDevice, DeviceDispatch> {
+ using Handle<VkShaderModule, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ /// Set object name.
+ void SetObjectNameEXT(const char* name) const;
+};
+
+class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> {
+ using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ /// Set object name.
+ void SetObjectNameEXT(const char* name) const;
+
+ [[nodiscard]] u64 GetCounter() const {
+ u64 value;
+ Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value));
+ return value;
+ }
+
+ /**
+ * Waits for a timeline semaphore on the host.
+ *
+ * @param value Value to wait
+ * @param timeout Time in nanoseconds to timeout
+ * @return True on successful wait, false on timeout
+ */
+ bool Wait(u64 value, u64 timeout = std::numeric_limits<u64>::max()) const {
+ const VkSemaphoreWaitInfoKHR wait_info{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR,
+ .pNext = nullptr,
+ .flags = 0,
+ .semaphoreCount = 1,
+ .pSemaphores = &handle,
+ .pValues = &value,
+ };
+ const VkResult result = dld->vkWaitSemaphoresKHR(owner, &wait_info, timeout);
+ switch (result) {
+ case VK_SUCCESS:
+ return true;
+ case VK_TIMEOUT:
+ return false;
+ default:
+ throw Exception(result);
+ }
+ }
+};
+
class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
public:
static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
Span<const char*> enabled_extensions, const void* next,
- DeviceDispatch& dld) noexcept;
+ DeviceDispatch& dispatch);
Queue GetQueue(u32 family_index) const noexcept;
@@ -694,6 +834,8 @@ public:
Semaphore CreateSemaphore() const;
+ Semaphore CreateSemaphore(const VkSemaphoreCreateInfo& ci) const;
+
Fence CreateFence(const VkFenceCreateInfo& ci) const;
DescriptorPool CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const;
@@ -721,7 +863,7 @@ public:
ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
- Event CreateNewEvent() const;
+ Event CreateEvent() const;
SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
@@ -729,7 +871,8 @@ public:
DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const;
- VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept;
+ VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer,
+ void* pnext = nullptr) const noexcept;
VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept;
@@ -767,8 +910,9 @@ class PhysicalDevice {
public:
constexpr PhysicalDevice() noexcept = default;
- constexpr PhysicalDevice(VkPhysicalDevice physical_device, const InstanceDispatch& dld) noexcept
- : physical_device{physical_device}, dld{&dld} {}
+ constexpr PhysicalDevice(VkPhysicalDevice physical_device_,
+ const InstanceDispatch& dld_) noexcept
+ : physical_device{physical_device_}, dld{&dld_} {}
constexpr operator VkPhysicalDevice() const noexcept {
return physical_device;
@@ -807,8 +951,8 @@ class CommandBuffer {
public:
CommandBuffer() noexcept = default;
- explicit CommandBuffer(VkCommandBuffer handle, const DeviceDispatch& dld) noexcept
- : handle{handle}, dld{&dld} {}
+ explicit CommandBuffer(VkCommandBuffer handle_, const DeviceDispatch& dld_) noexcept
+ : handle{handle_}, dld{&dld_} {}
const VkCommandBuffer* address() const noexcept {
return &handle;
@@ -887,6 +1031,12 @@ public:
regions.data(), filter);
}
+ void ResolveImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image,
+ VkImageLayout dst_layout, Span<VkImageResolve> regions) {
+ dld->vkCmdResolveImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(),
+ regions.data());
+ }
+
void Dispatch(u32 x, u32 y, u32 z) const noexcept {
dld->vkCmdDispatch(handle, x, y, z);
}
@@ -901,6 +1051,29 @@ public:
image_barriers.size(), image_barriers.data());
}
+ void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
+ VkDependencyFlags dependency_flags = 0) const noexcept {
+ PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {});
+ }
+
+ void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
+ VkDependencyFlags dependency_flags,
+ const VkMemoryBarrier& memory_barrier) const noexcept {
+ PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, memory_barrier, {}, {});
+ }
+
+ void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
+ VkDependencyFlags dependency_flags,
+ const VkBufferMemoryBarrier& buffer_barrier) const noexcept {
+ PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {});
+ }
+
+ void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
+ VkDependencyFlags dependency_flags,
+ const VkImageMemoryBarrier& image_barrier) const noexcept {
+ PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, image_barrier);
+ }
+
void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout,
Span<VkBufferImageCopy> regions) const noexcept {
dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(),
@@ -934,6 +1107,13 @@ public:
dld->vkCmdPushConstants(handle, layout, flags, offset, size, values);
}
+ template <typename T>
+ void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags,
+ const T& data) const noexcept {
+ static_assert(std::is_trivially_copyable_v<T>, "<data> is not trivially copyable");
+ dld->vkCmdPushConstants(handle, layout, flags, 0, static_cast<u32>(sizeof(T)), &data);
+ }
+
void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept {
dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data());
}
@@ -1043,11 +1223,27 @@ public:
counter_buffers, counter_buffer_offsets);
}
+ void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept {
+ const VkDebugUtilsLabelEXT label_info{
+ .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+ .pNext = nullptr,
+ .pLabelName = label,
+ .color{color[0], color[1], color[2], color[3]},
+ };
+ dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info);
+ }
+
+ void EndDebugUtilsLabelEXT() const noexcept {
+ dld->vkCmdEndDebugUtilsLabelEXT(handle);
+ }
+
private:
VkCommandBuffer handle;
const DeviceDispatch* dld;
};
+u32 AvailableVersion(const InstanceDispatch& dld) noexcept;
+
std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
const InstanceDispatch& dld);