summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2023-08-20 17:53:08 +0200
committerFernando Sahmkow <fsahmkow27@gmail.com>2023-09-23 23:05:30 +0200
commitc8237d5c312485394389b2520451ef720604ea9a (patch)
tree1a1064ed38a7a53bd61e4c04bf4571cdebfce2ec /src/video_core/renderer_vulkan
parentQuery Cache: Fix guest side sample counting (diff)
downloadyuzu-c8237d5c312485394389b2520451ef720604ea9a.tar
yuzu-c8237d5c312485394389b2520451ef720604ea9a.tar.gz
yuzu-c8237d5c312485394389b2520451ef720604ea9a.tar.bz2
yuzu-c8237d5c312485394389b2520451ef720604ea9a.tar.lz
yuzu-c8237d5c312485394389b2520451ef720604ea9a.tar.xz
yuzu-c8237d5c312485394389b2520451ef720604ea9a.tar.zst
yuzu-c8237d5c312485394389b2520451ef720604ea9a.zip
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp110
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp147
3 files changed, 223 insertions, 48 deletions
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 039dc95e1..a1af08cda 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -12,6 +12,7 @@
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "video_core/host_shaders/astc_decoder_comp_spv.h"
+#include "video_core/host_shaders/queries_prefix_scan_sum_comp_spv.h"
#include "video_core/host_shaders/resolve_conditional_render_comp_spv.h"
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
@@ -58,6 +59,30 @@ constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SE
},
}};
+constexpr std::array<VkDescriptorSetLayoutBinding, 3> QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS{{
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = 2,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+}};
+
constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
.uniform_buffers = 0,
.storage_buffers = 2,
@@ -68,6 +93,16 @@ constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
.score = 2,
};
+constexpr DescriptorBankInfo QUERIES_SCAN_BANK_INFO{
+ .uniform_buffers = 0,
+ .storage_buffers = 3,
+ .texture_buffers = 0,
+ .image_buffers = 0,
+ .textures = 0,
+ .images = 0,
+ .score = 3,
+};
+
constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{
{
.binding = ASTC_BINDING_INPUT_BUFFER,
@@ -104,6 +139,15 @@ constexpr VkDescriptorUpdateTemplateEntry INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLAT
.stride = sizeof(DescriptorUpdateEntry),
};
+constexpr VkDescriptorUpdateTemplateEntry QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE{
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 3,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .offset = 0,
+ .stride = sizeof(DescriptorUpdateEntry),
+};
+
constexpr std::array<VkDescriptorUpdateTemplateEntry, ASTC_NUM_BINDINGS>
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
{
@@ -132,6 +176,11 @@ struct AstcPushConstants {
u32 block_height;
u32 block_height_mask;
};
+
+struct QueriesPrefixScanPushConstants {
+ u32 max_accumulation_base;
+ u32 accumulation_limit;
+};
} // Anonymous namespace
ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
@@ -313,8 +362,6 @@ ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(
void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_buffer,
u32 src_offset, bool compare_to_zero) {
- scheduler.RequestOutsideRenderPassOperationContext();
-
const size_t compare_size = compare_to_zero ? 8 : 24;
compute_pass_descriptor_queue.Acquire();
@@ -327,7 +374,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_
static constexpr VkMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
- .srcAccessMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
};
static constexpr VkMemoryBarrier write_barrier{
@@ -349,6 +396,63 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_
});
}
+QueriesPrefixScanPass::QueriesPrefixScanPass(
+ const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
+ : ComputePass(device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS,
+ QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO,
+ COMPUTE_PUSH_CONSTANT_RANGE<sizeof(QueriesPrefixScanPushConstants)>,
+ QUERIES_PREFIX_SCAN_SUM_COMP_SPV),
+ scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {}
+
+void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer,
+ VkBuffer src_buffer, size_t number_of_sums,
+ size_t max_accumulation_limit) {
+ size_t aligned_runs = Common::AlignUp(number_of_sums, 32);
+
+ compute_pass_descriptor_queue.Acquire();
+ compute_pass_descriptor_queue.AddBuffer(src_buffer, 0, aligned_runs * sizeof(u64));
+ compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, aligned_runs * sizeof(u64));
+ compute_pass_descriptor_queue.AddBuffer(accumulation_buffer, 0, sizeof(u64));
+ const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
+
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([this, descriptor_data, max_accumulation_limit, number_of_sums,
+ aligned_runs](vk::CommandBuffer cmdbuf) {
+ static constexpr VkMemoryBarrier read_barrier{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+ };
+ static constexpr VkMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT |
+ VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
+ VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT |
+ VK_ACCESS_UNIFORM_READ_BIT |
+ VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT,
+ };
+ const QueriesPrefixScanPushConstants uniforms{
+ .max_accumulation_base = static_cast<u32>(max_accumulation_limit),
+ .accumulation_limit = static_cast<u32>(number_of_sums - 1),
+ };
+ const VkDescriptorSet set = descriptor_allocator.Commit();
+ device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
+
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, read_barrier);
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
+ cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
+ cmdbuf.Dispatch(static_cast<u32>(aligned_runs / 32U), 1, 1);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+ VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0, write_barrier);
+ });
+}
+
ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index c62f30d30..e6ff86e9a 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -95,6 +95,20 @@ private:
ComputePassDescriptorQueue& compute_pass_descriptor_queue;
};
+class QueriesPrefixScanPass final : public ComputePass {
+public:
+ explicit QueriesPrefixScanPass(const Device& device_, Scheduler& scheduler_,
+ DescriptorPool& descriptor_pool_,
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue_);
+
+ void Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer, VkBuffer src_buffer,
+ size_t number_of_sums, size_t max_accumulation_limit);
+
+private:
+ Scheduler& scheduler;
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue;
+};
+
class ASTCDecoderPass final : public ComputePass {
public:
explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 2147776f8..ded190ae0 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -11,6 +11,7 @@
#include <utility>
#include <vector>
+#include "common/bit_util.h"
#include "common/common_types.h"
#include "core/memory.h"
#include "video_core/engines/draw_manager.h"
@@ -112,14 +113,34 @@ class SamplesStreamer : public BaseStreamer {
public:
explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_,
VideoCore::RasterizerInterface* rasterizer_, const Device& device_,
- Scheduler& scheduler_, const MemoryAllocator& memory_allocator_)
+ Scheduler& scheduler_, const MemoryAllocator& memory_allocator_,
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue,
+ DescriptorPool& descriptor_pool)
: BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_},
scheduler{scheduler_}, memory_allocator{memory_allocator_} {
- BuildResolveBuffer();
current_bank = nullptr;
current_query = nullptr;
ammend_value = 0;
acumulation_value = 0;
+ queries_prefix_scan_pass = std::make_unique<QueriesPrefixScanPass>(
+ device, scheduler, descriptor_pool, compute_pass_descriptor_queue);
+
+ const VkBufferCreateInfo buffer_ci = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = 8,
+ .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ };
+ accumulation_buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([buffer = *accumulation_buffer](vk::CommandBuffer cmdbuf) {
+ cmdbuf.FillBuffer(buffer, 0, 8, 0);
+ });
}
~SamplesStreamer() = default;
@@ -159,6 +180,8 @@ public:
acumulation_value = 0;
});
rasterizer->SyncOperation(std::move(func));
+ accumulation_since_last_sync = false;
+ last_accumulation_checkpoint = std::min(last_accumulation_checkpoint, num_slots_used);
}
void CloseCounter() override {
@@ -175,7 +198,8 @@ public:
}
for (size_t i = 0; i < sync_values_stash.size(); i++) {
- runtime.template SyncValues<HostSyncValues>(sync_values_stash[i], *resolve_buffers[i]);
+ runtime.template SyncValues<HostSyncValues>(sync_values_stash[i],
+ *buffers[resolve_buffers[i]]);
}
sync_values_stash.clear();
@@ -189,36 +213,21 @@ public:
sync_values_stash.clear();
sync_values_stash.emplace_back();
std::vector<HostSyncValues>* sync_values = &sync_values_stash.back();
- sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE);
+ sync_values->reserve(num_slots_used);
std::unordered_map<size_t, std::pair<size_t, size_t>> offsets;
- size_t this_bank_slot = std::numeric_limits<size_t>::max();
- size_t resolve_slots_remaining = resolve_slots;
- size_t resolve_buffer_index = 0;
+ resolve_buffers.clear();
+ size_t resolve_buffer_index = ObtainBuffer<true>(num_slots_used);
+ resolve_buffers.push_back(resolve_buffer_index);
+ size_t base_offset = 0;
+
ApplyBanksWideOp<true>(pending_sync, [&](SamplesQueryBank* bank, size_t start,
size_t amount) {
size_t bank_id = bank->GetIndex();
- if (this_bank_slot != bank_id) {
- this_bank_slot = bank_id;
- if (resolve_slots_remaining == 0) {
- resolve_buffer_index++;
- if (resolve_buffer_index >= resolve_buffers.size()) {
- BuildResolveBuffer();
- }
- resolve_slots_remaining = resolve_slots;
- sync_values_stash.emplace_back();
- sync_values = &sync_values_stash.back();
- sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE);
- }
- resolve_slots_remaining--;
- }
- auto& resolve_buffer = resolve_buffers[resolve_buffer_index];
- const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE *
- (resolve_slots - resolve_slots_remaining - 1);
+ auto& resolve_buffer = buffers[resolve_buffer_index];
VkQueryPool query_pool = bank->GetInnerPool();
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([start, amount, base_offset, query_pool,
buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) {
- size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE;
const VkBufferMemoryBarrier copy_query_pool_barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
@@ -227,39 +236,60 @@ public:
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = buffer,
- .offset = final_offset,
+ .offset = base_offset,
.size = amount * SamplesQueryBank::QUERY_SIZE,
};
cmdbuf.CopyQueryPoolResults(
query_pool, static_cast<u32>(start), static_cast<u32>(amount), buffer,
- static_cast<u32>(final_offset), SamplesQueryBank::QUERY_SIZE,
+ static_cast<u32>(base_offset), SamplesQueryBank::QUERY_SIZE,
VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, copy_query_pool_barrier);
});
- offsets[bank_id] = {sync_values_stash.size() - 1, base_offset};
+ offsets[bank_id] = {start, base_offset};
+ base_offset += amount * SamplesQueryBank::QUERY_SIZE;
});
// Convert queries
+ bool has_multi_queries = false;
for (auto q : pending_sync) {
auto* query = GetQuery(q);
+ size_t sync_value_slot = 0;
if (True(query->flags & VideoCommon::QueryFlagBits::IsRewritten)) {
continue;
}
if (True(query->flags & VideoCommon::QueryFlagBits::IsInvalidated)) {
continue;
}
- if (query->size_slots > 1) {
- // This is problematic.
- // UNIMPLEMENTED();
+ if (accumulation_since_last_sync || query->size_slots > 1) {
+ if (!has_multi_queries) {
+ has_multi_queries = true;
+ sync_values_stash.emplace_back();
+ }
+ sync_value_slot = 1;
}
query->flags |= VideoCommon::QueryFlagBits::IsHostSynced;
auto loc_data = offsets[query->start_bank_id];
- sync_values_stash[loc_data.first].emplace_back(HostSyncValues{
+ sync_values_stash[sync_value_slot].emplace_back(HostSyncValues{
.address = query->guest_address,
.size = SamplesQueryBank::QUERY_SIZE,
- .offset = loc_data.second + query->start_slot * SamplesQueryBank::QUERY_SIZE,
+ .offset =
+ loc_data.second + (query->start_slot - loc_data.first + query->size_slots - 1) *
+ SamplesQueryBank::QUERY_SIZE,
+ });
+ }
+
+ if (has_multi_queries) {
+ size_t intermediary_buffer_index = ObtainBuffer<false>(num_slots_used);
+ resolve_buffers.push_back(intermediary_buffer_index);
+ queries_prefix_scan_pass->Run(*accumulation_buffer, *buffers[intermediary_buffer_index],
+ *buffers[resolve_buffer_index], num_slots_used,
+ std::min(last_accumulation_checkpoint, num_slots_used));
+ } else {
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([buffer = *accumulation_buffer](vk::CommandBuffer cmdbuf) {
+ cmdbuf.FillBuffer(buffer, 0, 8, 0);
});
}
@@ -267,6 +297,9 @@ public:
std::function<void()> func([this] { ammend_value = acumulation_value; });
rasterizer->SyncOperation(std::move(func));
AbandonCurrentQuery();
+ num_slots_used = 0;
+ last_accumulation_checkpoint = std::numeric_limits<size_t>::max();
+ accumulation_since_last_sync = has_multi_queries;
pending_sync.clear();
}
@@ -400,6 +433,7 @@ private:
void ReserveHostQuery() {
size_t new_slot = ReserveBankSlot();
current_bank->AddReference(1);
+ num_slots_used++;
if (current_query) {
size_t bank_id = current_query->start_bank_id;
size_t banks_set = current_query->size_banks - 1;
@@ -470,32 +504,50 @@ private:
});
}
- void BuildResolveBuffer() {
+ template <bool is_resolve>
+ size_t ObtainBuffer(size_t num_needed) {
+ const size_t log_2 = std::max<size_t>(6U, Common::Log2Ceil64(num_needed));
+ if constexpr (is_resolve) {
+ if (resolve_table[log_2] != 0) {
+ return resolve_table[log_2] - 1;
+ }
+ } else {
+ if (intermediary_table[log_2] != 0) {
+ return intermediary_table[log_2] - 1;
+ }
+ }
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots,
+ .size = SamplesQueryBank::QUERY_SIZE * (1ULL << log_2),
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
- resolve_buffers.emplace_back(
- memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal));
+ buffers.emplace_back(memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal));
+ if constexpr (is_resolve) {
+ resolve_table[log_2] = buffers.size();
+ } else {
+ intermediary_table[log_2] = buffers.size();
+ }
+ return buffers.size() - 1;
}
- static constexpr size_t resolve_slots = 8;
-
QueryCacheRuntime& runtime;
VideoCore::RasterizerInterface* rasterizer;
const Device& device;
Scheduler& scheduler;
const MemoryAllocator& memory_allocator;
VideoCommon::BankPool<SamplesQueryBank> bank_pool;
- std::deque<vk::Buffer> resolve_buffers;
+ std::deque<vk::Buffer> buffers;
+ std::array<size_t, 32> resolve_table{};
+ std::array<size_t, 32> intermediary_table{};
+ vk::Buffer accumulation_buffer;
std::deque<std::vector<HostSyncValues>> sync_values_stash;
+ std::vector<size_t> resolve_buffers;
// syncing queue
std::vector<size_t> pending_sync;
@@ -510,10 +562,14 @@ private:
SamplesQueryBank* current_bank;
VkQueryPool current_query_pool;
size_t current_query_id;
+ size_t num_slots_used{};
+ size_t last_accumulation_checkpoint{};
+ bool accumulation_since_last_sync{};
VideoCommon::HostQueryBase* current_query;
bool has_started{};
- bool current_unset{};
std::mutex flush_guard;
+
+ std::unique_ptr<QueriesPrefixScanPass> queries_prefix_scan_pass;
};
// Transform feedback queries
@@ -1090,7 +1146,8 @@ struct QueryCacheRuntimeImpl {
memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_},
guest_streamer(0, runtime),
sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer,
- device, scheduler, memory_allocator),
+ device, scheduler, memory_allocator, compute_pass_descriptor_queue,
+ descriptor_pool),
tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
scheduler, memory_allocator, staging_pool),
primitives_succeeded_streamer(
@@ -1319,10 +1376,10 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
return true;
}
}
- if (!is_in_bc[0] && !is_in_bc[1]) {
+ /*if (!is_in_bc[0] && !is_in_bc[1]) {
// Both queries are in query cache, it's best to just flush.
- return false;
- }
+ return true;
+ }*/
HostConditionalRenderingCompareBCImpl(object_1.address, equal_check);
return true;
}