summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_classes/vic.cpp31
-rw-r--r--src/video_core/query_cache.h8
-rw-r--r--src/video_core/rasterizer_accelerated.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h17
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp3
5 files changed, 35 insertions, 26 deletions
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index dc768b952..051616124 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -32,7 +32,7 @@ enum class VideoPixelFormat : u64_le {
RGBA8 = 0x1f,
BGRA8 = 0x20,
RGBX8 = 0x23,
- Yuv420 = 0x44,
+ YUV420 = 0x44,
};
} // Anonymous namespace
@@ -88,12 +88,10 @@ void Vic::Execute() {
const u64 surface_width = config.surface_width_minus1 + 1;
const u64 surface_height = config.surface_height_minus1 + 1;
if (static_cast<u64>(frame->width) != surface_width ||
- static_cast<u64>(frame->height) > surface_height) {
+ static_cast<u64>(frame->height) != surface_height) {
// TODO: Properly support multiple video streams with differing frame dimensions
- LOG_WARNING(Debug,
- "Frame dimensions {}x{} can't be safely decoded into surface dimensions {}x{}",
+ LOG_WARNING(Service_NVDRV, "Frame dimensions {}x{} don't match surface dimensions {}x{}",
frame->width, frame->height, surface_width, surface_height);
- return;
}
switch (config.pixel_format) {
case VideoPixelFormat::RGBA8:
@@ -101,7 +99,7 @@ void Vic::Execute() {
case VideoPixelFormat::RGBX8:
WriteRGBFrame(frame, config);
break;
- case VideoPixelFormat::Yuv420:
+ case VideoPixelFormat::YUV420:
WriteYUVFrame(frame, config);
break;
default:
@@ -136,21 +134,20 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
scaler_height = frame->height;
converted_frame_buffer.reset();
}
- // Get Converted frame
- const u32 width = static_cast<u32>(frame->width);
- const u32 height = static_cast<u32>(frame->height);
- const std::size_t linear_size = width * height * 4;
-
- // Only allocate frame_buffer once per stream, as the size is not expected to change
if (!converted_frame_buffer) {
- converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free};
+ const size_t frame_size = frame->width * frame->height * 4;
+ converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(frame_size)), av_free};
}
const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0};
u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
-
sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr,
converted_stride.data());
+ // Use the minimum of surface/frame dimensions to avoid buffer overflow.
+ const u32 surface_width = static_cast<u32>(config.surface_width_minus1) + 1;
+ const u32 surface_height = static_cast<u32>(config.surface_height_minus1) + 1;
+ const u32 width = std::min(surface_width, static_cast<u32>(frame->width));
+ const u32 height = std::min(surface_height, static_cast<u32>(frame->height));
const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
if (blk_kind != 0) {
// swizzle pitch linear to block linear
@@ -158,11 +155,12 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0);
luma_buffer.resize(size);
Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(),
- converted_frame_buffer.get(), block_height, 0, 0);
+ converted_frame_buf_addr, block_height, 0, 0);
gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
} else {
// send pitch linear frame
+ const size_t linear_size = width * height * 4;
gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
linear_size);
}
@@ -173,9 +171,10 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
const std::size_t surface_width = config.surface_width_minus1 + 1;
const std::size_t surface_height = config.surface_height_minus1 + 1;
+ const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL;
+ // Use the minimum of surface/frame dimensions to avoid buffer overflow.
const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width));
const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height));
- const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL;
const auto stride = static_cast<size_t>(frame->linesize[0]);
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 73231061a..392f82eb7 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -258,9 +258,9 @@ private:
void AsyncFlushQuery(VAddr addr) {
if (!uncommitted_flushes) {
- uncommitted_flushes = std::make_shared<std::unordered_set<VAddr>>();
+ uncommitted_flushes = std::make_shared<std::vector<VAddr>>();
}
- uncommitted_flushes->insert(addr);
+ uncommitted_flushes->push_back(addr);
}
static constexpr std::uintptr_t PAGE_SIZE = 4096;
@@ -276,8 +276,8 @@ private:
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
- std::shared_ptr<std::unordered_set<VAddr>> uncommitted_flushes{};
- std::list<std::shared_ptr<std::unordered_set<VAddr>>> committed_flushes;
+ std::shared_ptr<std::vector<VAddr>> uncommitted_flushes{};
+ std::list<std::shared_ptr<std::vector<VAddr>>> committed_flushes;
};
template <class QueryCache, class HostCounter>
diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h
index ea879bfdd..249644e50 100644
--- a/src/video_core/rasterizer_accelerated.h
+++ b/src/video_core/rasterizer_accelerated.h
@@ -42,7 +42,7 @@ private:
};
static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!");
- std::array<CacheEntry, 0x1000000> cached_pages;
+ std::array<CacheEntry, 0x2000000> cached_pages;
Core::Memory::Memory& cpu_memory;
};
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 4f8688118..0886b7da8 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -21,12 +21,12 @@ public:
/// Returns the current logical tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
- return current_tick.load(std::memory_order_relaxed);
+ return current_tick.load(std::memory_order_acquire);
}
/// Returns the last known GPU tick.
[[nodiscard]] u64 KnownGpuTick() const noexcept {
- return gpu_tick.load(std::memory_order_relaxed);
+ return gpu_tick.load(std::memory_order_acquire);
}
/// Returns the timeline semaphore handle.
@@ -41,12 +41,21 @@ public:
/// Advance to the logical tick and return the old one
[[nodiscard]] u64 NextTick() noexcept {
- return current_tick.fetch_add(1, std::memory_order::relaxed);
+ return current_tick.fetch_add(1, std::memory_order_release);
}
/// Refresh the known GPU tick
void Refresh() {
- gpu_tick.store(semaphore.GetCounter(), std::memory_order_relaxed);
+ u64 this_tick{};
+ u64 counter{};
+ do {
+ this_tick = gpu_tick.load(std::memory_order_acquire);
+ counter = semaphore.GetCounter();
+ if (counter < this_tick) {
+ return;
+ }
+ } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
+ std::memory_order_relaxed));
}
/// Waits for a tick to be hit on the GPU
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index c9cb32d71..259cba156 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -117,7 +117,8 @@ u64 HostCounter::BlockingQuery() const {
cache.GetScheduler().Wait(tick);
u64 data;
const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
- query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT);
+ query.first, query.second, 1, sizeof(data), &data, sizeof(data),
+ VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
switch (query_result) {
case VK_SUCCESS: