From 2b58652f0897053d4da04deb586490220ab5a774 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Jul 2019 19:40:10 -0300 Subject: maxwell_3d: Slow implementation of passed samples (query 21) Implements GL_SAMPLES_PASSED by waiting immediately for queries. --- src/video_core/engines/maxwell_3d.cpp | 39 +++++++++++++++++++++++------------ src/video_core/engines/maxwell_3d.h | 38 ++++++++++++++++++++++++++++++---- 2 files changed, 60 insertions(+), 17 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 0b3e8749b..fe91ff6a0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -400,6 +400,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ProcessQueryCondition(); break; } + case MAXWELL3D_REG_INDEX(counter_reset): { + ProcessCounterReset(); + break; + } case MAXWELL3D_REG_INDEX(sync_info): { ProcessSyncPoint(); break; @@ -544,23 +548,23 @@ void Maxwell3D::ProcessQueryGet() { "Units other than CROP are unimplemented"); switch (regs.query.query_get.operation) { - case Regs::QueryOperation::Release: { - const u64 result = regs.query.query_sequence; - StampQueryResult(result, regs.query.query_get.short_query == 0); + case Regs::QueryOperation::Release: + StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); break; - } - case Regs::QueryOperation::Acquire: { - // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU - // to write a value that matches the current payload. + case Regs::QueryOperation::Acquire: + // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that + // matches the current payload. UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); break; - } case Regs::QueryOperation::Counter: { - u64 result{}; + u64 result; switch (regs.query.query_get.select) { case Regs::QuerySelect::Zero: result = 0; break; + case Regs::QuerySelect::SamplesPassed: + result = rasterizer.Query(VideoCore::QueryType::SamplesPassed); + break; default: result = 1; UNIMPLEMENTED_MSG("Unimplemented query select type {}", @@ -569,15 +573,13 @@ void Maxwell3D::ProcessQueryGet() { StampQueryResult(result, regs.query.query_get.short_query == 0); break; } - case Regs::QueryOperation::Trap: { + case Regs::QueryOperation::Trap: UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); break; - } - default: { + default: UNIMPLEMENTED_MSG("Unknown query operation"); break; } - } } void Maxwell3D::ProcessQueryCondition() { @@ -619,6 +621,17 @@ void Maxwell3D::ProcessQueryCondition() { } } +void Maxwell3D::ProcessCounterReset() { + switch (regs.counter_reset) { + case Regs::CounterReset::SampleCnt: + rasterizer.ResetCounter(VideoCore::QueryType::SamplesPassed); + break; + default: + UNIMPLEMENTED_MSG("counter_reset={}", static_cast(regs.counter_reset)); + break; + } +} + void Maxwell3D::ProcessSyncPoint() { const u32 sync_point = regs.sync_info.sync_point.Value(); const u32 increment = regs.sync_info.increment.Value(); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0a2af54e5..d21f678ed 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -409,6 +409,27 @@ public: Linear = 1, }; + enum class CounterReset : u32 { + SampleCnt = 0x01, + Unk02 = 0x02, + Unk03 = 0x03, + Unk04 = 0x04, + EmittedPrimitives = 0x10, // Not tested + Unk11 = 0x11, + Unk12 = 0x12, + Unk13 = 0x13, + Unk15 = 0x15, + Unk16 = 0x16, + Unk17 = 0x17, + Unk18 = 0x18, + Unk1A = 0x1A, + Unk1B = 0x1B, + Unk1C = 0x1C, + Unk1D = 0x1D, + Unk1E = 0x1E, + GeneratedPrimitives = 0x1F, + }; + struct Cull { enum class FrontFace : u32 { ClockWise = 0x0900, @@ -857,7 +878,7 @@ public: BitField<7, 1, u32> c7; } clip_distance_enabled; - INSERT_UNION_PADDING_WORDS(0x1); + u32 samplecnt_enable; float point_size; @@ -865,7 +886,11 @@ public: u32 point_sprite_enable; - INSERT_UNION_PADDING_WORDS(0x5); + INSERT_UNION_PADDING_WORDS(0x3); + + CounterReset counter_reset; + + INSERT_UNION_PADDING_WORDS(0x1); u32 zeta_enable; @@ -1412,12 +1437,15 @@ private: /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); - // Writes the query result accordingly + /// Writes the query result accordingly. void StampQueryResult(u64 payload, bool long_query); - // Handles Conditional Rendering + /// Handles conditional rendering. void ProcessQueryCondition(); + /// Handles counter resets. + void ProcessCounterReset(); + /// Handles writes to syncing register. void ProcessSyncPoint(); @@ -1499,8 +1527,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(vb_base_instance, 0x50E); ASSERT_REG_POSITION(clip_distance_enabled, 0x544); +ASSERT_REG_POSITION(samplecnt_enable, 0x545); ASSERT_REG_POSITION(point_size, 0x546); ASSERT_REG_POSITION(point_sprite_enable, 0x548); +ASSERT_REG_POSITION(counter_reset, 0x54C); ASSERT_REG_POSITION(zeta_enable, 0x54E); ASSERT_REG_POSITION(multisample_control, 0x54F); ASSERT_REG_POSITION(condition, 0x554); -- cgit v1.2.3 From aae8c180cbbf91ba12f53c37e81a97d4b3cc4ccd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 26 Nov 2019 18:52:15 -0300 Subject: gl_query_cache: Implement host queries using a deferred cache Instead of waiting immediately for executed commands, defer the query until the guest CPU reads it. This way we get closer to what the guest program is doing. To archive this we have to build a dependency queue, because host APIs (like OpenGL and Vulkan) use ranged queries instead of counters like NVN. Waiting for queries implicitly uses fences and this requires a command being queued, otherwise the driver will lock waiting until a timeout. To fix this when there are no commands queued, we explicitly call glFlush. --- src/video_core/engines/maxwell_3d.cpp | 41 ++++++++++++++++++++--------------- src/video_core/engines/maxwell_3d.h | 4 ++++ 2 files changed, 27 insertions(+), 18 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index fe91ff6a0..9add2bc94 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -556,23 +556,13 @@ void Maxwell3D::ProcessQueryGet() { // matches the current payload. UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); break; - case Regs::QueryOperation::Counter: { - u64 result; - switch (regs.query.query_get.select) { - case Regs::QuerySelect::Zero: - result = 0; - break; - case Regs::QuerySelect::SamplesPassed: - result = rasterizer.Query(VideoCore::QueryType::SamplesPassed); - break; - default: - result = 1; - UNIMPLEMENTED_MSG("Unimplemented query select type {}", - static_cast(regs.query.query_get.select.Value())); + case Regs::QueryOperation::Counter: + if (const std::optional result = GetQueryResult()) { + // If the query returns an empty optional it means it's cached and deferred. + // In this case we have a non-empty result, so we stamp it immediately. + StampQueryResult(*result, regs.query.query_get.short_query == 0); } - StampQueryResult(result, regs.query.query_get.short_query == 0); break; - } case Regs::QueryOperation::Trap: UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); break; @@ -595,20 +585,20 @@ void Maxwell3D::ProcessQueryCondition() { } case Regs::ConditionMode::ResNonZero: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U; break; } case Regs::ConditionMode::Equal: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode; break; } case Regs::ConditionMode::NotEqual: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode; break; @@ -674,6 +664,21 @@ void Maxwell3D::DrawArrays() { } } +std::optional Maxwell3D::GetQueryResult() { + switch (regs.query.query_get.select) { + case Regs::QuerySelect::Zero: + return 0; + case Regs::QuerySelect::SamplesPassed: + // Deferred. + rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed); + return {}; + default: + UNIMPLEMENTED_MSG("Unimplemented query select type {}", + static_cast(regs.query.query_get.select.Value())); + return 1; + } +} + void Maxwell3D::ProcessCBBind(std::size_t stage_index) { // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. auto& shader = state.shader_stages[stage_index]; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d21f678ed..26939be3f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -1462,6 +1463,9 @@ private: // Handles a instance drawcall from MME void StepInstance(MMEDrawMode expected_mode, u32 count); + + /// Returns a query's value or an empty object if the value will be deferred through a cache. + std::optional GetQueryResult(); }; #define ASSERT_REG_POSITION(field_name, position) \ -- cgit v1.2.3 From 73d2d3342dc8867d32f08f89b2ca36ff071598dc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 28 Nov 2019 02:15:34 -0300 Subject: gl_query_cache: Optimize query cache Use a custom cache instead of relying on a ranged cache. --- src/video_core/engines/maxwell_3d.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 9add2bc94..842cdcbcf 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,6 +4,7 @@ #include #include +#include #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" @@ -16,6 +17,8 @@ namespace Tegra::Engines { +using VideoCore::QueryType; + /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; @@ -614,10 +617,11 @@ void Maxwell3D::ProcessQueryCondition() { void Maxwell3D::ProcessCounterReset() { switch (regs.counter_reset) { case Regs::CounterReset::SampleCnt: - rasterizer.ResetCounter(VideoCore::QueryType::SamplesPassed); + rasterizer.ResetCounter(QueryType::SamplesPassed); break; default: - UNIMPLEMENTED_MSG("counter_reset={}", static_cast(regs.counter_reset)); + LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", + static_cast(regs.counter_reset)); break; } } @@ -670,7 +674,8 @@ std::optional Maxwell3D::GetQueryResult() { return 0; case Regs::QuerySelect::SamplesPassed: // Deferred. - rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed); + rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, + system.GPU().GetTicks()); return {}; default: UNIMPLEMENTED_MSG("Unimplemented query select type {}", -- cgit v1.2.3