diff options
Diffstat (limited to '')
22 files changed, 385 insertions, 376 deletions
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 0ddf9b83e..87b3d63a4 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -37,12 +37,12 @@ constexpr Xbyak::Reg IndexToReg(size_t reg_index) { } } -inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) { - std::bitset<32> bits; +constexpr std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) { + size_t bits = 0; for (const Xbyak::Reg& reg : regs) { - bits[RegToIndex(reg)] = true; + bits |= size_t{1} << RegToIndex(reg); } - return bits; + return {bits}; } constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); @@ -57,7 +57,7 @@ constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; -const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ +constexpr inline std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ // GPRs Xbyak::util::rcx, Xbyak::util::rdx, @@ -74,7 +74,7 @@ const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ Xbyak::util::xmm5, }); -const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ +constexpr inline std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ // GPRs Xbyak::util::rbx, Xbyak::util::rsi, @@ -108,7 +108,7 @@ constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; -const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ +constexpr inline std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ // GPRs Xbyak::util::rcx, Xbyak::util::rdx, @@ -137,7 +137,7 @@ const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ Xbyak::util::xmm15, }); -const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ +constexpr inline std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ // GPRs Xbyak::util::rbx, Xbyak::util::rbp, diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp index d12037b11..a7cdf45e6 100644 --- a/src/core/hid/emulated_controller.cpp +++ b/src/core/hid/emulated_controller.cpp @@ -749,6 +749,7 @@ void EmulatedController::SetMotion(const Common::Input::CallbackStatus& callback raw_status.gyro.y.value, raw_status.gyro.z.value, }); + emulated.SetGyroThreshold(raw_status.gyro.x.properties.threshold); emulated.UpdateRotation(raw_status.delta_timestamp); emulated.UpdateOrientation(raw_status.delta_timestamp); force_update_motion = raw_status.force_update; diff --git a/src/core/hid/motion_input.cpp b/src/core/hid/motion_input.cpp index 6e126be19..05042fd99 100644 --- a/src/core/hid/motion_input.cpp +++ b/src/core/hid/motion_input.cpp @@ -10,7 +10,7 @@ namespace Core::HID { MotionInput::MotionInput() { // Initialize PID constants with default values SetPID(0.3f, 0.005f, 0.0f); - SetGyroThreshold(0.00005f); + SetGyroThreshold(0.007f); } void MotionInput::SetPID(f32 new_kp, f32 new_ki, f32 new_kd) { @@ -31,7 +31,7 @@ void MotionInput::SetGyroscope(const Common::Vec3f& gyroscope) { gyro_bias = (gyro_bias * 0.9999f) + (gyroscope * 0.0001f); } - if (gyro.Length2() < gyro_threshold) { + if (gyro.Length() < gyro_threshold) { gyro = {}; } else { only_accelerometer = false; diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp index b650ea31d..2ebbc0819 100644 --- a/src/core/hle/kernel/k_page_table.cpp +++ b/src/core/hle/kernel/k_page_table.cpp @@ -276,22 +276,23 @@ ResultCode KPageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemoryState state, KMemoryPermission perm) { - std::lock_guard lock{page_table_lock}; - const u64 size{num_pages * PageSize}; - if (!CanContain(addr, size, state)) { - return ResultInvalidCurrentMemory; - } + // Validate the mapping request. + R_UNLESS(this->CanContain(addr, size, state), ResultInvalidCurrentMemory); - if (IsRegionMapped(addr, size)) { - return ResultInvalidCurrentMemory; - } + // Lock the table. + std::lock_guard lock{page_table_lock}; + + // Verify that the destination memory is unmapped. + R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, KMemoryState::Free, + KMemoryPermission::None, KMemoryPermission::None, + KMemoryAttribute::None, KMemoryAttribute::None)); KPageLinkedList page_linked_list; - CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool, - allocation_option)); - CASCADE_CODE(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup)); + R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool, + allocation_option)); + R_TRY(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup)); block_manager->Update(addr, num_pages, state, perm); @@ -395,39 +396,12 @@ ResultCode KPageTable::UnmapProcessMemory(VAddr dst_addr, std::size_t size, return ResultSuccess; } -void KPageTable::MapPhysicalMemory(KPageLinkedList& page_linked_list, VAddr start, VAddr end) { - auto node{page_linked_list.Nodes().begin()}; - PAddr map_addr{node->GetAddress()}; - std::size_t src_num_pages{node->GetNumPages()}; - - block_manager->IterateForRange(start, end, [&](const KMemoryInfo& info) { - if (info.state != KMemoryState::Free) { - return; - } - - std::size_t dst_num_pages{GetSizeInRange(info, start, end) / PageSize}; - VAddr dst_addr{GetAddressInRange(info, start)}; - - while (dst_num_pages) { - if (!src_num_pages) { - node = std::next(node); - map_addr = node->GetAddress(); - src_num_pages = node->GetNumPages(); - } - - const std::size_t num_pages{std::min(src_num_pages, dst_num_pages)}; - Operate(dst_addr, num_pages, KMemoryPermission::UserReadWrite, OperationType::Map, - map_addr); - - dst_addr += num_pages * PageSize; - map_addr += num_pages * PageSize; - src_num_pages -= num_pages; - dst_num_pages -= num_pages; - } - }); -} ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) { + // Lock the physical memory lock. + std::lock_guard phys_lk(map_physical_memory_lock); + + // Lock the table. std::lock_guard lock{page_table_lock}; std::size_t mapped_size{}; @@ -463,7 +437,35 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) { // We succeeded, so commit the memory reservation. memory_reservation.Commit(); - MapPhysicalMemory(page_linked_list, addr, end_addr); + // Map the memory. + auto node{page_linked_list.Nodes().begin()}; + PAddr map_addr{node->GetAddress()}; + std::size_t src_num_pages{node->GetNumPages()}; + block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) { + if (info.state != KMemoryState::Free) { + return; + } + + std::size_t dst_num_pages{GetSizeInRange(info, addr, end_addr) / PageSize}; + VAddr dst_addr{GetAddressInRange(info, addr)}; + + while (dst_num_pages) { + if (!src_num_pages) { + node = std::next(node); + map_addr = node->GetAddress(); + src_num_pages = node->GetNumPages(); + } + + const std::size_t num_pages{std::min(src_num_pages, dst_num_pages)}; + Operate(dst_addr, num_pages, KMemoryPermission::UserReadWrite, OperationType::Map, + map_addr); + + dst_addr += num_pages * PageSize; + map_addr += num_pages * PageSize; + src_num_pages -= num_pages; + dst_num_pages -= num_pages; + } + }); mapped_physical_memory_size += remaining_size; @@ -503,23 +505,8 @@ ResultCode KPageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) { return ResultSuccess; } - CASCADE_CODE(UnmapMemory(addr, size)); - - auto process{system.Kernel().CurrentProcess()}; - process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size); - mapped_physical_memory_size -= mapped_size; - - return ResultSuccess; -} - -ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) { - std::lock_guard lock{page_table_lock}; - - const VAddr end_addr{addr + size}; - ResultCode result{ResultSuccess}; - KPageLinkedList page_linked_list; - // Unmap each region within the range + KPageLinkedList page_linked_list; block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) { if (info.state == KMemoryState::Normal) { const std::size_t block_size{GetSizeInRange(info, addr, end_addr)}; @@ -535,7 +522,6 @@ ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) { } } }); - if (result.IsError()) { return result; } @@ -546,10 +532,14 @@ ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) { block_manager->Update(addr, num_pages, KMemoryState::Free); + auto process{system.Kernel().CurrentProcess()}; + process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size); + mapped_physical_memory_size -= mapped_size; + return ResultSuccess; } -ResultCode KPageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) { +ResultCode KPageTable::MapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { std::lock_guard lock{page_table_lock}; KMemoryState src_state{}; @@ -588,7 +578,7 @@ ResultCode KPageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) { return ResultSuccess; } -ResultCode KPageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) { +ResultCode KPageTable::UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { std::lock_guard lock{page_table_lock}; KMemoryState src_state{}; @@ -652,24 +642,26 @@ ResultCode KPageTable::MapPages(VAddr addr, const KPageLinkedList& page_linked_l return ResultSuccess; } -ResultCode KPageTable::MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state, - KMemoryPermission perm) { - std::lock_guard lock{page_table_lock}; - +ResultCode KPageTable::MapPages(VAddr address, KPageLinkedList& page_linked_list, + KMemoryState state, KMemoryPermission perm) { + // Check that the map is in range. const std::size_t num_pages{page_linked_list.GetNumPages()}; const std::size_t size{num_pages * PageSize}; + R_UNLESS(this->CanContain(address, size, state), ResultInvalidCurrentMemory); - if (!CanContain(addr, size, state)) { - return ResultInvalidCurrentMemory; - } + // Lock the table. + std::lock_guard lock{page_table_lock}; - if (IsRegionMapped(addr, num_pages * PageSize)) { - return ResultInvalidCurrentMemory; - } + // Check the memory state. + R_TRY(this->CheckMemoryState(address, size, KMemoryState::All, KMemoryState::Free, + KMemoryPermission::None, KMemoryPermission::None, + KMemoryAttribute::None, KMemoryAttribute::None)); - CASCADE_CODE(MapPages(addr, page_linked_list, perm)); + // Map the pages. + R_TRY(MapPages(address, page_linked_list, perm)); - block_manager->Update(addr, num_pages, state, perm); + // Update the blocks. + block_manager->Update(address, num_pages, state, perm); return ResultSuccess; } @@ -693,21 +685,23 @@ ResultCode KPageTable::UnmapPages(VAddr addr, const KPageLinkedList& page_linked ResultCode KPageTable::UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state) { - std::lock_guard lock{page_table_lock}; - + // Check that the unmap is in range. const std::size_t num_pages{page_linked_list.GetNumPages()}; const std::size_t size{num_pages * PageSize}; + R_UNLESS(this->Contains(addr, size), ResultInvalidCurrentMemory); - if (!CanContain(addr, size, state)) { - return ResultInvalidCurrentMemory; - } + // Lock the table. + std::lock_guard lock{page_table_lock}; - if (IsRegionMapped(addr, num_pages * PageSize)) { - return ResultInvalidCurrentMemory; - } + // Check the memory state. + R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, state, KMemoryPermission::None, + KMemoryPermission::None, KMemoryAttribute::All, + KMemoryAttribute::None)); - CASCADE_CODE(UnmapPages(addr, page_linked_list)); + // Perform the unmap. + R_TRY(UnmapPages(addr, page_linked_list)); + // Update the blocks. block_manager->Update(addr, num_pages, state, KMemoryPermission::None); return ResultSuccess; @@ -765,7 +759,6 @@ ResultCode KPageTable::SetProcessMemoryPermission(VAddr addr, std::size_t size, // Ensure cache coherency, if we're setting pages as executable. if (is_x) { - // Memory execution state is changing, invalidate CPU cache range system.InvalidateCpuInstructionCacheRange(addr, size); } @@ -793,12 +786,12 @@ ResultCode KPageTable::ReserveTransferMemory(VAddr addr, std::size_t size, KMemo KMemoryState state{}; KMemoryAttribute attribute{}; - CASCADE_CODE(CheckMemoryState( - &state, nullptr, &attribute, nullptr, addr, size, - KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, - KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, KMemoryPermission::All, - KMemoryPermission::UserReadWrite, KMemoryAttribute::Mask, KMemoryAttribute::None, - KMemoryAttribute::IpcAndDeviceMapped)); + R_TRY(CheckMemoryState(&state, nullptr, &attribute, nullptr, addr, size, + KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, + KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, + KMemoryPermission::All, KMemoryPermission::UserReadWrite, + KMemoryAttribute::Mask, KMemoryAttribute::None, + KMemoryAttribute::IpcAndDeviceMapped)); block_manager->Update(addr, size / PageSize, state, perm, attribute | KMemoryAttribute::Locked); @@ -810,12 +803,11 @@ ResultCode KPageTable::ResetTransferMemory(VAddr addr, std::size_t size) { KMemoryState state{}; - CASCADE_CODE( - CheckMemoryState(&state, nullptr, nullptr, nullptr, addr, size, - KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, - KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, - KMemoryPermission::None, KMemoryPermission::None, KMemoryAttribute::Mask, - KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped)); + R_TRY(CheckMemoryState(&state, nullptr, nullptr, nullptr, addr, size, + KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, + KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, + KMemoryPermission::None, KMemoryPermission::None, KMemoryAttribute::Mask, + KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped)); block_manager->Update(addr, size / PageSize, state, KMemoryPermission::UserReadWrite); return ResultSuccess; @@ -871,8 +863,9 @@ ResultCode KPageTable::SetMemoryAttribute(VAddr addr, std::size_t size, u32 mask AttributeTestMask, KMemoryAttribute::None, ~AttributeTestMask)); // Determine the new attribute. - const auto new_attr = ((old_attr & static_cast<KMemoryAttribute>(~mask)) | - static_cast<KMemoryAttribute>(attr & mask)); + const KMemoryAttribute new_attr = + static_cast<KMemoryAttribute>(((old_attr & static_cast<KMemoryAttribute>(~mask)) | + static_cast<KMemoryAttribute>(attr & mask))); // Perform operation. this->Operate(addr, num_pages, old_perm, OperationType::ChangePermissionsAndRefresh); @@ -896,6 +889,9 @@ ResultCode KPageTable::SetMaxHeapSize(std::size_t size) { } ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) { + // Lock the physical memory lock. + std::lock_guard phys_lk(map_physical_memory_lock); + // Try to perform a reduction in heap, instead of an extension. VAddr cur_address{}; std::size_t allocation_size{}; @@ -1025,12 +1021,12 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages, } if (is_map_only) { - CASCADE_CODE(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr)); + R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr)); } else { KPageLinkedList page_group; - CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, - memory_pool, allocation_option)); - CASCADE_CODE(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup)); + R_TRY(system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, memory_pool, + allocation_option)); + R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup)); } block_manager->Update(addr, needed_num_pages, state, perm); @@ -1186,7 +1182,7 @@ VAddr KPageTable::AllocateVirtualMemory(VAddr start, std::size_t region_num_page ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, const KPageLinkedList& page_group, OperationType operation) { - std::lock_guard lock{page_table_lock}; + ASSERT(this->IsLockedByCurrentThread()); ASSERT(Common::IsAligned(addr, PageSize)); ASSERT(num_pages > 0); @@ -1211,7 +1207,7 @@ ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, const KPageLin ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm, OperationType operation, PAddr map_addr) { - std::lock_guard lock{page_table_lock}; + ASSERT(this->IsLockedByCurrentThread()); ASSERT(num_pages > 0); ASSERT(Common::IsAligned(addr, PageSize)); diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h index f67986e91..60ae9b9e8 100644 --- a/src/core/hle/kernel/k_page_table.h +++ b/src/core/hle/kernel/k_page_table.h @@ -37,9 +37,8 @@ public: VAddr src_addr); ResultCode MapPhysicalMemory(VAddr addr, std::size_t size); ResultCode UnmapPhysicalMemory(VAddr addr, std::size_t size); - ResultCode UnmapMemory(VAddr addr, std::size_t size); - ResultCode Map(VAddr dst_addr, VAddr src_addr, std::size_t size); - ResultCode Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size); + ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); + ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); ResultCode MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state, KMemoryPermission perm); ResultCode UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state); @@ -88,7 +87,6 @@ private: ResultCode MapPages(VAddr addr, const KPageLinkedList& page_linked_list, KMemoryPermission perm); ResultCode UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list); - void MapPhysicalMemory(KPageLinkedList& page_linked_list, VAddr start, VAddr end); bool IsRegionMapped(VAddr address, u64 size); bool IsRegionContiguous(VAddr addr, u64 size) const; void AddRegionToPages(VAddr start, std::size_t num_pages, KPageLinkedList& page_linked_list); @@ -148,6 +146,7 @@ private: } std::recursive_mutex page_table_lock; + std::mutex map_physical_memory_lock; std::unique_ptr<KMemoryBlockManager> block_manager; public: @@ -249,7 +248,9 @@ public: return !IsOutsideASLRRegion(address, size); } constexpr PAddr GetPhysicalAddr(VAddr addr) { - return page_table_impl.backing_addr[addr >> PageBits] + addr; + const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits]; + ASSERT(backing_addr); + return backing_addr + addr; } constexpr bool Contains(VAddr addr) const { return address_space_start <= addr && addr <= address_space_end - 1; diff --git a/src/core/hle/kernel/k_priority_queue.h b/src/core/hle/kernel/k_priority_queue.h index 0b894c8cf..bd779739d 100644 --- a/src/core/hle/kernel/k_priority_queue.h +++ b/src/core/hle/kernel/k_priority_queue.h @@ -258,7 +258,7 @@ private: private: constexpr void ClearAffinityBit(u64& affinity, s32 core) { - affinity &= ~(u64(1) << core); + affinity &= ~(UINT64_C(1) << core); } constexpr s32 GetNextCore(u64& affinity) { diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp index b32d4f285..c96520828 100644 --- a/src/core/hle/kernel/k_scheduler.cpp +++ b/src/core/hle/kernel/k_scheduler.cpp @@ -710,23 +710,19 @@ void KScheduler::Unload(KThread* thread) { } void KScheduler::Reload(KThread* thread) { - LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread ? thread->GetName() : "nullptr"); + LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread->GetName()); - if (thread) { - ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable."); - - Core::ARM_Interface& cpu_core = system.ArmInterface(core_id); - cpu_core.LoadContext(thread->GetContext32()); - cpu_core.LoadContext(thread->GetContext64()); - cpu_core.SetTlsAddress(thread->GetTLSAddress()); - cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0()); - cpu_core.ClearExclusiveState(); - } + Core::ARM_Interface& cpu_core = system.ArmInterface(core_id); + cpu_core.LoadContext(thread->GetContext32()); + cpu_core.LoadContext(thread->GetContext64()); + cpu_core.SetTlsAddress(thread->GetTLSAddress()); + cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0()); + cpu_core.ClearExclusiveState(); } void KScheduler::SwitchContextStep2() { // Load context of new thread - Reload(current_thread.load()); + Reload(GetCurrentThread()); RescheduleCurrentCore(); } @@ -735,13 +731,17 @@ void KScheduler::ScheduleImpl() { KThread* previous_thread = GetCurrentThread(); KThread* next_thread = state.highest_priority_thread; - state.needs_scheduling = false; + state.needs_scheduling.store(false); // We never want to schedule a null thread, so use the idle thread if we don't have a next. if (next_thread == nullptr) { next_thread = idle_thread; } + if (next_thread->GetCurrentCore() != core_id) { + next_thread->SetCurrentCore(core_id); + } + // We never want to schedule a dummy thread, as these are only used by host threads for locking. if (next_thread->GetThreadType() == ThreadType::Dummy) { ASSERT_MSG(false, "Dummy threads should never be scheduled!"); @@ -755,14 +755,8 @@ void KScheduler::ScheduleImpl() { return; } - if (next_thread->GetCurrentCore() != core_id) { - next_thread->SetCurrentCore(core_id); - } - - current_thread.store(next_thread); - + // Update the CPU time tracking variables. KProcess* const previous_process = system.Kernel().CurrentProcess(); - UpdateLastContextSwitchTime(previous_thread, previous_process); // Save context for previous thread @@ -770,6 +764,10 @@ void KScheduler::ScheduleImpl() { std::shared_ptr<Common::Fiber>* old_context; old_context = &previous_thread->GetHostContext(); + + // Set the new thread. + current_thread.store(next_thread); + guard.Unlock(); Common::Fiber::YieldTo(*old_context, *switch_fiber); @@ -797,8 +795,8 @@ void KScheduler::SwitchToCurrent() { do { auto next_thread = current_thread.load(); if (next_thread != nullptr) { - next_thread->context_guard.Lock(); - if (next_thread->GetRawState() != ThreadState::Runnable) { + const auto locked = next_thread->context_guard.TryLock(); + if (state.needs_scheduling.load()) { next_thread->context_guard.Unlock(); break; } @@ -806,6 +804,9 @@ void KScheduler::SwitchToCurrent() { next_thread->context_guard.Unlock(); break; } + if (!locked) { + continue; + } } auto thread = next_thread ? next_thread : idle_thread; Common::Fiber::YieldTo(switch_fiber, *thread->GetHostContext()); diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index c7f5140f4..40bb893ac 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -230,7 +230,7 @@ static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr return result; } - return page_table.Map(dst_addr, src_addr, size); + return page_table.MapMemory(dst_addr, src_addr, size); } static ResultCode MapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { @@ -249,7 +249,7 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad return result; } - return page_table.Unmap(dst_addr, src_addr, size); + return page_table.UnmapMemory(dst_addr, src_addr, size); } static ResultCode UnmapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { diff --git a/src/input_common/input_poller.cpp b/src/input_common/input_poller.cpp index 7b370335f..2f3c0735a 100644 --- a/src/input_common/input_poller.cpp +++ b/src/input_common/input_poller.cpp @@ -504,9 +504,10 @@ private: class InputFromMotion final : public Common::Input::InputDevice { public: - explicit InputFromMotion(PadIdentifier identifier_, int motion_sensor_, + explicit InputFromMotion(PadIdentifier identifier_, int motion_sensor_, float gyro_threshold_, InputEngine* input_engine_) - : identifier(identifier_), motion_sensor(motion_sensor_), input_engine(input_engine_) { + : identifier(identifier_), motion_sensor(motion_sensor_), gyro_threshold(gyro_threshold_), + input_engine(input_engine_) { UpdateCallback engine_callback{[this]() { OnChange(); }}; const InputIdentifier input_identifier{ .identifier = identifier, @@ -525,8 +526,9 @@ public: const auto basic_motion = input_engine->GetMotion(identifier, motion_sensor); Common::Input::MotionStatus status{}; const Common::Input::AnalogProperties properties = { - .deadzone = 0.001f, + .deadzone = 0.0f, .range = 1.0f, + .threshold = gyro_threshold, .offset = 0.0f, }; status.accel.x = {.raw_value = basic_motion.accel_x, .properties = properties}; @@ -551,6 +553,7 @@ public: private: const PadIdentifier identifier; const int motion_sensor; + const float gyro_threshold; int callback_key; InputEngine* input_engine; }; @@ -873,9 +876,11 @@ std::unique_ptr<Common::Input::InputDevice> InputFactory::CreateMotionDevice( if (params.Has("motion")) { const auto motion_sensor = params.Get("motion", 0); + const auto gyro_threshold = params.Get("threshold", 0.007f); input_engine->PreSetController(identifier); input_engine->PreSetMotion(identifier, motion_sensor); - return std::make_unique<InputFromMotion>(identifier, motion_sensor, input_engine.get()); + return std::make_unique<InputFromMotion>(identifier, motion_sensor, gyro_threshold, + input_engine.get()); } const auto deadzone = std::clamp(params.Get("deadzone", 0.15f), 0.0f, 1.0f); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 50918317f..08b3a81ce 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -387,6 +387,14 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr } } +void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) { + if (ctx.runtime_info.xfb_varyings.empty()) { + return; + } + ctx.AddCapability(spv::Capability::TransformFeedback); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::Xfb); +} + void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { if (info.uses_sampled_1d) { ctx.AddCapability(spv::Capability::Sampled1D); @@ -442,9 +450,6 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.uses_sample_id) { ctx.AddCapability(spv::Capability::SampleRateShading); } - if (!ctx.runtime_info.xfb_varyings.empty()) { - ctx.AddCapability(spv::Capability::TransformFeedback); - } if (info.uses_derivatives) { ctx.AddCapability(spv::Capability::DerivativeControl); } @@ -484,6 +489,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in SetupSignedNanCapabilities(profile, program, ctx, main); } SetupCapabilities(profile, program.info, ctx); + SetupTransformFeedbackCapabilities(ctx, main); PatchPhiNodes(program, ctx); return ctx.Assemble(); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp index 78869601f..4851b0b8d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp @@ -57,16 +57,6 @@ void TranslatorVisitor::VMNMX(u64 insn) { if (vmnmx.sat != 0) { throw NotImplementedException("VMNMX SAT"); } - // Selectors were shown to default to 2 in unit tests - if (vmnmx.src_a_selector != 2) { - throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value()); - } - if (vmnmx.src_b_selector != 2) { - throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value()); - } - if (vmnmx.src_a_width != VideoWidth::Word) { - throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value()); - } const bool is_b_imm{vmnmx.is_src_b_reg == 0}; const IR::U32 src_a{GetReg8(insn)}; @@ -76,10 +66,14 @@ void TranslatorVisitor::VMNMX(u64 insn) { const VideoWidth a_width{vmnmx.src_a_width}; const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)}; + const u32 a_selector{static_cast<u32>(vmnmx.src_a_selector)}; + // Immediate values can't have a selector + const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmnmx.src_b_selector)}; + const bool src_a_signed{vmnmx.src_a_sign != 0}; const bool src_b_signed{vmnmx.src_b_sign != 0}; - const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)}; - const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; // First operation's sign is only dependent on operand b's sign const bool op_1_signed{src_b_signed}; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 43bed63ac..048dba4f3 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1474,6 +1474,8 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu // When this memory region has been joined a bunch of times, we assume it's being used // as a stream buffer. Increase the size to skip constantly recreating buffers. has_stream_leap = true; + begin -= PAGE_SIZE * 256; + cpu_addr = begin; end += PAGE_SIZE * 256; } } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index b18b8a02a..c38ebd670 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -240,7 +240,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); // Execute the current macro. - macro_engine->Execute(*this, macro_positions[entry], parameters); + macro_engine->Execute(macro_positions[entry], parameters); if (mme_draw.current_mode != MMEDrawMode::Undefined) { FlushMMEInlineDraw(); } diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index d7fabe605..0aeda4ce8 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -2,12 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cstring> #include <optional> + #include <boost/container_hash/hash.hpp> + #include "common/assert.h" -#include "common/logging/log.h" #include "common/settings.h" -#include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro.h" #include "video_core/macro/macro_hle.h" #include "video_core/macro/macro_interpreter.h" @@ -24,8 +25,7 @@ void MacroEngine::AddCode(u32 method, u32 data) { uploaded_macro_code[method].push_back(data); } -void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, - const std::vector<u32>& parameters) { +void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { auto compiled_macro = macro_cache.find(method); if (compiled_macro != macro_cache.end()) { const auto& cache_info = compiled_macro->second; @@ -66,10 +66,9 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, cache_info.lle_program = Compile(code); } - auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); - if (hle_program.has_value()) { + if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { cache_info.has_hle_program = true; - cache_info.hle_program = std::move(hle_program.value()); + cache_info.hle_program = std::move(hle_program); cache_info.hle_program->Execute(parameters, method); } else { cache_info.lle_program->Execute(parameters, method); diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index 31ee3440a..7aaa49286 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -119,7 +119,7 @@ public: void AddCode(u32 method, u32 data); // Compiles the macro if its not in the cache, and executes the compiled macro - void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters); + void Execute(u32 method, const std::vector<u32>& parameters); protected: virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 70ac7c620..900ad23c9 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -5,12 +5,15 @@ #include <array> #include <vector> #include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro.h" #include "video_core/macro/macro_hle.h" #include "video_core/rasterizer_interface.h" namespace Tegra { - namespace { + +using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); + // HLE'd functions void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); @@ -77,7 +80,6 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.CallMethodFromMME(0x8e5, 0x0); maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } -} // Anonymous namespace constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, @@ -85,25 +87,31 @@ constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ {0x0217920100488FF7, &HLE_0217920100488FF7}, }}; +class HLEMacroImpl final : public CachedMacro { +public: + explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) + : maxwell3d{maxwell3d_}, func{func_} {} + + void Execute(const std::vector<u32>& parameters, u32 method) override { + func(maxwell3d, parameters); + } + +private: + Engines::Maxwell3D& maxwell3d; + HLEFunction func; +}; +} // Anonymous namespace + HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} HLEMacro::~HLEMacro() = default; -std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { +std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const { const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), [hash](const auto& pair) { return pair.first == hash; }); if (it == hle_funcs.end()) { - return std::nullopt; + return nullptr; } return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); } -HLEMacroImpl::~HLEMacroImpl() = default; - -HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) - : maxwell3d{maxwell3d_}, func{func_} {} - -void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { - func(maxwell3d, parameters); -} - } // namespace Tegra diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h index cb3bd1600..b86ba84a1 100644 --- a/src/video_core/macro/macro_hle.h +++ b/src/video_core/macro/macro_hle.h @@ -5,10 +5,7 @@ #pragma once #include <memory> -#include <optional> -#include <vector> #include "common/common_types.h" -#include "video_core/macro/macro.h" namespace Tegra { @@ -16,29 +13,17 @@ namespace Engines { class Maxwell3D; } -using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); - class HLEMacro { public: explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); ~HLEMacro(); - std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; - -private: - Engines::Maxwell3D& maxwell3d; -}; - -class HLEMacroImpl : public CachedMacro { -public: - explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func); - ~HLEMacroImpl(); - - void Execute(const std::vector<u32>& parameters, u32 method) override; + // Allocates and returns a cached macro if the hash matches a known function. + // Returns nullptr otherwise. + [[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const; private: Engines::Maxwell3D& maxwell3d; - HLEFunction func; }; } // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index 8da26fd59..fba755448 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -2,6 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> +#include <optional> + #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" @@ -11,16 +14,81 @@ MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); namespace Tegra { -MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) - : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} +namespace { +class MacroInterpreterImpl final : public CachedMacro { +public: + explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) + : maxwell3d{maxwell3d_}, code{code_} {} -std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { - return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); -} + void Execute(const std::vector<u32>& params, u32 method) override; + +private: + /// Resets the execution engine state, zeroing registers, etc. + void Reset(); + + /** + * Executes a single macro instruction located at the current program counter. Returns whether + * the interpreter should keep running. + * + * @param is_delay_slot Whether the current step is being executed due to a delay slot in a + * previous instruction. + */ + bool Step(bool is_delay_slot); + + /// Calculates the result of an ALU operation. src_a OP src_b; + u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); + + /// Performs the result operation on the input result and stores it in the specified register + /// (if necessary). + void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); + + /// Evaluates the branch condition and returns whether the branch should be taken or not. + bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; + + /// Reads an opcode at the current program counter location. + Macro::Opcode GetOpcode() const; + + /// Returns the specified register's value. Register 0 is hardcoded to always return 0. + u32 GetRegister(u32 register_id) const; + + /// Sets the register to the input value. + void SetRegister(u32 register_id, u32 value); + + /// Sets the method address to use for the next Send instruction. + void SetMethodAddress(u32 address); -MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, - const std::vector<u32>& code_) - : maxwell3d{maxwell3d_}, code{code_} {} + /// Calls a GPU Engine method with the input parameter. + void Send(u32 value); + + /// Reads a GPU register located at the method address. + u32 Read(u32 method) const; + + /// Returns the next parameter in the parameter queue. + u32 FetchParameter(); + + Engines::Maxwell3D& maxwell3d; + + /// Current program counter + u32 pc{}; + /// Program counter to execute at after the delay slot is executed. + std::optional<u32> delayed_pc; + + /// General purpose macro registers. + std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; + + /// Method address to use for the next Send instruction. + Macro::MethodAddress method_address = {}; + + /// Input parameters of the current macro. + std::unique_ptr<u32[]> parameters; + std::size_t num_parameters = 0; + std::size_t parameters_capacity = 0; + /// Index of the next parameter that will be fetched by the 'parm' instruction. + u32 next_parameter_index = 0; + + bool carry_flag = false; + const std::vector<u32>& code; +}; void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) { MICROPROFILE_SCOPE(MacroInterp); @@ -283,5 +351,13 @@ u32 MacroInterpreterImpl::FetchParameter() { ASSERT(next_parameter_index < num_parameters); return parameters[next_parameter_index++]; } +} // Anonymous namespace + +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) + : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} + +std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { + return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); +} } // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h index d50c619ce..8a9648e46 100644 --- a/src/video_core/macro/macro_interpreter.h +++ b/src/video_core/macro/macro_interpreter.h @@ -3,10 +3,9 @@ // Refer to the license.txt file included. #pragma once -#include <array> -#include <optional> + #include <vector> -#include "common/bit_field.h" + #include "common/common_types.h" #include "video_core/macro/macro.h" @@ -26,77 +25,4 @@ private: Engines::Maxwell3D& maxwell3d; }; -class MacroInterpreterImpl : public CachedMacro { -public: - explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); - void Execute(const std::vector<u32>& params, u32 method) override; - -private: - /// Resets the execution engine state, zeroing registers, etc. - void Reset(); - - /** - * Executes a single macro instruction located at the current program counter. Returns whether - * the interpreter should keep running. - * - * @param is_delay_slot Whether the current step is being executed due to a delay slot in a - * previous instruction. - */ - bool Step(bool is_delay_slot); - - /// Calculates the result of an ALU operation. src_a OP src_b; - u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); - - /// Performs the result operation on the input result and stores it in the specified register - /// (if necessary). - void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); - - /// Evaluates the branch condition and returns whether the branch should be taken or not. - bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; - - /// Reads an opcode at the current program counter location. - Macro::Opcode GetOpcode() const; - - /// Returns the specified register's value. Register 0 is hardcoded to always return 0. - u32 GetRegister(u32 register_id) const; - - /// Sets the register to the input value. - void SetRegister(u32 register_id, u32 value); - - /// Sets the method address to use for the next Send instruction. - void SetMethodAddress(u32 address); - - /// Calls a GPU Engine method with the input parameter. - void Send(u32 value); - - /// Reads a GPU register located at the method address. - u32 Read(u32 method) const; - - /// Returns the next parameter in the parameter queue. - u32 FetchParameter(); - - Engines::Maxwell3D& maxwell3d; - - /// Current program counter - u32 pc; - /// Program counter to execute at after the delay slot is executed. - std::optional<u32> delayed_pc; - - /// General purpose macro registers. - std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; - - /// Method address to use for the next Send instruction. - Macro::MethodAddress method_address = {}; - - /// Input parameters of the current macro. - std::unique_ptr<u32[]> parameters; - std::size_t num_parameters = 0; - std::size_t parameters_capacity = 0; - /// Index of the next parameter that will be fetched by the 'parm' instruction. - u32 next_parameter_index = 0; - - bool carry_flag = false; - const std::vector<u32>& code; -}; - } // namespace Tegra diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index c6b2b2109..47b28ad16 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -2,9 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> +#include <bitset> +#include <optional> + +#include <xbyak/xbyak.h> + #include "common/assert.h" +#include "common/bit_field.h" #include "common/logging/log.h" #include "common/microprofile.h" +#include "common/x64/xbyak_abi.h" #include "common/x64/xbyak_util.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro_interpreter.h" @@ -14,13 +22,14 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); namespace Tegra { +namespace { constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; -static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ +constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ STATE, RESULT, PARAMETERS, @@ -28,19 +37,75 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ BRANCH_HOLDER, }); -MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) - : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} +// Arbitrarily chosen based on current booting games. +constexpr size_t MAX_CODE_SIZE = 0x10000; -std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { - return std::make_unique<MacroJITx64Impl>(maxwell3d, code); +std::bitset<32> PersistentCallerSavedRegs() { + return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; } -MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) - : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { - Compile(); -} +class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro { +public: + explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) + : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { + Compile(); + } + + void Execute(const std::vector<u32>& parameters, u32 method) override; + + void Compile_ALU(Macro::Opcode opcode); + void Compile_AddImmediate(Macro::Opcode opcode); + void Compile_ExtractInsert(Macro::Opcode opcode); + void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); + void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); + void Compile_Read(Macro::Opcode opcode); + void Compile_Branch(Macro::Opcode opcode); + +private: + void Optimizer_ScanFlags(); + + void Compile(); + bool Compile_NextInstruction(); + + Xbyak::Reg32 Compile_FetchParameter(); + Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); + + void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); + void Compile_Send(Xbyak::Reg32 value); -MacroJITx64Impl::~MacroJITx64Impl() = default; + Macro::Opcode GetOpCode() const; + + struct JITState { + Engines::Maxwell3D* maxwell3d{}; + std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; + u32 carry_flag{}; + }; + static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); + using ProgramType = void (*)(JITState*, const u32*); + + struct OptimizerState { + bool can_skip_carry{}; + bool has_delayed_pc{}; + bool zero_reg_skip{}; + bool skip_dummy_addimmediate{}; + bool optimize_for_method_move{}; + bool enable_asserts{}; + }; + OptimizerState optimizer{}; + + std::optional<Macro::Opcode> next_opcode{}; + ProgramType program{nullptr}; + + std::array<Xbyak::Label, MAX_CODE_SIZE> labels; + std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; + Xbyak::Label end_of_code{}; + + bool is_delay_slot{}; + u32 pc{}; + + const std::vector<u32>& code; + Engines::Maxwell3D& maxwell3d; +}; void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { MICROPROFILE_SCOPE(MacroJitExecute); @@ -307,11 +372,11 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { Compile_ProcessResult(opcode.result_operation, opcode.dst); } -static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { +void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { maxwell3d->CallMethodFromMME(method_address.address, value); } -void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { +void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); @@ -338,7 +403,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { L(dont_process); } -void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { +void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); const s32 jump_address = static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); @@ -392,7 +457,7 @@ void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { L(end); } -void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { +void MacroJITx64Impl::Optimizer_ScanFlags() { optimizer.can_skip_carry = true; optimizer.has_delayed_pc = false; for (auto raw_op : code) { @@ -534,7 +599,7 @@ bool MacroJITx64Impl::Compile_NextInstruction() { return true; } -Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { +Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { mov(eax, dword[PARAMETERS]); add(PARAMETERS, sizeof(u32)); return eax; @@ -611,9 +676,12 @@ Macro::Opcode MacroJITx64Impl::GetOpCode() const { ASSERT(pc < code.size()); return {code[pc]}; } +} // Anonymous namespace -std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { - return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; -} +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) + : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} +std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { + return std::make_unique<MacroJITx64Impl>(maxwell3d, code); +} } // namespace Tegra diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index d03d480b4..773b037ae 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -4,12 +4,7 @@ #pragma once -#include <array> -#include <bitset> -#include <xbyak/xbyak.h> -#include "common/bit_field.h" #include "common/common_types.h" -#include "common/x64/xbyak_abi.h" #include "video_core/macro/macro.h" namespace Tegra { @@ -18,9 +13,6 @@ namespace Engines { class Maxwell3D; } -/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games -constexpr size_t MAX_CODE_SIZE = 0x10000; - class MacroJITx64 final : public MacroEngine { public: explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); @@ -32,67 +24,4 @@ private: Engines::Maxwell3D& maxwell3d; }; -class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { -public: - explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); - ~MacroJITx64Impl(); - - void Execute(const std::vector<u32>& parameters, u32 method) override; - - void Compile_ALU(Macro::Opcode opcode); - void Compile_AddImmediate(Macro::Opcode opcode); - void Compile_ExtractInsert(Macro::Opcode opcode); - void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); - void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); - void Compile_Read(Macro::Opcode opcode); - void Compile_Branch(Macro::Opcode opcode); - -private: - void Optimizer_ScanFlags(); - - void Compile(); - bool Compile_NextInstruction(); - - Xbyak::Reg32 Compile_FetchParameter(); - Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); - - void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); - void Compile_Send(Xbyak::Reg32 value); - - Macro::Opcode GetOpCode() const; - std::bitset<32> PersistentCallerSavedRegs() const; - - struct JITState { - Engines::Maxwell3D* maxwell3d{}; - std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; - u32 carry_flag{}; - }; - static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); - using ProgramType = void (*)(JITState*, const u32*); - - struct OptimizerState { - bool can_skip_carry{}; - bool has_delayed_pc{}; - bool zero_reg_skip{}; - bool skip_dummy_addimmediate{}; - bool optimize_for_method_move{}; - bool enable_asserts{}; - }; - OptimizerState optimizer{}; - - std::optional<Macro::Opcode> next_opcode{}; - ProgramType program{nullptr}; - - std::array<Xbyak::Label, MAX_CODE_SIZE> labels; - std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; - Xbyak::Label end_of_code{}; - - bool is_delay_slot{}; - u32 pc{}; - std::optional<u32> delayed_pc; - - const std::vector<u32>& code; - Engines::Maxwell3D& maxwell3d; -}; - } // namespace Tegra diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 7029287a9..752504236 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -403,10 +403,22 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i connect(button, &QPushButton::customContextMenuRequested, [=, this](const QPoint& menu_location) { QMenu context_menu; + Common::ParamPackage param = emulated_controller->GetMotionParam(motion_id); context_menu.addAction(tr("Clear"), [&] { emulated_controller->SetMotionParam(motion_id, {}); motion_map[motion_id]->setText(tr("[not set]")); }); + if (param.Has("motion")) { + context_menu.addAction(tr("Set gyro threshold"), [&] { + const int gyro_threshold = + static_cast<int>(param.Get("threshold", 0.007f) * 1000.0f); + const int new_threshold = QInputDialog::getInt( + this, tr("Set threshold"), tr("Choose a value between 0% and 100%"), + gyro_threshold, 0, 100); + param.Set("threshold", new_threshold / 1000.0f); + emulated_controller->SetMotionParam(motion_id, param); + }); + } context_menu.exec(motion_map[motion_id]->mapToGlobal(menu_location)); }); } |