summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/x64/xbyak_abi.h95
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp21
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.h5
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.cpp81
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.h126
-rw-r--r--src/core/settings.h2
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp76
-rw-r--r--src/video_core/macro/macro_jit_x64.h5
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2074
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/yuzu/configuration/config.cpp10
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp45
-rw-r--r--src/yuzu/configuration/configure_graphics.ui40
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp3
-rw-r--r--src/yuzu/main.cpp5
-rw-r--r--src/yuzu_cmd/config.cpp2
-rw-r--r--src/yuzu_cmd/default_ini.h5
-rw-r--r--src/yuzu_tester/config.cpp2
-rw-r--r--src/yuzu_tester/default_ini.h5
24 files changed, 2261 insertions, 384 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 24b7a083c..0a3e2f4d1 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp
DEPENDS
# WARNING! It was too much work to try and make a common location for this list,
# so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
+ "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
+ "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
index 794da8a52..a5f5d4fc1 100644
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@@ -11,7 +11,7 @@
namespace Common::X64 {
-inline int RegToIndex(const Xbyak::Reg& reg) {
+inline std::size_t RegToIndex(const Xbyak::Reg& reg) {
using Kind = Xbyak::Reg::Kind;
ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
"RegSet only support GPRs and XMM registers.");
@@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) {
return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
}
-inline Xbyak::Reg64 IndexToReg64(int reg_index) {
+inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
ASSERT(reg_index < 16);
- return Xbyak::Reg64(reg_index);
+ return Xbyak::Reg64(static_cast<int>(reg_index));
}
-inline Xbyak::Xmm IndexToXmm(int reg_index) {
+inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
ASSERT(reg_index >= 16 && reg_index < 32);
- return Xbyak::Xmm(reg_index - 16);
+ return Xbyak::Xmm(static_cast<int>(reg_index - 16));
}
-inline Xbyak::Reg IndexToReg(int reg_index) {
+inline Xbyak::Reg IndexToReg(std::size_t reg_index) {
if (reg_index < 16) {
return IndexToReg64(reg_index);
} else {
@@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0;
#endif
-inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
- size_t needed_frame_size, s32* out_subtraction,
- s32* out_xmm_offset) {
+struct ABIFrameInfo {
+ s32 subtraction;
+ s32 xmm_offset;
+};
+
+inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
+ size_t needed_frame_size) {
const auto count = (regs & ABI_ALL_GPRS).count();
rsp_alignment -= count * 8;
size_t subtraction = 0;
@@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
rsp_alignment -= subtraction;
subtraction += rsp_alignment & 0xF;
- *out_subtraction = (s32)subtraction;
- *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
+ return ABIFrameInfo{static_cast<s32>(subtraction),
+ static_cast<s32>(subtraction - xmm_base_subtraction)};
}
inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
size_t rsp_alignment, size_t needed_frame_size = 0) {
- s32 subtraction, xmm_offset;
- ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
+ auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
+
for (std::size_t i = 0; i < regs.size(); ++i) {
if (regs[i] && ABI_ALL_GPRS[i]) {
- code.push(IndexToReg64(static_cast<int>(i)));
+ code.push(IndexToReg64(i));
}
}
- if (subtraction != 0) {
- code.sub(code.rsp, subtraction);
- }
- for (int i = 0; i < regs.count(); i++) {
- if (regs.test(i) & ABI_ALL_GPRS.test(i)) {
- code.push(IndexToReg64(i));
- }
+ if (frame_info.subtraction != 0) {
+ code.sub(code.rsp, frame_info.subtraction);
}
for (std::size_t i = 0; i < regs.size(); ++i) {
if (regs[i] && ABI_ALL_XMMS[i]) {
- code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i)));
- xmm_offset += 0x10;
+ code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
+ frame_info.xmm_offset += 0x10;
}
}
@@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b
inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
size_t rsp_alignment, size_t needed_frame_size = 0) {
- s32 subtraction, xmm_offset;
- ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
+ auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
for (std::size_t i = 0; i < regs.size(); ++i) {
if (regs[i] && ABI_ALL_XMMS[i]) {
- code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]);
- xmm_offset += 0x10;
+ code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
+ frame_info.xmm_offset += 0x10;
}
}
- if (subtraction != 0) {
- code.add(code.rsp, subtraction);
+ if (frame_info.subtraction != 0) {
+ code.add(code.rsp, frame_info.subtraction);
}
// GPRs need to be popped in reverse order
- for (int i = 15; i >= 0; i--) {
- if (regs[i]) {
- code.pop(IndexToReg64(i));
- }
- }
-}
-
-inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
- size_t rsp_alignment,
- size_t needed_frame_size = 0) {
- s32 subtraction, xmm_offset;
- ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
-
- for (std::size_t i = 0; i < regs.size(); ++i) {
+ for (std::size_t j = 0; j < regs.size(); ++j) {
+ const std::size_t i = regs.size() - j - 1;
if (regs[i] && ABI_ALL_GPRS[i]) {
- code.push(IndexToReg64(static_cast<int>(i)));
- }
- }
-
- if (subtraction != 0) {
- code.sub(code.rsp, subtraction);
- }
-
- return ABI_SHADOW_SPACE;
-}
-
-inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
- size_t rsp_alignment, size_t needed_frame_size = 0) {
- s32 subtraction, xmm_offset;
- ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
-
- if (subtraction != 0) {
- code.add(code.rsp, subtraction);
- }
-
- // GPRs need to be popped in reverse order
- for (int i = 15; i >= 0; i--) {
- if (regs[i]) {
code.pop(IndexToReg64(i));
}
}
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 9bc86e3b9..e7456a8c3 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -50,7 +50,8 @@ public:
}
void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
- UNIMPLEMENTED();
+ UNIMPLEMENTED_MSG("This should never happen, pc = {:08X}, code = {:08X}", pc,
+ MemoryReadCode(pc));
}
void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
@@ -89,8 +90,6 @@ public:
ARM_Dynarmic_32& parent;
std::size_t num_interpreted_instructions{};
- u64 tpidrro_el0{};
- u64 tpidr_el0{};
};
std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
@@ -99,7 +98,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
config.callbacks = cb.get();
// TODO(bunnei): Implement page table for 32-bit
// config.page_table = &page_table.pointers;
- config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]);
+ config.coprocessors[15] = cp15;
config.define_unpredictable_behaviour = true;
return std::make_unique<Dynarmic::A32::Jit>(config);
}
@@ -112,13 +111,13 @@ void ARM_Dynarmic_32::Run() {
}
void ARM_Dynarmic_32::Step() {
- cb->InterpreterFallback(jit->Regs()[15], 1);
+ jit->Step();
}
ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor,
std::size_t core_index)
- : ARM_Interface{system},
- cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index},
+ : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks32>(*this)),
+ cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index},
exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
ARM_Dynarmic_32::~ARM_Dynarmic_32() = default;
@@ -154,19 +153,19 @@ void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) {
}
u64 ARM_Dynarmic_32::GetTlsAddress() const {
- return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
+ return cp15->uro;
}
void ARM_Dynarmic_32::SetTlsAddress(VAddr address) {
- CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address);
+ cp15->uro = static_cast<u32>(address);
}
u64 ARM_Dynarmic_32::GetTPIDR_EL0() const {
- return cb->tpidr_el0;
+ return cp15->uprw;
}
void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
- cb->tpidr_el0 = value;
+ cp15->uprw = value;
}
void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 8ba9cea8f..e5b92d7bb 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -22,6 +22,7 @@ class Memory;
namespace Core {
class DynarmicCallbacks32;
+class DynarmicCP15;
class DynarmicExclusiveMonitor;
class System;
@@ -66,12 +67,14 @@ private:
std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>;
friend class DynarmicCallbacks32;
+ friend class DynarmicCP15;
+
std::unique_ptr<DynarmicCallbacks32> cb;
JitCacheType jit_cache;
std::shared_ptr<Dynarmic::A32::Jit> jit;
+ std::shared_ptr<DynarmicCP15> cp15;
std::size_t core_index;
DynarmicExclusiveMonitor& exclusive_monitor;
- std::array<u32, 84> CP15_regs{};
};
} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
index 3fdcdebde..d43e4dd70 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
@@ -2,79 +2,132 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <fmt/format.h>
+#include "common/logging/log.h"
+#include "core/arm/dynarmic/arm_dynarmic_32.h"
#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/core_timing_util.h"
using Callback = Dynarmic::A32::Coprocessor::Callback;
using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;
+template <>
+struct fmt::formatter<Dynarmic::A32::CoprocReg> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Dynarmic::A32::CoprocReg& reg, FormatContext& ctx) {
+ return format_to(ctx.out(), "cp{}", static_cast<size_t>(reg));
+ }
+};
+
+namespace Core {
+
+static u32 dummy_value;
+
std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1,
CoprocReg CRd, CoprocReg CRn,
CoprocReg CRm, unsigned opc2) {
+ LOG_CRITICAL(Core_ARM, "CP15: cdp{} p15, {}, {}, {}, {}, {}", two ? "2" : "", opc1, CRd, CRn,
+ CRm, opc2);
return {};
}
CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
CoprocReg CRm, unsigned opc2) {
- // TODO(merry): Privileged CP15 registers
-
if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) {
+ // CP15_FLUSH_PREFETCH_BUFFER
// This is a dummy write, we ignore the value written here.
- return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)];
+ return &dummy_value;
}
if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) {
switch (opc2) {
case 4:
+ // CP15_DATA_SYNC_BARRIER
// This is a dummy write, we ignore the value written here.
- return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)];
+ return &dummy_value;
case 5:
+ // CP15_DATA_MEMORY_BARRIER
// This is a dummy write, we ignore the value written here.
- return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)];
- default:
- return {};
+ return &dummy_value;
}
}
if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) {
- return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
+ // CP15_THREAD_UPRW
+ return &uprw;
}
+ LOG_CRITICAL(Core_ARM, "CP15: mcr{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
+ opc2);
return {};
}
CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) {
+ LOG_CRITICAL(Core_ARM, "CP15: mcrr{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
return {};
}
CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
CoprocReg CRm, unsigned opc2) {
- // TODO(merry): Privileged CP15 registers
-
if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) {
switch (opc2) {
case 2:
- return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
+ // CP15_THREAD_UPRW
+ return &uprw;
case 3:
- return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
- default:
- return {};
+ // CP15_THREAD_URO
+ return &uro;
}
}
+ LOG_CRITICAL(Core_ARM, "CP15: mrc{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
+ opc2);
return {};
}
CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) {
+ if (!two && opc == 0 && CRm == CoprocReg::C14) {
+ // CNTPCT
+ const auto callback = static_cast<u64 (*)(Dynarmic::A32::Jit*, void*, u32, u32)>(
+ [](Dynarmic::A32::Jit*, void* arg, u32, u32) -> u64 {
+ ARM_Dynarmic_32& parent = *(ARM_Dynarmic_32*)arg;
+ return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
+ });
+ return Dynarmic::A32::Coprocessor::Callback{callback, (void*)&parent};
+ }
+
+ LOG_CRITICAL(Core_ARM, "CP15: mrrc{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
return {};
}
std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
std::optional<u8> option) {
+ if (option) {
+ LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
+ long_transfer ? "l" : "", CRd, *option);
+ } else {
+ LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
+ long_transfer ? "l" : "", CRd);
+ }
return {};
}
std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
std::optional<u8> option) {
+ if (option) {
+ LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
+ long_transfer ? "l" : "", CRd, *option);
+ } else {
+ LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
+ long_transfer ? "l" : "", CRd);
+ }
return {};
}
+
+} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
index 07bcde5f9..7356d252e 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
@@ -10,128 +10,15 @@
#include <dynarmic/A32/coprocessor.h>
#include "common/common_types.h"
-enum class CP15Register {
- // c0 - Information registers
- CP15_MAIN_ID,
- CP15_CACHE_TYPE,
- CP15_TCM_STATUS,
- CP15_TLB_TYPE,
- CP15_CPU_ID,
- CP15_PROCESSOR_FEATURE_0,
- CP15_PROCESSOR_FEATURE_1,
- CP15_DEBUG_FEATURE_0,
- CP15_AUXILIARY_FEATURE_0,
- CP15_MEMORY_MODEL_FEATURE_0,
- CP15_MEMORY_MODEL_FEATURE_1,
- CP15_MEMORY_MODEL_FEATURE_2,
- CP15_MEMORY_MODEL_FEATURE_3,
- CP15_ISA_FEATURE_0,
- CP15_ISA_FEATURE_1,
- CP15_ISA_FEATURE_2,
- CP15_ISA_FEATURE_3,
- CP15_ISA_FEATURE_4,
+namespace Core {
- // c1 - Control registers
- CP15_CONTROL,
- CP15_AUXILIARY_CONTROL,
- CP15_COPROCESSOR_ACCESS_CONTROL,
-
- // c2 - Translation table registers
- CP15_TRANSLATION_BASE_TABLE_0,
- CP15_TRANSLATION_BASE_TABLE_1,
- CP15_TRANSLATION_BASE_CONTROL,
- CP15_DOMAIN_ACCESS_CONTROL,
- CP15_RESERVED,
-
- // c5 - Fault status registers
- CP15_FAULT_STATUS,
- CP15_INSTR_FAULT_STATUS,
- CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS,
- CP15_INST_FSR,
-
- // c6 - Fault Address registers
- CP15_FAULT_ADDRESS,
- CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS,
- CP15_WFAR,
- CP15_IFAR,
-
- // c7 - Cache operation registers
- CP15_WAIT_FOR_INTERRUPT,
- CP15_PHYS_ADDRESS,
- CP15_INVALIDATE_INSTR_CACHE,
- CP15_INVALIDATE_INSTR_CACHE_USING_MVA,
- CP15_INVALIDATE_INSTR_CACHE_USING_INDEX,
- CP15_FLUSH_PREFETCH_BUFFER,
- CP15_FLUSH_BRANCH_TARGET_CACHE,
- CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY,
- CP15_INVALIDATE_DATA_CACHE,
- CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
- CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
- CP15_INVALIDATE_DATA_AND_INSTR_CACHE,
- CP15_CLEAN_DATA_CACHE,
- CP15_CLEAN_DATA_CACHE_LINE_USING_MVA,
- CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX,
- CP15_DATA_SYNC_BARRIER,
- CP15_DATA_MEMORY_BARRIER,
- CP15_CLEAN_AND_INVALIDATE_DATA_CACHE,
- CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
- CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
-
- // c8 - TLB operations
- CP15_INVALIDATE_ITLB,
- CP15_INVALIDATE_ITLB_SINGLE_ENTRY,
- CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH,
- CP15_INVALIDATE_ITLB_ENTRY_ON_MVA,
- CP15_INVALIDATE_DTLB,
- CP15_INVALIDATE_DTLB_SINGLE_ENTRY,
- CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH,
- CP15_INVALIDATE_DTLB_ENTRY_ON_MVA,
- CP15_INVALIDATE_UTLB,
- CP15_INVALIDATE_UTLB_SINGLE_ENTRY,
- CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH,
- CP15_INVALIDATE_UTLB_ENTRY_ON_MVA,
-
- // c9 - Data cache lockdown register
- CP15_DATA_CACHE_LOCKDOWN,
-
- // c10 - TLB/Memory map registers
- CP15_TLB_LOCKDOWN,
- CP15_PRIMARY_REGION_REMAP,
- CP15_NORMAL_REGION_REMAP,
-
- // c13 - Thread related registers
- CP15_PID,
- CP15_CONTEXT_ID,
- CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write
- CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W)
- CP15_THREAD_PRW, // Thread ID register - Privileged R/W only.
-
- // c15 - Performance and TLB lockdown registers
- CP15_PERFORMANCE_MONITOR_CONTROL,
- CP15_CYCLE_COUNTER,
- CP15_COUNT_0,
- CP15_COUNT_1,
- CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY,
- CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY,
- CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS,
- CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS,
- CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE,
- CP15_TLB_DEBUG_CONTROL,
-
- // Skyeye defined
- CP15_TLB_FAULT_ADDR,
- CP15_TLB_FAULT_STATUS,
-
- // Not an actual register.
- // All registers should be defined above this.
- CP15_REGISTER_COUNT,
-};
+class ARM_Dynarmic_32;
class DynarmicCP15 final : public Dynarmic::A32::Coprocessor {
public:
using CoprocReg = Dynarmic::A32::CoprocReg;
- explicit DynarmicCP15(u32* cp15) : CP15(cp15){};
+ explicit DynarmicCP15(ARM_Dynarmic_32& parent) : parent(parent) {}
std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
CoprocReg CRn, CoprocReg CRm,
@@ -147,6 +34,9 @@ public:
std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
std::optional<u8> option) override;
-private:
- u32* CP15{};
+ ARM_Dynarmic_32& parent;
+ u32 uprw;
+ u32 uro;
};
+
+} // namespace Core
diff --git a/src/core/settings.h b/src/core/settings.h
index 36cd66fd4..33e1e06cd 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -437,7 +437,7 @@ struct Values {
bool renderer_debug;
int vulkan_device;
- float resolution_factor;
+ u16 resolution_factor{1};
int aspect_ratio;
int max_anisotropy;
bool use_frame_limit;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 39d5d8401..099bb446e 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -52,6 +52,8 @@ add_library(video_core STATIC
rasterizer_interface.h
renderer_base.cpp
renderer_base.h
+ renderer_opengl/gl_arb_decompiler.cpp
+ renderer_opengl/gl_arb_decompiler.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_device.cpp
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 11c1cc3be..d4a97ec7b 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -14,22 +14,16 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
namespace Tegra {
-static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9;
-static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10;
-static const Xbyak::Reg64 STATE = Xbyak::util::r11;
-static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12;
-static const Xbyak::Reg32 RESULT = Xbyak::util::r13d;
-static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13;
+static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
+static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
+static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
-static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14;
static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
- PARAMETERS,
- REGISTERS,
STATE,
- NEXT_PARAMETER,
RESULT,
+ PARAMETERS,
METHOD_ADDRESS,
BRANCH_HOLDER,
});
@@ -53,8 +47,7 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
JITState state{};
state.maxwell3d = &maxwell3d;
state.registers = {};
- state.parameters = parameters.data();
- program(&state);
+ program(&state, parameters.data());
}
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
@@ -64,18 +57,18 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
const bool is_move_operation = !is_a_zero && is_b_zero;
const bool has_zero_register = is_a_zero || is_b_zero;
- Xbyak::Reg64 src_a;
+ Xbyak::Reg32 src_a;
Xbyak::Reg32 src_b;
if (!optimizer.zero_reg_skip) {
- src_a = Compile_GetRegister(opcode.src_a, RESULT_64);
- src_b = Compile_GetRegister(opcode.src_b, ebx);
+ src_a = Compile_GetRegister(opcode.src_a, RESULT);
+ src_b = Compile_GetRegister(opcode.src_b, eax);
} else {
if (!is_a_zero) {
- src_a = Compile_GetRegister(opcode.src_a, RESULT_64);
+ src_a = Compile_GetRegister(opcode.src_a, RESULT);
}
if (!is_b_zero) {
- src_b = Compile_GetRegister(opcode.src_b, ebx);
+ src_b = Compile_GetRegister(opcode.src_b, eax);
}
}
Xbyak::Label skip_carry{};
@@ -302,22 +295,22 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
sub(result, opcode.immediate * -1);
}
}
- Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0);
+ Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(Common::X64::ABI_PARAM1, qword[STATE]);
mov(Common::X64::ABI_PARAM2, RESULT);
Common::X64::CallFarFunction(*this, &Read);
- Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0);
+ Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(RESULT, Common::X64::ABI_RETURN.cvt32());
Compile_ProcessResult(opcode.result_operation, opcode.dst);
}
void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
- Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0);
+ Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(Common::X64::ABI_PARAM1, qword[STATE]);
mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
mov(Common::X64::ABI_PARAM3, value);
Common::X64::CallFarFunction(*this, &Send);
- Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0);
+ Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
Xbyak::Label dont_process{};
// Get increment
@@ -329,7 +322,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
and_(METHOD_ADDRESS, 0xfff);
shr(ecx, 12);
and_(ecx, 0x3f);
- lea(eax, ptr[rcx + METHOD_ADDRESS_64]);
+ lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
sal(ecx, 12);
or_(eax, ecx);
@@ -421,19 +414,15 @@ void MacroJITx64Impl::Compile() {
bool keep_executing = true;
labels.fill(Xbyak::Label());
- Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
+ Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
// JIT state
mov(STATE, Common::X64::ABI_PARAM1);
- mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 +
- static_cast<Xbyak::uint32>(offsetof(JITState, parameters))]);
- mov(REGISTERS, Common::X64::ABI_PARAM1);
- add(REGISTERS, static_cast<Xbyak::uint32>(offsetof(JITState, registers)));
+ mov(PARAMETERS, Common::X64::ABI_PARAM2);
xor_(RESULT, RESULT);
xor_(METHOD_ADDRESS, METHOD_ADDRESS);
- xor_(NEXT_PARAMETER, NEXT_PARAMETER);
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
- mov(dword[REGISTERS + 4], Compile_FetchParameter());
+ mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
// Track get register for zero registers and mark it as no-op
optimizer.zero_reg_skip = true;
@@ -463,7 +452,7 @@ void MacroJITx64Impl::Compile() {
L(end_of_code);
- Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
+ Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
ret();
ready();
program = getCode<ProgramType>();
@@ -537,8 +526,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
}
Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
- mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]);
- inc(NEXT_PARAMETER);
+ mov(eax, dword[PARAMETERS]);
+ add(PARAMETERS, sizeof(u32));
return eax;
}
@@ -547,31 +536,12 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
// Register 0 is always zero
xor_(dst, dst);
} else {
- mov(dst, dword[REGISTERS + index * sizeof(u32)]);
+ mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
}
return dst;
}
-Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) {
- if (index == 0) {
- // Register 0 is always zero
- xor_(dst, dst);
- } else {
- mov(dst, dword[REGISTERS + index * sizeof(u32)]);
- }
-
- return dst;
-}
-
-void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) {
- Xbyak::Label zero{}, end{};
- xor_(ecx, ecx);
- shr(dst, 32);
- setne(cl);
- mov(dword[STATE + offsetof(JITState, carry_flag)], ecx);
-}
-
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) {
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
@@ -579,7 +549,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
if (reg == 0) {
return;
}
- mov(dword[REGISTERS + reg * sizeof(u32)], result);
+ mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
};
auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); };
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index 71f738b9a..51ec090b8 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -55,8 +55,6 @@ private:
Xbyak::Reg32 Compile_FetchParameter();
Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
- Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst);
- void Compile_WriteCarry(Xbyak::Reg64 dst);
void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
void Compile_Send(Xbyak::Reg32 value);
@@ -67,11 +65,10 @@ private:
struct JITState {
Engines::Maxwell3D* maxwell3d{};
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
- const u32* parameters{};
u32 carry_flag{};
};
static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
- using ProgramType = void (*)(JITState*);
+ using ProgramType = void (*)(JITState*, const u32*);
struct OptimizerState {
bool can_skip_carry{};
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
new file mode 100644
index 000000000..1e96b0310
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -0,0 +1,2074 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <variant>
+
+#include <fmt/format.h>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_arb_decompiler.h"
+#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/shader/registry.h"
+#include "video_core/shader/shader_ir.h"
+
+// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
+// GLASM lacks booleans, so they have to be implemented as integers.
+// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
+// select between two values, because -1 will be evaluated as true and 0 as false.
+
+namespace OpenGL {
+
+namespace {
+
+using Tegra::Engines::ShaderType;
+using Tegra::Shader::Attribute;
+using Tegra::Shader::PixelImap;
+using Tegra::Shader::Register;
+using namespace VideoCommon::Shader;
+using Operation = const OperationNode&;
+
+constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
+
+char Swizzle(std::size_t component) {
+ ASSERT(component < 4);
+ return component["xyzw"];
+}
+
+constexpr bool IsGenericAttribute(Attribute::Index index) {
+ return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
+}
+
+u32 GetGenericAttributeIndex(Attribute::Index index) {
+ ASSERT(IsGenericAttribute(index));
+ return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
+}
+
+std::string_view Modifiers(Operation operation) {
+ const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
+ if (meta && meta->precise) {
+ return ".PREC";
+ }
+ return "";
+}
+
+std::string_view GetInputFlags(PixelImap attribute) {
+ switch (attribute) {
+ case PixelImap::Perspective:
+ return "";
+ case PixelImap::Constant:
+ return "FLAT ";
+ case PixelImap::ScreenLinear:
+ return "NOPERSPECTIVE ";
+ case PixelImap::Unused:
+ break;
+ }
+ UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
+ return {};
+}
+
+std::string_view ImageType(Tegra::Shader::ImageType image_type) {
+ switch (image_type) {
+ case Tegra::Shader::ImageType::Texture1D:
+ return "1D";
+ case Tegra::Shader::ImageType::TextureBuffer:
+ return "BUFFER";
+ case Tegra::Shader::ImageType::Texture1DArray:
+ return "ARRAY1D";
+ case Tegra::Shader::ImageType::Texture2D:
+ return "2D";
+ case Tegra::Shader::ImageType::Texture2DArray:
+ return "ARRAY2D";
+ case Tegra::Shader::ImageType::Texture3D:
+ return "3D";
+ }
+ UNREACHABLE();
+ return {};
+}
+
+std::string_view StackName(MetaStackClass stack) {
+ switch (stack) {
+ case MetaStackClass::Ssy:
+ return "SSY";
+ case MetaStackClass::Pbk:
+ return "PBK";
+ }
+ UNREACHABLE();
+ return "";
+};
+
+std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
+ switch (topology) {
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
+ return "POINTS";
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
+ return "LINES";
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
+ return "LINES_ADJACENCY";
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
+ return "TRIANGLES";
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
+ return "TRIANGLES_ADJACENCY";
+ default:
+ UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
+ return "POINTS";
+ }
+}
+
+std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
+ switch (topology) {
+ case Tegra::Shader::OutputTopology::PointList:
+ return "POINTS";
+ case Tegra::Shader::OutputTopology::LineStrip:
+ return "LINE_STRIP";
+ case Tegra::Shader::OutputTopology::TriangleStrip:
+ return "TRIANGLE_STRIP";
+ default:
+ UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
+ return "points";
+ }
+}
+
+std::string_view StageInputName(ShaderType stage) {
+ switch (stage) {
+ case ShaderType::Vertex:
+ case ShaderType::Geometry:
+ return "vertex";
+ case ShaderType::Fragment:
+ return "fragment";
+ case ShaderType::Compute:
+ return "invocation";
+ default:
+ UNREACHABLE();
+ return "";
+ }
+}
+
+std::string TextureType(const MetaTexture& meta) {
+ if (meta.sampler.is_buffer) {
+ return "BUFFER";
+ }
+ std::string type;
+ if (meta.sampler.is_shadow) {
+ type += "SHADOW";
+ }
+ if (meta.sampler.is_array) {
+ type += "ARRAY";
+ }
+ type += [&meta] {
+ switch (meta.sampler.type) {
+ case Tegra::Shader::TextureType::Texture1D:
+ return "1D";
+ case Tegra::Shader::TextureType::Texture2D:
+ return "2D";
+ case Tegra::Shader::TextureType::Texture3D:
+ return "3D";
+ case Tegra::Shader::TextureType::TextureCube:
+ return "CUBE";
+ }
+ UNREACHABLE();
+ return "2D";
+ }();
+ return type;
+}
+
+std::string GlobalMemoryName(const GlobalMemoryBase& base) {
+ return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset);
+}
+
+class ARBDecompiler final {
+public:
+ explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
+ ShaderType stage, std::string_view identifier);
+
+ std::string Code() const {
+ return shader_source;
+ }
+
+private:
+ void DeclareHeader();
+ void DeclareVertex();
+ void DeclareGeometry();
+ void DeclareFragment();
+ void DeclareCompute();
+ void DeclareInputAttributes();
+ void DeclareOutputAttributes();
+ void DeclareLocalMemory();
+ void DeclareGlobalMemory();
+ void DeclareConstantBuffers();
+ void DeclareRegisters();
+ void DeclareTemporaries();
+ void DeclarePredicates();
+ void DeclareInternalFlags();
+
+ void InitializeVariables();
+
+ void DecompileAST();
+ void DecompileBranchMode();
+
+ void VisitAST(const ASTNode& node);
+ std::string VisitExpression(const Expr& node);
+
+ void VisitBlock(const NodeBlock& bb);
+
+ std::string Visit(const Node& node);
+
+ std::pair<std::string, std::size_t> BuildCoords(Operation);
+ std::string BuildAoffi(Operation);
+ void Exit();
+
+ std::string Assign(Operation);
+ std::string Select(Operation);
+ std::string FClamp(Operation);
+ std::string FCastHalf0(Operation);
+ std::string FCastHalf1(Operation);
+ std::string FSqrt(Operation);
+ std::string FSwizzleAdd(Operation);
+ std::string HAdd2(Operation);
+ std::string HMul2(Operation);
+ std::string HFma2(Operation);
+ std::string HAbsolute(Operation);
+ std::string HNegate(Operation);
+ std::string HClamp(Operation);
+ std::string HCastFloat(Operation);
+ std::string HUnpack(Operation);
+ std::string HMergeF32(Operation);
+ std::string HMergeH0(Operation);
+ std::string HMergeH1(Operation);
+ std::string HPack2(Operation);
+ std::string LogicalAssign(Operation);
+ std::string LogicalPick2(Operation);
+ std::string LogicalAnd2(Operation);
+ std::string FloatOrdered(Operation);
+ std::string FloatUnordered(Operation);
+ std::string LogicalAddCarry(Operation);
+ std::string Texture(Operation);
+ std::string TextureGather(Operation);
+ std::string TextureQueryDimensions(Operation);
+ std::string TextureQueryLod(Operation);
+ std::string TexelFetch(Operation);
+ std::string TextureGradient(Operation);
+ std::string ImageLoad(Operation);
+ std::string ImageStore(Operation);
+ std::string Branch(Operation);
+ std::string BranchIndirect(Operation);
+ std::string PushFlowStack(Operation);
+ std::string PopFlowStack(Operation);
+ std::string Exit(Operation);
+ std::string Discard(Operation);
+ std::string EmitVertex(Operation);
+ std::string EndPrimitive(Operation);
+ std::string InvocationId(Operation);
+ std::string YNegate(Operation);
+ std::string ThreadId(Operation);
+ std::string ShuffleIndexed(Operation);
+ std::string Barrier(Operation);
+ std::string MemoryBarrierGroup(Operation);
+ std::string MemoryBarrierGlobal(Operation);
+
+ template <const std::string_view& op>
+ std::string Unary(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
+ return temporary;
+ }
+
+ template <const std::string_view& op>
+ std::string Binary(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
+ Visit(operation[1]));
+ return temporary;
+ }
+
+ template <const std::string_view& op>
+ std::string Trinary(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
+ Visit(operation[1]), Visit(operation[2]));
+ return temporary;
+ }
+
+ template <const std::string_view& op, bool unordered>
+ std::string FloatComparison(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
+ AddLine("MOV.S {}, 0;", temporary);
+ AddLine("MOV.S {} (NE.x), -1;", temporary);
+
+ const std::string op_a = Visit(operation[0]);
+ const std::string op_b = Visit(operation[1]);
+ if constexpr (unordered) {
+ AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
+ AddLine("TRUNC.U.CC RC.x, RC.x;");
+ AddLine("MOV.S {} (NE.x), -1;", temporary);
+ AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
+ AddLine("TRUNC.U.CC RC.x, RC.x;");
+ AddLine("MOV.S {} (NE.x), -1;", temporary);
+ } else if (op == SNE_F) {
+ AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
+ AddLine("TRUNC.U.CC RC.x, RC.x;");
+ AddLine("MOV.S {} (NE.x), 0;", temporary);
+ AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
+ AddLine("TRUNC.U.CC RC.x, RC.x;");
+ AddLine("MOV.S {} (NE.x), 0;", temporary);
+ }
+ return temporary;
+ }
+
+ template <const std::string_view& op, bool is_nan>
+ std::string HalfComparison(Operation operation) {
+ const std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ const std::string op_a = Visit(operation[0]);
+ const std::string op_b = Visit(operation[1]);
+ AddLine("UP2H.F {}, {};", tmp1, op_a);
+ AddLine("UP2H.F {}, {};", tmp2, op_b);
+ AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
+ AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
+ AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
+ AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
+ AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
+ if constexpr (is_nan) {
+ AddLine("MOVC.F RC.x, {};", op_a);
+ AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
+ AddLine("MOVC.F RC.x, {};", op_b);
+ AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
+ }
+ return tmp1;
+ }
+
+ template <const std::string_view& op, const std::string_view& type>
+ std::string AtomicImage(Operation operation) {
+ const auto& meta = std::get<MetaImage>(operation.GetMeta());
+ const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
+ const std::size_t num_coords = operation.GetOperandsCount();
+ const std::size_t num_values = meta.values.size();
+
+ const std::string coord = AllocVectorTemporary();
+ const std::string value = AllocVectorTemporary();
+ for (std::size_t i = 0; i < num_coords; ++i) {
+ AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
+ }
+ for (std::size_t i = 0; i < num_values; ++i) {
+ AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
+ }
+
+ const std::string result = coord;
+ AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, result, value, coord,
+ image_id, ImageType(meta.image.type));
+ return fmt::format("{}.x", result);
+ }
+
+ template <const std::string_view& op, const std::string_view& type>
+ std::string Atomic(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ std::string address;
+ std::string_view opname;
+ if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
+ AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
+ Visit(gmem->GetBaseAddress()));
+ address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary);
+ opname = "ATOMB";
+ } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
+ address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
+ opname = "ATOMS";
+ } else {
+ UNREACHABLE();
+ return "{0, 0, 0, 0}";
+ }
+ AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
+ return temporary;
+ }
+
+ template <char type>
+ std::string Negate(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ if constexpr (type == 'F') {
+ AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
+ } else {
+ AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
+ }
+ return temporary;
+ }
+
+ template <char type>
+ std::string Absolute(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
+ return temporary;
+ }
+
+ template <char type>
+ std::string BitfieldInsert(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
+ AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
+ AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
+ Visit(operation[0]));
+ return fmt::format("{}.x", temporary);
+ }
+
+ template <char type>
+ std::string BitfieldExtract(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
+ AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
+ AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
+ return fmt::format("{}.x", temporary);
+ }
+
+ template <char swizzle>
+ std::string LocalInvocationId(Operation) {
+ return fmt::format("invocation.localid.{}", swizzle);
+ }
+
+ template <char swizzle>
+ std::string WorkGroupId(Operation) {
+ return fmt::format("invocation.groupid.{}", swizzle);
+ }
+
+ template <char c1, char c2>
+ std::string ThreadMask(Operation) {
+ return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
+ }
+
+ template <typename... Args>
+ void AddExpression(std::string_view text, Args&&... args) {
+ shader_source += fmt::format(text, std::forward<Args>(args)...);
+ }
+
+ template <typename... Args>
+ void AddLine(std::string_view text, Args&&... args) {
+ AddExpression(text, std::forward<Args>(args)...);
+ shader_source += '\n';
+ }
+
+ std::string AllocTemporary() {
+ max_temporaries = std::max(max_temporaries, num_temporaries + 1);
+ return fmt::format("T{}.x", num_temporaries++);
+ }
+
+ std::string AllocVectorTemporary() {
+ max_temporaries = std::max(max_temporaries, num_temporaries + 1);
+ return fmt::format("T{}", num_temporaries++);
+ }
+
+ void ResetTemporaries() noexcept {
+ num_temporaries = 0;
+ }
+
+ const Device& device;
+ const ShaderIR& ir;
+ const Registry& registry;
+ const ShaderType stage;
+
+ std::size_t num_temporaries = 0;
+ std::size_t max_temporaries = 0;
+
+ std::string shader_source;
+
+ static constexpr std::string_view ADD_F32 = "ADD.F32";
+ static constexpr std::string_view ADD_S = "ADD.S";
+ static constexpr std::string_view ADD_U = "ADD.U";
+ static constexpr std::string_view MUL_F32 = "MUL.F32";
+ static constexpr std::string_view MUL_S = "MUL.S";
+ static constexpr std::string_view MUL_U = "MUL.U";
+ static constexpr std::string_view DIV_F32 = "DIV.F32";
+ static constexpr std::string_view DIV_S = "DIV.S";
+ static constexpr std::string_view DIV_U = "DIV.U";
+ static constexpr std::string_view MAD_F32 = "MAD.F32";
+ static constexpr std::string_view RSQ_F32 = "RSQ.F32";
+ static constexpr std::string_view COS_F32 = "COS.F32";
+ static constexpr std::string_view SIN_F32 = "SIN.F32";
+ static constexpr std::string_view EX2_F32 = "EX2.F32";
+ static constexpr std::string_view LG2_F32 = "LG2.F32";
+ static constexpr std::string_view SLT_F = "SLT.F32";
+ static constexpr std::string_view SLT_S = "SLT.S";
+ static constexpr std::string_view SLT_U = "SLT.U";
+ static constexpr std::string_view SEQ_F = "SEQ.F32";
+ static constexpr std::string_view SEQ_S = "SEQ.S";
+ static constexpr std::string_view SEQ_U = "SEQ.U";
+ static constexpr std::string_view SLE_F = "SLE.F32";
+ static constexpr std::string_view SLE_S = "SLE.S";
+ static constexpr std::string_view SLE_U = "SLE.U";
+ static constexpr std::string_view SGT_F = "SGT.F32";
+ static constexpr std::string_view SGT_S = "SGT.S";
+ static constexpr std::string_view SGT_U = "SGT.U";
+ static constexpr std::string_view SNE_F = "SNE.F32";
+ static constexpr std::string_view SNE_S = "SNE.S";
+ static constexpr std::string_view SNE_U = "SNE.U";
+ static constexpr std::string_view SGE_F = "SGE.F32";
+ static constexpr std::string_view SGE_S = "SGE.S";
+ static constexpr std::string_view SGE_U = "SGE.U";
+ static constexpr std::string_view AND_S = "AND.S";
+ static constexpr std::string_view AND_U = "AND.U";
+ static constexpr std::string_view TRUNC_F = "TRUNC.F";
+ static constexpr std::string_view TRUNC_S = "TRUNC.S";
+ static constexpr std::string_view TRUNC_U = "TRUNC.U";
+ static constexpr std::string_view SHL_S = "SHL.S";
+ static constexpr std::string_view SHL_U = "SHL.U";
+ static constexpr std::string_view SHR_S = "SHR.S";
+ static constexpr std::string_view SHR_U = "SHR.U";
+ static constexpr std::string_view OR_S = "OR.S";
+ static constexpr std::string_view OR_U = "OR.U";
+ static constexpr std::string_view XOR_S = "XOR.S";
+ static constexpr std::string_view XOR_U = "XOR.U";
+ static constexpr std::string_view NOT_S = "NOT.S";
+ static constexpr std::string_view NOT_U = "NOT.U";
+ static constexpr std::string_view BTC_S = "BTC.S";
+ static constexpr std::string_view BTC_U = "BTC.U";
+ static constexpr std::string_view BTFM_S = "BTFM.S";
+ static constexpr std::string_view BTFM_U = "BTFM.U";
+ static constexpr std::string_view ROUND_F = "ROUND.F";
+ static constexpr std::string_view CEIL_F = "CEIL.F";
+ static constexpr std::string_view FLR_F = "FLR.F";
+ static constexpr std::string_view I2F_S = "I2F.S";
+ static constexpr std::string_view I2F_U = "I2F.U";
+ static constexpr std::string_view MIN_F = "MIN.F";
+ static constexpr std::string_view MIN_S = "MIN.S";
+ static constexpr std::string_view MIN_U = "MIN.U";
+ static constexpr std::string_view MAX_F = "MAX.F";
+ static constexpr std::string_view MAX_S = "MAX.S";
+ static constexpr std::string_view MAX_U = "MAX.U";
+ static constexpr std::string_view MOV_U = "MOV.U";
+ static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
+ static constexpr std::string_view TGALL_U = "TGALL.U";
+ static constexpr std::string_view TGANY_U = "TGANY.U";
+ static constexpr std::string_view TGEQ_U = "TGEQ.U";
+ static constexpr std::string_view EXCH = "EXCH";
+ static constexpr std::string_view ADD = "ADD";
+ static constexpr std::string_view MIN = "MIN";
+ static constexpr std::string_view MAX = "MAX";
+ static constexpr std::string_view AND = "AND";
+ static constexpr std::string_view OR = "OR";
+ static constexpr std::string_view XOR = "XOR";
+ static constexpr std::string_view U32 = "U32";
+ static constexpr std::string_view S32 = "S32";
+
+ static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
+ using DecompilerType = std::string (ARBDecompiler::*)(Operation);
+ static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
+ &ARBDecompiler::Assign,
+
+ &ARBDecompiler::Select,
+
+ &ARBDecompiler::Binary<ADD_F32>,
+ &ARBDecompiler::Binary<MUL_F32>,
+ &ARBDecompiler::Binary<DIV_F32>,
+ &ARBDecompiler::Trinary<MAD_F32>,
+ &ARBDecompiler::Negate<'F'>,
+ &ARBDecompiler::Absolute<'F'>,
+ &ARBDecompiler::FClamp,
+ &ARBDecompiler::FCastHalf0,
+ &ARBDecompiler::FCastHalf1,
+ &ARBDecompiler::Binary<MIN_F>,
+ &ARBDecompiler::Binary<MAX_F>,
+ &ARBDecompiler::Unary<COS_F32>,
+ &ARBDecompiler::Unary<SIN_F32>,
+ &ARBDecompiler::Unary<EX2_F32>,
+ &ARBDecompiler::Unary<LG2_F32>,
+ &ARBDecompiler::Unary<RSQ_F32>,
+ &ARBDecompiler::FSqrt,
+ &ARBDecompiler::Unary<ROUND_F>,
+ &ARBDecompiler::Unary<FLR_F>,
+ &ARBDecompiler::Unary<CEIL_F>,
+ &ARBDecompiler::Unary<TRUNC_F>,
+ &ARBDecompiler::Unary<I2F_S>,
+ &ARBDecompiler::Unary<I2F_U>,
+ &ARBDecompiler::FSwizzleAdd,
+
+ &ARBDecompiler::Binary<ADD_S>,
+ &ARBDecompiler::Binary<MUL_S>,
+ &ARBDecompiler::Binary<DIV_S>,
+ &ARBDecompiler::Negate<'S'>,
+ &ARBDecompiler::Absolute<'S'>,
+ &ARBDecompiler::Binary<MIN_S>,
+ &ARBDecompiler::Binary<MAX_S>,
+
+ &ARBDecompiler::Unary<TRUNC_S>,
+ &ARBDecompiler::Unary<MOV_U>,
+ &ARBDecompiler::Binary<SHL_S>,
+ &ARBDecompiler::Binary<SHR_U>,
+ &ARBDecompiler::Binary<SHR_S>,
+ &ARBDecompiler::Binary<AND_S>,
+ &ARBDecompiler::Binary<OR_S>,
+ &ARBDecompiler::Binary<XOR_S>,
+ &ARBDecompiler::Unary<NOT_S>,
+ &ARBDecompiler::BitfieldInsert<'S'>,
+ &ARBDecompiler::BitfieldExtract<'S'>,
+ &ARBDecompiler::Unary<BTC_S>,
+ &ARBDecompiler::Unary<BTFM_S>,
+
+ &ARBDecompiler::Binary<ADD_U>,
+ &ARBDecompiler::Binary<MUL_U>,
+ &ARBDecompiler::Binary<DIV_U>,
+ &ARBDecompiler::Binary<MIN_U>,
+ &ARBDecompiler::Binary<MAX_U>,
+ &ARBDecompiler::Unary<TRUNC_U>,
+ &ARBDecompiler::Unary<MOV_U>,
+ &ARBDecompiler::Binary<SHL_U>,
+ &ARBDecompiler::Binary<SHR_U>,
+ &ARBDecompiler::Binary<SHR_U>,
+ &ARBDecompiler::Binary<AND_U>,
+ &ARBDecompiler::Binary<OR_U>,
+ &ARBDecompiler::Binary<XOR_U>,
+ &ARBDecompiler::Unary<NOT_U>,
+ &ARBDecompiler::BitfieldInsert<'U'>,
+ &ARBDecompiler::BitfieldExtract<'U'>,
+ &ARBDecompiler::Unary<BTC_U>,
+ &ARBDecompiler::Unary<BTFM_U>,
+
+ &ARBDecompiler::HAdd2,
+ &ARBDecompiler::HMul2,
+ &ARBDecompiler::HFma2,
+ &ARBDecompiler::HAbsolute,
+ &ARBDecompiler::HNegate,
+ &ARBDecompiler::HClamp,
+ &ARBDecompiler::HCastFloat,
+ &ARBDecompiler::HUnpack,
+ &ARBDecompiler::HMergeF32,
+ &ARBDecompiler::HMergeH0,
+ &ARBDecompiler::HMergeH1,
+ &ARBDecompiler::HPack2,
+
+ &ARBDecompiler::LogicalAssign,
+ &ARBDecompiler::Binary<AND_U>,
+ &ARBDecompiler::Binary<OR_U>,
+ &ARBDecompiler::Binary<XOR_U>,
+ &ARBDecompiler::Unary<NOT_U>,
+ &ARBDecompiler::LogicalPick2,
+ &ARBDecompiler::LogicalAnd2,
+
+ &ARBDecompiler::FloatComparison<SLT_F, false>,
+ &ARBDecompiler::FloatComparison<SEQ_F, false>,
+ &ARBDecompiler::FloatComparison<SLE_F, false>,
+ &ARBDecompiler::FloatComparison<SGT_F, false>,
+ &ARBDecompiler::FloatComparison<SNE_F, false>,
+ &ARBDecompiler::FloatComparison<SGE_F, false>,
+ &ARBDecompiler::FloatOrdered,
+ &ARBDecompiler::FloatUnordered,
+ &ARBDecompiler::FloatComparison<SLT_F, true>,
+ &ARBDecompiler::FloatComparison<SEQ_F, true>,
+ &ARBDecompiler::FloatComparison<SLE_F, true>,
+ &ARBDecompiler::FloatComparison<SGT_F, true>,
+ &ARBDecompiler::FloatComparison<SNE_F, true>,
+ &ARBDecompiler::FloatComparison<SGE_F, true>,
+
+ &ARBDecompiler::Binary<SLT_S>,
+ &ARBDecompiler::Binary<SEQ_S>,
+ &ARBDecompiler::Binary<SLE_S>,
+ &ARBDecompiler::Binary<SGT_S>,
+ &ARBDecompiler::Binary<SNE_S>,
+ &ARBDecompiler::Binary<SGE_S>,
+
+ &ARBDecompiler::Binary<SLT_U>,
+ &ARBDecompiler::Binary<SEQ_U>,
+ &ARBDecompiler::Binary<SLE_U>,
+ &ARBDecompiler::Binary<SGT_U>,
+ &ARBDecompiler::Binary<SNE_U>,
+ &ARBDecompiler::Binary<SGE_U>,
+
+ &ARBDecompiler::LogicalAddCarry,
+
+ &ARBDecompiler::HalfComparison<SLT_F, false>,
+ &ARBDecompiler::HalfComparison<SEQ_F, false>,
+ &ARBDecompiler::HalfComparison<SLE_F, false>,
+ &ARBDecompiler::HalfComparison<SGT_F, false>,
+ &ARBDecompiler::HalfComparison<SNE_F, false>,
+ &ARBDecompiler::HalfComparison<SGE_F, false>,
+ &ARBDecompiler::HalfComparison<SLT_F, true>,
+ &ARBDecompiler::HalfComparison<SEQ_F, true>,
+ &ARBDecompiler::HalfComparison<SLE_F, true>,
+ &ARBDecompiler::HalfComparison<SGT_F, true>,
+ &ARBDecompiler::HalfComparison<SNE_F, true>,
+ &ARBDecompiler::HalfComparison<SGE_F, true>,
+
+ &ARBDecompiler::Texture,
+ &ARBDecompiler::Texture,
+ &ARBDecompiler::TextureGather,
+ &ARBDecompiler::TextureQueryDimensions,
+ &ARBDecompiler::TextureQueryLod,
+ &ARBDecompiler::TexelFetch,
+ &ARBDecompiler::TextureGradient,
+
+ &ARBDecompiler::ImageLoad,
+ &ARBDecompiler::ImageStore,
+
+ &ARBDecompiler::AtomicImage<ADD, U32>,
+ &ARBDecompiler::AtomicImage<AND, U32>,
+ &ARBDecompiler::AtomicImage<OR, U32>,
+ &ARBDecompiler::AtomicImage<XOR, U32>,
+ &ARBDecompiler::AtomicImage<EXCH, U32>,
+
+ &ARBDecompiler::Atomic<EXCH, U32>,
+ &ARBDecompiler::Atomic<ADD, U32>,
+ &ARBDecompiler::Atomic<MIN, U32>,
+ &ARBDecompiler::Atomic<MAX, U32>,
+ &ARBDecompiler::Atomic<AND, U32>,
+ &ARBDecompiler::Atomic<OR, U32>,
+ &ARBDecompiler::Atomic<XOR, U32>,
+
+ &ARBDecompiler::Atomic<EXCH, S32>,
+ &ARBDecompiler::Atomic<ADD, S32>,
+ &ARBDecompiler::Atomic<MIN, S32>,
+ &ARBDecompiler::Atomic<MAX, S32>,
+ &ARBDecompiler::Atomic<AND, S32>,
+ &ARBDecompiler::Atomic<OR, S32>,
+ &ARBDecompiler::Atomic<XOR, S32>,
+
+ &ARBDecompiler::Atomic<ADD, U32>,
+ &ARBDecompiler::Atomic<MIN, U32>,
+ &ARBDecompiler::Atomic<MAX, U32>,
+ &ARBDecompiler::Atomic<AND, U32>,
+ &ARBDecompiler::Atomic<OR, U32>,
+ &ARBDecompiler::Atomic<XOR, U32>,
+
+ &ARBDecompiler::Atomic<ADD, S32>,
+ &ARBDecompiler::Atomic<MIN, S32>,
+ &ARBDecompiler::Atomic<MAX, S32>,
+ &ARBDecompiler::Atomic<AND, S32>,
+ &ARBDecompiler::Atomic<OR, S32>,
+ &ARBDecompiler::Atomic<XOR, S32>,
+
+ &ARBDecompiler::Branch,
+ &ARBDecompiler::BranchIndirect,
+ &ARBDecompiler::PushFlowStack,
+ &ARBDecompiler::PopFlowStack,
+ &ARBDecompiler::Exit,
+ &ARBDecompiler::Discard,
+
+ &ARBDecompiler::EmitVertex,
+ &ARBDecompiler::EndPrimitive,
+
+ &ARBDecompiler::InvocationId,
+ &ARBDecompiler::YNegate,
+ &ARBDecompiler::LocalInvocationId<'x'>,
+ &ARBDecompiler::LocalInvocationId<'y'>,
+ &ARBDecompiler::LocalInvocationId<'z'>,
+ &ARBDecompiler::WorkGroupId<'x'>,
+ &ARBDecompiler::WorkGroupId<'y'>,
+ &ARBDecompiler::WorkGroupId<'z'>,
+
+ &ARBDecompiler::Unary<TGBALLOT_U>,
+ &ARBDecompiler::Unary<TGALL_U>,
+ &ARBDecompiler::Unary<TGANY_U>,
+ &ARBDecompiler::Unary<TGEQ_U>,
+
+ &ARBDecompiler::ThreadId,
+ &ARBDecompiler::ThreadMask<'e', 'q'>,
+ &ARBDecompiler::ThreadMask<'g', 'e'>,
+ &ARBDecompiler::ThreadMask<'g', 't'>,
+ &ARBDecompiler::ThreadMask<'l', 'e'>,
+ &ARBDecompiler::ThreadMask<'l', 't'>,
+ &ARBDecompiler::ShuffleIndexed,
+
+ &ARBDecompiler::Barrier,
+ &ARBDecompiler::MemoryBarrierGroup,
+ &ARBDecompiler::MemoryBarrierGlobal,
+ };
+};
+
+ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
+ ShaderType stage, std::string_view identifier)
+ : device{device}, ir{ir}, registry{registry}, stage{stage} {
+ AddLine("TEMP RC;");
+ AddLine("TEMP FSWZA[4];");
+ AddLine("TEMP FSWZB[4];");
+ if (ir.IsDecompiled()) {
+ DecompileAST();
+ } else {
+ DecompileBranchMode();
+ }
+ AddLine("END");
+
+ const std::string code = std::move(shader_source);
+ DeclareHeader();
+ DeclareVertex();
+ DeclareGeometry();
+ DeclareFragment();
+ DeclareCompute();
+ DeclareInputAttributes();
+ DeclareOutputAttributes();
+ DeclareLocalMemory();
+ DeclareGlobalMemory();
+ DeclareConstantBuffers();
+ DeclareRegisters();
+ DeclareTemporaries();
+ DeclarePredicates();
+ DeclareInternalFlags();
+
+ shader_source += code;
+}
+
+std::string_view HeaderStageName(ShaderType stage) {
+ switch (stage) {
+ case ShaderType::Vertex:
+ return "vp";
+ case ShaderType::Geometry:
+ return "gp";
+ case ShaderType::Fragment:
+ return "fp";
+ case ShaderType::Compute:
+ return "cp";
+ default:
+ UNREACHABLE();
+ return "";
+ }
+}
+
+void ARBDecompiler::DeclareHeader() {
+ AddLine("!!NV{}5.0", HeaderStageName(stage));
+ // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
+ AddLine("OPTION NV_internal;");
+ AddLine("OPTION NV_gpu_program_fp64;");
+ AddLine("OPTION NV_shader_storage_buffer;");
+ AddLine("OPTION NV_shader_thread_group;");
+ if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
+ AddLine("OPTION NV_shader_thread_shuffle;");
+ }
+ if (stage == ShaderType::Vertex) {
+ if (device.HasNvViewportArray2()) {
+ AddLine("OPTION NV_viewport_array2;");
+ }
+ }
+ if (stage == ShaderType::Fragment) {
+ AddLine("OPTION ARB_draw_buffers;");
+ }
+ if (device.HasImageLoadFormatted()) {
+ AddLine("OPTION EXT_shader_image_load_formatted;");
+ }
+}
+
+void ARBDecompiler::DeclareVertex() {
+ if (stage != ShaderType::Vertex) {
+ return;
+ }
+ AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
+}
+
+void ARBDecompiler::DeclareGeometry() {
+ if (stage != ShaderType::Geometry) {
+ return;
+ }
+ const auto& info = registry.GetGraphicsInfo();
+ const auto& header = ir.GetHeader();
+ AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
+ AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
+ AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
+ AddLine("ATTRIB vertex_position = vertex.position;");
+}
+
+void ARBDecompiler::DeclareFragment() {
+ if (stage != ShaderType::Fragment) {
+ return;
+ }
+ AddLine("OUTPUT result_color7 = result.color[7];");
+ AddLine("OUTPUT result_color6 = result.color[6];");
+ AddLine("OUTPUT result_color5 = result.color[5];");
+ AddLine("OUTPUT result_color4 = result.color[4];");
+ AddLine("OUTPUT result_color3 = result.color[3];");
+ AddLine("OUTPUT result_color2 = result.color[2];");
+ AddLine("OUTPUT result_color1 = result.color[1];");
+ AddLine("OUTPUT result_color0 = result.color;");
+}
+
+void ARBDecompiler::DeclareCompute() {
+ if (stage != ShaderType::Compute) {
+ return;
+ }
+ const ComputeInfo& info = registry.GetComputeInfo();
+ AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
+ info.workgroup_size[2]);
+ if (info.shared_memory_size_in_words > 0) {
+ const u32 size_in_bytes = info.shared_memory_size_in_words * 4;
+ AddLine("SHARED_MEMORY {};", size_in_bytes);
+ AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
+ }
+}
+
+void ARBDecompiler::DeclareInputAttributes() {
+ if (stage == ShaderType::Compute) {
+ return;
+ }
+ const std::string_view stage_name = StageInputName(stage);
+ for (const auto attribute : ir.GetInputAttributes()) {
+ if (!IsGenericAttribute(attribute)) {
+ continue;
+ }
+ const u32 index = GetGenericAttributeIndex(attribute);
+
+ std::string_view suffix;
+ if (stage == ShaderType::Fragment) {
+ const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
+ if (input_mode == PixelImap::Unused) {
+ return;
+ }
+ suffix = GetInputFlags(input_mode);
+ }
+ AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
+ index);
+ }
+}
+
+void ARBDecompiler::DeclareOutputAttributes() {
+ if (stage == ShaderType::Compute) {
+ return;
+ }
+ for (const auto attribute : ir.GetOutputAttributes()) {
+ if (!IsGenericAttribute(attribute)) {
+ continue;
+ }
+ const u32 index = GetGenericAttributeIndex(attribute);
+ AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
+ }
+}
+
+void ARBDecompiler::DeclareLocalMemory() {
+ u64 size = 0;
+ if (stage == ShaderType::Compute) {
+ size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
+ } else {
+ size = ir.GetHeader().GetLocalMemorySize();
+ }
+ if (size == 0) {
+ return;
+ }
+ const u64 element_count = Common::AlignUp(size, 4) / 4;
+ AddLine("TEMP lmem[{}];", element_count);
+}
+
+void ARBDecompiler::DeclareGlobalMemory() {
+ u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer;
+ for (const auto& pair : ir.GetGlobalMemory()) {
+ const auto& base = pair.first;
+ AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding);
+ ++binding;
+ }
+}
+
+void ARBDecompiler::DeclareConstantBuffers() {
+ u32 binding = 0;
+ for (const auto& cbuf : ir.GetConstantBuffers()) {
+ AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
+ ++binding;
+ }
+}
+
+void ARBDecompiler::DeclareRegisters() {
+ for (const u32 gpr : ir.GetRegisters()) {
+ AddLine("TEMP R{};", gpr);
+ }
+}
+
+void ARBDecompiler::DeclareTemporaries() {
+ for (std::size_t i = 0; i < max_temporaries; ++i) {
+ AddLine("TEMP T{};", i);
+ }
+}
+
+void ARBDecompiler::DeclarePredicates() {
+ for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
+ AddLine("TEMP P{};", static_cast<u64>(pred));
+ }
+}
+
+void ARBDecompiler::DeclareInternalFlags() {
+ for (const char* name : INTERNAL_FLAG_NAMES) {
+ AddLine("TEMP {};", name);
+ }
+}
+
+void ARBDecompiler::InitializeVariables() {
+ AddLine("MOV.F32 FSWZA[0], -1;");
+ AddLine("MOV.F32 FSWZA[1], 1;");
+ AddLine("MOV.F32 FSWZA[2], -1;");
+ AddLine("MOV.F32 FSWZA[3], 0;");
+ AddLine("MOV.F32 FSWZB[0], -1;");
+ AddLine("MOV.F32 FSWZB[1], -1;");
+ AddLine("MOV.F32 FSWZB[2], 1;");
+ AddLine("MOV.F32 FSWZB[3], -1;");
+
+ if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
+ AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
+ }
+ for (const auto attribute : ir.GetOutputAttributes()) {
+ if (!IsGenericAttribute(attribute)) {
+ continue;
+ }
+ const u32 index = GetGenericAttributeIndex(attribute);
+ AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
+ }
+ for (const u32 gpr : ir.GetRegisters()) {
+ AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
+ }
+ for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
+ AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
+ }
+}
+
+void ARBDecompiler::DecompileAST() {
+ const u32 num_flow_variables = ir.GetASTNumVariables();
+ for (u32 i = 0; i < num_flow_variables; ++i) {
+ AddLine("TEMP F{};", i);
+ }
+ for (u32 i = 0; i < num_flow_variables; ++i) {
+ AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
+ }
+
+ InitializeVariables();
+
+ VisitAST(ir.GetASTProgram());
+}
+
+void ARBDecompiler::DecompileBranchMode() {
+ static constexpr u32 FLOW_STACK_SIZE = 20;
+ if (!ir.IsFlowStackDisabled()) {
+ AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
+ AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
+ AddLine("TEMP SSY_TOP;");
+ AddLine("TEMP PBK_TOP;");
+ }
+
+ AddLine("TEMP PC;");
+
+ if (!ir.IsFlowStackDisabled()) {
+ AddLine("MOV.U SSY_TOP.x, 0;");
+ AddLine("MOV.U PBK_TOP.x, 0;");
+ }
+
+ InitializeVariables();
+
+ const auto basic_block_end = ir.GetBasicBlocks().end();
+ auto basic_block_it = ir.GetBasicBlocks().begin();
+ const u32 first_address = basic_block_it->first;
+ AddLine("MOV.U PC.x, {};", first_address);
+
+ AddLine("REP;");
+
+ std::size_t num_blocks = 0;
+ while (basic_block_it != basic_block_end) {
+ const auto& [address, bb] = *basic_block_it;
+ ++num_blocks;
+
+ AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
+ AddLine("IF NE.x;");
+
+ VisitBlock(bb);
+
+ ++basic_block_it;
+
+ if (basic_block_it != basic_block_end) {
+ const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
+ if (!op || op->GetCode() != OperationCode::Branch) {
+ const u32 next_address = basic_block_it->first;
+ AddLine("MOV.U PC.x, {};", next_address);
+ AddLine("CONT;");
+ }
+ }
+
+ AddLine("ELSE;");
+ }
+ AddLine("RET;");
+ while (num_blocks--) {
+ AddLine("ENDIF;");
+ }
+
+ AddLine("ENDREP;");
+}
+
+void ARBDecompiler::VisitAST(const ASTNode& node) {
+ if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
+ for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ VisitAST(current);
+ }
+ } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
+ const std::string condition = VisitExpression(ast->condition);
+ ResetTemporaries();
+
+ AddLine("MOVC.U RC.x, {};", condition);
+ AddLine("IF NE.x;");
+ for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ VisitAST(current);
+ }
+ AddLine("ENDIF;");
+ } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
+ AddLine("ELSE;");
+ for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ VisitAST(current);
+ }
+ } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
+ VisitBlock(ast->nodes);
+ } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
+ AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition));
+ ResetTemporaries();
+ } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
+ const std::string condition = VisitExpression(ast->condition);
+ ResetTemporaries();
+ AddLine("REP;");
+ for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ VisitAST(current);
+ }
+ AddLine("MOVC.U RC.x, {};", condition);
+ AddLine("BRK (NE.x);");
+ AddLine("ENDREP;");
+ } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) {
+ const bool is_true = ExprIsTrue(ast->condition);
+ if (!is_true) {
+ AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
+ AddLine("IF NE.x;");
+ ResetTemporaries();
+ }
+ if (ast->kills) {
+ AddLine("KIL TR;");
+ } else {
+ Exit();
+ }
+ if (!is_true) {
+ AddLine("ENDIF;");
+ }
+ } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) {
+ if (ExprIsTrue(ast->condition)) {
+ AddLine("BRK;");
+ } else {
+ AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
+ AddLine("BRK (NE.x);");
+ ResetTemporaries();
+ }
+ } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
+ // Nothing to do
+ } else {
+ UNREACHABLE();
+ }
+}
+
+std::string ARBDecompiler::VisitExpression(const Expr& node) {
+ const std::string result = AllocTemporary();
+ if (const auto expr = std::get_if<ExprAnd>(&*node)) {
+ AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
+ VisitExpression(expr->operand2));
+ return result;
+ }
+ if (const auto expr = std::get_if<ExprOr>(&*node)) {
+ const std::string result = AllocTemporary();
+ AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
+ VisitExpression(expr->operand2));
+ return result;
+ }
+ if (const auto expr = std::get_if<ExprNot>(&*node)) {
+ const std::string result = AllocTemporary();
+ AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
+ return result;
+ }
+ if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
+ return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
+ }
+ if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
+ return Visit(ir.GetConditionCode(expr->cc));
+ }
+ if (const auto expr = std::get_if<ExprVar>(&*node)) {
+ return fmt::format("F{}.x", expr->var_index);
+ }
+ if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
+ return expr->value ? "0xffffffff" : "0";
+ }
+ if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
+ const std::string result = AllocTemporary();
+ AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
+ return result;
+ }
+ UNREACHABLE();
+ return "0";
+}
+
+void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
+ for (const auto& node : bb) {
+ Visit(node);
+ }
+}
+
+std::string ARBDecompiler::Visit(const Node& node) {
+ if (const auto operation = std::get_if<OperationNode>(&*node)) {
+ if (const auto amend_index = operation->GetAmendIndex()) {
+ Visit(ir.GetAmendNode(*amend_index));
+ }
+ const std::size_t index = static_cast<std::size_t>(operation->GetCode());
+ if (index >= OPERATION_DECOMPILERS.size()) {
+ UNREACHABLE_MSG("Out of bounds operation: {}", index);
+ return {};
+ }
+ const auto decompiler = OPERATION_DECOMPILERS[index];
+ if (decompiler == nullptr) {
+ UNREACHABLE_MSG("Undefined operation: {}", index);
+ return {};
+ }
+ return (this->*decompiler)(*operation);
+ }
+
+ if (const auto gpr = std::get_if<GprNode>(&*node)) {
+ const u32 index = gpr->GetIndex();
+ if (index == Register::ZeroIndex) {
+ return "{0, 0, 0, 0}.x";
+ }
+ return fmt::format("R{}.x", index);
+ }
+
+ if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
+ return fmt::format("CV{}.x", cv->GetIndex());
+ }
+
+ if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
+ const std::string temporary = AllocTemporary();
+ AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
+ return temporary;
+ }
+
+ if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
+ const std::string temporary = AllocTemporary();
+ switch (const auto index = predicate->GetIndex(); index) {
+ case Tegra::Shader::Pred::UnusedIndex:
+ AddLine("MOV.S {}, -1;", temporary);
+ break;
+ case Tegra::Shader::Pred::NeverExecute:
+ AddLine("MOV.S {}, 0;", temporary);
+ break;
+ default:
+ AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
+ break;
+ }
+ if (predicate->IsNegated()) {
+ AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
+ }
+ return temporary;
+ }
+
+ if (const auto abuf = std::get_if<AbufNode>(&*node)) {
+ if (abuf->IsPhysicalBuffer()) {
+ UNIMPLEMENTED_MSG("Physical buffers are not implemented");
+ return "{0, 0, 0, 0}.x";
+ }
+
+ const auto buffer_index = [this, &abuf]() -> std::string {
+ if (stage != ShaderType::Geometry) {
+ return "";
+ }
+ return fmt::format("[{}]", Visit(abuf->GetBuffer()));
+ };
+
+ const Attribute::Index index = abuf->GetIndex();
+ const u32 element = abuf->GetElement();
+ const char swizzle = Swizzle(element);
+ switch (index) {
+ case Attribute::Index::Position: {
+ if (stage == ShaderType::Geometry) {
+ return fmt::format("{}_position[{}].{}", StageInputName(stage),
+ Visit(abuf->GetBuffer()), swizzle);
+ } else {
+ return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
+ }
+ }
+ case Attribute::Index::TessCoordInstanceIDVertexID:
+ ASSERT(stage == ShaderType::Vertex);
+ switch (element) {
+ case 2:
+ return "vertex.instance";
+ case 3:
+ return "vertex.id";
+ }
+ UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
+ break;
+ case Attribute::Index::PointCoord:
+ switch (element) {
+ case 0:
+ return "fragment.pointcoord.x";
+ case 1:
+ return "fragment.pointcoord.y";
+ }
+ UNIMPLEMENTED();
+ break;
+ case Attribute::Index::FrontFacing: {
+ ASSERT(stage == ShaderType::Fragment);
+ ASSERT(element == 3);
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
+ AddLine("MOV.U.CC RC.x, -RC;");
+ AddLine("MOV.S {}.x, 0;", temporary);
+ AddLine("MOV.S {}.x (NE.x), -1;", temporary);
+ return fmt::format("{}.x", temporary);
+ }
+ default:
+ if (IsGenericAttribute(index)) {
+ if (stage == ShaderType::Geometry) {
+ return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
+ Visit(abuf->GetBuffer()), swizzle);
+ } else {
+ return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
+ GetGenericAttributeIndex(index), swizzle);
+ }
+ }
+ UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index));
+ break;
+ }
+ return "{0, 0, 0, 0}.x";
+ }
+
+ if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
+ std::string offset_string;
+ const auto& offset = cbuf->GetOffset();
+ if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
+ offset_string = std::to_string(imm->GetValue());
+ } else {
+ offset_string = Visit(offset);
+ }
+ const std::string temporary = AllocTemporary();
+ AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
+ return temporary;
+ }
+
+ if (const auto gmem = std::get_if<GmemNode>(&*node)) {
+ const std::string temporary = AllocTemporary();
+ AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
+ Visit(gmem->GetBaseAddress()));
+ AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()),
+ temporary);
+ return temporary;
+ }
+
+ if (const auto lmem = std::get_if<LmemNode>(&*node)) {
+ const std::string temporary = Visit(lmem->GetAddress());
+ AddLine("SHR.U {}, {}, 2;", temporary, temporary);
+ AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
+ return temporary;
+ }
+
+ if (const auto smem = std::get_if<SmemNode>(&*node)) {
+ const std::string temporary = Visit(smem->GetAddress());
+ AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
+ return temporary;
+ }
+
+ if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
+ const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
+ return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
+ }
+
+ if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+ if (const auto amend_index = conditional->GetAmendIndex()) {
+ Visit(ir.GetAmendNode(*amend_index));
+ }
+ AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
+ AddLine("IF NE.x;");
+ VisitBlock(conditional->GetCode());
+ AddLine("ENDIF;");
+ return {};
+ }
+
+ if (const auto cmt = std::get_if<CommentNode>(&*node)) {
+ // Uncommenting this will generate invalid code. GLASM lacks comments.
+ // AddLine("// {}", cmt->GetText());
+ return {};
+ }
+
+ UNIMPLEMENTED();
+ return {};
+}
+
+std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ UNIMPLEMENTED_IF(meta.sampler.is_indexed);
+ UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array &&
+ meta.sampler.type == Tegra::Shader::TextureType::TextureCube);
+
+ const std::size_t count = operation.GetOperandsCount();
+ std::string temporary = AllocVectorTemporary();
+ std::size_t i = 0;
+ for (; i < count; ++i) {
+ AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
+ }
+ if (meta.sampler.is_array) {
+ AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array));
+ }
+ if (meta.sampler.is_shadow) {
+ AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare));
+ }
+ return {std::move(temporary), i};
+}
+
+std::string ARBDecompiler::BuildAoffi(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ if (meta.aoffi.empty()) {
+ return {};
+ }
+ const std::string temporary = AllocVectorTemporary();
+ std::size_t i = 0;
+ for (auto& node : meta.aoffi) {
+ AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
+ }
+ return fmt::format(", offset({})", temporary);
+}
+
+void ARBDecompiler::Exit() {
+ if (stage != ShaderType::Fragment) {
+ AddLine("RET;");
+ return;
+ }
+
+ const auto safe_get_register = [this](u32 reg) -> std::string {
+ // TODO(Rodrigo): Replace with contains once C++20 releases
+ const auto& used_registers = ir.GetRegisters();
+ if (used_registers.find(reg) != used_registers.end()) {
+ return fmt::format("R{}.x", reg);
+ }
+ return "{0, 0, 0, 0}.x";
+ };
+
+ const auto& header = ir.GetHeader();
+ u32 current_reg = 0;
+ for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
+ for (u32 component = 0; component < 4; ++component) {
+ if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
+ continue;
+ }
+ AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
+ safe_get_register(current_reg));
+ ++current_reg;
+ }
+ }
+ if (header.ps.omap.depth) {
+ AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
+ }
+
+ AddLine("RET;");
+}
+
+std::string ARBDecompiler::Assign(Operation operation) {
+ const Node& dest = operation[0];
+ const Node& src = operation[1];
+
+ std::string dest_name;
+ if (const auto gpr = std::get_if<GprNode>(&*dest)) {
+ if (gpr->GetIndex() == Register::ZeroIndex) {
+ // Writing to Register::ZeroIndex is a no op
+ return {};
+ }
+ dest_name = fmt::format("R{}.x", gpr->GetIndex());
+ } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
+ const u32 element = abuf->GetElement();
+ const char swizzle = Swizzle(element);
+ switch (const Attribute::Index index = abuf->GetIndex()) {
+ case Attribute::Index::Position:
+ dest_name = fmt::format("result.position.{}", swizzle);
+ break;
+ case Attribute::Index::LayerViewportPointSize:
+ switch (element) {
+ case 0:
+ UNIMPLEMENTED();
+ return {};
+ case 1:
+ case 2:
+ if (!device.HasNvViewportArray2()) {
+ LOG_ERROR(
+ Render_OpenGL,
+ "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
+ return {};
+ }
+ dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
+ break;
+ case 3:
+ dest_name = "result.pointsize.x";
+ break;
+ }
+ break;
+ case Attribute::Index::ClipDistances0123:
+ dest_name = fmt::format("result.clip[{}].x", element);
+ break;
+ case Attribute::Index::ClipDistances4567:
+ dest_name = fmt::format("result.clip[{}].x", element + 4);
+ break;
+ default:
+ if (!IsGenericAttribute(index)) {
+ UNREACHABLE();
+ return {};
+ }
+ dest_name =
+ fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
+ break;
+ }
+ } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
+ const std::string address = Visit(lmem->GetAddress());
+ AddLine("SHR.U {}, {}, 2;", address, address);
+ dest_name = fmt::format("lmem[{}].x", address);
+ } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
+ AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
+ ResetTemporaries();
+ return {};
+ } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
+ const std::string temporary = AllocTemporary();
+ AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
+ Visit(gmem->GetBaseAddress()));
+ AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()),
+ temporary);
+ ResetTemporaries();
+ return {};
+ } else {
+ UNREACHABLE();
+ ResetTemporaries();
+ return {};
+ }
+
+ AddLine("MOV.U {}, {};", dest_name, Visit(src));
+ ResetTemporaries();
+ return {};
+}
+
+std::string ARBDecompiler::Select(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
+ Visit(operation[2]));
+ return temporary;
+}
+
+std::string ARBDecompiler::FClamp(Operation operation) {
+ // 1.0f in hex, replace with std::bit_cast on C++20
+ static constexpr u32 POSITIVE_ONE = 0x3f800000;
+
+ const std::string temporary = AllocTemporary();
+ const Node& value = operation[0];
+ const Node& low = operation[1];
+ const Node& high = operation[2];
+ const auto imm_low = std::get_if<ImmediateNode>(&*low);
+ const auto imm_high = std::get_if<ImmediateNode>(&*high);
+ if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
+ AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
+ } else {
+ AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
+ AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
+ }
+ return temporary;
+}
+
+std::string ARBDecompiler::FCastHalf0(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::FCastHalf1(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
+ AddLine("MOV {}.x, {}.y;", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::FSqrt(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
+ AddLine("RCP.F32 {}, {};", temporary, temporary);
+ return temporary;
+}
+
+std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ if (!device.HasWarpIntrinsics()) {
+ LOG_ERROR(Render_OpenGL,
+ "NV_shader_thread_shuffle is missing. Kepler or better is required.");
+ AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
+ return fmt::format("{}.x", temporary);
+ }
+ const std::string lut = AllocVectorTemporary();
+ AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
+ AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
+ AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
+ AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
+ AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
+ AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
+ AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HAdd2(Operation operation) {
+ const std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
+ AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
+ AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
+ AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
+ return fmt::format("{}.x", tmp1);
+}
+
+std::string ARBDecompiler::HMul2(Operation operation) {
+ const std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
+ AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
+ AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
+ AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
+ return fmt::format("{}.x", tmp1);
+}
+
+std::string ARBDecompiler::HFma2(Operation operation) {
+ const std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ const std::string tmp3 = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
+ AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
+ AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
+ AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
+ AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
+ return fmt::format("{}.x", tmp1);
+}
+
+std::string ARBDecompiler::HAbsolute(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HNegate(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
+ AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
+ AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
+ AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HClamp(Operation operation) {
+ const std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
+ AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
+ AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
+ AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
+ AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
+ AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
+ AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
+ AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
+ return fmt::format("{}.x", tmp1);
+}
+
+std::string ARBDecompiler::HCastFloat(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
+ AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HUnpack(Operation operation) {
+ const std::string operand = Visit(operation[0]);
+ switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
+ case Tegra::Shader::HalfType::H0_H1:
+ return operand;
+ case Tegra::Shader::HalfType::F32: {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.U {}.x, {};", temporary, operand);
+ AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+ }
+ case Tegra::Shader::HalfType::H0_H0: {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, operand);
+ AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+ }
+ case Tegra::Shader::HalfType::H1_H1: {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, operand);
+ AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+ }
+ }
+ UNREACHABLE();
+ return "{0, 0, 0, 0}.x";
+}
+
+std::string ARBDecompiler::HMergeF32(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HMergeH0(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
+ AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HMergeH1(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
+ AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HPack2(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
+ AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::LogicalAssign(Operation operation) {
+ const Node& dest = operation[0];
+ const Node& src = operation[1];
+
+ std::string target;
+
+ if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
+ ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
+
+ const Tegra::Shader::Pred index = pred->GetIndex();
+ switch (index) {
+ case Tegra::Shader::Pred::NeverExecute:
+ case Tegra::Shader::Pred::UnusedIndex:
+ // Writing to these predicates is a no-op
+ return {};
+ }
+ target = fmt::format("P{}.x", static_cast<u64>(index));
+ } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
+ const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
+ target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
+ } else {
+ UNREACHABLE();
+ ResetTemporaries();
+ return {};
+ }
+
+ AddLine("MOV.U {}, {};", target, Visit(src));
+ ResetTemporaries();
+ return {};
+}
+
+std::string ARBDecompiler::LogicalPick2(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
+ AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
+ return temporary;
+}
+
+std::string ARBDecompiler::LogicalAnd2(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ const std::string op = Visit(operation[0]);
+ AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
+ return temporary;
+}
+
+std::string ARBDecompiler::FloatOrdered(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
+ AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
+ AddLine("MOV.S {}, -1;", temporary);
+ AddLine("MOV.S {} (NAN.x), 0;", temporary);
+ AddLine("MOV.S {} (NAN.y), 0;", temporary);
+ return temporary;
+}
+
+std::string ARBDecompiler::FloatUnordered(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
+ AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
+ AddLine("MOV.S {}, 0;", temporary);
+ AddLine("MOV.S {} (NAN.x), -1;", temporary);
+ AddLine("MOV.S {} (NAN.y), -1;", temporary);
+ return temporary;
+}
+
+std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
+ const std::string temporary = AllocTemporary();
+ AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
+ AddLine("MOV.S {}, 0;", temporary);
+ AddLine("IF CF.x;");
+ AddLine("MOV.S {}, -1;", temporary);
+ AddLine("ENDIF;");
+ return temporary;
+}
+
+std::string ARBDecompiler::Texture(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+ const auto [temporary, swizzle] = BuildCoords(operation);
+
+ std::string_view opcode = "TEX";
+ std::string extra;
+ if (meta.bias) {
+ ASSERT(!meta.lod);
+ opcode = "TXB";
+
+ if (swizzle < 4) {
+ AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
+ } else {
+ const std::string bias = AllocTemporary();
+ AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
+ extra = fmt::format(" {},", bias);
+ }
+ }
+ if (meta.lod) {
+ ASSERT(!meta.bias);
+ opcode = "TXL";
+
+ if (swizzle < 4) {
+ AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
+ } else {
+ const std::string lod = AllocTemporary();
+ AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
+ extra = fmt::format(" {},", lod);
+ }
+ }
+
+ AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id,
+ TextureType(meta), BuildAoffi(operation));
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TextureGather(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+ const auto [temporary, swizzle] = BuildCoords(operation);
+
+ std::string comp;
+ if (!meta.sampler.is_shadow) {
+ const auto& immediate = std::get<ImmediateNode>(*meta.component);
+ comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
+ }
+
+ AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
+ TextureType(meta), BuildAoffi(operation));
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const std::string temporary = AllocVectorTemporary();
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+
+ ASSERT(!meta.sampler.is_array);
+
+ const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
+ AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TextureQueryLod(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const std::string temporary = AllocVectorTemporary();
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+
+ ASSERT(!meta.sampler.is_array);
+
+ const std::size_t count = operation.GetOperandsCount();
+ for (std::size_t i = 0; i < count; ++i) {
+ AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
+ }
+ AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
+ AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
+ AddLine("TRUNC.S {}, {};", temporary, temporary);
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TexelFetch(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+ const auto [temporary, swizzle] = BuildCoords(operation);
+
+ if (!meta.sampler.is_buffer) {
+ ASSERT(swizzle < 4);
+ AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
+ }
+ AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta),
+ BuildAoffi(operation));
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TextureGradient(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+ const std::string ddx = AllocVectorTemporary();
+ const std::string ddy = AllocVectorTemporary();
+ const std::string coord = BuildCoords(operation).first;
+
+ const std::size_t num_components = meta.derivates.size() / 2;
+ for (std::size_t index = 0; index < num_components; ++index) {
+ const char swizzle = Swizzle(index);
+ AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
+ AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
+ }
+
+ const std::string_view result = coord;
+ AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
+ TextureType(meta), BuildAoffi(operation));
+ AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
+ return fmt::format("{}.x", result);
+}
+
+std::string ARBDecompiler::ImageLoad(Operation operation) {
+ const auto& meta = std::get<MetaImage>(operation.GetMeta());
+ const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
+ const std::size_t count = operation.GetOperandsCount();
+ const std::string_view type = ImageType(meta.image.type);
+
+ const std::string temporary = AllocVectorTemporary();
+ for (std::size_t i = 0; i < count; ++i) {
+ AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
+ }
+ AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
+ AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::ImageStore(Operation operation) {
+ const auto& meta = std::get<MetaImage>(operation.GetMeta());
+ const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
+ const std::size_t num_coords = operation.GetOperandsCount();
+ const std::size_t num_values = meta.values.size();
+ const std::string_view type = ImageType(meta.image.type);
+
+ const std::string coord = AllocVectorTemporary();
+ const std::string value = AllocVectorTemporary();
+ for (std::size_t i = 0; i < num_coords; ++i) {
+ AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
+ }
+ for (std::size_t i = 0; i < num_values; ++i) {
+ AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
+ }
+ AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
+ return {};
+}
+
+std::string ARBDecompiler::Branch(Operation operation) {
+ const auto target = std::get<ImmediateNode>(*operation[0]);
+ AddLine("MOV.U PC.x, {};", target.GetValue());
+ AddLine("CONT;");
+ return {};
+}
+
+std::string ARBDecompiler::BranchIndirect(Operation operation) {
+ AddLine("MOV.U PC.x, {};", Visit(operation[0]));
+ AddLine("CONT;");
+ return {};
+}
+
+std::string ARBDecompiler::PushFlowStack(Operation operation) {
+ const auto stack = std::get<MetaStackClass>(operation.GetMeta());
+ const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
+ const std::string_view stack_name = StackName(stack);
+ AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
+ AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
+ return {};
+}
+
+std::string ARBDecompiler::PopFlowStack(Operation operation) {
+ const auto stack = std::get<MetaStackClass>(operation.GetMeta());
+ const std::string_view stack_name = StackName(stack);
+ AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
+ AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
+ AddLine("CONT;");
+ return {};
+}
+
+std::string ARBDecompiler::Exit(Operation) {
+ Exit();
+ return {};
+}
+
+std::string ARBDecompiler::Discard(Operation) {
+ AddLine("KIL TR;");
+ return {};
+}
+
+std::string ARBDecompiler::EmitVertex(Operation) {
+ AddLine("EMIT;");
+ return {};
+}
+
+std::string ARBDecompiler::EndPrimitive(Operation) {
+ AddLine("ENDPRIM;");
+ return {};
+}
+
+std::string ARBDecompiler::InvocationId(Operation) {
+ return "primitive.invocation";
+}
+
+std::string ARBDecompiler::YNegate(Operation) {
+ LOG_WARNING(Render_OpenGL, "(STUBBED)");
+ const std::string temporary = AllocTemporary();
+ AddLine("MOV.F {}, 1;", temporary);
+ return temporary;
+}
+
+std::string ARBDecompiler::ThreadId(Operation) {
+ return fmt::format("{}.threadid", StageInputName(stage));
+}
+
+std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
+ if (!device.HasWarpIntrinsics()) {
+ LOG_ERROR(Render_OpenGL,
+ "NV_shader_thread_shuffle is missing. Kepler or better is required.");
+ return Visit(operation[0]);
+ }
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
+ Visit(operation[1]));
+ AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::Barrier(Operation) {
+ if (!ir.IsDecompiled()) {
+ LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled");
+ return {};
+ }
+ AddLine("BAR;");
+ return {};
+}
+
+std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
+ AddLine("MEMBAR.CTA;");
+ return {};
+}
+
+std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
+ AddLine("MEMBAR;");
+ return {};
+}
+
+} // Anonymous namespace
+
+std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
+ const VideoCommon::Shader::Registry& registry,
+ Tegra::Engines::ShaderType stage, std::string_view identifier) {
+ return ARBDecompiler(device, ir, registry, stage, identifier).Code();
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
new file mode 100644
index 000000000..6afc87220
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h
@@ -0,0 +1,29 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <string_view>
+
+#include "common/common_types.h"
+
+namespace Tegra::Engines {
+enum class ShaderType : u32;
+}
+
+namespace VideoCommon::Shader {
+class ShaderIR;
+class Registry;
+} // namespace VideoCommon::Shader
+
+namespace OpenGL {
+
+class Device;
+
+std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
+ const VideoCommon::Shader::Registry& registry,
+ Tegra::Engines::ShaderType stage, std::string_view identifier);
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 21a4f4def..b31d604e4 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -221,6 +221,7 @@ Device::Device()
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
+ has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
GLAD_GL_NV_transform_feedback2;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 98cca0254..145347943 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -88,6 +88,10 @@ public:
return has_fast_buffer_sub_data;
}
+ bool HasNvViewportArray2() const {
+ return has_nv_viewport_array2;
+ }
+
bool UseAssemblyShaders() const {
return use_assembly_shaders;
}
@@ -111,6 +115,7 @@ private:
bool has_component_indexing_bug{};
bool has_precise_bug{};
bool has_fast_buffer_sub_data{};
+ bool has_nv_viewport_array2{};
bool use_assembly_shaders{};
};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index c28486b1d..46e780a06 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -20,6 +20,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_arb_decompiler.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -148,7 +149,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
auto program = std::make_shared<ProgramHandle>();
if (device.UseAssemblyShaders()) {
- const std::string arb = "Not implemented";
+ const std::string arb =
+ DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
GLuint& arb_prog = program->assembly_program.handle;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 7e9073cc3..32c81dc70 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -631,13 +631,11 @@ void Config::ReadRendererValues() {
static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt());
Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool();
Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt();
- Settings::values.resolution_factor =
- ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat();
Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt();
Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt();
Settings::values.use_frame_limit =
ReadSetting(QStringLiteral("use_frame_limit"), true).toBool();
- Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
+ Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toUInt();
Settings::values.use_disk_shader_cache =
ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
@@ -722,8 +720,6 @@ void Config::ReadUIValues() {
.toString();
UISettings::values.enable_discord_presence =
ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool();
- UISettings::values.screenshot_resolution_factor =
- static_cast<u16>(ReadSetting(QStringLiteral("screenshot_resolution_factor"), 0).toUInt());
UISettings::values.select_user_on_boot =
ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool();
@@ -1082,8 +1078,6 @@ void Config::SaveRendererValues() {
WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0);
WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false);
WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0);
- WriteSetting(QStringLiteral("resolution_factor"),
- static_cast<double>(Settings::values.resolution_factor), 1.0);
WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0);
WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
@@ -1159,8 +1153,6 @@ void Config::SaveUIValues() {
QString::fromUtf8(UISettings::themes[0].second));
WriteSetting(QStringLiteral("enable_discord_presence"),
UISettings::values.enable_discord_presence, true);
- WriteSetting(QStringLiteral("screenshot_resolution_factor"),
- UISettings::values.screenshot_resolution_factor, 0);
WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot,
false);
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index ea667caef..304625cd7 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -19,47 +19,6 @@
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#endif
-namespace {
-enum class Resolution : int {
- Auto,
- Scale1x,
- Scale2x,
- Scale3x,
- Scale4x,
-};
-
-float ToResolutionFactor(Resolution option) {
- switch (option) {
- case Resolution::Auto:
- return 0.f;
- case Resolution::Scale1x:
- return 1.f;
- case Resolution::Scale2x:
- return 2.f;
- case Resolution::Scale3x:
- return 3.f;
- case Resolution::Scale4x:
- return 4.f;
- }
- return 0.f;
-}
-
-Resolution FromResolutionFactor(float factor) {
- if (factor == 0.f) {
- return Resolution::Auto;
- } else if (factor == 1.f) {
- return Resolution::Scale1x;
- } else if (factor == 2.f) {
- return Resolution::Scale2x;
- } else if (factor == 3.f) {
- return Resolution::Scale3x;
- } else if (factor == 4.f) {
- return Resolution::Scale4x;
- }
- return Resolution::Auto;
-}
-} // Anonymous namespace
-
ConfigureGraphics::ConfigureGraphics(QWidget* parent)
: QWidget(parent), ui(new Ui::ConfigureGraphics) {
vulkan_device = Settings::values.vulkan_device;
@@ -99,8 +58,6 @@ void ConfigureGraphics::SetConfiguration() {
ui->api->setEnabled(runtime_lock);
ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend));
- ui->resolution_factor_combobox->setCurrentIndex(
- static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio);
ui->use_disk_shader_cache->setEnabled(runtime_lock);
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
@@ -114,8 +71,6 @@ void ConfigureGraphics::SetConfiguration() {
void ConfigureGraphics::ApplyConfiguration() {
Settings::values.renderer_backend = GetCurrentGraphicsBackend();
Settings::values.vulkan_device = vulkan_device;
- Settings::values.resolution_factor =
- ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex();
Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
Settings::values.use_asynchronous_gpu_emulation =
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index c816d6108..6e75447a5 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -85,46 +85,6 @@
</widget>
</item>
<item>
- <layout class="QHBoxLayout" name="horizontalLayout_2">
- <item>
- <widget class="QLabel" name="label">
- <property name="text">
- <string>Internal Resolution:</string>
- </property>
- </widget>
- </item>
- <item>
- <widget class="QComboBox" name="resolution_factor_combobox">
- <item>
- <property name="text">
- <string>Auto (Window Size)</string>
- </property>
- </item>
- <item>
- <property name="text">
- <string>Native (1280x720)</string>
- </property>
- </item>
- <item>
- <property name="text">
- <string>2x Native (2560x1440)</string>
- </property>
- </item>
- <item>
- <property name="text">
- <string>3x Native (3840x2160)</string>
- </property>
- </item>
- <item>
- <property name="text">
- <string>4x Native (5120x2880)</string>
- </property>
- </item>
- </widget>
- </item>
- </layout>
- </item>
- <item>
<layout class="QHBoxLayout" name="horizontalLayout_6">
<item>
<widget class="QLabel" name="ar_label">
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 37aadf7f8..be5006ad3 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -12,9 +12,6 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
ui->setupUi(this);
- // TODO: Remove this after assembly shaders are fully integrated
- ui->use_assembly_shaders->setVisible(false);
-
SetConfiguration();
}
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 270cccc77..4119d7907 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -689,10 +689,7 @@ void GMainWindow::InitializeHotkeys() {
Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
UpdateStatusBar();
});
- // TODO: Remove this comment/static whenever the next major release of
- // MSVC occurs and we make it a requirement (see:
- // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
- static constexpr u16 SPEED_LIMIT_STEP = 5;
+ constexpr u16 SPEED_LIMIT_STEP = 5;
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this),
&QShortcut::activated, this, [&] {
if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 7240270f5..659b9f701 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -380,8 +380,6 @@ void Config::ReadValues() {
Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false);
Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0);
- Settings::values.resolution_factor =
- static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
Settings::values.aspect_ratio =
static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
Settings::values.max_anisotropy =
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 6f53e9659..45c07ed5d 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -117,11 +117,6 @@ use_hw_renderer =
# 0: Interpreter (slow), 1 (default): JIT (fast)
use_shader_jit =
-# Resolution scale factor
-# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
-# factor for the Switch resolution
-resolution_factor =
-
# Aspect ratio
# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
aspect_ratio =
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index 3be58b15d..1566c2e3f 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -116,8 +116,6 @@ void Config::ReadValues() {
Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
// Renderer
- Settings::values.resolution_factor =
- static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
Settings::values.aspect_ratio =
static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
Settings::values.max_anisotropy =
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index ca203b64d..41bbbbf60 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -21,11 +21,6 @@ use_hw_renderer =
# 0: Interpreter (slow), 1 (default): JIT (fast)
use_shader_jit =
-# Resolution scale factor
-# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
-# factor for the Switch resolution
-resolution_factor =
-
# Aspect ratio
# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
aspect_ratio =