diff options
25 files changed, 478 insertions, 60 deletions
diff --git a/src/android/app/build.gradle.kts b/src/android/app/build.gradle.kts index 13bb227ff..fe613d339 100644 --- a/src/android/app/build.gradle.kts +++ b/src/android/app/build.gradle.kts @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later import android.annotation.SuppressLint +import org.jetbrains.kotlin.konan.properties.Properties plugins { id("com.android.application") @@ -57,9 +58,22 @@ android { applicationId = "org.yuzu.yuzu_emu" minSdk = 30 targetSdk = 33 - versionCode = 1 versionName = getGitVersion() + // If you want to use autoVersion for the versionCode, create a property in local.properties + // named "autoVersioned" and set it to "true" + val properties = Properties() + val versionProperty = try { + properties.load(project.rootProject.file("local.properties").inputStream()) + properties.getProperty("autoVersioned") ?: "" + } catch (e: Exception) { "" } + + versionCode = if (versionProperty == "true") { + autoVersion + } else { + 1 + } + ndk { @SuppressLint("ChromeOsAbiSupport") abiFilters += listOf("arm64-v8a") @@ -74,16 +88,7 @@ android { // Signed by release key, allowing for upload to Play Store. release { - signingConfig = signingConfigs.getByName("debug") - isMinifyEnabled = true - isDebuggable = false - proguardFiles( - getDefaultProguardFile("proguard-android.txt"), - "proguard-rules.pro" - ) - } - - register("relWithVersionCode") { + resValue("string", "app_name_suffixed", "yuzu") signingConfig = signingConfigs.getByName("debug") isMinifyEnabled = true isDebuggable = false @@ -96,6 +101,7 @@ android { // builds a release build that doesn't need signing // Attaches 'debug' suffix to version and package name, allowing installation alongside the release build. register("relWithDebInfo") { + resValue("string", "app_name_suffixed", "yuzu Debug Release") signingConfig = signingConfigs.getByName("debug") isMinifyEnabled = true isDebuggable = true @@ -103,16 +109,19 @@ android { getDefaultProguardFile("proguard-android.txt"), "proguard-rules.pro" ) - versionNameSuffix = "-debug" + versionNameSuffix = "-relWithDebInfo" + applicationIdSuffix = ".relWithDebInfo" isJniDebuggable = true } // Signed by debug key disallowing distribution on Play Store. // Attaches 'debug' suffix to version and package name, allowing installation alongside the release build. debug { + resValue("string", "app_name_suffixed", "yuzu Debug") isDebuggable = true isJniDebuggable = true versionNameSuffix = "-debug" + applicationIdSuffix = ".debug" } } @@ -162,19 +171,19 @@ dependencies { implementation("androidx.appcompat:appcompat:1.6.1") implementation("androidx.recyclerview:recyclerview:1.3.0") implementation("androidx.constraintlayout:constraintlayout:2.1.4") - implementation("androidx.fragment:fragment-ktx:1.5.7") + implementation("androidx.fragment:fragment-ktx:1.6.0") implementation("androidx.documentfile:documentfile:1.0.1") implementation("com.google.android.material:material:1.9.0") implementation("androidx.preference:preference:1.2.0") implementation("androidx.lifecycle:lifecycle-viewmodel-ktx:2.6.1") implementation("io.coil-kt:coil:2.2.2") implementation("androidx.core:core-splashscreen:1.0.1") - implementation("androidx.window:window:1.0.0") + implementation("androidx.window:window:1.1.0") implementation("org.ini4j:ini4j:0.5.4") implementation("androidx.constraintlayout:constraintlayout:2.1.4") implementation("androidx.swiperefreshlayout:swiperefreshlayout:1.1.0") - implementation("androidx.navigation:navigation-fragment-ktx:2.5.3") - implementation("androidx.navigation:navigation-ui-ktx:2.5.3") + implementation("androidx.navigation:navigation-fragment-ktx:2.6.0") + implementation("androidx.navigation:navigation-ui-ktx:2.6.0") implementation("info.debatty:java-string-similarity:2.0.0") implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.5.0") } diff --git a/src/android/app/src/main/AndroidManifest.xml b/src/android/app/src/main/AndroidManifest.xml index eef566042..1e92098ec 100644 --- a/src/android/app/src/main/AndroidManifest.xml +++ b/src/android/app/src/main/AndroidManifest.xml @@ -18,7 +18,7 @@ SPDX-License-Identifier: GPL-3.0-or-later <application android:name="org.yuzu.yuzu_emu.YuzuApplication" - android:label="@string/app_name" + android:label="@string/app_name_suffixed" android:icon="@drawable/ic_launcher" android:allowBackup="true" android:hasFragileUserData="true" diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/Game.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/Game.kt index 3d6782c49..35d8000c5 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/Game.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/Game.kt @@ -26,13 +26,18 @@ class Game( if (other !is Game) return false - return title == other.title - && description == other.description - && regions == other.regions - && path == other.path - && gameId == other.gameId - && company == other.company - && isHomebrew == other.isHomebrew + return hashCode() == other.hashCode() + } + + override fun hashCode(): Int { + var result = title.hashCode() + result = 31 * result + description.hashCode() + result = 31 * result + regions.hashCode() + result = 31 * result + path.hashCode() + result = 31 * result + gameId.hashCode() + result = 31 * result + company.hashCode() + result = 31 * result + isHomebrew.hashCode() + return result } companion object { diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt index 82fc9e04e..041d16f3a 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt @@ -284,10 +284,10 @@ class MainActivity : AppCompatActivity(), ThemeProvider { if (result == null) return@registerForActivityResult - if (!FileUtil.hasExtension(result.toString(), "keys")) { + if (!FileUtil.hasExtension(result, "keys")) { MessageDialogFragment.newInstance( R.string.reading_keys_failure, - R.string.install_keys_failure_extension_description + R.string.install_prod_keys_failure_extension_description ).show(supportFragmentManager, MessageDialogFragment.TAG) return@registerForActivityResult } @@ -379,10 +379,10 @@ class MainActivity : AppCompatActivity(), ThemeProvider { if (result == null) return@registerForActivityResult - if (!FileUtil.hasExtension(result.toString(), "bin")) { + if (!FileUtil.hasExtension(result, "bin")) { MessageDialogFragment.newInstance( R.string.reading_keys_failure, - R.string.install_keys_failure_extension_description + R.string.install_amiibo_keys_failure_extension_description ).show(supportFragmentManager, MessageDialogFragment.TAG) return@registerForActivityResult } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/FileUtil.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/FileUtil.kt index 593dad8d3..492b1ad91 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/FileUtil.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/FileUtil.kt @@ -7,7 +7,9 @@ import android.content.Context import android.database.Cursor import android.net.Uri import android.provider.DocumentsContract +import android.provider.OpenableColumns import androidx.documentfile.provider.DocumentFile +import org.yuzu.yuzu_emu.YuzuApplication import org.yuzu.yuzu_emu.model.MinimalDocumentFile import java.io.BufferedInputStream import java.io.File @@ -324,7 +326,25 @@ object FileUtil { } } - fun hasExtension(path: String, extension: String): Boolean { - return path.substring(path.lastIndexOf(".") + 1).contains(extension) + fun hasExtension(path: String, extension: String): Boolean = + path.substring(path.lastIndexOf(".") + 1).contains(extension) + + fun hasExtension(uri: Uri, extension: String): Boolean { + val fileName: String? + val cursor = YuzuApplication.appContext.contentResolver.query(uri, null, null, null, null) + val nameIndex = cursor?.getColumnIndex(OpenableColumns.DISPLAY_NAME) + cursor?.moveToFirst() + + if (nameIndex == null) { + return false + } + + fileName = cursor.getString(nameIndex) + cursor.close() + + if (fileName == null) { + return false + } + return fileName.substring(fileName.lastIndexOf(".") + 1).contains(extension) } } diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index 5d4636d1a..7dae63dcb 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -65,11 +65,8 @@ <string name="invalid_keys_file">Invalid keys file selected</string> <string name="install_keys_success">Keys successfully installed</string> <string name="reading_keys_failure">Error reading encryption keys</string> - <string name="install_keys_failure_extension_description"> - 1. Verify your keys have the .keys extension.\n\n - 2. Keys must not be stored in the Downloads folder.\n\n - Resolve the issue(s) and try again. - </string> + <string name="install_prod_keys_failure_extension_description">Verify your keys file has a .keys extension and try again.</string> + <string name="install_amiibo_keys_failure_extension_description">Verify your keys file has a .bin extension and try again.</string> <string name="invalid_keys_error">Invalid encryption keys</string> <string name="dumping_keys_quickstart_link">https://yuzu-emu.org/help/quickstart/#dumping-decryption-keys</string> <string name="install_keys_failure_description">The selected file is incorrect or corrupt. Please redump your keys.</string> diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 525b2363c..07e75f9d8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -216,6 +216,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate_program.h host_translate_info.h ir_opt/collect_shader_info_pass.cpp + ir_opt/conditional_barrier_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp ir_opt/dual_vertex_pass.cpp @@ -223,6 +224,7 @@ add_library(shader_recompiler STATIC ir_opt/identity_removal_pass.cpp ir_opt/layer_pass.cpp ir_opt/lower_fp16_to_fp32.cpp + ir_opt/lower_fp64_to_fp32.cpp ir_opt/lower_int64_to_int32.cpp ir_opt/passes.h ir_opt/position_pass.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 17a6d4888..928b35561 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -280,12 +280,18 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo RemoveUnreachableBlocks(program); // Replace instructions before the SSA rewrite + if (!host_info.support_float64) { + Optimization::LowerFp64ToFp32(program); + } if (!host_info.support_float16) { Optimization::LowerFp16ToFp32(program); } if (!host_info.support_int64) { Optimization::LowerInt64ToInt32(program); } + if (!host_info.support_conditional_barrier) { + Optimization::ConditionalBarrierPass(program); + } Optimization::SsaRewritePass(program); Optimization::ConstantPropagationPass(env, program); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 2aaa6c5ea..7d2ded907 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -10,6 +10,7 @@ namespace Shader { /// Misc information about the host struct HostTranslateInfo { + bool support_float64{}; ///< True when the device supports 64-bit floats bool support_float16{}; ///< True when the device supports 16-bit floats bool support_int64{}; ///< True when the device supports 64-bit integers bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered @@ -17,6 +18,8 @@ struct HostTranslateInfo { bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry ///< passthrough shaders + bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional + ///< control flow }; } // namespace Shader diff --git a/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp new file mode 100644 index 000000000..c3ed27f4f --- /dev/null +++ b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp @@ -0,0 +1,44 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +void ConditionalBarrierPass(IR::Program& program) { + s32 conditional_control_flow_count{0}; + s32 conditional_return_count{0}; + for (IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::If: + case IR::AbstractSyntaxNode::Type::Loop: + conditional_control_flow_count++; + break; + case IR::AbstractSyntaxNode::Type::EndIf: + case IR::AbstractSyntaxNode::Type::Repeat: + conditional_control_flow_count--; + break; + case IR::AbstractSyntaxNode::Type::Unreachable: + case IR::AbstractSyntaxNode::Type::Return: + if (conditional_control_flow_count > 0) { + conditional_return_count++; + } + break; + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.data.block->Instructions()) { + if ((conditional_control_flow_count > 0 || conditional_return_count > 0) && + inst.GetOpcode() == IR::Opcode::Barrier) { + LOG_WARNING(Shader, "Barrier within conditional control flow"); + inst.ReplaceOpcode(IR::Opcode::Identity); + } + } + break; + default: + break; + } + } + ASSERT(conditional_control_flow_count == 0); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp new file mode 100644 index 000000000..5db7a38ad --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp @@ -0,0 +1,185 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/opcodes.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { + +constexpr s32 F64ToF32Exp = +1023 - 127; +constexpr s32 F32ToF64Exp = +127 - 1023; + +IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) { + const IR::U32 lo{ir.CompositeExtract(packed, 0)}; + const IR::U32 hi{ir.CompositeExtract(packed, 1)}; + const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))}; + const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))}; + const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))}; + const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))}; + const IR::U32 mantissa{ + ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)}; + const IR::U32 exp_if_subnorm{ + ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))}; + const IR::U32 exp_if_infnan{ + ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)}; + const IR::U32 result{ + ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), + ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))}; + return ir.BitCast<IR::F32>(result); +} + +IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) { + const IR::U32 value{ir.BitCast<IR::U32>(IR::F32(raw))}; + const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))}; + const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))}; + const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))}; + const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))}; + const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))}; + const IR::U32 exp_if_subnorm{ + ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))}; + const IR::U32 exp_if_infnan{ + ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)}; + const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))}; + const IR::U32 hi{ + ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), + ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))}; + return ir.CompositeConstruct(lo, hi); +} + +IR::Opcode Replace(IR::Opcode op) { + switch (op) { + case IR::Opcode::FPAbs64: + return IR::Opcode::FPAbs32; + case IR::Opcode::FPAdd64: + return IR::Opcode::FPAdd32; + case IR::Opcode::FPCeil64: + return IR::Opcode::FPCeil32; + case IR::Opcode::FPFloor64: + return IR::Opcode::FPFloor32; + case IR::Opcode::FPFma64: + return IR::Opcode::FPFma32; + case IR::Opcode::FPMul64: + return IR::Opcode::FPMul32; + case IR::Opcode::FPNeg64: + return IR::Opcode::FPNeg32; + case IR::Opcode::FPRoundEven64: + return IR::Opcode::FPRoundEven32; + case IR::Opcode::FPSaturate64: + return IR::Opcode::FPSaturate32; + case IR::Opcode::FPClamp64: + return IR::Opcode::FPClamp32; + case IR::Opcode::FPTrunc64: + return IR::Opcode::FPTrunc32; + case IR::Opcode::CompositeConstructF64x2: + return IR::Opcode::CompositeConstructF32x2; + case IR::Opcode::CompositeConstructF64x3: + return IR::Opcode::CompositeConstructF32x3; + case IR::Opcode::CompositeConstructF64x4: + return IR::Opcode::CompositeConstructF32x4; + case IR::Opcode::CompositeExtractF64x2: + return IR::Opcode::CompositeExtractF32x2; + case IR::Opcode::CompositeExtractF64x3: + return IR::Opcode::CompositeExtractF32x3; + case IR::Opcode::CompositeExtractF64x4: + return IR::Opcode::CompositeExtractF32x4; + case IR::Opcode::CompositeInsertF64x2: + return IR::Opcode::CompositeInsertF32x2; + case IR::Opcode::CompositeInsertF64x3: + return IR::Opcode::CompositeInsertF32x3; + case IR::Opcode::CompositeInsertF64x4: + return IR::Opcode::CompositeInsertF32x4; + case IR::Opcode::FPOrdEqual64: + return IR::Opcode::FPOrdEqual32; + case IR::Opcode::FPUnordEqual64: + return IR::Opcode::FPUnordEqual32; + case IR::Opcode::FPOrdNotEqual64: + return IR::Opcode::FPOrdNotEqual32; + case IR::Opcode::FPUnordNotEqual64: + return IR::Opcode::FPUnordNotEqual32; + case IR::Opcode::FPOrdLessThan64: + return IR::Opcode::FPOrdLessThan32; + case IR::Opcode::FPUnordLessThan64: + return IR::Opcode::FPUnordLessThan32; + case IR::Opcode::FPOrdGreaterThan64: + return IR::Opcode::FPOrdGreaterThan32; + case IR::Opcode::FPUnordGreaterThan64: + return IR::Opcode::FPUnordGreaterThan32; + case IR::Opcode::FPOrdLessThanEqual64: + return IR::Opcode::FPOrdLessThanEqual32; + case IR::Opcode::FPUnordLessThanEqual64: + return IR::Opcode::FPUnordLessThanEqual32; + case IR::Opcode::FPOrdGreaterThanEqual64: + return IR::Opcode::FPOrdGreaterThanEqual32; + case IR::Opcode::FPUnordGreaterThanEqual64: + return IR::Opcode::FPUnordGreaterThanEqual32; + case IR::Opcode::FPIsNan64: + return IR::Opcode::FPIsNan32; + case IR::Opcode::ConvertS16F64: + return IR::Opcode::ConvertS16F32; + case IR::Opcode::ConvertS32F64: + return IR::Opcode::ConvertS32F32; + case IR::Opcode::ConvertS64F64: + return IR::Opcode::ConvertS64F32; + case IR::Opcode::ConvertU16F64: + return IR::Opcode::ConvertU16F32; + case IR::Opcode::ConvertU32F64: + return IR::Opcode::ConvertU32F32; + case IR::Opcode::ConvertU64F64: + return IR::Opcode::ConvertU64F32; + case IR::Opcode::ConvertF32F64: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF64F32: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF64S8: + return IR::Opcode::ConvertF32S8; + case IR::Opcode::ConvertF64S16: + return IR::Opcode::ConvertF32S16; + case IR::Opcode::ConvertF64S32: + return IR::Opcode::ConvertF32S32; + case IR::Opcode::ConvertF64S64: + return IR::Opcode::ConvertF32S64; + case IR::Opcode::ConvertF64U8: + return IR::Opcode::ConvertF32U8; + case IR::Opcode::ConvertF64U16: + return IR::Opcode::ConvertF32U16; + case IR::Opcode::ConvertF64U32: + return IR::Opcode::ConvertF32U32; + case IR::Opcode::ConvertF64U64: + return IR::Opcode::ConvertF32U64; + default: + return op; + } +} + +void Lower(IR::Block& block, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::PackDouble2x32: { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0))); + break; + } + case IR::Opcode::UnpackDouble2x32: { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0))); + break; + } + default: + inst.ReplaceOpcode(Replace(inst.GetOpcode())); + break; + } +} + +} // Anonymous namespace + +void LowerFp64ToFp32(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + Lower(*block, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 1f8f2ba95..629d18fa1 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -13,10 +13,12 @@ struct HostTranslateInfo; namespace Shader::Optimization { void CollectShaderInfoPass(Environment& env, IR::Program& program); +void ConditionalBarrierPass(IR::Program& program); void ConstantPropagationPass(Environment& env, IR::Program& program); void DeadCodeEliminationPass(IR::Program& program); void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Program& program); +void LowerFp64ToFp32(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); void LowerInt64ToInt32(IR::Program& program); void RescalingPass(IR::Program& program); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2f281b370..251a4a880 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -715,20 +715,38 @@ void BufferCache<P>::BindHostIndexBuffer() { template <class P> void BufferCache<P>::BindHostVertexBuffers() { + HostBindings host_bindings; + bool any_valid{false}; auto& flags = maxwell3d->dirty.flags; for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { - const Binding& binding = channel_state->vertex_buffers[index]; - Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer, binding.buffer_id); - SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); if (!flags[Dirty::VertexBuffer0 + index]) { continue; } - flags[Dirty::VertexBuffer0 + index] = false; + host_bindings.min_index = std::min(host_bindings.min_index, index); + host_bindings.max_index = std::max(host_bindings.max_index, index); + any_valid = true; + } - const u32 stride = maxwell3d->regs.vertex_streams[index].stride; - const u32 offset = buffer.Offset(binding.cpu_addr); - runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride); + if (any_valid) { + host_bindings.max_index++; + for (u32 index = host_bindings.min_index; index < host_bindings.max_index; index++) { + flags[Dirty::VertexBuffer0 + index] = false; + + const Binding& binding = channel_state->vertex_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + + TouchBuffer(buffer, binding.buffer_id); + SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); + + const u32 stride = maxwell3d->regs.vertex_streams[index].stride; + const u32 offset = buffer.Offset(binding.cpu_addr); + + host_bindings.buffers.push_back(reinterpret_cast<void*>(&buffer)); + host_bindings.offsets.push_back(offset); + host_bindings.sizes.push_back(binding.size); + host_bindings.strides.push_back(stride); + } + runtime.BindVertexBuffers(host_bindings); } } @@ -882,15 +900,25 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { if (maxwell3d->regs.transform_feedback_enabled == 0) { return; } + HostBindings host_bindings; for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { const Binding& binding = channel_state->transform_feedback_buffers[index]; + if (maxwell3d->regs.transform_feedback.controls[index].varying_count == 0 && + maxwell3d->regs.transform_feedback.controls[index].stride == 0) { + break; + } Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); - runtime.BindTransformFeedbackBuffer(index, buffer, offset, size); + host_bindings.buffers.push_back(reinterpret_cast<void*>(&buffer)); + host_bindings.offsets.push_back(offset); + host_bindings.sizes.push_back(binding.size); + } + if (host_bindings.buffers.size() > 0) { + runtime.BindTransformFeedbackBuffers(host_bindings); } } @@ -1616,6 +1644,8 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si template <class P> void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { + bool dirty_index{false}; + boost::container::small_vector<u64, NUM_VERTEX_BUFFERS> dirty_vertex_buffers; const auto scalar_replace = [buffer_id](Binding& binding) { if (binding.buffer_id == buffer_id) { binding.buffer_id = BufferId{}; @@ -1624,8 +1654,19 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { const auto replace = [scalar_replace](std::span<Binding> bindings) { std::ranges::for_each(bindings, scalar_replace); }; - scalar_replace(channel_state->index_buffer); - replace(channel_state->vertex_buffers); + + if (channel_state->index_buffer.buffer_id == buffer_id) { + channel_state->index_buffer.buffer_id = BufferId{}; + dirty_index = true; + } + + for (u32 index = 0; index < channel_state->vertex_buffers.size(); index++) { + auto& binding = channel_state->vertex_buffers[index]; + if (binding.buffer_id == buffer_id) { + binding.buffer_id = BufferId{}; + dirty_vertex_buffers.push_back(index); + } + } std::ranges::for_each(channel_state->uniform_buffers, replace); std::ranges::for_each(channel_state->storage_buffers, replace); replace(channel_state->transform_feedback_buffers); @@ -1642,20 +1683,21 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); slot_buffers.erase(buffer_id); - NotifyBufferDeletion(); -} - -template <class P> -void BufferCache<P>::NotifyBufferDeletion() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { channel_state->dirty_uniform_buffers.fill(~u32{0}); channel_state->uniform_buffer_binding_sizes.fill({}); } + auto& flags = maxwell3d->dirty.flags; - flags[Dirty::IndexBuffer] = true; - flags[Dirty::VertexBuffers] = true; - for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { - flags[Dirty::VertexBuffer0 + index] = true; + if (dirty_index) { + flags[Dirty::IndexBuffer] = true; + } + + if (dirty_vertex_buffers.size() > 0) { + flags[Dirty::VertexBuffers] = true; + for (auto index : dirty_vertex_buffers) { + flags[Dirty::VertexBuffer0 + index] = true; + } } channel_state->has_deleted_buffers = true; } diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 60a1f285e..cf359e241 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -105,6 +105,15 @@ static constexpr Binding NULL_BINDING{ .buffer_id = NULL_BUFFER_ID, }; +struct HostBindings { + boost::container::small_vector<void*, NUM_VERTEX_BUFFERS> buffers; + boost::container::small_vector<u64, NUM_VERTEX_BUFFERS> offsets; + boost::container::small_vector<u64, NUM_VERTEX_BUFFERS> sizes; + boost::container::small_vector<u64, NUM_VERTEX_BUFFERS> strides; + u32 min_index{NUM_VERTEX_BUFFERS}; + u32 max_index{0}; +}; + class BufferCacheChannelInfo : public ChannelInfo { public: BufferCacheChannelInfo() = delete; @@ -519,8 +528,6 @@ private: void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); - void NotifyBufferDeletion(); - [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, bool is_written) const; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index c419714d4..0cc546a3a 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -232,6 +232,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, } } +void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bindings) { + for (u32 index = 0; index < bindings.buffers.size(); index++) { + BindVertexBuffer( + bindings.min_index + index, *reinterpret_cast<Buffer*>(bindings.buffers[index]), + static_cast<u32>(bindings.offsets[index]), static_cast<u32>(bindings.sizes[index]), + static_cast<u32>(bindings.strides[index])); + } +} + void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size) { if (use_assembly_shaders) { @@ -320,6 +329,15 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); } +void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings) { + for (u32 index = 0; index < bindings.buffers.size(); index++) { + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, + reinterpret_cast<Buffer*>(bindings.buffers[index])->Handle(), + static_cast<GLintptr>(bindings.offsets[index]), + static_cast<GLsizeiptr>(bindings.sizes[index])); + } +} + void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { *texture_handles++ = buffer.View(offset, size, format); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a24991585..e4e000284 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -7,7 +7,7 @@ #include <span> #include "common/common_types.h" -#include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_device.h" @@ -87,6 +87,7 @@ public: void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride); + void BindVertexBuffers(VideoCommon::HostBindings& bindings); void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size); @@ -99,6 +100,7 @@ public: bool is_written); void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); + void BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings); void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, VideoCore::Surface::PixelFormat format); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 400c21981..03d234f2f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -201,6 +201,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && !(is_amd || (is_intel && !is_linux)) && !strict_context_required; use_driver_cache = is_nvidia; + supports_conditional_barriers = !is_intel; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index cc0b95f1a..ad27264e5 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -188,6 +188,10 @@ public: return strict_context_required; } + bool SupportsConditionalBarriers() const { + return supports_conditional_barriers; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -233,6 +237,7 @@ private: bool has_bool_ref_bug{}; bool can_report_memory{}; bool strict_context_required{}; + bool supports_conditional_barriers{}; std::string vendor_name; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6ecda2984..3f077311e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -232,12 +232,14 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), }, host_info{ + .support_float64 = true, .support_float16 = false, .support_int64 = device.HasShaderInt64(), .needs_demote_reorder = device.IsAmd(), .support_snorm_render_buffer = false, .support_viewport_index_layer = device.HasVertexViewportLayer(), .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), + .support_conditional_barrier = device.SupportsConditionalBarriers(), } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index daa128399..d72d99899 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -7,7 +7,6 @@ #include <span> #include <vector> -#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -502,6 +501,40 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset } } +void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bindings) { + boost::container::small_vector<VkBuffer, 32> buffer_handles; + for (u32 index = 0; index < bindings.buffers.size(); index++) { + auto& buffer = *reinterpret_cast<Buffer*>(bindings.buffers[index]); + auto handle = buffer.Handle(); + if (handle == VK_NULL_HANDLE) { + bindings.offsets[index] = 0; + bindings.sizes[index] = VK_WHOLE_SIZE; + if (!device.HasNullDescriptor()) { + ReserveNullBuffer(); + handle = *null_buffer; + } + } + buffer_handles.push_back(handle); + } + if (device.IsExtExtendedDynamicStateSupported()) { + scheduler.Record([bindings = bindings, + buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers2EXT( + bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(), + reinterpret_cast<const VkDeviceSize*>(bindings.offsets.data()), + reinterpret_cast<const VkDeviceSize*>(bindings.sizes.data()), + reinterpret_cast<const VkDeviceSize*>(bindings.strides.data())); + }); + } else { + scheduler.Record([bindings = bindings, + buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers( + bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(), + reinterpret_cast<const VkDeviceSize*>(bindings.offsets.data())); + }); + } +} + void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size) { if (!device.IsExtTransformFeedbackSupported()) { @@ -523,6 +556,25 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, }); } +void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings) { + if (!device.IsExtTransformFeedbackSupported()) { + // Already logged in the rasterizer + return; + } + boost::container::small_vector<VkBuffer, 4> buffer_handles; + for (u32 index = 0; index < bindings.buffers.size(); index++) { + auto& buffer = *reinterpret_cast<Buffer*>(bindings.buffers[index]); + buffer_handles.push_back(buffer.Handle()); + } + scheduler.Record( + [bindings = bindings, buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { + cmdbuf.BindTransformFeedbackBuffersEXT( + 0, static_cast<u32>(buffer_handles.size()), buffer_handles.data(), + reinterpret_cast<const VkDeviceSize*>(bindings.offsets.data()), + reinterpret_cast<const VkDeviceSize*>(bindings.sizes.data())); + }); +} + void BufferCacheRuntime::ReserveNullBuffer() { if (null_buffer) { return; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 92b4f7859..92d3e9f32 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -18,6 +18,7 @@ namespace Vulkan { class Device; class DescriptorPool; class Scheduler; +struct HostVertexBinding; class BufferCacheRuntime; @@ -96,8 +97,10 @@ public: void BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count); void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); + void BindVertexBuffers(VideoCommon::HostBindings& bindings); void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); + void BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings); std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage, [[maybe_unused]] u32 binding_index, u32 size) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9482e91b0..5734f51e5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -350,6 +350,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .has_broken_spirv_subgroup_mask_vector_extract_dynamic = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; host_info = Shader::HostTranslateInfo{ + .support_float64 = device.IsFloat64Supported(), .support_float16 = device.IsFloat16Supported(), .support_int64 = device.IsShaderInt64Supported(), .needs_demote_reorder = diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0158b6b0d..a46f9beed 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -386,6 +386,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal); + supports_conditional_barriers = !(is_intel_anv || is_intel_windows); + CollectPhysicalMemoryInfo(); CollectToolingInfo(); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index d62a103a1..f314d0ffe 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -300,6 +300,11 @@ public: return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY; } + /// Returns true if the device suppors float64 natively. + bool IsFloat64Supported() const { + return features.features.shaderFloat64; + } + /// Returns true if the device supports float16 natively. bool IsFloat16Supported() const { return features.shader_float16_int8.shaderFloat16; @@ -580,6 +585,10 @@ public: return properties.properties.limits.maxVertexInputBindings; } + bool SupportsConditionalBarriers() const { + return supports_conditional_barriers; + } + private: /// Checks if the physical device is suitable and configures the object state /// with all necessary info about its properties. @@ -683,6 +692,7 @@ private: bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. + bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. u64 device_access_memory{}; ///< Total size of device local memory in bytes. u32 sets_per_pool{}; ///< Sets per Description Pool diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 9d06b21b6..013715b44 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -3067,7 +3067,7 @@ InstallResult GMainWindow::InstallNSPXCI(const QString& filename) { return false; } - std::array<u8, 0x1000> buffer{}; + std::vector<u8> buffer(1_MiB); for (std::size_t i = 0; i < src->GetSize(); i += buffer.size()) { if (install_progress->wasCanceled()) { |