diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 10 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 | ||||
-rw-r--r-- | src/video_core/shader_cache.cpp | 5 | ||||
-rw-r--r-- | src/video_core/shader_cache.h | 2 | ||||
-rw-r--r-- | src/video_core/shader_environment.cpp | 31 | ||||
-rw-r--r-- | src/video_core/shader_environment.h | 6 |
6 files changed, 41 insertions, 25 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 618cb6354..2888e0238 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -445,7 +445,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, std::span<Shader::Environment* const> envs, bool use_shader_workers, bool force_context_flush) try { - LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + auto hash = key.Hash(); + LOG_INFO(Render_OpenGL, "0x{:016x}", hash); size_t env_index{}; u32 total_storage_buffers{}; std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; @@ -474,7 +475,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (Settings::values.dump_shaders) { - env.Dump(key.unique_hashes[index]); + env.Dump(hash, key.unique_hashes[index]); } if (!uses_vertex_a || index != 1) { @@ -566,12 +567,13 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env, bool force_context_flush) try { - LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + auto hash = key.Hash(); + LOG_INFO(Render_OpenGL, "0x{:016x}", hash); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; if (Settings::values.dump_shaders) { - env.Dump(key.Hash()); + env.Dump(hash, key.unique_hash); } auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4f84d8497..c1314ca99 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -584,7 +584,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineCacheKey& key, std::span<Shader::Environment* const> envs, PipelineStatistics* statistics, bool build_in_parallel) try { - LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); + auto hash = key.Hash(); + LOG_INFO(Render_Vulkan, "0x{:016x}", hash); size_t env_index{0}; std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; const bool uses_vertex_a{key.unique_hashes[0] != 0}; @@ -611,7 +612,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (Settings::values.dump_shaders) { - env.Dump(key.unique_hashes[index]); + env.Dump(hash, key.unique_hashes[index]); } if (!uses_vertex_a || index != 1) { // Normal path @@ -712,18 +713,19 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, PipelineStatistics* statistics, bool build_in_parallel) try { + auto hash = key.Hash(); if (device.HasBrokenCompute()) { - LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash()); + LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", hash); return nullptr; } - LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); + LOG_INFO(Render_Vulkan, "0x{:016x}", hash); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; // Dump it before error. if (Settings::values.dump_shaders) { - env.Dump(key.Hash()); + env.Dump(hash, key.unique_hash); } auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index 01701201d..e81cd031b 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -51,6 +51,11 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { } const auto& shader_config{maxwell3d->regs.pipelines[index]}; const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderType>(index)}; + if (program == Tegra::Engines::Maxwell3D::Regs::ShaderType::Pixel && + !maxwell3d->regs.rasterize_enable) { + unique_hashes[index] = 0; + continue; + } const GPUVAddr shader_addr{base_addr + shader_config.offset}; const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)}; if (!cpu_shader_addr) { diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index de8e08002..a76896620 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -70,7 +70,7 @@ public: protected: struct GraphicsEnvironments { std::array<GraphicsEnvironment, NUM_PROGRAMS> envs; - std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs; + std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs{}; std::span<Shader::Environment* const> Span() const noexcept { return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr)); diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index c7cb56243..4edbe5700 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -102,7 +102,8 @@ static std::string_view StageToPrefix(Shader::Stage stage) { } } -static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowest, +static void DumpImpl(u64 pipeline_hash, u64 shader_hash, std::span<const u64> code, + [[maybe_unused]] u32 read_highest, [[maybe_unused]] u32 read_lowest, u32 initial_offset, Shader::Stage stage) { const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)}; const auto base_dir{shader_dir / "shaders"}; @@ -111,13 +112,18 @@ static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowes return; } const auto prefix = StageToPrefix(stage); - const auto name{base_dir / fmt::format("{}{:016x}.ash", prefix, hash)}; - const size_t real_size = read_highest - read_lowest + initial_offset; - const size_t padding_needed = ((32 - (real_size % 32)) % 32); + const auto name{base_dir / + fmt::format("{:016x}_{}_{:016x}.ash", pipeline_hash, prefix, shader_hash)}; std::fstream shader_file(name, std::ios::out | std::ios::binary); + ASSERT(initial_offset % sizeof(u64) == 0); const size_t jump_index = initial_offset / sizeof(u64); - shader_file.write(reinterpret_cast<const char*>(code + jump_index), real_size); - for (size_t i = 0; i < padding_needed; i++) { + const size_t code_size = code.size_bytes() - initial_offset; + shader_file.write(reinterpret_cast<const char*>(&code[jump_index]), code_size); + + // + 1 instruction, due to the fact that we skip the final self branch instruction in the code, + // but we need to consider it for padding, otherwise nvdisasm rages. + const size_t padding_needed = (32 - ((code_size + INST_SIZE) % 32)) % 32; + for (size_t i = 0; i < INST_SIZE + padding_needed; i++) { shader_file.put(0); } } @@ -197,8 +203,8 @@ u64 GenericEnvironment::CalculateHash() const { return Common::CityHash64(data.get(), size); } -void GenericEnvironment::Dump(u64 hash) { - DumpImpl(hash, code.data(), read_highest, read_lowest, initial_offset, stage); +void GenericEnvironment::Dump(u64 pipeline_hash, u64 shader_hash) { + DumpImpl(pipeline_hash, shader_hash, code, read_highest, read_lowest, initial_offset, stage); } void GenericEnvironment::Serialize(std::ofstream& file) const { @@ -282,6 +288,7 @@ std::optional<u64> GenericEnvironment::TryFindSize() { Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, u32 raw) { const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; + ASSERT(handle.first <= tic_limit); const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); @@ -465,8 +472,8 @@ void FileEnvironment::Deserialize(std::ifstream& file) { .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest)) .read(reinterpret_cast<char*>(&viewport_transform_state), sizeof(viewport_transform_state)) .read(reinterpret_cast<char*>(&stage), sizeof(stage)); - code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64))); - file.read(reinterpret_cast<char*>(code.get()), code_size); + code.resize(Common::DivCeil(code_size, sizeof(u64))); + file.read(reinterpret_cast<char*>(code.data()), code_size); for (size_t i = 0; i < num_texture_types; ++i) { u32 key; Shader::TextureType type; @@ -509,8 +516,8 @@ void FileEnvironment::Deserialize(std::ifstream& file) { is_propietary_driver = texture_bound == 2; } -void FileEnvironment::Dump(u64 hash) { - DumpImpl(hash, code.get(), read_highest, read_lowest, initial_offset, stage); +void FileEnvironment::Dump(u64 pipeline_hash, u64 shader_hash) { + DumpImpl(pipeline_hash, shader_hash, code, read_highest, read_lowest, initial_offset, stage); } u64 FileEnvironment::ReadInstruction(u32 address) { diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index a0f61cbda..b90f3d44e 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -58,7 +58,7 @@ public: [[nodiscard]] u64 CalculateHash() const; - void Dump(u64 hash) override; + void Dump(u64 pipeline_hash, u64 shader_hash) override; void Serialize(std::ofstream& file) const; @@ -188,10 +188,10 @@ public: return cbuf_replacements.size() != 0; } - void Dump(u64 hash) override; + void Dump(u64 pipeline_hash, u64 shader_hash) override; private: - std::unique_ptr<u64[]> code; + std::vector<u64> code; std::unordered_map<u32, Shader::TextureType> texture_types; std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; std::unordered_map<u64, u32> cbuf_values; |