summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp82
2 files changed, 97 insertions, 28 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index ae7152bd3..801d45144 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -331,8 +331,8 @@ static bool IsFormatBCn(PixelFormat format) {
}
template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::size_t gl_buffer_size,
- VAddr addr) {
+void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
+ std::size_t gl_buffer_size, VAddr addr) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
@@ -341,7 +341,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::si
// pixel values.
const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
- addr, tile_size, bytes_per_pixel, stride, height, block_height);
+ addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth);
const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
memcpy(gl_buffer, data.data(), size_to_copy);
} else {
@@ -353,7 +353,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::si
}
}
-static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
SurfaceParams::MaxPixelFormat>
morton_to_gl_fns = {
// clang-format off
@@ -413,7 +413,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
// clang-format on
};
-static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
SurfaceParams::MaxPixelFormat>
gl_to_morton_fns = {
// clang-format off
@@ -841,36 +841,23 @@ void CachedSurface::LoadGLBuffer() {
if (params.is_tiled) {
gl_buffer.resize(total_size);
+ u32 depth = params.depth;
+ u32 block_depth = params.block_depth;
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
params.block_width, static_cast<u32>(params.target));
- ASSERT_MSG(params.block_depth == 1, "Block depth is defined as {} on texture type {}",
- params.block_depth, static_cast<u32>(params.target));
- // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do
- // this for 3D textures, etc.
- switch (params.target) {
- case SurfaceParams::SurfaceTarget::Texture2D:
- // Pass impl. to the fallback code below
- break;
- case SurfaceParams::SurfaceTarget::Texture2DArray:
- case SurfaceParams::SurfaceTarget::TextureCubemap:
- for (std::size_t index = 0; index < params.depth; ++index) {
- const std::size_t offset{index * copy_size};
- morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
- params.width, params.block_height, params.height, gl_buffer.data() + offset,
- copy_size, params.addr + offset);
- }
- break;
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented tiled load for target={}",
- static_cast<u32>(params.target));
- UNREACHABLE();
+ if (params.target == SurfaceParams::SurfaceTarget::Texture2D) {
+ // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
+ depth = 1U;
+ block_depth = 1U;
}
+ const std::size_t size = copy_size * depth;
+
morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
- params.width, params.block_height, params.height, gl_buffer.data(), copy_size,
- params.addr);
+ params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(),
+ size, params.addr);
} else {
const u8* const texture_src_data_end{texture_src_data + total_size};
gl_buffer.assign(texture_src_data, texture_src_data_end);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c82a0dcfa..8dfb49507 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2953,6 +2953,88 @@ private:
LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
break;
}
+ case OpCode::Id::VMAD: {
+ const bool signed_a = instr.vmad.signed_a == 1;
+ const bool signed_b = instr.vmad.signed_b == 1;
+ const bool result_signed = signed_a || signed_b;
+ boost::optional<std::string> forced_result;
+
+ auto Unpack = [&](const std::string& op, bool is_chunk, bool is_signed,
+ Tegra::Shader::VmadType type, u64 byte_height) {
+ const std::string value = [&]() {
+ if (!is_chunk) {
+ const auto offset = static_cast<u32>(byte_height * 8);
+ return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)";
+ }
+ const std::string zero = "0";
+
+ switch (type) {
+ case Tegra::Shader::VmadType::Size16_Low:
+ return '(' + op + " & 0xffff)";
+ case Tegra::Shader::VmadType::Size16_High:
+ return '(' + op + " >> 16)";
+ case Tegra::Shader::VmadType::Size32:
+ // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when
+ // this type is used (1 * 1 + 0 == 0x5b800000). Until a better
+ // explanation is found: assert.
+ UNREACHABLE_MSG("Unimplemented");
+ return zero;
+ case Tegra::Shader::VmadType::Invalid:
+ // Note(Rodrigo): This flag is invalid according to nvdisasm. From my
+ // testing (even though it's invalid) this makes the whole instruction
+ // assign zero to target register.
+ forced_result = boost::make_optional(zero);
+ return zero;
+ default:
+ UNREACHABLE();
+ return zero;
+ }
+ }();
+
+ if (is_signed) {
+ return "int(" + value + ')';
+ }
+ return value;
+ };
+
+ const std::string op_a = Unpack(regs.GetRegisterAsInteger(instr.gpr8, 0, false),
+ instr.vmad.is_byte_chunk_a != 0, signed_a,
+ instr.vmad.type_a, instr.vmad.byte_height_a);
+
+ std::string op_b;
+ if (instr.vmad.use_register_b) {
+ op_b = Unpack(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
+ instr.vmad.is_byte_chunk_b != 0, signed_b, instr.vmad.type_b,
+ instr.vmad.byte_height_b);
+ } else {
+ op_b = '(' +
+ std::to_string(signed_b ? static_cast<s16>(instr.alu.GetImm20_16())
+ : instr.alu.GetImm20_16()) +
+ ')';
+ }
+
+ const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed);
+
+ std::string result;
+ if (forced_result) {
+ result = *forced_result;
+ } else {
+ result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
+
+ switch (instr.vmad.shr) {
+ case Tegra::Shader::VmadShr::Shr7:
+ result = '(' + result + " >> 7)";
+ break;
+ case Tegra::Shader::VmadShr::Shr15:
+ result = '(' + result + " >> 15)";
+ break;
+ }
+ }
+ regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
+ instr.vmad.saturate == 1, 0, Register::Size::Word,
+ instr.vmad.cc);
+ break;
+ }
default: {
LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
UNREACHABLE();