summaryrefslogtreecommitdiffstats
path: root/src/video_core/host_shaders
diff options
context:
space:
mode:
authorAmeer J <52414509+ameerj@users.noreply.github.com>2023-08-09 23:21:33 +0200
committerAmeer J <52414509+ameerj@users.noreply.github.com>2023-08-09 23:45:39 +0200
commit70f8ffb787a8bf394dfdc33420da3e6632472e5f (patch)
tree9ec0aac926d621f47e5bf0b6b6c6b88228b6a9e1 /src/video_core/host_shaders
parentRevert "HACK: Avoid swizzling and reuploading ASTC image every frame" (diff)
downloadyuzu-70f8ffb787a8bf394dfdc33420da3e6632472e5f.tar
yuzu-70f8ffb787a8bf394dfdc33420da3e6632472e5f.tar.gz
yuzu-70f8ffb787a8bf394dfdc33420da3e6632472e5f.tar.bz2
yuzu-70f8ffb787a8bf394dfdc33420da3e6632472e5f.tar.lz
yuzu-70f8ffb787a8bf394dfdc33420da3e6632472e5f.tar.xz
yuzu-70f8ffb787a8bf394dfdc33420da3e6632472e5f.tar.zst
yuzu-70f8ffb787a8bf394dfdc33420da3e6632472e5f.zip
Diffstat (limited to 'src/video_core/host_shaders')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp132
1 files changed, 63 insertions, 69 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 5e922d1fe..4014d4bfe 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -804,11 +804,7 @@ uint UnquantizeTexelWeight(EncodingData val) {
return result;
}
-uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE];
-
void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
- const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1));
- const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1));
const uint num_planes = is_dual_plane ? 2 : 1;
const uint area = size.x * size.y;
const uint loop_count = min(result_index, area * num_planes);
@@ -818,58 +814,71 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
result_vector[array_index][vector_index] =
UnquantizeTexelWeight(GetEncodingFromVector(itr));
}
- for (uint plane = 0; plane < num_planes; ++plane) {
- for (uint t = 0; t < block_dims.y; t++) {
- for (uint s = 0; s < block_dims.x; s++) {
- const uint cs = Ds * s;
- const uint ct = Dt * t;
- const uint gs = (cs * (size.x - 1) + 32) >> 6;
- const uint gt = (ct * (size.y - 1) + 32) >> 6;
- const uint js = gs >> 4;
- const uint fs = gs & 0xF;
- const uint jt = gt >> 4;
- const uint ft = gt & 0x0F;
- const uint w11 = (fs * ft + 8) >> 4;
- const uint w10 = ft - w11;
- const uint w01 = fs - w11;
- const uint w00 = 16 - fs - ft + w11;
- const uvec4 w = uvec4(w00, w01, w10, w11);
- const uint v0 = jt * size.x + js;
-
- uvec4 p = uvec4(0);
-
-#define VectorIndicesFromBase(offset_base) \
- const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base; \
- const uint array_index = offset / 4; \
+}
+
+uint GetUnquantizedTexelWieght(uint offset_base, uint plane, bool is_dual_plane) {
+ const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base;
+ const uint array_index = offset / 4;
const uint vector_index = offset % 4;
+ return result_vector[array_index][vector_index];
+}
- if (v0 < area) {
- const uint offset_base = v0;
- VectorIndicesFromBase(offset_base);
- p.x = result_vector[array_index][vector_index];
- }
- if ((v0 + 1) < (area)) {
- const uint offset_base = v0 + 1;
- VectorIndicesFromBase(offset_base);
- p.y = result_vector[array_index][vector_index];
- }
- if ((v0 + size.x) < (area)) {
- const uint offset_base = v0 + size.x;
- VectorIndicesFromBase(offset_base);
- p.z = result_vector[array_index][vector_index];
- }
- if ((v0 + size.x + 1) < (area)) {
- const uint offset_base = v0 + size.x + 1;
- VectorIndicesFromBase(offset_base);
- p.w = result_vector[array_index][vector_index];
- }
- const uint offset = (t * block_dims.x + s) + ARRAY_NUM_ELEMENTS * plane;
- const uint array_index = offset / 4;
- const uint vector_index = offset % 4;
- unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4;
- }
+uvec4 GetUnquantizedWeightVector(uint t, uint s, uvec2 size, uint plane_index, bool is_dual_plane) {
+ const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1));
+ const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1));
+ const uint area = size.x * size.y;
+
+ const uint cs = Ds * s;
+ const uint ct = Dt * t;
+ const uint gs = (cs * (size.x - 1) + 32) >> 6;
+ const uint gt = (ct * (size.y - 1) + 32) >> 6;
+ const uint js = gs >> 4;
+ const uint fs = gs & 0xF;
+ const uint jt = gt >> 4;
+ const uint ft = gt & 0x0F;
+ const uint w11 = (fs * ft + 8) >> 4;
+ const uint w10 = ft - w11;
+ const uint w01 = fs - w11;
+ const uint w00 = 16 - fs - ft + w11;
+ const uvec4 w = uvec4(w00, w01, w10, w11);
+ const uint v0 = jt * size.x + js;
+
+ uvec4 p0 = uvec4(0);
+ uvec4 p1 = uvec4(0);
+
+ if (v0 < area) {
+ const uint offset_base = v0;
+ p0.x = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane);
+ p1.x = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane);
+ }
+ if ((v0 + 1) < (area)) {
+ const uint offset_base = v0 + 1;
+ p0.y = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane);
+ p1.y = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane);
+ }
+ if ((v0 + size.x) < (area)) {
+ const uint offset_base = v0 + size.x;
+ p0.z = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane);
+ p1.z = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane);
+ }
+ if ((v0 + size.x + 1) < (area)) {
+ const uint offset_base = v0 + size.x + 1;
+ p0.w = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane);
+ p1.w = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane);
+ }
+
+ const uint primary_weight = (uint(dot(p0, w)) + 8) >> 4;
+
+ uvec4 weight_vec = uvec4(primary_weight);
+
+ if (is_dual_plane) {
+ const uint secondary_weight = (uint(dot(p1, w)) + 8) >> 4;
+ for (uint c = 0; c < 4; c++) {
+ const bool is_secondary = ((plane_index + 1u) & 3u) == c;
+ weight_vec[c] = is_secondary ? secondary_weight : primary_weight;
}
}
+ return weight_vec;
}
int FindLayout(uint mode) {
@@ -1155,25 +1164,10 @@ void DecompressBlock(ivec3 coord) {
}
const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]);
const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]);
- const uint weight_offset = (j * block_dims.x + i);
- const uint array_index = weight_offset / 4;
- const uint vector_index = weight_offset % 4;
- const uint primary_weight = unquantized_texel_weights[array_index][vector_index];
- uvec4 weight_vec = uvec4(primary_weight);
- if (dual_plane) {
- const uint secondary_weight_offset = (j * block_dims.x + i) + ARRAY_NUM_ELEMENTS;
- const uint secondary_array_index = secondary_weight_offset / 4;
- const uint secondary_vector_index = secondary_weight_offset % 4;
- const uint secondary_weight =
- unquantized_texel_weights[secondary_array_index][secondary_vector_index];
- for (uint c = 0; c < 4; c++) {
- const bool is_secondary = ((plane_index + 1u) & 3u) == c;
- weight_vec[c] = is_secondary ? secondary_weight : primary_weight;
- }
- }
+ const uvec4 weight_vec = GetUnquantizedWeightVector(j, i, size_params, plane_index, dual_plane);
const vec4 Cf =
vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64);
- const vec4 p = (Cf / 65535.0);
+ const vec4 p = (Cf / 65535.0f);
imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar);
}
}