From 5ab80535118e593ef3add3ce2b5935437e1dc1d3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 31 Jul 2021 22:24:15 -0400 Subject: astc_decoder: Compute offset swizzles in-shader Alleviates the dependency on the swizzle table and a uniform which is constant for all ASTC texture sizes. --- src/video_core/host_shaders/astc_decoder.comp | 46 ++++++++------------------- 1 file changed, 13 insertions(+), 33 deletions(-) (limited to 'src/video_core/host_shaders') diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 392f09c68..74ce058a9 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -10,8 +10,7 @@ #define END_PUSH_CONSTANTS }; #define UNIFORM(n) #define BINDING_INPUT_BUFFER 0 -#define BINDING_SWIZZLE_BUFFER 1 -#define BINDING_OUTPUT_IMAGE 2 +#define BINDING_OUTPUT_IMAGE 1 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv @@ -19,7 +18,6 @@ #define END_PUSH_CONSTANTS #define UNIFORM(n) layout(location = n) uniform #define BINDING_INPUT_BUFFER 0 -#define BINDING_SWIZZLE_BUFFER 1 #define BINDING_OUTPUT_IMAGE 0 #endif @@ -28,13 +26,11 @@ layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; BEGIN_PUSH_CONSTANTS UNIFORM(1) uvec2 block_dims; - -UNIFORM(2) uint bytes_per_block_log2; -UNIFORM(3) uint layer_stride; -UNIFORM(4) uint block_size; -UNIFORM(5) uint x_shift; -UNIFORM(6) uint block_height; -UNIFORM(7) uint block_height_mask; +UNIFORM(2) uint layer_stride; +UNIFORM(3) uint block_size; +UNIFORM(4) uint x_shift; +UNIFORM(5) uint block_height; +UNIFORM(6) uint block_height_mask; END_PUSH_CONSTANTS struct EncodingData { @@ -53,35 +49,17 @@ struct TexelWeightParams { bool void_extent_hdr; }; -// Swizzle data -layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { - uint swizzle_table[]; -}; - layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uvec4 astc_data[]; }; layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; -const uint GOB_SIZE_X = 64; -const uint GOB_SIZE_Y = 8; -const uint GOB_SIZE_Z = 1; -const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; - const uint GOB_SIZE_X_SHIFT = 6; const uint GOB_SIZE_Y_SHIFT = 3; -const uint GOB_SIZE_Z_SHIFT = 0; -const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; - -const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); - -const int BLOCK_SIZE_IN_BYTES = 16; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT; -const int BLOCK_INFO_ERROR = 0; -const int BLOCK_INFO_VOID_EXTENT_HDR = 1; -const int BLOCK_INFO_VOID_EXTENT_LDR = 2; -const int BLOCK_INFO_NORMAL = 3; +const uint BYTES_PER_BLOCK_LOG2 = 4; const int JUST_BITS = 0; const int QUINT = 1; @@ -168,8 +146,10 @@ int texel_vector_index = 0; uint unquantized_texel_weights[2][144]; uint SwizzleOffset(uvec2 pos) { - pos = pos & SWIZZLE_MASK; - return swizzle_table[pos.y * 64 + pos.x]; + uint x = pos.x; + uint y = pos.y; + return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + + (y % 2) * 16 + (x % 16); } // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] @@ -1253,7 +1233,7 @@ void DecompressBlock(ivec3 coord) { void main() { uvec3 pos = gl_GlobalInvocationID; - pos.x <<= bytes_per_block_log2; + pos.x <<= BYTES_PER_BLOCK_LOG2; // Read as soon as possible due to its latency const uint swizzle = SwizzleOffset(pos.xy); -- cgit v1.2.3