From 15c0c213b1efb63f1d6f4900409fca8c8984e973 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Sun, 4 Jul 2021 22:07:53 -0400
Subject: astc.h: Move data to cpp implementation

Moves leftover values that are no longer used by the gpu decoder back to the cpp implementation.
---
 src/video_core/textures/astc.cpp | 86 +++++++++++++++++++++++++++++-----------
 src/video_core/textures/astc.h   | 41 -------------------
 2 files changed, 63 insertions(+), 64 deletions(-)

(limited to 'src/video_core/textures')
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 3ab500760..26c19d75b 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -521,35 +521,41 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
     return params;
 }
 
-static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
-                              u32 blockHeight) {
-    // Don't actually care about the void extent, just read the bits...
-    for (s32 i = 0; i < 4; ++i) {
-        strm.ReadBits<13>();
+// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
+// is the same as [(num_bits - 1):0] and repeats all the way down.
+template <typename IntType>
+static constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) {
+    if (num_bits == 0 || to_bit == 0) {
+        return 0;
     }
-
-    // Decode the RGBA components and renormalize them to the range [0, 255]
-    u16 r = static_cast<u16>(strm.ReadBits<16>());
-    u16 g = static_cast<u16>(strm.ReadBits<16>());
-    u16 b = static_cast<u16>(strm.ReadBits<16>());
-    u16 a = static_cast<u16>(strm.ReadBits<16>());
-
-    u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 |
-               (static_cast<u32>(a) & 0xFF00) << 16;
-
-    for (u32 j = 0; j < blockHeight; j++) {
-        for (u32 i = 0; i < blockWidth; i++) {
-            outBuf[j * blockWidth + i] = rgba;
+    const IntType v = val & static_cast<IntType>((1 << num_bits) - 1);
+    IntType res = v;
+    u32 reslen = num_bits;
+    while (reslen < to_bit) {
+        u32 comp = 0;
+        if (num_bits > to_bit - reslen) {
+            u32 newshift = to_bit - reslen;
+            comp = num_bits - newshift;
+            num_bits = newshift;
         }
+        res = static_cast<IntType>(res << num_bits);
+        res = static_cast<IntType>(res | (v >> comp));
+        reslen += num_bits;
     }
+    return res;
 }
 
-static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
-    for (u32 j = 0; j < blockHeight; j++) {
-        for (u32 i = 0; i < blockWidth; i++) {
-            outBuf[j * blockWidth + i] = 0xFFFF00FF;
-        }
+static constexpr std::size_t NumReplicateEntries(u32 num_bits) {
+    return std::size_t(1) << num_bits;
+}
+
+template <typename IntType, u32 num_bits, u32 to_bit>
+static constexpr auto MakeReplicateTable() {
+    std::array<IntType, NumReplicateEntries(num_bits)> table{};
+    for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
+        table[value] = Replicate(value, num_bits, to_bit);
     }
+    return table;
 }
 
 static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
@@ -572,6 +578,9 @@ static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>
 static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
 static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
 static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
+static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
+static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
+static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
 /// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
 /// to the runtime implementation
 static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
@@ -1316,6 +1325,37 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
 #undef READ_INT_VALUES
 }
 
+static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
+                              u32 blockHeight) {
+    // Don't actually care about the void extent, just read the bits...
+    for (s32 i = 0; i < 4; ++i) {
+        strm.ReadBits<13>();
+    }
+
+    // Decode the RGBA components and renormalize them to the range [0, 255]
+    u16 r = static_cast<u16>(strm.ReadBits<16>());
+    u16 g = static_cast<u16>(strm.ReadBits<16>());
+    u16 b = static_cast<u16>(strm.ReadBits<16>());
+    u16 a = static_cast<u16>(strm.ReadBits<16>());
+
+    u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 |
+               (static_cast<u32>(a) & 0xFF00) << 16;
+
+    for (u32 j = 0; j < blockHeight; j++) {
+        for (u32 i = 0; i < blockWidth; i++) {
+            outBuf[j * blockWidth + i] = rgba;
+        }
+    }
+}
+
+static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
+    for (u32 j = 0; j < blockHeight; j++) {
+        for (u32 i = 0; i < blockWidth; i++) {
+            outBuf[j * blockWidth + i] = 0xFFFF00FF;
+        }
+    }
+}
+
 static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
                             const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
     InputBitStream strm(inBuf);
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 0229ae122..9e148afc4 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -79,47 +79,6 @@ constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
 
 constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
 
-// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
-// is the same as [(num_bits - 1):0] and repeats all the way down.
-template <typename IntType>
-constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) {
-    if (num_bits == 0 || to_bit == 0) {
-        return 0;
-    }
-    const IntType v = val & static_cast<IntType>((1 << num_bits) - 1);
-    IntType res = v;
-    u32 reslen = num_bits;
-    while (reslen < to_bit) {
-        u32 comp = 0;
-        if (num_bits > to_bit - reslen) {
-            u32 newshift = to_bit - reslen;
-            comp = num_bits - newshift;
-            num_bits = newshift;
-        }
-        res = static_cast<IntType>(res << num_bits);
-        res = static_cast<IntType>(res | (v >> comp));
-        reslen += num_bits;
-    }
-    return res;
-}
-
-constexpr std::size_t NumReplicateEntries(u32 num_bits) {
-    return std::size_t(1) << num_bits;
-}
-
-template <typename IntType, u32 num_bits, u32 to_bit>
-constexpr auto MakeReplicateTable() {
-    std::array<IntType, NumReplicateEntries(num_bits)> table{};
-    for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
-        table[value] = Replicate(value, num_bits, to_bit);
-    }
-    return table;
-}
-
-constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
-constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
-constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
-
 void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
                 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
 
-- 
cgit v1.2.3


From 5665d055476fa793192523c3cb6fe06369d58674 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Sun, 4 Jul 2021 22:48:41 -0400
Subject: astc_decoder: Optimize the use EncodingData

This buffer was a list of EncodingData structures sorted by their bit length, with some duplication from the cpu decoder implementation.
We can take advantage of its sorted property to optimize its usage in the shader.

Thanks to wwylele for the optimization idea.
---
 src/video_core/textures/astc.cpp | 70 ++++++++++++++++++++++++++++++++++++++++
 src/video_core/textures/astc.h   | 70 ----------------------------------------
 2 files changed, 70 insertions(+), 70 deletions(-)

(limited to 'src/video_core/textures')

diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 26c19d75b..25161df1f 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -151,6 +151,76 @@ private:
     const IntType& m_Bits;
 };
 
+enum class IntegerEncoding { JustBits, Quint, Trit };
+
+struct IntegerEncodedValue {
+    constexpr IntegerEncodedValue() = default;
+
+    constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
+        : encoding{encoding_}, num_bits{num_bits_} {}
+
+    constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
+        return encoding == other.encoding && num_bits == other.num_bits;
+    }
+
+    // Returns the number of bits required to encode num_vals values.
+    u32 GetBitLength(u32 num_vals) const {
+        u32 total_bits = num_bits * num_vals;
+        if (encoding == IntegerEncoding::Trit) {
+            total_bits += (num_vals * 8 + 4) / 5;
+        } else if (encoding == IntegerEncoding::Quint) {
+            total_bits += (num_vals * 7 + 2) / 3;
+        }
+        return total_bits;
+    }
+
+    IntegerEncoding encoding{};
+    u32 num_bits = 0;
+    u32 bit_value = 0;
+    union {
+        u32 quint_value = 0;
+        u32 trit_value;
+    };
+};
+
+// Returns a new instance of this struct that corresponds to the
+// can take no more than mav_value values
+static constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
+    while (mav_value > 0) {
+        u32 check = mav_value + 1;
+
+        // Is mav_value a power of two?
+        if (!(check & (check - 1))) {
+            return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
+        }
+
+        // Is mav_value of the type 3*2^n - 1?
+        if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
+            return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
+        }
+
+        // Is mav_value of the type 5*2^n - 1?
+        if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
+            return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
+        }
+
+        // Apparently it can't be represented with a bounded integer sequence...
+        // just iterate.
+        mav_value--;
+    }
+    return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
+}
+
+static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
+    std::array<IntegerEncodedValue, 256> encodings{};
+    for (std::size_t i = 0; i < encodings.size(); ++i) {
+        encodings[i] = CreateEncoding(static_cast<u32>(i));
+    }
+    return encodings;
+}
+
+static constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
+
 namespace Tegra::Texture::ASTC {
 using IntegerEncodedVector = boost::container::static_vector<
     IntegerEncodedValue, 256,
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 9e148afc4..14d2beec0 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,76 +9,6 @@
 
 namespace Tegra::Texture::ASTC {
 
-enum class IntegerEncoding { JustBits, Quint, Trit };
-
-struct IntegerEncodedValue {
-    constexpr IntegerEncodedValue() = default;
-
-    constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
-        : encoding{encoding_}, num_bits{num_bits_} {}
-
-    constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
-        return encoding == other.encoding && num_bits == other.num_bits;
-    }
-
-    // Returns the number of bits required to encode num_vals values.
-    u32 GetBitLength(u32 num_vals) const {
-        u32 total_bits = num_bits * num_vals;
-        if (encoding == IntegerEncoding::Trit) {
-            total_bits += (num_vals * 8 + 4) / 5;
-        } else if (encoding == IntegerEncoding::Quint) {
-            total_bits += (num_vals * 7 + 2) / 3;
-        }
-        return total_bits;
-    }
-
-    IntegerEncoding encoding{};
-    u32 num_bits = 0;
-    u32 bit_value = 0;
-    union {
-        u32 quint_value = 0;
-        u32 trit_value;
-    };
-};
-
-// Returns a new instance of this struct that corresponds to the
-// can take no more than mav_value values
-constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
-    while (mav_value > 0) {
-        u32 check = mav_value + 1;
-
-        // Is mav_value a power of two?
-        if (!(check & (check - 1))) {
-            return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
-        }
-
-        // Is mav_value of the type 3*2^n - 1?
-        if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
-            return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
-        }
-
-        // Is mav_value of the type 5*2^n - 1?
-        if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
-            return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
-        }
-
-        // Apparently it can't be represented with a bounded integer sequence...
-        // just iterate.
-        mav_value--;
-    }
-    return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
-}
-
-constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
-    std::array<IntegerEncodedValue, 256> encodings{};
-    for (std::size_t i = 0; i < encodings.size(); ++i) {
-        encodings[i] = CreateEncoding(static_cast<u32>(i));
-    }
-    return encodings;
-}
-
-constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
-
 void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
                 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
 
-- 
cgit v1.2.3