From 15c0c213b1efb63f1d6f4900409fca8c8984e973 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 22:07:53 -0400 Subject: astc.h: Move data to cpp implementation Moves leftover values that are no longer used by the gpu decoder back to the cpp implementation. --- src/video_core/textures/astc.cpp | 86 +++++++++++++++++++++++++++++----------- src/video_core/textures/astc.h | 41 ------------------- 2 files changed, 63 insertions(+), 64 deletions(-) (limited to 'src/video_core/textures') diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 3ab500760..26c19d75b 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -521,35 +521,41 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { return params; } -static void FillVoidExtentLDR(InputBitStream& strm, std::span outBuf, u32 blockWidth, - u32 blockHeight) { - // Don't actually care about the void extent, just read the bits... - for (s32 i = 0; i < 4; ++i) { - strm.ReadBits<13>(); +// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] +// is the same as [(num_bits - 1):0] and repeats all the way down. +template +static constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) { + if (num_bits == 0 || to_bit == 0) { + return 0; } - - // Decode the RGBA components and renormalize them to the range [0, 255] - u16 r = static_cast(strm.ReadBits<16>()); - u16 g = static_cast(strm.ReadBits<16>()); - u16 b = static_cast(strm.ReadBits<16>()); - u16 a = static_cast(strm.ReadBits<16>()); - - u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast(b) & 0xFF00) << 8 | - (static_cast(a) & 0xFF00) << 16; - - for (u32 j = 0; j < blockHeight; j++) { - for (u32 i = 0; i < blockWidth; i++) { - outBuf[j * blockWidth + i] = rgba; + const IntType v = val & static_cast((1 << num_bits) - 1); + IntType res = v; + u32 reslen = num_bits; + while (reslen < to_bit) { + u32 comp = 0; + if (num_bits > to_bit - reslen) { + u32 newshift = to_bit - reslen; + comp = num_bits - newshift; + num_bits = newshift; } + res = static_cast(res << num_bits); + res = static_cast(res | (v >> comp)); + reslen += num_bits; } + return res; } -static void FillError(std::span outBuf, u32 blockWidth, u32 blockHeight) { - for (u32 j = 0; j < blockHeight; j++) { - for (u32 i = 0; i < blockWidth; i++) { - outBuf[j * blockWidth + i] = 0xFFFF00FF; - } +static constexpr std::size_t NumReplicateEntries(u32 num_bits) { + return std::size_t(1) << num_bits; +} + +template +static constexpr auto MakeReplicateTable() { + std::array table{}; + for (IntType value = 0; value < static_cast(std::size(table)); ++value) { + table[value] = Replicate(value, num_bits, to_bit); } + return table; } static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable(); @@ -572,6 +578,9 @@ static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable(); static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable(); static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable(); +static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable(); +static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable(); +static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable(); /// Use a precompiled table with the most common usages, if it's not in the expected range, fallback /// to the runtime implementation static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { @@ -1316,6 +1325,37 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues, #undef READ_INT_VALUES } +static void FillVoidExtentLDR(InputBitStream& strm, std::span outBuf, u32 blockWidth, + u32 blockHeight) { + // Don't actually care about the void extent, just read the bits... + for (s32 i = 0; i < 4; ++i) { + strm.ReadBits<13>(); + } + + // Decode the RGBA components and renormalize them to the range [0, 255] + u16 r = static_cast(strm.ReadBits<16>()); + u16 g = static_cast(strm.ReadBits<16>()); + u16 b = static_cast(strm.ReadBits<16>()); + u16 a = static_cast(strm.ReadBits<16>()); + + u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast(b) & 0xFF00) << 8 | + (static_cast(a) & 0xFF00) << 16; + + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { + outBuf[j * blockWidth + i] = rgba; + } + } +} + +static void FillError(std::span outBuf, u32 blockWidth, u32 blockHeight) { + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { + outBuf[j * blockWidth + i] = 0xFFFF00FF; + } + } +} + static void DecompressBlock(std::span inBuf, const u32 blockWidth, const u32 blockHeight, std::span outBuf) { InputBitStream strm(inBuf); diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 0229ae122..9e148afc4 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -79,47 +79,6 @@ constexpr std::array MakeEncodedValues() { constexpr std::array ASTC_ENCODINGS_VALUES = MakeEncodedValues(); -// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] -// is the same as [(num_bits - 1):0] and repeats all the way down. -template -constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) { - if (num_bits == 0 || to_bit == 0) { - return 0; - } - const IntType v = val & static_cast((1 << num_bits) - 1); - IntType res = v; - u32 reslen = num_bits; - while (reslen < to_bit) { - u32 comp = 0; - if (num_bits > to_bit - reslen) { - u32 newshift = to_bit - reslen; - comp = num_bits - newshift; - num_bits = newshift; - } - res = static_cast(res << num_bits); - res = static_cast(res | (v >> comp)); - reslen += num_bits; - } - return res; -} - -constexpr std::size_t NumReplicateEntries(u32 num_bits) { - return std::size_t(1) << num_bits; -} - -template -constexpr auto MakeReplicateTable() { - std::array table{}; - for (IntType value = 0; value < static_cast(std::size(table)); ++value) { - table[value] = Replicate(value, num_bits, to_bit); - } - return table; -} - -constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable(); -constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable(); -constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable(); - void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, uint32_t block_width, uint32_t block_height, std::span output); -- cgit v1.2.3 From 5665d055476fa793192523c3cb6fe06369d58674 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 22:48:41 -0400 Subject: astc_decoder: Optimize the use EncodingData This buffer was a list of EncodingData structures sorted by their bit length, with some duplication from the cpu decoder implementation. We can take advantage of its sorted property to optimize its usage in the shader. Thanks to wwylele for the optimization idea. --- src/video_core/textures/astc.cpp | 70 ++++++++++++++++++++++++++++++++++++++++ src/video_core/textures/astc.h | 70 ---------------------------------------- 2 files changed, 70 insertions(+), 70 deletions(-) (limited to 'src/video_core/textures') diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 26c19d75b..25161df1f 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -151,6 +151,76 @@ private: const IntType& m_Bits; }; +enum class IntegerEncoding { JustBits, Quint, Trit }; + +struct IntegerEncodedValue { + constexpr IntegerEncodedValue() = default; + + constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) + : encoding{encoding_}, num_bits{num_bits_} {} + + constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { + return encoding == other.encoding && num_bits == other.num_bits; + } + + // Returns the number of bits required to encode num_vals values. + u32 GetBitLength(u32 num_vals) const { + u32 total_bits = num_bits * num_vals; + if (encoding == IntegerEncoding::Trit) { + total_bits += (num_vals * 8 + 4) / 5; + } else if (encoding == IntegerEncoding::Quint) { + total_bits += (num_vals * 7 + 2) / 3; + } + return total_bits; + } + + IntegerEncoding encoding{}; + u32 num_bits = 0; + u32 bit_value = 0; + union { + u32 quint_value = 0; + u32 trit_value; + }; +}; + +// Returns a new instance of this struct that corresponds to the +// can take no more than mav_value values +static constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) { + while (mav_value > 0) { + u32 check = mav_value + 1; + + // Is mav_value a power of two? + if (!(check & (check - 1))) { + return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value)); + } + + // Is mav_value of the type 3*2^n - 1? + if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { + return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1)); + } + + // Is mav_value of the type 5*2^n - 1? + if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { + return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1)); + } + + // Apparently it can't be represented with a bounded integer sequence... + // just iterate. + mav_value--; + } + return IntegerEncodedValue(IntegerEncoding::JustBits, 0); +} + +static constexpr std::array MakeEncodedValues() { + std::array encodings{}; + for (std::size_t i = 0; i < encodings.size(); ++i) { + encodings[i] = CreateEncoding(static_cast(i)); + } + return encodings; +} + +static constexpr std::array ASTC_ENCODINGS_VALUES = MakeEncodedValues(); + namespace Tegra::Texture::ASTC { using IntegerEncodedVector = boost::container::static_vector< IntegerEncodedValue, 256, diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 9e148afc4..14d2beec0 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -9,76 +9,6 @@ namespace Tegra::Texture::ASTC { -enum class IntegerEncoding { JustBits, Quint, Trit }; - -struct IntegerEncodedValue { - constexpr IntegerEncodedValue() = default; - - constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) - : encoding{encoding_}, num_bits{num_bits_} {} - - constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { - return encoding == other.encoding && num_bits == other.num_bits; - } - - // Returns the number of bits required to encode num_vals values. - u32 GetBitLength(u32 num_vals) const { - u32 total_bits = num_bits * num_vals; - if (encoding == IntegerEncoding::Trit) { - total_bits += (num_vals * 8 + 4) / 5; - } else if (encoding == IntegerEncoding::Quint) { - total_bits += (num_vals * 7 + 2) / 3; - } - return total_bits; - } - - IntegerEncoding encoding{}; - u32 num_bits = 0; - u32 bit_value = 0; - union { - u32 quint_value = 0; - u32 trit_value; - }; -}; - -// Returns a new instance of this struct that corresponds to the -// can take no more than mav_value values -constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) { - while (mav_value > 0) { - u32 check = mav_value + 1; - - // Is mav_value a power of two? - if (!(check & (check - 1))) { - return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value)); - } - - // Is mav_value of the type 3*2^n - 1? - if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { - return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1)); - } - - // Is mav_value of the type 5*2^n - 1? - if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { - return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1)); - } - - // Apparently it can't be represented with a bounded integer sequence... - // just iterate. - mav_value--; - } - return IntegerEncodedValue(IntegerEncoding::JustBits, 0); -} - -constexpr std::array MakeEncodedValues() { - std::array encodings{}; - for (std::size_t i = 0; i < encodings.size(); ++i) { - encodings[i] = CreateEncoding(static_cast(i)); - } - return encodings; -} - -constexpr std::array ASTC_ENCODINGS_VALUES = MakeEncodedValues(); - void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, uint32_t block_width, uint32_t block_height, std::span output); -- cgit v1.2.3