From b3b723b4531a3781ab847a2652cf192303e78941 Mon Sep 17 00:00:00 2001 From: Mattes D Date: Sun, 25 Dec 2016 18:29:21 +0100 Subject: Refactored to put URL Encoding / Decoding in a single place. (#3491) --- src/StringUtils.cpp | 220 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 163 insertions(+), 57 deletions(-) (limited to 'src/StringUtils.cpp') diff --git a/src/StringUtils.cpp b/src/StringUtils.cpp index 81999a35d..2fb4f6a9a 100644 --- a/src/StringUtils.cpp +++ b/src/StringUtils.cpp @@ -14,6 +14,42 @@ +/** Returns the value of the single hex digit. +Returns 0xff on failure. */ +static unsigned char HexToDec(char a_HexChar) +{ + switch (a_HexChar) + { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'a': return 10; + case 'b': return 11; + case 'c': return 12; + case 'd': return 13; + case 'e': return 14; + case 'f': return 15; + case 'A': return 10; + case 'B': return 11; + case 'C': return 12; + case 'D': return 13; + case 'E': return 14; + case 'F': return 15; + } + return 0xff; +} + + + + + AString & AppendVPrintf(AString & str, const char * format, va_list args) { ASSERT(format != nullptr); @@ -353,44 +389,63 @@ AString & RawBEToUTF8(const char * a_RawData, size_t a_NumShorts, AString & a_UT a_UTF8.reserve(3 * a_NumShorts / 2); // a quick guess of the resulting size for (size_t i = 0; i < a_NumShorts; i++) { - int c = GetBEShort(&a_RawData[i * 2]); - if (c < 0x80) - { - a_UTF8.push_back(static_cast(c)); - } - else if (c < 0x800) - { - a_UTF8.push_back(static_cast(192 + c / 64)); - a_UTF8.push_back(static_cast(128 + c % 64)); - } - else if (c - 0xd800 < 0x800) - { - // Error, silently drop - } - else if (c < 0x10000) + a_UTF8.append(UnicodeCharToUtf8(GetBEUShort(&a_RawData[i * 2]))); + } + return a_UTF8; +} + + + + +AString UnicodeCharToUtf8(unsigned a_UnicodeChar) +{ + if (a_UnicodeChar < 0x80) + { + return AString{static_cast(a_UnicodeChar)}; + } + else if (a_UnicodeChar < 0x800) + { + return AString { - a_UTF8.push_back(static_cast(224 + c / 4096)); - a_UTF8.push_back(static_cast(128 + (c / 64) % 64)); - a_UTF8.push_back(static_cast(128 + c % 64)); - } - else if (c < 0x110000) + static_cast(192 + a_UnicodeChar / 64), + static_cast(128 + a_UnicodeChar % 64), + }; + } + else if (a_UnicodeChar - 0xd800 < 0x800) + { + // Error + return AString(); + } + else if (a_UnicodeChar < 0x10000) + { + return AString { - a_UTF8.push_back(static_cast(240 + c / 262144)); - a_UTF8.push_back(static_cast(128 + (c / 4096) % 64)); - a_UTF8.push_back(static_cast(128 + (c / 64) % 64)); - a_UTF8.push_back(static_cast(128 + c % 64)); - } - else + static_cast(224 + a_UnicodeChar / 4096), + static_cast(128 + (a_UnicodeChar / 64) % 64), + static_cast(128 + a_UnicodeChar % 64) + }; + } + else if (a_UnicodeChar < 0x110000) + { + return AString { - // Error, silently drop - } + static_cast(240 + a_UnicodeChar / 262144), + static_cast(128 + (a_UnicodeChar / 4096) % 64), + static_cast(128 + (a_UnicodeChar / 64) % 64), + static_cast(128 + a_UnicodeChar % 64), + }; + } + else + { + // Error + return AString(); } - return a_UTF8; } + // UTF-8 conversion code adapted from: // https://stackoverflow.com/questions/2867123/convert-utf-16-to-utf-8-under-windows-and-linux-in-c @@ -708,58 +763,99 @@ AString StripColorCodes(const AString & a_Message) -AString URLDecode(const AString & a_String) +std::pair URLDecode(const AString & a_Text) { AString res; - size_t len = a_String.length(); + auto len = a_Text.size(); res.reserve(len); for (size_t i = 0; i < len; i++) { - char ch = a_String[i]; - if ((ch != '%') || (i > len - 3)) + if (a_Text[i] == '+') { - res.push_back(ch); + res.push_back(' '); continue; } - // Decode the hex value: - char hi = a_String[i + 1], lo = a_String[i + 2]; - if ((hi >= '0') && (hi <= '9')) + if (a_Text[i] != '%') { - hi = hi - '0'; + res.push_back(a_Text[i]); + continue; } - else if ((hi >= 'a') && (hi <= 'f')) + if (i + 1 >= len) { - hi = hi - 'a' + 10; + // String too short for an encoded value + return std::make_pair(false, AString()); } - else if ((hi >= 'A') && (hi <= 'F')) + if ((a_Text[i + 1] == 'u') || (a_Text[i + 1] == 'U')) { - hi = hi - 'F' + 10; + // Unicode char "%u0xxxx" + if (i + 6 >= len) + { + return std::make_pair(false, AString()); + } + if (a_Text[i + 2] != '0') + { + return std::make_pair(false, AString()); + } + unsigned v1 = HexToDec(a_Text[i + 3]); + unsigned v2 = HexToDec(a_Text[i + 4]); + unsigned v3 = HexToDec(a_Text[i + 5]); + unsigned v4 = HexToDec(a_Text[i + 6]); + if ((v1 == 0xff) || (v2 == 0xff) || (v4 == 0xff) || (v3 == 0xff)) + { + // Invalid hex numbers + return std::make_pair(false, AString()); + } + res.append(UnicodeCharToUtf8((v1 << 12) | (v2 << 8) | (v3 << 4) | v4)); + i = i + 6; } else { - res.push_back(ch); - continue; - } - if ((lo >= '0') && (lo <= '9')) - { - lo = lo - '0'; + // Regular char "%xx": + if (i + 2 >= len) + { + return std::make_pair(false, AString()); + } + auto v1 = HexToDec(a_Text[i + 1]); + auto v2 = HexToDec(a_Text[i + 2]); + if ((v1 == 0xff) || (v2 == 0xff)) + { + // Invalid hex numbers + return std::make_pair(false, AString()); + } + res.push_back(static_cast((v1 << 4) | v2)); + i = i + 2; } - else if ((lo >= 'a') && (lo <= 'f')) + } // for i - a_Text[i] + return std::make_pair(true, res); +} + + + + + +AString URLEncode(const AString & a_Text) +{ + AString res; + auto len = a_Text.size(); + res.reserve(len); + static const char HEX[] = "0123456789abcdef"; + for (size_t i = 0; i < len; ++i) + { + if (isalnum(a_Text[i])) { - lo = lo - 'a' + 10; + res.push_back(a_Text[i]); } - else if ((lo >= 'A') && (lo <= 'F')) + else if (a_Text[i] == ' ') { - lo = lo - 'A' + 10; + res.push_back('+'); } else { - res.push_back(ch); - continue; + res.push_back('%'); + res.push_back(HEX[static_cast(a_Text[i]) >> 4]); + res.push_back(HEX[static_cast(a_Text[i]) & 0x0f]); } - res.push_back(static_cast((hi << 4) | lo)); - i += 2; - } // for i - a_String[] + } return res; } @@ -907,6 +1003,16 @@ short GetBEShort(const char * a_Mem) +unsigned short GetBEUShort(const char * a_Mem) +{ + const Byte * Bytes = reinterpret_cast(a_Mem); + return static_cast((Bytes[0] << 8) | Bytes[1]); +} + + + + + int GetBEInt(const char * a_Mem) { const Byte * Bytes = reinterpret_cast(a_Mem); -- cgit v1.2.3