diff options
Diffstat (limited to 'src/HTTPServer')
-rw-r--r-- | src/HTTPServer/CMakeLists.txt | 10 | ||||
-rw-r--r-- | src/HTTPServer/UrlParser.cpp | 200 | ||||
-rw-r--r-- | src/HTTPServer/UrlParser.h | 58 |
3 files changed, 265 insertions, 3 deletions
diff --git a/src/HTTPServer/CMakeLists.txt b/src/HTTPServer/CMakeLists.txt index 6788d50bf..a08a1fcf8 100644 --- a/src/HTTPServer/CMakeLists.txt +++ b/src/HTTPServer/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required (VERSION 2.6) -project (MCServer) +project (Cuberite) include_directories ("${PROJECT_SOURCE_DIR}/../") @@ -12,7 +12,9 @@ SET (SRCS HTTPServer.cpp MultipartParser.cpp NameValueParser.cpp - SslHTTPConnection.cpp) + SslHTTPConnection.cpp + UrlParser.cpp +) SET (HDRS EnvelopeParser.h @@ -22,7 +24,9 @@ SET (HDRS HTTPServer.h MultipartParser.h NameValueParser.h - SslHTTPConnection.h) + SslHTTPConnection.h + UrlParser.h +) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set_source_files_properties(HTTPServer.cpp PROPERTIES COMPILE_FLAGS "-Wno-error=global-constructors ") diff --git a/src/HTTPServer/UrlParser.cpp b/src/HTTPServer/UrlParser.cpp new file mode 100644 index 000000000..05db3e413 --- /dev/null +++ b/src/HTTPServer/UrlParser.cpp @@ -0,0 +1,200 @@ + +// UrlParser.cpp + +// Implements the cUrlParser class that parses string URL into individual parts + +#include "Globals.h" +#include "UrlParser.h" + + + + + +UInt16 cUrlParser::GetDefaultPort(const AString & a_Scheme) +{ + if (a_Scheme == "http") + { + return 80; + } + else if (a_Scheme == "https") + { + return 443; + } + else if (a_Scheme == "ftp") + { + return 21; + } + else if (a_Scheme == "mailto") + { + return 25; + } + return 0; +} + + + + + +std::pair<bool, AString> cUrlParser::ParseAuthorityPart( + const AString & a_AuthorityPart, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port +) +{ + /* + a_AuthorityPart format: + [user:password@]host[:port] + host can be an IPv4, hostname, or an IPv6 enclosed in brackets + Assume only the password can contain an additional at-sign + */ + + // Split the authority on the last at-sign, if present: + auto idxLastAtSign = a_AuthorityPart.find_last_of('@'); + auto credPart = (idxLastAtSign == AString::npos) ? AString() : a_AuthorityPart.substr(0, idxLastAtSign); + auto srvrPart = (idxLastAtSign == AString::npos) ? a_AuthorityPart : a_AuthorityPart.substr(idxLastAtSign + 1); + + // User credentials are completely optional: + auto idxCredColon = credPart.find(':'); + a_Username = credPart.substr(0, idxCredColon); + a_Password = (idxCredColon == AString::npos) ? AString() : credPart.substr(idxCredColon + 1); + + // Host can be a hostname, IPv4 or [IPv6]. If in brackets, search for the closing bracket first + if (srvrPart.empty()) + { + // No host information at all. Bail out with success + a_Host.clear(); + return std::make_pair(true, AString()); + } + if (srvrPart[0] == '[') + { + // [IPv6] host, search for the closing bracket + auto idxClosingBracket = srvrPart.find(']'); + if (idxClosingBracket == AString::npos) + { + return std::make_pair(false, "Invalid IPv6-like address, missing closing bracket"); + } + a_Host = srvrPart.substr(0, idxClosingBracket); + auto portPart = srvrPart.substr(idxClosingBracket + 1); + if (portPart.empty()) + { + // No port was specified, return success + return std::make_pair(true, AString()); + } + if (portPart[0] != ':') + { + return std::make_pair(false, "Invalid port format after IPv6 address, mising colon"); + } + if (!StringToInteger(portPart.substr(2), a_Port)) + { + return std::make_pair(false, "Failed to parse port number after IPv6 address"); + } + return std::make_pair(true, AString()); + } + + // Not an [IPv6] address, split on the last colon: + auto idxLastColon = srvrPart.find_last_of(':'); + a_Host = srvrPart.substr(0, idxLastColon); + if (idxLastColon == AString::npos) + { + // No port was specified, return success + return std::make_pair(true, AString()); + } + auto portPart = srvrPart.substr(idxLastColon + 1); + if (!StringToInteger(portPart, a_Port)) + { + return std::make_pair(false, "Failed to parse port number after hostname"); + } + return std::make_pair(true, AString()); +} + + + + + +std::pair<bool, AString> cUrlParser::Parse( + const AString & a_Url, + AString & a_Scheme, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port, + AString & a_Path, + AString & a_Query, + AString & a_Fragment +) +{ + // Find the scheme - the text before the first colon: + auto idxColon = a_Url.find(':'); + if (idxColon == AString::npos) + { + return std::make_pair(false, "Cannot parse the Scheme part of the URL"); + } + a_Scheme = StrToLower(a_Url.substr(0, idxColon)); + a_Port = GetDefaultPort(a_Scheme); + if (a_Port == 0) + { + return std::make_pair(false, Printf("Unknown URL scheme: \"%s\"", a_Scheme.c_str())); + } + + // If the next two chars are a double-slash, skip them: + auto authStart = idxColon + 1; + if (a_Url.substr(authStart, 2) == "//") + { + authStart += 2; + } + + // The Authority part follows the Scheme, until the first slash: + auto idxFirstSlash = a_Url.find('/', authStart + 1); + if (idxFirstSlash == AString::npos) + { + // No slash, the whole end of the Url is the authority part + idxFirstSlash = a_Url.size(); + } + + // Parse the Authority part into individual components: + auto res = ParseAuthorityPart( + a_Url.substr(authStart, idxFirstSlash - authStart), + a_Username, a_Password, + a_Host, a_Port + ); + if (!res.first) + { + return res; + } + + // Parse the rest into a path, query and fragment: + a_Path.clear(); + a_Query.clear(); + a_Fragment.clear(); + if (idxFirstSlash == a_Url.size()) + { + // No additional data, bail out with success + return std::make_pair(true, AString()); + } + auto idxPathEnd = a_Url.find_first_of("?#", idxFirstSlash + 1); + if (idxPathEnd == AString::npos) + { + a_Path = a_Url.substr(idxFirstSlash); + return std::make_pair(true, AString()); + } + a_Path = a_Url.substr(idxFirstSlash, idxPathEnd - idxFirstSlash); + auto idxHash = a_Url.find('#', idxPathEnd); + if (idxHash == AString::npos) + { + a_Query = a_Url.substr(idxPathEnd + 1); + return std::make_pair(true, AString()); + } + if (idxHash > idxPathEnd) + { + a_Query = a_Url.substr(idxPathEnd + 1, idxHash - idxPathEnd - 1); + } + a_Fragment = a_Url.substr(idxHash + 1); + return std::make_pair(true, AString()); +} + + + + + diff --git a/src/HTTPServer/UrlParser.h b/src/HTTPServer/UrlParser.h new file mode 100644 index 000000000..15a63e05d --- /dev/null +++ b/src/HTTPServer/UrlParser.h @@ -0,0 +1,58 @@ + +// UrlParser.h + +// Declares the cUrlParser class that parses string URL into individual parts + + + + + +#pragma once + + + + + +class cUrlParser +{ +public: + /** Returns true if the specified scheme (http, ftp, mailto, ...) is recognized by the URL parser. + Is case sensitive, known schemes are always lowercase. */ + static bool IsKnownScheme(const AString & a_Scheme) { return (GetDefaultPort(a_Scheme) > 0); } + + /** Returns the default port used by the specified scheme / protocol. + If the scheme is not known, 0 is returned. */ + static UInt16 GetDefaultPort(const AString & a_Scheme); + + /** Parses the given Authority part of an URL into individual components. + Returns true on success, + returns false and error message on failure. */ + static std::pair<bool, AString> ParseAuthorityPart( + const AString & a_AuthorityPart, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port + ); + + /** Parses the given URL into individual components. + Returns true on success, + returns false and error message on failure. + Fails if the scheme (protocol) is not known. + If port is missing, the default port for the specific scheme is applied. */ + static std::pair<bool, AString> Parse( + const AString & a_Url, + AString & a_Scheme, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port, + AString & a_Path, + AString & a_Query, + AString & a_Fragment + ); +}; + + + + |