diff options
62 files changed, 3583 insertions, 941 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 44ed4196d..467d769a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -157,8 +157,14 @@ if (ENABLE_SDL2) target_include_directories(SDL2 INTERFACE "${SDL2_INCLUDE_DIR}") else() find_package(SDL2 REQUIRED) - include_directories(${SDL2_INCLUDE_DIRS}) + # Some installations don't set SDL2_LIBRARIES + if("${SDL2_LIBRARIES}" STREQUAL "") + message(WARNING "SDL2_LIBRARIES wasn't set, manually setting to SDL2::SDL2") + set(SDL2_LIBRARIES "SDL2::SDL2") + endif() + + include_directories(${SDL2_INCLUDE_DIRS}) add_library(SDL2 INTERFACE) target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARIES}") endif() diff --git a/externals/httplib/README.md b/externals/httplib/README.md index 0e26522b5..73037d297 100644 --- a/externals/httplib/README.md +++ b/externals/httplib/README.md @@ -1,4 +1,4 @@ -From https://github.com/yhirose/cpp-httplib/commit/d9479bc0b12e8a1e8bce2d34da4feeef488581f3 +From https://github.com/yhirose/cpp-httplib/tree/fce8e6fefdab4ad48bc5b25c98e5ebfda4f3cf53 MIT License diff --git a/externals/httplib/httplib.h b/externals/httplib/httplib.h index fa2edcc94..e03842e6d 100644 --- a/externals/httplib/httplib.h +++ b/externals/httplib/httplib.h @@ -1,7 +1,7 @@ // // httplib.h // -// Copyright (c) 2019 Yuji Hirose. All rights reserved. +// Copyright (c) 2020 Yuji Hirose. All rights reserved. // MIT License // @@ -11,6 +11,7 @@ /* * Configuration */ + #ifndef CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND #define CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND 5 #endif @@ -40,7 +41,7 @@ #endif #ifndef CPPHTTPLIB_PAYLOAD_MAX_LENGTH -#define CPPHTTPLIB_PAYLOAD_MAX_LENGTH (std::numeric_limits<size_t>::max)() +#define CPPHTTPLIB_PAYLOAD_MAX_LENGTH (std::numeric_limits<size_t>::max()) #endif #ifndef CPPHTTPLIB_RECV_BUFSIZ @@ -48,9 +49,14 @@ #endif #ifndef CPPHTTPLIB_THREAD_POOL_COUNT -#define CPPHTTPLIB_THREAD_POOL_COUNT 8 +#define CPPHTTPLIB_THREAD_POOL_COUNT \ + (std::max(1u, std::thread::hardware_concurrency() - 1)) #endif +/* + * Headers + */ + #ifdef _WIN32 #ifndef _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS @@ -62,9 +68,9 @@ #if defined(_MSC_VER) #ifdef _WIN64 -typedef __int64 ssize_t; +using ssize_t = __int64; #else -typedef int ssize_t; +using ssize_t = int; #endif #if _MSC_VER < 1900 @@ -100,7 +106,7 @@ typedef int ssize_t; #define strcasecmp _stricmp #endif // strcasecmp -typedef SOCKET socket_t; +using socket_t = SOCKET; #ifdef CPPHTTPLIB_USE_POLL #define poll(fds, nfds, timeout) WSAPoll(fds, nfds, timeout) #endif @@ -109,23 +115,25 @@ typedef SOCKET socket_t; #include <arpa/inet.h> #include <cstring> +#include <ifaddrs.h> #include <netdb.h> #include <netinet/in.h> #ifdef CPPHTTPLIB_USE_POLL #include <poll.h> #endif +#include <csignal> #include <pthread.h> -#include <signal.h> #include <sys/select.h> #include <sys/socket.h> #include <unistd.h> -typedef int socket_t; +using socket_t = int; #define INVALID_SOCKET (-1) #endif //_WIN32 -#include <assert.h> +#include <array> #include <atomic> +#include <cassert> #include <condition_variable> #include <errno.h> #include <fcntl.h> @@ -143,9 +151,13 @@ typedef int socket_t; #ifdef CPPHTTPLIB_OPENSSL_SUPPORT #include <openssl/err.h> +#include <openssl/md5.h> #include <openssl/ssl.h> #include <openssl/x509v3.h> +#include <iomanip> +#include <sstream> + // #if OPENSSL_VERSION_NUMBER < 0x1010100fL // #error Sorry, OpenSSL versions prior to 1.1.1 are not supported // #endif @@ -162,6 +174,9 @@ inline const unsigned char *ASN1_STRING_get0_data(const ASN1_STRING *asn1) { #include <zlib.h> #endif +/* + * Declaration + */ namespace httplib { namespace detail { @@ -176,37 +191,15 @@ struct ci { } // namespace detail -enum class HttpVersion { v1_0 = 0, v1_1 }; - -typedef std::multimap<std::string, std::string, detail::ci> Headers; - -typedef std::multimap<std::string, std::string> Params; -typedef std::smatch Match; - -typedef std::function<void(const char *data, size_t data_len)> DataSink; - -typedef std::function<void()> Done; - -typedef std::function<void(size_t offset, size_t length, DataSink sink, - Done done)> - ContentProvider; +using Headers = std::multimap<std::string, std::string, detail::ci>; -typedef std::function<bool(const char *data, size_t data_length, size_t offset, - uint64_t content_length)> - ContentReceiver; +using Params = std::multimap<std::string, std::string>; +using Match = std::smatch; -typedef std::function<bool(uint64_t current, uint64_t total)> Progress; +using Progress = std::function<bool(uint64_t current, uint64_t total)>; struct Response; -typedef std::function<bool(const Response &response)> ResponseHandler; - -struct MultipartFile { - std::string filename; - std::string content_type; - size_t offset = 0; - size_t length = 0; -}; -typedef std::multimap<std::string, MultipartFile> MultipartFiles; +using ResponseHandler = std::function<bool(const Response &response)>; struct MultipartFormData { std::string name; @@ -214,10 +207,53 @@ struct MultipartFormData { std::string filename; std::string content_type; }; -typedef std::vector<MultipartFormData> MultipartFormDataItems; +using MultipartFormDataItems = std::vector<MultipartFormData>; +using MultipartFormDataMap = std::multimap<std::string, MultipartFormData>; + +class DataSink { +public: + DataSink() = default; + DataSink(const DataSink &) = delete; + DataSink &operator=(const DataSink &) = delete; + DataSink(DataSink &&) = delete; + DataSink &operator=(DataSink &&) = delete; + + std::function<void(const char *data, size_t data_len)> write; + std::function<void()> done; + std::function<bool()> is_writable; +}; + +using ContentProvider = + std::function<void(size_t offset, size_t length, DataSink &sink)>; + +using ContentReceiver = + std::function<bool(const char *data, size_t data_length)>; + +using MultipartContentHeader = + std::function<bool(const MultipartFormData &file)>; + +class ContentReader { +public: + using Reader = std::function<bool(ContentReceiver receiver)>; + using MultipartReader = std::function<bool(MultipartContentHeader header, + ContentReceiver receiver)>; + + ContentReader(Reader reader, MultipartReader muitlpart_reader) + : reader_(reader), muitlpart_reader_(muitlpart_reader) {} + + bool operator()(MultipartContentHeader header, + ContentReceiver receiver) const { + return muitlpart_reader_(header, receiver); + } + + bool operator()(ContentReceiver receiver) const { return reader_(receiver); } + + Reader reader_; + MultipartReader muitlpart_reader_; +}; -typedef std::pair<ssize_t, ssize_t> Range; -typedef std::vector<Range> Ranges; +using Range = std::pair<ssize_t, ssize_t>; +using Ranges = std::vector<Range>; struct Request { std::string method; @@ -229,7 +265,7 @@ struct Request { std::string version; std::string target; Params params; - MultipartFiles files; + MultipartFormDataMap files; Ranges ranges; Match matches; @@ -253,13 +289,19 @@ struct Request { std::string get_param_value(const char *key, size_t id = 0) const; size_t get_param_value_count(const char *key) const; + bool is_multipart_form_data() const; + bool has_file(const char *key) const; - MultipartFile get_file_value(const char *key) const; + MultipartFormData get_file_value(const char *key) const; + + // private members... + size_t content_length; + ContentProvider content_provider; }; struct Response { std::string version; - int status; + int status = -1; Headers headers; std::string body; @@ -269,106 +311,81 @@ struct Response { void set_header(const char *key, const char *val); void set_header(const char *key, const std::string &val); - void set_redirect(const char *uri); + void set_redirect(const char *url); void set_content(const char *s, size_t n, const char *content_type); void set_content(const std::string &s, const char *content_type); void set_content_provider( size_t length, - std::function<void(size_t offset, size_t length, DataSink sink)> provider, + std::function<void(size_t offset, size_t length, DataSink &sink)> + provider, std::function<void()> resource_releaser = [] {}); void set_chunked_content_provider( - std::function<void(size_t offset, DataSink sink, Done done)> provider, + std::function<void(size_t offset, DataSink &sink)> provider, std::function<void()> resource_releaser = [] {}); - Response() : status(-1), content_provider_resource_length(0) {} - + Response() = default; + Response(const Response &) = default; + Response &operator=(const Response &) = default; + Response(Response &&) = default; + Response &operator=(Response &&) = default; ~Response() { if (content_provider_resource_releaser) { content_provider_resource_releaser(); } } - size_t content_provider_resource_length; + // private members... + size_t content_length = 0; ContentProvider content_provider; std::function<void()> content_provider_resource_releaser; }; class Stream { public: - virtual ~Stream() {} + virtual ~Stream() = default; + + virtual bool is_readable() const = 0; + virtual bool is_writable() const = 0; + virtual int read(char *ptr, size_t size) = 0; - virtual int write(const char *ptr, size_t size1) = 0; - virtual int write(const char *ptr) = 0; - virtual int write(const std::string &s) = 0; + virtual int write(const char *ptr, size_t size) = 0; virtual std::string get_remote_addr() const = 0; template <typename... Args> int write_format(const char *fmt, const Args &... args); -}; - -class SocketStream : public Stream { -public: - SocketStream(socket_t sock); - virtual ~SocketStream(); - - virtual int read(char *ptr, size_t size); - virtual int write(const char *ptr, size_t size); - virtual int write(const char *ptr); - virtual int write(const std::string &s); - virtual std::string get_remote_addr() const; - -private: - socket_t sock_; -}; - -class BufferStream : public Stream { -public: - BufferStream() {} - virtual ~BufferStream() {} - - virtual int read(char *ptr, size_t size); - virtual int write(const char *ptr, size_t size); - virtual int write(const char *ptr); - virtual int write(const std::string &s); - virtual std::string get_remote_addr() const; - - const std::string &get_buffer() const; - -private: - std::string buffer; + int write(const char *ptr); + int write(const std::string &s); }; class TaskQueue { public: - TaskQueue() {} - virtual ~TaskQueue() {} + TaskQueue() = default; + virtual ~TaskQueue() = default; virtual void enqueue(std::function<void()> fn) = 0; virtual void shutdown() = 0; }; -#if CPPHTTPLIB_THREAD_POOL_COUNT > 0 class ThreadPool : public TaskQueue { public: - ThreadPool(size_t n) : shutdown_(false) { + explicit ThreadPool(size_t n) : shutdown_(false) { while (n) { - auto t = std::make_shared<std::thread>(worker(*this)); - threads_.push_back(t); + threads_.emplace_back(worker(*this)); n--; } } ThreadPool(const ThreadPool &) = delete; - virtual ~ThreadPool() {} + ~ThreadPool() override = default; - virtual void enqueue(std::function<void()> fn) override { + void enqueue(std::function<void()> fn) override { std::unique_lock<std::mutex> lock(mutex_); jobs_.push_back(fn); cond_.notify_one(); } - virtual void shutdown() override { + void shutdown() override { // Stop all worker threads... { std::unique_lock<std::mutex> lock(mutex_); @@ -378,14 +395,14 @@ public: cond_.notify_all(); // Join... - for (auto t : threads_) { - t->join(); + for (auto &t : threads_) { + t.join(); } } private: struct worker { - worker(ThreadPool &pool) : pool_(pool) {} + explicit worker(ThreadPool &pool) : pool_(pool) {} void operator()() { for (;;) { @@ -411,7 +428,7 @@ private: }; friend struct worker; - std::vector<std::shared_ptr<std::thread>> threads_; + std::vector<std::thread> threads_; std::list<std::function<void()>> jobs_; bool shutdown_; @@ -419,46 +436,16 @@ private: std::condition_variable cond_; std::mutex mutex_; }; -#else -class Threads : public TaskQueue { -public: - Threads() : running_threads_(0) {} - virtual ~Threads() {} - - virtual void enqueue(std::function<void()> fn) override { - std::thread([=]() { - { - std::lock_guard<std::mutex> guard(running_threads_mutex_); - running_threads_++; - } - - fn(); - - { - std::lock_guard<std::mutex> guard(running_threads_mutex_); - running_threads_--; - } - }).detach(); - } - - virtual void shutdown() override { - for (;;) { - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - std::lock_guard<std::mutex> guard(running_threads_mutex_); - if (!running_threads_) { break; } - } - } -private: - std::mutex running_threads_mutex_; - int running_threads_; -}; -#endif +using Logger = std::function<void(const Request &, const Response &)>; class Server { public: - typedef std::function<void(const Request &, Response &)> Handler; - typedef std::function<void(const Request &, const Response &)> Logger; + using Handler = std::function<void(const Request &, Response &)>; + using HandlerWithContentReader = std::function<void( + const Request &, Response &, const ContentReader &content_reader)>; + using Expect100ContinueHandler = + std::function<int(const Request &, Response &)>; Server(); @@ -468,21 +455,32 @@ public: Server &Get(const char *pattern, Handler handler); Server &Post(const char *pattern, Handler handler); - + Server &Post(const char *pattern, HandlerWithContentReader handler); Server &Put(const char *pattern, Handler handler); + Server &Put(const char *pattern, HandlerWithContentReader handler); Server &Patch(const char *pattern, Handler handler); + Server &Patch(const char *pattern, HandlerWithContentReader handler); Server &Delete(const char *pattern, Handler handler); Server &Options(const char *pattern, Handler handler); - bool set_base_dir(const char *path); + [[deprecated]] bool set_base_dir(const char *dir, + const char *mount_point = nullptr); + bool set_mount_point(const char *mount_point, const char *dir); + bool remove_mount_point(const char *mount_point); + void set_file_extension_and_mimetype_mapping(const char *ext, + const char *mime); void set_file_request_handler(Handler handler); void set_error_handler(Handler handler); void set_logger(Logger logger); + void set_expect_100_continue_handler(Expect100ContinueHandler handler); + void set_keep_alive_max_count(size_t count); + void set_read_timeout(time_t sec, time_t usec); void set_payload_max_length(size_t length); + bool bind_to_port(const char *host, int port, int socket_flags = 0); int bind_to_any_port(const char *host, int socket_flags = 0); bool listen_after_bind(); @@ -496,22 +494,29 @@ public: protected: bool process_request(Stream &strm, bool last_connection, bool &connection_close, - std::function<void(Request &)> setup_request); + const std::function<void(Request &)> &setup_request); size_t keep_alive_max_count_; + time_t read_timeout_sec_; + time_t read_timeout_usec_; size_t payload_max_length_; private: - typedef std::vector<std::pair<std::regex, Handler>> Handlers; + using Handlers = std::vector<std::pair<std::regex, Handler>>; + using HandlersForContentReader = + std::vector<std::pair<std::regex, HandlerWithContentReader>>; socket_t create_server_socket(const char *host, int port, int socket_flags) const; int bind_internal(const char *host, int port, int socket_flags); bool listen_internal(); - bool routing(Request &req, Response &res); - bool handle_file_request(Request &req, Response &res); + bool routing(Request &req, Response &res, Stream &strm, bool last_connection); + bool handle_file_request(Request &req, Response &res, bool head = false); bool dispatch_request(Request &req, Response &res, Handlers &handlers); + bool dispatch_request_for_content_reader(Request &req, Response &res, + ContentReader content_reader, + HandlersForContentReader &handlers); bool parse_request_line(const char *s, Request &req); bool write_response(Stream &strm, bool last_connection, const Request &req, @@ -519,26 +524,43 @@ private: bool write_content_with_provider(Stream &strm, const Request &req, Response &res, const std::string &boundary, const std::string &content_type); + bool read_content(Stream &strm, bool last_connection, Request &req, + Response &res); + bool read_content_with_content_receiver( + Stream &strm, bool last_connection, Request &req, Response &res, + ContentReceiver receiver, MultipartContentHeader multipart_header, + ContentReceiver multipart_receiver); + bool read_content_core(Stream &strm, bool last_connection, Request &req, + Response &res, ContentReceiver receiver, + MultipartContentHeader mulitpart_header, + ContentReceiver multipart_receiver); virtual bool process_and_close_socket(socket_t sock); std::atomic<bool> is_running_; std::atomic<socket_t> svr_sock_; - std::string base_dir_; + std::vector<std::pair<std::string, std::string>> base_dirs_; + std::map<std::string, std::string> file_extension_and_mimetype_map_; Handler file_request_handler_; Handlers get_handlers_; Handlers post_handlers_; + HandlersForContentReader post_handlers_for_content_reader_; Handlers put_handlers_; + HandlersForContentReader put_handlers_for_content_reader_; Handlers patch_handlers_; + HandlersForContentReader patch_handlers_for_content_reader_; Handlers delete_handlers_; Handlers options_handlers_; Handler error_handler_; Logger logger_; + Expect100ContinueHandler expect_100_continue_handler_; }; class Client { public: - Client(const char *host, int port = 80, time_t timeout_sec = 300); + explicit Client(const std::string &host, int port = 80, + const std::string &client_cert_path = std::string(), + const std::string &client_key_path = std::string()); virtual ~Client(); @@ -586,6 +608,15 @@ public: const std::string &body, const char *content_type); + std::shared_ptr<Response> Post(const char *path, size_t content_length, + ContentProvider content_provider, + const char *content_type); + + std::shared_ptr<Response> Post(const char *path, const Headers &headers, + size_t content_length, + ContentProvider content_provider, + const char *content_type); + std::shared_ptr<Response> Post(const char *path, const Params ¶ms); std::shared_ptr<Response> Post(const char *path, const Headers &headers, @@ -604,6 +635,20 @@ public: const std::string &body, const char *content_type); + std::shared_ptr<Response> Put(const char *path, size_t content_length, + ContentProvider content_provider, + const char *content_type); + + std::shared_ptr<Response> Put(const char *path, const Headers &headers, + size_t content_length, + ContentProvider content_provider, + const char *content_type); + + std::shared_ptr<Response> Put(const char *path, const Params ¶ms); + + std::shared_ptr<Response> Put(const char *path, const Headers &headers, + const Params ¶ms); + std::shared_ptr<Response> Patch(const char *path, const std::string &body, const char *content_type); @@ -611,6 +656,15 @@ public: const std::string &body, const char *content_type); + std::shared_ptr<Response> Patch(const char *path, size_t content_length, + ContentProvider content_provider, + const char *content_type); + + std::shared_ptr<Response> Patch(const char *path, const Headers &headers, + size_t content_length, + ContentProvider content_provider, + const char *content_type); + std::shared_ptr<Response> Delete(const char *path); std::shared_ptr<Response> Delete(const char *path, const std::string &body, @@ -631,9 +685,33 @@ public: bool send(const std::vector<Request> &requests, std::vector<Response> &responses); + void set_timeout_sec(time_t timeout_sec); + + void set_read_timeout(time_t sec, time_t usec); + void set_keep_alive_max_count(size_t count); - void follow_location(bool on); + void set_basic_auth(const char *username, const char *password); + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + void set_digest_auth(const char *username, const char *password); +#endif + + void set_follow_location(bool on); + + void set_compress(bool on); + + void set_interface(const char *intf); + + void set_proxy(const char *host, int port); + + void set_proxy_basic_auth(const char *username, const char *password); + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + void set_proxy_digest_auth(const char *username, const char *password); +#endif + + void set_logger(Logger logger); protected: bool process_request(Stream &strm, const Request &req, Response &res, @@ -641,16 +719,85 @@ protected: const std::string host_; const int port_; - time_t timeout_sec_; const std::string host_and_port_; - size_t keep_alive_max_count_; - size_t follow_location_; + + // Settings + std::string client_cert_path_; + std::string client_key_path_; + + time_t timeout_sec_ = 300; + time_t read_timeout_sec_ = CPPHTTPLIB_READ_TIMEOUT_SECOND; + time_t read_timeout_usec_ = CPPHTTPLIB_READ_TIMEOUT_USECOND; + + size_t keep_alive_max_count_ = CPPHTTPLIB_KEEPALIVE_MAX_COUNT; + + std::string basic_auth_username_; + std::string basic_auth_password_; +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + std::string digest_auth_username_; + std::string digest_auth_password_; +#endif + + bool follow_location_ = false; + + bool compress_ = false; + + std::string interface_; + + std::string proxy_host_; + int proxy_port_; + + std::string proxy_basic_auth_username_; + std::string proxy_basic_auth_password_; +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + std::string proxy_digest_auth_username_; + std::string proxy_digest_auth_password_; +#endif + + Logger logger_; + + void copy_settings(const Client &rhs) { + client_cert_path_ = rhs.client_cert_path_; + client_key_path_ = rhs.client_key_path_; + timeout_sec_ = rhs.timeout_sec_; + read_timeout_sec_ = rhs.read_timeout_sec_; + read_timeout_usec_ = rhs.read_timeout_usec_; + keep_alive_max_count_ = rhs.keep_alive_max_count_; + basic_auth_username_ = rhs.basic_auth_username_; + basic_auth_password_ = rhs.basic_auth_password_; +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + digest_auth_username_ = rhs.digest_auth_username_; + digest_auth_password_ = rhs.digest_auth_password_; +#endif + follow_location_ = rhs.follow_location_; + compress_ = rhs.compress_; + interface_ = rhs.interface_; + proxy_host_ = rhs.proxy_host_; + proxy_port_ = rhs.proxy_port_; + proxy_basic_auth_username_ = rhs.proxy_basic_auth_username_; + proxy_basic_auth_password_ = rhs.proxy_basic_auth_password_; +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + proxy_digest_auth_username_ = rhs.proxy_digest_auth_username_; + proxy_digest_auth_password_ = rhs.proxy_digest_auth_password_; +#endif + logger_ = rhs.logger_; + } private: socket_t create_client_socket() const; bool read_response_line(Stream &strm, Response &res); - void write_request(Stream &strm, const Request &req, bool last_connection); + bool write_request(Stream &strm, const Request &req, bool last_connection); bool redirect(const Request &req, Response &res); + bool handle_request(Stream &strm, const Request &req, Response &res, + bool last_connection, bool &connection_close); +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + bool connect(socket_t sock, Response &res, bool &error); +#endif + + std::shared_ptr<Response> send_with_content_provider( + const char *method, const char *path, const Headers &headers, + const std::string &body, size_t content_length, + ContentProvider content_provider, const char *content_type); virtual bool process_and_close_socket( socket_t sock, size_t request_count, @@ -692,22 +839,6 @@ inline void Post(std::vector<Request> &requests, const char *path, } #ifdef CPPHTTPLIB_OPENSSL_SUPPORT -class SSLSocketStream : public Stream { -public: - SSLSocketStream(socket_t sock, SSL *ssl); - virtual ~SSLSocketStream(); - - virtual int read(char *ptr, size_t size); - virtual int write(const char *ptr, size_t size); - virtual int write(const char *ptr); - virtual int write(const std::string &s); - virtual std::string get_remote_addr() const; - -private: - socket_t sock_; - SSL *ssl_; -}; - class SSLServer : public Server { public: SSLServer(const char *cert_path, const char *private_key_path, @@ -727,9 +858,9 @@ private: class SSLClient : public Client { public: - SSLClient(const char *host, int port = 443, time_t timeout_sec = 300, - const char *client_cert_path = nullptr, - const char *client_key_path = nullptr); + SSLClient(const std::string &host, int port = 443, + const std::string &client_cert_path = std::string(), + const std::string &client_key_path = std::string()); virtual ~SSLClient(); @@ -737,11 +868,12 @@ public: void set_ca_cert_path(const char *ca_ceert_file_path, const char *ca_cert_dir_path = nullptr); + void enable_server_certificate_verification(bool enabled); long get_openssl_verify_result() const; - SSL_CTX* ssl_context() const noexcept; + SSL_CTX *ssl_context() const noexcept; private: virtual bool process_and_close_socket( @@ -759,6 +891,7 @@ private: SSL_CTX *ctx_; std::mutex ctx_mutex_; std::vector<std::string> host_components_; + std::string ca_cert_file_path_; std::string ca_cert_dir_path_; bool server_certificate_verification_ = false; @@ -766,9 +899,12 @@ private: }; #endif +// ---------------------------------------------------------------------------- + /* * Implementation */ + namespace detail { inline bool is_hex(char c, int &v) { @@ -932,8 +1068,8 @@ inline void read_file(const std::string &path, std::string &out) { inline std::string file_extension(const std::string &path) { std::smatch m; - auto pat = std::regex("\\.([a-zA-Z0-9]+)$"); - if (std::regex_search(path, m, pat)) { return m[1].str(); } + static auto re = std::regex("\\.([a-zA-Z0-9]+)$"); + if (std::regex_search(path, m, re)) { return m[1].str(); } return std::string(); } @@ -976,6 +1112,11 @@ public: } } + bool end_with_crlf() const { + auto end = ptr() + size(); + return size() >= 2 && end[-2] == '\r' && end[-1] == '\n'; + } + bool getline() { fixed_buffer_used_size_ = 0; glowable_buffer_.clear(); @@ -1019,7 +1160,7 @@ private: Stream &strm_; char *fixed_buffer_; const size_t fixed_buffer_size_; - size_t fixed_buffer_used_size_; + size_t fixed_buffer_used_size_ = 0; std::string glowable_buffer_; }; @@ -1053,6 +1194,28 @@ inline int select_read(socket_t sock, time_t sec, time_t usec) { #endif } +inline int select_write(socket_t sock, time_t sec, time_t usec) { +#ifdef CPPHTTPLIB_USE_POLL + struct pollfd pfd_read; + pfd_read.fd = sock; + pfd_read.events = POLLOUT; + + auto timeout = static_cast<int>(sec * 1000 + usec / 1000); + + return poll(&pfd_read, 1, timeout); +#else + fd_set fds; + FD_ZERO(&fds); + FD_SET(sock, &fds); + + timeval tv; + tv.tv_sec = static_cast<long>(sec); + tv.tv_usec = static_cast<long>(usec); + + return select(static_cast<int>(sock + 1), nullptr, &fds, nullptr, &tv); +#endif +} + inline bool wait_until_socket_is_ready(socket_t sock, time_t sec, time_t usec) { #ifdef CPPHTTPLIB_USE_POLL struct pollfd pfd_read; @@ -1065,7 +1228,8 @@ inline bool wait_until_socket_is_ready(socket_t sock, time_t sec, time_t usec) { pfd_read.revents & (POLLIN | POLLOUT)) { int error = 0; socklen_t len = sizeof(error); - return getsockopt(sock, SOL_SOCKET, SO_ERROR, reinterpret_cast<char*>(&error), &len) >= 0 && + return getsockopt(sock, SOL_SOCKET, SO_ERROR, + reinterpret_cast<char *>(&error), &len) >= 0 && !error; } return false; @@ -1085,27 +1249,86 @@ inline bool wait_until_socket_is_ready(socket_t sock, time_t sec, time_t usec) { (FD_ISSET(sock, &fdsr) || FD_ISSET(sock, &fdsw))) { int error = 0; socklen_t len = sizeof(error); - return getsockopt(sock, SOL_SOCKET, SO_ERROR, (char *)&error, &len) >= 0 && + return getsockopt(sock, SOL_SOCKET, SO_ERROR, + reinterpret_cast<char *>(&error), &len) >= 0 && !error; } return false; #endif } +class SocketStream : public Stream { +public: + SocketStream(socket_t sock, time_t read_timeout_sec, + time_t read_timeout_usec); + ~SocketStream() override; + + bool is_readable() const override; + bool is_writable() const override; + int read(char *ptr, size_t size) override; + int write(const char *ptr, size_t size) override; + std::string get_remote_addr() const override; + +private: + socket_t sock_; + time_t read_timeout_sec_; + time_t read_timeout_usec_; +}; + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +class SSLSocketStream : public Stream { +public: + SSLSocketStream(socket_t sock, SSL *ssl, time_t read_timeout_sec, + time_t read_timeout_usec); + virtual ~SSLSocketStream(); + + bool is_readable() const override; + bool is_writable() const override; + int read(char *ptr, size_t size) override; + int write(const char *ptr, size_t size) override; + std::string get_remote_addr() const override; + +private: + socket_t sock_; + SSL *ssl_; + time_t read_timeout_sec_; + time_t read_timeout_usec_; +}; +#endif + +class BufferStream : public Stream { +public: + BufferStream() = default; + ~BufferStream() override = default; + + bool is_readable() const override; + bool is_writable() const override; + int read(char *ptr, size_t size) override; + int write(const char *ptr, size_t size) override; + std::string get_remote_addr() const override; + + const std::string &get_buffer() const; + +private: + std::string buffer; + int position = 0; +}; + template <typename T> -inline bool process_and_close_socket(bool is_client_request, socket_t sock, - size_t keep_alive_max_count, T callback) { +inline bool process_socket(bool is_client_request, socket_t sock, + size_t keep_alive_max_count, time_t read_timeout_sec, + time_t read_timeout_usec, T callback) { assert(keep_alive_max_count > 0); - bool ret = false; + auto ret = false; if (keep_alive_max_count > 1) { auto count = keep_alive_max_count; while (count > 0 && (is_client_request || - detail::select_read(sock, CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND, - CPPHTTPLIB_KEEPALIVE_TIMEOUT_USECOND) > 0)) { - SocketStream strm(sock); + select_read(sock, CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND, + CPPHTTPLIB_KEEPALIVE_TIMEOUT_USECOND) > 0)) { + SocketStream strm(sock, read_timeout_sec, read_timeout_usec); auto last_connection = count == 1; auto connection_close = false; @@ -1114,12 +1337,22 @@ inline bool process_and_close_socket(bool is_client_request, socket_t sock, count--; } - } else { - SocketStream strm(sock); + } else { // keep_alive_max_count is 0 or 1 + SocketStream strm(sock, read_timeout_sec, read_timeout_usec); auto dummy_connection_close = false; ret = callback(strm, true, dummy_connection_close); } + return ret; +} + +template <typename T> +inline bool process_and_close_socket(bool is_client_request, socket_t sock, + size_t keep_alive_max_count, + time_t read_timeout_sec, + time_t read_timeout_usec, T callback) { + auto ret = process_socket(is_client_request, sock, keep_alive_max_count, + read_timeout_sec, read_timeout_usec, callback); close_socket(sock); return ret; } @@ -1165,6 +1398,23 @@ socket_t create_socket(const char *host, int port, Fn fn, #ifdef _WIN32 auto sock = WSASocketW(rp->ai_family, rp->ai_socktype, rp->ai_protocol, nullptr, 0, WSA_FLAG_NO_HANDLE_INHERIT); + /** + * Since the WSA_FLAG_NO_HANDLE_INHERIT is only supported on Windows 7 SP1 + * and above the socket creation fails on older Windows Systems. + * + * Let's try to create a socket the old way in this case. + * + * Reference: + * https://docs.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-wsasocketa + * + * WSA_FLAG_NO_HANDLE_INHERIT: + * This flag is supported on Windows 7 with SP1, Windows Server 2008 R2 with + * SP1, and later + * + */ + if (sock == INVALID_SOCKET) { + sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); + } #else auto sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); #endif @@ -1176,9 +1426,11 @@ socket_t create_socket(const char *host, int port, Fn fn, // Make 'reuse address' option available int yes = 1; - setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, reinterpret_cast<char*>(&yes), sizeof(yes)); + setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, reinterpret_cast<char *>(&yes), + sizeof(yes)); #ifdef SO_REUSEPORT - setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, reinterpret_cast<char*>(&yes), sizeof(yes)); + setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, reinterpret_cast<char *>(&yes), + sizeof(yes)); #endif // bind or connect @@ -1213,27 +1465,105 @@ inline bool is_connection_error() { #endif } +inline bool bind_ip_address(socket_t sock, const char *host) { + struct addrinfo hints; + struct addrinfo *result; + + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = 0; + + if (getaddrinfo(host, "0", &hints, &result)) { return false; } + + auto ret = false; + for (auto rp = result; rp; rp = rp->ai_next) { + const auto &ai = *rp; + if (!::bind(sock, ai.ai_addr, static_cast<int>(ai.ai_addrlen))) { + ret = true; + break; + } + } + + freeaddrinfo(result); + return ret; +} + +inline std::string if2ip(const std::string &ifn) { +#ifndef _WIN32 + struct ifaddrs *ifap; + getifaddrs(&ifap); + for (auto ifa = ifap; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_addr && ifn == ifa->ifa_name) { + if (ifa->ifa_addr->sa_family == AF_INET) { + auto sa = reinterpret_cast<struct sockaddr_in *>(ifa->ifa_addr); + char buf[INET_ADDRSTRLEN]; + if (inet_ntop(AF_INET, &sa->sin_addr, buf, INET_ADDRSTRLEN)) { + freeifaddrs(ifap); + return std::string(buf, INET_ADDRSTRLEN); + } + } + } + } + freeifaddrs(ifap); +#endif + return std::string(); +} + +inline socket_t create_client_socket(const char *host, int port, + time_t timeout_sec, + const std::string &intf) { + return create_socket( + host, port, [&](socket_t sock, struct addrinfo &ai) -> bool { + if (!intf.empty()) { + auto ip = if2ip(intf); + if (ip.empty()) { ip = intf; } + if (!bind_ip_address(sock, ip.c_str())) { return false; } + } + + set_nonblocking(sock, true); + + auto ret = ::connect(sock, ai.ai_addr, static_cast<int>(ai.ai_addrlen)); + if (ret < 0) { + if (is_connection_error() || + !wait_until_socket_is_ready(sock, timeout_sec, 0)) { + close_socket(sock); + return false; + } + } + + set_nonblocking(sock, false); + return true; + }); +} + inline std::string get_remote_addr(socket_t sock) { struct sockaddr_storage addr; socklen_t len = sizeof(addr); if (!getpeername(sock, reinterpret_cast<struct sockaddr *>(&addr), &len)) { - char ipstr[NI_MAXHOST]; + std::array<char, NI_MAXHOST> ipstr{}; - if (!getnameinfo(reinterpret_cast<struct sockaddr *>(&addr), len, ipstr, sizeof(ipstr), - nullptr, 0, NI_NUMERICHOST)) { - return ipstr; + if (!getnameinfo(reinterpret_cast<struct sockaddr *>(&addr), len, + ipstr.data(), static_cast<unsigned int>(ipstr.size()), nullptr, 0, NI_NUMERICHOST)) { + return ipstr.data(); } } return std::string(); } -inline const char *find_content_type(const std::string &path) { +inline const char * +find_content_type(const std::string &path, + const std::map<std::string, std::string> &user_data) { auto ext = file_extension(path); + + auto it = user_data.find(ext); + if (it != user_data.end()) { return it->second.c_str(); } + if (ext == "txt") { return "text/plain"; - } else if (ext == "html") { + } else if (ext == "html" || ext == "htm") { return "text/html"; } else if (ext == "css") { return "text/css"; @@ -1253,6 +1583,8 @@ inline const char *find_content_type(const std::string &path) { return "application/pdf"; } else if (ext == "js") { return "application/javascript"; + } else if (ext == "wasm") { + return "application/wasm"; } else if (ext == "xml") { return "application/xml"; } else if (ext == "xhtml") { @@ -1263,19 +1595,25 @@ inline const char *find_content_type(const std::string &path) { inline const char *status_message(int status) { switch (status) { + case 100: return "Continue"; case 200: return "OK"; + case 202: return "Accepted"; + case 204: return "No Content"; case 206: return "Partial Content"; case 301: return "Moved Permanently"; case 302: return "Found"; case 303: return "See Other"; case 304: return "Not Modified"; case 400: return "Bad Request"; + case 401: return "Unauthorized"; case 403: return "Forbidden"; case 404: return "Not Found"; case 413: return "Payload Too Large"; case 414: return "Request-URI Too Long"; case 415: return "Unsupported Media Type"; case 416: return "Range Not Satisfiable"; + case 417: return "Expectation Failed"; + case 503: return "Service Unavailable"; default: case 500: return "Internal Server Error"; @@ -1302,18 +1640,18 @@ inline bool compress(std::string &content) { if (ret != Z_OK) { return false; } strm.avail_in = content.size(); - strm.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(content.data())); + strm.next_in = + const_cast<Bytef *>(reinterpret_cast<const Bytef *>(content.data())); std::string compressed; - const auto bufsiz = 16384; - char buff[bufsiz]; + std::array<char, 16384> buff{}; do { - strm.avail_out = bufsiz; - strm.next_out = reinterpret_cast<Bytef*>(buff); + strm.avail_out = buff.size(); + strm.next_out = reinterpret_cast<Bytef *>(buff.data()); ret = deflate(&strm, Z_FINISH); assert(ret != Z_STREAM_ERROR); - compressed.append(buff, bufsiz - strm.avail_out); + compressed.append(buff.data(), buff.size() - strm.avail_out); } while (strm.avail_out == 0); assert(ret == Z_STREAM_END); @@ -1347,13 +1685,12 @@ public: int ret = Z_OK; strm.avail_in = data_length; - strm.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef *>(data)); + strm.next_in = const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data)); - const auto bufsiz = 16384; - char buff[bufsiz]; + std::array<char, 16384> buff{}; do { - strm.avail_out = bufsiz; - strm.next_out = reinterpret_cast<Bytef*>(buff); + strm.avail_out = buff.size(); + strm.next_out = reinterpret_cast<Bytef *>(buff.data()); ret = inflate(&strm, Z_NO_FLUSH); assert(ret != Z_STREAM_ERROR); @@ -1363,10 +1700,12 @@ public: case Z_MEM_ERROR: inflateEnd(&strm); return false; } - if (!callback(buff, bufsiz - strm.avail_out)) { return false; } + if (!callback(buff.data(), buff.size() - strm.avail_out)) { + return false; + } } while (strm.avail_out == 0); - return ret == Z_STREAM_END; + return ret == Z_OK || ret == Z_STREAM_END; } private: @@ -1397,18 +1736,35 @@ inline uint64_t get_header_value_uint64(const Headers &headers, const char *key, } inline bool read_headers(Stream &strm, Headers &headers) { - static std::regex re(R"((.+?):\s*(.+?)\s*\r\n)"); - const auto bufsiz = 2048; char buf[bufsiz]; - - stream_line_reader reader(strm, buf, bufsiz); + stream_line_reader line_reader(strm, buf, bufsiz); for (;;) { - if (!reader.getline()) { return false; } - if (!strcmp(reader.ptr(), "\r\n")) { break; } + if (!line_reader.getline()) { return false; } + + // Check if the line ends with CRLF. + if (line_reader.end_with_crlf()) { + // Blank line indicates end of headers. + if (line_reader.size() == 2) { break; } + } else { + continue; // Skip invalid line. + } + + // Skip trailing spaces and tabs. + auto end = line_reader.ptr() + line_reader.size() - 2; + while (line_reader.ptr() < end && (end[-1] == ' ' || end[-1] == '\t')) { + end--; + } + + // Horizontal tab and ' ' are considered whitespace and are ignored when on + // the left or right side of the header value: + // - https://stackoverflow.com/questions/50179659/ + // - https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html + static const std::regex re(R"((.+?):[\t ]*(.+))"); + std::cmatch m; - if (std::regex_match(reader.ptr(), m, re)) { + if (std::regex_match(line_reader.ptr(), end, m, re)) { auto key = std::string(m[1]); auto val = std::string(m[2]); headers.emplace(key, val); @@ -1418,12 +1774,8 @@ inline bool read_headers(Stream &strm, Headers &headers) { return true; } -typedef std::function<bool(const char *data, size_t data_length)> - ContentReceiverCore; - inline bool read_content_with_length(Stream &strm, uint64_t len, - Progress progress, - ContentReceiverCore out) { + Progress progress, ContentReceiver out) { char buf[CPPHTTPLIB_RECV_BUFSIZ]; uint64_t r = 0; @@ -1455,7 +1807,7 @@ inline void skip_content_with_length(Stream &strm, uint64_t len) { } } -inline bool read_content_without_length(Stream &strm, ContentReceiverCore out) { +inline bool read_content_without_length(Stream &strm, ContentReceiver out) { char buf[CPPHTTPLIB_RECV_BUFSIZ]; for (;;) { auto n = strm.read(buf, CPPHTTPLIB_RECV_BUFSIZ); @@ -1470,33 +1822,34 @@ inline bool read_content_without_length(Stream &strm, ContentReceiverCore out) { return true; } -inline bool read_content_chunked(Stream &strm, ContentReceiverCore out) { +inline bool read_content_chunked(Stream &strm, ContentReceiver out) { const auto bufsiz = 16; char buf[bufsiz]; - stream_line_reader reader(strm, buf, bufsiz); + stream_line_reader line_reader(strm, buf, bufsiz); - if (!reader.getline()) { return false; } + if (!line_reader.getline()) { return false; } - auto chunk_len = std::stoi(reader.ptr(), 0, 16); + auto chunk_len = std::stoi(line_reader.ptr(), 0, 16); while (chunk_len > 0) { if (!read_content_with_length(strm, chunk_len, nullptr, out)) { return false; } - if (!reader.getline()) { return false; } + if (!line_reader.getline()) { return false; } - if (strcmp(reader.ptr(), "\r\n")) { break; } + if (strcmp(line_reader.ptr(), "\r\n")) { break; } - if (!reader.getline()) { return false; } + if (!line_reader.getline()) { return false; } - chunk_len = std::stoi(reader.ptr(), 0, 16); + chunk_len = std::stoi(line_reader.ptr(), 0, 16); } if (chunk_len == 0) { // Reader terminator after chunks - if (!reader.getline() || strcmp(reader.ptr(), "\r\n")) return false; + if (!line_reader.getline() || strcmp(line_reader.ptr(), "\r\n")) + return false; } return true; @@ -1509,14 +1862,14 @@ inline bool is_chunked_transfer_encoding(const Headers &headers) { template <typename T> bool read_content(Stream &strm, T &x, size_t payload_max_length, int &status, - Progress progress, ContentReceiverCore receiver) { + Progress progress, ContentReceiver receiver) { - ContentReceiverCore out = [&](const char *buf, size_t n) { + ContentReceiver out = [&](const char *buf, size_t n) { return receiver(buf, n); }; #ifdef CPPHTTPLIB_ZLIB_SUPPORT - detail::decompressor decompressor; + decompressor decompressor; if (!decompressor.is_valid()) { status = 500; @@ -1586,39 +1939,47 @@ inline ssize_t write_content(Stream &strm, ContentProvider content_provider, size_t end_offset = offset + length; while (offset < end_offset) { ssize_t written_length = 0; - content_provider( - offset, end_offset - offset, - [&](const char *d, size_t l) { - offset += l; - written_length = strm.write(d, l); - }, - [&](void) { written_length = -1; }); + + DataSink data_sink; + data_sink.write = [&](const char *d, size_t l) { + offset += l; + written_length = strm.write(d, l); + }; + data_sink.done = [&](void) { written_length = -1; }; + data_sink.is_writable = [&](void) { return strm.is_writable(); }; + + content_provider(offset, end_offset - offset, data_sink); if (written_length < 0) { return written_length; } } return static_cast<ssize_t>(offset - begin_offset); } +template <typename T> inline ssize_t write_content_chunked(Stream &strm, - ContentProvider content_provider) { + ContentProvider content_provider, + T is_shutting_down) { size_t offset = 0; auto data_available = true; ssize_t total_written_length = 0; - while (data_available) { + while (data_available && !is_shutting_down()) { ssize_t written_length = 0; - content_provider( - offset, 0, - [&](const char *d, size_t l) { - data_available = l > 0; - offset += l; - - // Emit chunked response header and footer for each chunk - auto chunk = from_i_to_hex(l) + "\r\n" + std::string(d, l) + "\r\n"; - written_length = strm.write(chunk); - }, - [&](void) { - data_available = false; - written_length = strm.write("0\r\n\r\n"); - }); + + DataSink data_sink; + data_sink.write = [&](const char *d, size_t l) { + data_available = l > 0; + offset += l; + + // Emit chunked response header and footer for each chunk + auto chunk = from_i_to_hex(l) + "\r\n" + std::string(d, l) + "\r\n"; + written_length = strm.write(chunk); + }; + data_sink.done = [&](void) { + data_available = false; + written_length = strm.write("0\r\n\r\n"); + }; + data_sink.is_writable = [&](void) { return strm.is_writable(); }; + + content_provider(offset, 0, data_sink); if (written_length < 0) { return written_length; } total_written_length += written_length; @@ -1629,17 +1990,12 @@ inline ssize_t write_content_chunked(Stream &strm, template <typename T> inline bool redirect(T &cli, const Request &req, Response &res, const std::string &path) { - Request new_req; - new_req.method = req.method; + Request new_req = req; new_req.path = path; - new_req.headers = req.headers; - new_req.body = req.body; - new_req.redirect_count = req.redirect_count - 1; - new_req.response_handler = req.response_handler; - new_req.content_receiver = req.content_receiver; - new_req.progress = req.progress; + new_req.redirect_count -= 1; Response new_res; + auto ret = cli.send(new_req, new_res); if (ret) { res = new_res; } return ret; @@ -1656,7 +2012,7 @@ inline std::string encode_url(const std::string &s) { case '\n': result += "%0A"; break; case '\'': result += "%27"; break; case ',': result += "%2C"; break; - case ':': result += "%3A"; break; + // case ':': result += "%3A"; break; // ok? probably... case ';': result += "%3B"; break; default: auto c = static_cast<uint8_t>(s[i]); @@ -1716,11 +2072,11 @@ inline void parse_query_text(const std::string &s, Params ¶ms) { split(&s[0], &s[s.size()], '&', [&](const char *b, const char *e) { std::string key; std::string val; - split(b, e, '=', [&](const char *b, const char *e) { + split(b, e, '=', [&](const char *b2, const char *e2) { if (key.empty()) { - key.assign(b, e); + key.assign(b2, e2); } else { - val.assign(b, e); + val.assign(b2, e2); } }); params.emplace(key, decode_url(val)); @@ -1736,112 +2092,207 @@ inline bool parse_multipart_boundary(const std::string &content_type, return true; } -inline bool parse_multipart_formdata(const std::string &boundary, - const std::string &body, - MultipartFiles &files) { - static std::string dash = "--"; - static std::string crlf = "\r\n"; - - static std::regex re_content_type("Content-Type: (.*?)", - std::regex_constants::icase); - - static std::regex re_content_disposition( - "Content-Disposition: form-data; name=\"(.*?)\"(?:; filename=\"(.*?)\")?", - std::regex_constants::icase); - - auto dash_boundary = dash + boundary; - - auto pos = body.find(dash_boundary); - if (pos != 0) { return false; } - - pos += dash_boundary.size(); +inline bool parse_range_header(const std::string &s, Ranges &ranges) { + static auto re_first_range = std::regex(R"(bytes=(\d*-\d*(?:,\s*\d*-\d*)*))"); + std::smatch m; + if (std::regex_match(s, m, re_first_range)) { + auto pos = m.position(1); + auto len = m.length(1); + bool all_valid_ranges = true; + split(&s[pos], &s[pos + len], ',', [&](const char *b, const char *e) { + if (!all_valid_ranges) return; + static auto re_another_range = std::regex(R"(\s*(\d*)-(\d*))"); + std::cmatch cm; + if (std::regex_match(b, e, cm, re_another_range)) { + ssize_t first = -1; + if (!cm.str(1).empty()) { + first = static_cast<ssize_t>(std::stoll(cm.str(1))); + } - auto next_pos = body.find(crlf, pos); - if (next_pos == std::string::npos) { return false; } + ssize_t last = -1; + if (!cm.str(2).empty()) { + last = static_cast<ssize_t>(std::stoll(cm.str(2))); + } - pos = next_pos + crlf.size(); + if (first != -1 && last != -1 && first > last) { + all_valid_ranges = false; + return; + } + ranges.emplace_back(std::make_pair(first, last)); + } + }); + return all_valid_ranges; + } + return false; +} - while (pos < body.size()) { - next_pos = body.find(crlf, pos); - if (next_pos == std::string::npos) { return false; } +class MultipartFormDataParser { +public: + MultipartFormDataParser() {} - std::string name; - MultipartFile file; + void set_boundary(const std::string &boundary) { boundary_ = boundary; } - auto header = body.substr(pos, (next_pos - pos)); + bool is_valid() const { return is_valid_; } - while (pos != next_pos) { - std::smatch m; - if (std::regex_match(header, m, re_content_type)) { - file.content_type = m[1]; - } else if (std::regex_match(header, m, re_content_disposition)) { - name = m[1]; - file.filename = m[2]; + template <typename T, typename U> + bool parse(const char *buf, size_t n, T content_callback, U header_callback) { + static const std::regex re_content_type(R"(^Content-Type:\s*(.*?)\s*$)", + std::regex_constants::icase); + + static const std::regex re_content_disposition( + "^Content-Disposition:\\s*form-data;\\s*name=\"(.*?)\"(?:;\\s*filename=" + "\"(.*?)\")?\\s*$", + std::regex_constants::icase); + + buf_.append(buf, n); // TODO: performance improvement + + while (!buf_.empty()) { + switch (state_) { + case 0: { // Initial boundary + auto pattern = dash_ + boundary_ + crlf_; + if (pattern.size() > buf_.size()) { return true; } + auto pos = buf_.find(pattern); + if (pos != 0) { + is_done_ = true; + return false; + } + buf_.erase(0, pattern.size()); + off_ += pattern.size(); + state_ = 1; + break; } + case 1: { // New entry + clear_file_info(); + state_ = 2; + break; + } + case 2: { // Headers + auto pos = buf_.find(crlf_); + while (pos != std::string::npos) { + // Empty line + if (pos == 0) { + if (!header_callback(file_)) { + is_valid_ = false; + is_done_ = false; + return false; + } + buf_.erase(0, crlf_.size()); + off_ += crlf_.size(); + state_ = 3; + break; + } - pos = next_pos + crlf.size(); - - next_pos = body.find(crlf, pos); - if (next_pos == std::string::npos) { return false; } - - header = body.substr(pos, (next_pos - pos)); - } - - pos = next_pos + crlf.size(); + auto header = buf_.substr(0, pos); + { + std::smatch m; + if (std::regex_match(header, m, re_content_type)) { + file_.content_type = m[1]; + } else if (std::regex_match(header, m, re_content_disposition)) { + file_.name = m[1]; + file_.filename = m[2]; + } + } - next_pos = body.find(crlf + dash_boundary, pos); + buf_.erase(0, pos + crlf_.size()); + off_ += pos + crlf_.size(); + pos = buf_.find(crlf_); + } + break; + } + case 3: { // Body + { + auto pattern = crlf_ + dash_; + if (pattern.size() > buf_.size()) { return true; } + + auto pos = buf_.find(pattern); + if (pos == std::string::npos) { pos = buf_.size(); } + if (!content_callback(buf_.data(), pos)) { + is_valid_ = false; + is_done_ = false; + return false; + } - if (next_pos == std::string::npos) { return false; } + off_ += pos; + buf_.erase(0, pos); + } - file.offset = pos; - file.length = next_pos - pos; + { + auto pattern = crlf_ + dash_ + boundary_; + if (pattern.size() > buf_.size()) { return true; } + + auto pos = buf_.find(pattern); + if (pos != std::string::npos) { + if (!content_callback(buf_.data(), pos)) { + is_valid_ = false; + is_done_ = false; + return false; + } - pos = next_pos + crlf.size() + dash_boundary.size(); + off_ += pos + pattern.size(); + buf_.erase(0, pos + pattern.size()); + state_ = 4; + } else { + if (!content_callback(buf_.data(), pattern.size())) { + is_valid_ = false; + is_done_ = false; + return false; + } - next_pos = body.find(crlf, pos); - if (next_pos == std::string::npos) { return false; } + off_ += pattern.size(); + buf_.erase(0, pattern.size()); + } + } + break; + } + case 4: { // Boundary + if (crlf_.size() > buf_.size()) { return true; } + if (buf_.find(crlf_) == 0) { + buf_.erase(0, crlf_.size()); + off_ += crlf_.size(); + state_ = 1; + } else { + auto pattern = dash_ + crlf_; + if (pattern.size() > buf_.size()) { return true; } + if (buf_.find(pattern) == 0) { + buf_.erase(0, pattern.size()); + off_ += pattern.size(); + is_valid_ = true; + state_ = 5; + } else { + is_done_ = true; + return true; + } + } + break; + } + case 5: { // Done + is_valid_ = false; + return false; + } + } + } - files.emplace(name, file); + return true; + } - pos = next_pos + crlf.size(); +private: + void clear_file_info() { + file_.name.clear(); + file_.filename.clear(); + file_.content_type.clear(); } - return true; -} + const std::string dash_ = "--"; + const std::string crlf_ = "\r\n"; + std::string boundary_; -inline bool parse_range_header(const std::string &s, Ranges &ranges) { - try { - static auto re = std::regex(R"(bytes=(\d*-\d*(?:,\s*\d*-\d*)*))"); - std::smatch m; - if (std::regex_match(s, m, re)) { - auto pos = m.position(1); - auto len = m.length(1); - detail::split(&s[pos], &s[pos + len], ',', - [&](const char *b, const char *e) { - static auto re = std::regex(R"(\s*(\d*)-(\d*))"); - std::cmatch m; - if (std::regex_match(b, e, m, re)) { - ssize_t first = -1; - if (!m.str(1).empty()) { - first = static_cast<ssize_t>(std::stoll(m.str(1))); - } - - ssize_t last = -1; - if (!m.str(2).empty()) { - last = static_cast<ssize_t>(std::stoll(m.str(2))); - } - - if (first != -1 && last != -1 && first > last) { - throw std::runtime_error("invalid range error"); - } - ranges.emplace_back(std::make_pair(first, last)); - } - }); - return true; - } - return false; - } catch (...) { return false; } -} + std::string buf_; + size_t state_ = 0; + size_t is_valid_ = false; + size_t is_done_ = false; + size_t off_ = 0; + MultipartFormData file_; +}; inline std::string to_lower(const char *beg, const char *end) { std::string out; @@ -1915,7 +2366,7 @@ bool process_multipart_ranges_data(const Request &req, Response &res, ctoken("\r\n"); } - auto offsets = detail::get_range_offset_and_length(req, res.body.size(), i); + auto offsets = get_range_offset_and_length(req, res.body.size(), i); auto offset = offsets.first; auto length = offsets.second; @@ -1978,8 +2429,7 @@ inline bool write_multipart_ranges_data(Stream &strm, const Request &req, [&](const std::string &token) { strm.write(token); }, [&](const char *token) { strm.write(token); }, [&](size_t offset, size_t length) { - return detail::write_content(strm, res.content_provider, offset, - length) >= 0; + return write_content(strm, res.content_provider, offset, length) >= 0; }); } @@ -1988,11 +2438,56 @@ get_range_offset_and_length(const Request &req, const Response &res, size_t index) { auto r = req.ranges[index]; - if (r.second == -1) { r.second = res.content_provider_resource_length - 1; } + if (r.second == -1) { r.second = res.content_length - 1; } return std::make_pair(r.first, r.second - r.first + 1); } +inline bool expect_content(const Request &req) { + if (req.method == "POST" || req.method == "PUT" || req.method == "PATCH" || + req.method == "PRI") { + return true; + } + // TODO: check if Content-Length is set + return false; +} + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +template <typename CTX, typename Init, typename Update, typename Final> +inline std::string message_digest(const std::string &s, Init init, + Update update, Final final, + size_t digest_length) { + using namespace std; + + std::vector<unsigned char> md(digest_length, 0); + CTX ctx; + init(&ctx); + update(&ctx, s.data(), s.size()); + final(md.data(), &ctx); + + stringstream ss; + for (auto c : md) { + ss << setfill('0') << setw(2) << hex << (unsigned int)c; + } + return ss.str(); +} + +inline std::string MD5(const std::string &s) { + return message_digest<MD5_CTX>(s, MD5_Init, MD5_Update, MD5_Final, + MD5_DIGEST_LENGTH); +} + +inline std::string SHA_256(const std::string &s) { + return message_digest<SHA256_CTX>(s, SHA256_Init, SHA256_Update, SHA256_Final, + SHA256_DIGEST_LENGTH); +} + +inline std::string SHA_512(const std::string &s) { + return message_digest<SHA512_CTX>(s, SHA512_Init, SHA512_Update, SHA512_Final, + SHA512_DIGEST_LENGTH); +} +#endif + #ifdef _WIN32 class WSInit { public: @@ -2025,9 +2520,103 @@ inline std::pair<std::string, std::string> make_range_header(Ranges ranges) { inline std::pair<std::string, std::string> make_basic_authentication_header(const std::string &username, - const std::string &password) { + const std::string &password, + bool is_proxy = false) { auto field = "Basic " + detail::base64_encode(username + ":" + password); - return std::make_pair("Authorization", field); + auto key = is_proxy ? "Proxy-Authorization" : "Authorization"; + return std::make_pair(key, field); +} + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline std::pair<std::string, std::string> make_digest_authentication_header( + const Request &req, const std::map<std::string, std::string> &auth, + size_t cnonce_count, const std::string &cnonce, const std::string &username, + const std::string &password, bool is_proxy = false) { + using namespace std; + + string nc; + { + stringstream ss; + ss << setfill('0') << setw(8) << hex << cnonce_count; + nc = ss.str(); + } + + auto qop = auth.at("qop"); + if (qop.find("auth-int") != std::string::npos) { + qop = "auth-int"; + } else { + qop = "auth"; + } + + std::string algo = "MD5"; + if (auth.find("algorithm") != auth.end()) { algo = auth.at("algorithm"); } + + string response; + { + auto H = algo == "SHA-256" + ? detail::SHA_256 + : algo == "SHA-512" ? detail::SHA_512 : detail::MD5; + + auto A1 = username + ":" + auth.at("realm") + ":" + password; + + auto A2 = req.method + ":" + req.path; + if (qop == "auth-int") { A2 += ":" + H(req.body); } + + response = H(H(A1) + ":" + auth.at("nonce") + ":" + nc + ":" + cnonce + + ":" + qop + ":" + H(A2)); + } + + auto field = "Digest username=\"hello\", realm=\"" + auth.at("realm") + + "\", nonce=\"" + auth.at("nonce") + "\", uri=\"" + req.path + + "\", algorithm=" + algo + ", qop=" + qop + ", nc=\"" + nc + + "\", cnonce=\"" + cnonce + "\", response=\"" + response + "\""; + + auto key = is_proxy ? "Proxy-Authorization" : "Authorization"; + return std::make_pair(key, field); +} +#endif + +inline bool parse_www_authenticate(const httplib::Response &res, + std::map<std::string, std::string> &auth, + bool is_proxy) { + auto auth_key = is_proxy ? "Proxy-Authenticate" : "WWW-Authenticate"; + if (res.has_header(auth_key)) { + static auto re = std::regex(R"~((?:(?:,\s*)?(.+?)=(?:"(.*?)"|([^,]*))))~"); + auto s = res.get_header_value(auth_key); + auto pos = s.find(' '); + if (pos != std::string::npos) { + auto type = s.substr(0, pos); + if (type == "Basic") { + return false; + } else if (type == "Digest") { + s = s.substr(pos + 1); + auto beg = std::sregex_iterator(s.begin(), s.end(), re); + for (auto i = beg; i != std::sregex_iterator(); ++i) { + auto m = *i; + auto key = s.substr(m.position(1), m.length(1)); + auto val = m.length(2) > 0 ? s.substr(m.position(2), m.length(2)) + : s.substr(m.position(3), m.length(3)); + auth[key] = val; + } + return true; + } + } + } + return false; +} + +// https://stackoverflow.com/questions/440133/how-do-i-create-a-random-alpha-numeric-string-in-c/440240#answer-440240 +inline std::string random_string(size_t length) { + auto randchar = []() -> char { + const char charset[] = "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz"; + const size_t max_index = (sizeof(charset) - 1); + return charset[rand() % max_index]; + }; + std::string str(length, 0); + std::generate_n(str.begin(), length, randchar); + return str; } // Request implementation @@ -2068,14 +2657,19 @@ inline size_t Request::get_param_value_count(const char *key) const { return std::distance(r.first, r.second); } +inline bool Request::is_multipart_form_data() const { + const auto &content_type = get_header_value("Content-Type"); + return !content_type.find("multipart/form-data"); +} + inline bool Request::has_file(const char *key) const { return files.find(key) != files.end(); } -inline MultipartFile Request::get_file_value(const char *key) const { +inline MultipartFormData Request::get_file_value(const char *key) const { auto it = files.find(key); if (it != files.end()) { return it->second; } - return MultipartFile(); + return MultipartFormData(); } // Response implementation @@ -2119,40 +2713,47 @@ inline void Response::set_content(const std::string &s, } inline void Response::set_content_provider( - size_t length, - std::function<void(size_t offset, size_t length, DataSink sink)> provider, + size_t in_length, + std::function<void(size_t offset, size_t length, DataSink &sink)> provider, std::function<void()> resource_releaser) { - assert(length > 0); - content_provider_resource_length = length; - content_provider = [provider](size_t offset, size_t length, DataSink sink, - Done) { provider(offset, length, sink); }; + assert(in_length > 0); + content_length = in_length; + content_provider = [provider](size_t offset, size_t length, DataSink &sink) { + provider(offset, length, sink); + }; content_provider_resource_releaser = resource_releaser; } inline void Response::set_chunked_content_provider( - std::function<void(size_t offset, DataSink sink, Done done)> provider, + std::function<void(size_t offset, DataSink &sink)> provider, std::function<void()> resource_releaser) { - content_provider_resource_length = 0; - content_provider = [provider](size_t offset, size_t, DataSink sink, - Done done) { provider(offset, sink, done); }; + content_length = 0; + content_provider = [provider](size_t offset, size_t, DataSink &sink) { + provider(offset, sink); + }; content_provider_resource_releaser = resource_releaser; } // Rstream implementation +inline int Stream::write(const char *ptr) { return write(ptr, strlen(ptr)); } + +inline int Stream::write(const std::string &s) { + return write(s.data(), s.size()); +} + template <typename... Args> inline int Stream::write_format(const char *fmt, const Args &... args) { - const auto bufsiz = 2048; - char buf[bufsiz]; + std::array<char, 2048> buf; #if defined(_MSC_VER) && _MSC_VER < 1900 - auto n = _snprintf_s(buf, bufsiz, bufsiz - 1, fmt, args...); + auto n = _snprintf_s(buf, bufsiz, buf.size() - 1, fmt, args...); #else - auto n = snprintf(buf, bufsiz - 1, fmt, args...); + auto n = snprintf(buf.data(), buf.size() - 1, fmt, args...); #endif if (n <= 0) { return n; } - if (n >= bufsiz - 1) { - std::vector<char> glowable_buf(bufsiz); + if (n >= static_cast<int>(buf.size()) - 1) { + std::vector<char> glowable_buf(buf.size()); while (n >= static_cast<int>(glowable_buf.size() - 1)) { glowable_buf.resize(glowable_buf.size() * 2); @@ -2165,33 +2766,36 @@ inline int Stream::write_format(const char *fmt, const Args &... args) { } return write(&glowable_buf[0], n); } else { - return write(buf, n); + return write(buf.data(), n); } } +namespace detail { + // Socket stream implementation -inline SocketStream::SocketStream(socket_t sock) : sock_(sock) {} +inline SocketStream::SocketStream(socket_t sock, time_t read_timeout_sec, + time_t read_timeout_usec) + : sock_(sock), read_timeout_sec_(read_timeout_sec), + read_timeout_usec_(read_timeout_usec) {} inline SocketStream::~SocketStream() {} -inline int SocketStream::read(char *ptr, size_t size) { - if (detail::select_read(sock_, CPPHTTPLIB_READ_TIMEOUT_SECOND, - CPPHTTPLIB_READ_TIMEOUT_USECOND) > 0) { - return recv(sock_, ptr, static_cast<int>(size), 0); - } - return -1; +inline bool SocketStream::is_readable() const { + return detail::select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0; } -inline int SocketStream::write(const char *ptr, size_t size) { - return send(sock_, ptr, static_cast<int>(size), 0); +inline bool SocketStream::is_writable() const { + return detail::select_write(sock_, 0, 0) > 0; } -inline int SocketStream::write(const char *ptr) { - return write(ptr, strlen(ptr)); +inline int SocketStream::read(char *ptr, size_t size) { + if (is_readable()) { return recv(sock_, ptr, static_cast<int>(size), 0); } + return -1; } -inline int SocketStream::write(const std::string &s) { - return write(s.data(), s.size()); +inline int SocketStream::write(const char *ptr, size_t size) { + if (is_writable()) { return send(sock_, ptr, static_cast<int>(size), 0); } + return -1; } inline std::string SocketStream::get_remote_addr() const { @@ -2199,12 +2803,18 @@ inline std::string SocketStream::get_remote_addr() const { } // Buffer stream implementation +inline bool BufferStream::is_readable() const { return true; } + +inline bool BufferStream::is_writable() const { return true; } + inline int BufferStream::read(char *ptr, size_t size) { #if defined(_MSC_VER) && _MSC_VER < 1900 - return static_cast<int>(buffer._Copy_s(ptr, size, size)); + int len_read = static_cast<int>(buffer._Copy_s(ptr, size, size, position)); #else - return static_cast<int>(buffer.copy(ptr, size)); + int len_read = static_cast<int>(buffer.copy(ptr, size, position)); #endif + position += len_read; + return len_read; } inline int BufferStream::write(const char *ptr, size_t size) { @@ -2212,33 +2822,23 @@ inline int BufferStream::write(const char *ptr, size_t size) { return static_cast<int>(size); } -inline int BufferStream::write(const char *ptr) { - return write(ptr, strlen(ptr)); -} - -inline int BufferStream::write(const std::string &s) { - return write(s.data(), s.size()); -} - inline std::string BufferStream::get_remote_addr() const { return ""; } inline const std::string &BufferStream::get_buffer() const { return buffer; } +} // namespace detail + // HTTP server implementation inline Server::Server() : keep_alive_max_count_(CPPHTTPLIB_KEEPALIVE_MAX_COUNT), + read_timeout_sec_(CPPHTTPLIB_READ_TIMEOUT_SECOND), + read_timeout_usec_(CPPHTTPLIB_READ_TIMEOUT_USECOND), payload_max_length_(CPPHTTPLIB_PAYLOAD_MAX_LENGTH), is_running_(false), svr_sock_(INVALID_SOCKET) { #ifndef _WIN32 signal(SIGPIPE, SIG_IGN); #endif - new_task_queue = [] { -#if CPPHTTPLIB_THREAD_POOL_COUNT > 0 - return new ThreadPool(CPPHTTPLIB_THREAD_POOL_COUNT); -#else - return new Threads(); -#endif - }; + new_task_queue = [] { return new ThreadPool(CPPHTTPLIB_THREAD_POOL_COUNT); }; } inline Server::~Server() {} @@ -2253,16 +2853,37 @@ inline Server &Server::Post(const char *pattern, Handler handler) { return *this; } +inline Server &Server::Post(const char *pattern, + HandlerWithContentReader handler) { + post_handlers_for_content_reader_.push_back( + std::make_pair(std::regex(pattern), handler)); + return *this; +} + inline Server &Server::Put(const char *pattern, Handler handler) { put_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); return *this; } +inline Server &Server::Put(const char *pattern, + HandlerWithContentReader handler) { + put_handlers_for_content_reader_.push_back( + std::make_pair(std::regex(pattern), handler)); + return *this; +} + inline Server &Server::Patch(const char *pattern, Handler handler) { patch_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); return *this; } +inline Server &Server::Patch(const char *pattern, + HandlerWithContentReader handler) { + patch_handlers_for_content_reader_.push_back( + std::make_pair(std::regex(pattern), handler)); + return *this; +} + inline Server &Server::Delete(const char *pattern, Handler handler) { delete_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); return *this; @@ -2273,32 +2894,68 @@ inline Server &Server::Options(const char *pattern, Handler handler) { return *this; } -inline bool Server::set_base_dir(const char *path) { - if (detail::is_dir(path)) { - base_dir_ = path; - return true; +inline bool Server::set_base_dir(const char *dir, const char *mount_point) { + return set_mount_point(mount_point, dir); +} + +inline bool Server::set_mount_point(const char *mount_point, const char *dir) { + if (detail::is_dir(dir)) { + std::string mnt = mount_point ? mount_point : "/"; + if (!mnt.empty() && mnt[0] == '/') { + base_dirs_.emplace_back(mnt, dir); + return true; + } + } + return false; +} + +inline bool Server::remove_mount_point(const char *mount_point) { + for (auto it = base_dirs_.begin(); it != base_dirs_.end(); ++it) { + if (it->first == mount_point) { + base_dirs_.erase(it); + return true; + } } return false; } +inline void Server::set_file_extension_and_mimetype_mapping(const char *ext, + const char *mime) { + file_extension_and_mimetype_map_[ext] = mime; +} + inline void Server::set_file_request_handler(Handler handler) { - file_request_handler_ = handler; + file_request_handler_ = std::move(handler); } inline void Server::set_error_handler(Handler handler) { - error_handler_ = handler; + error_handler_ = std::move(handler); } -inline void Server::set_logger(Logger logger) { logger_ = logger; } +inline void Server::set_logger(Logger logger) { logger_ = std::move(logger); } + +inline void +Server::set_expect_100_continue_handler(Expect100ContinueHandler handler) { + expect_100_continue_handler_ = std::move(handler); +} inline void Server::set_keep_alive_max_count(size_t count) { keep_alive_max_count_ = count; } +inline void Server::set_read_timeout(time_t sec, time_t usec) { + read_timeout_sec_ = sec; + read_timeout_usec_ = usec; +} + inline void Server::set_payload_max_length(size_t length) { payload_max_length_ = length; } +inline bool Server::bind_to_port(const char *host, int port, int socket_flags) { + if (bind_internal(host, port, socket_flags) < 0) return false; + return true; +} inline int Server::bind_to_any_port(const char *host, int socket_flags) { return bind_internal(host, 0, socket_flags); } @@ -2306,8 +2963,7 @@ inline int Server::bind_to_any_port(const char *host, int socket_flags) { inline bool Server::listen_after_bind() { return listen_internal(); } inline bool Server::listen(const char *host, int port, int socket_flags) { - if (bind_internal(host, port, socket_flags) < 0) return false; - return listen_internal(); + return bind_to_port(host, port, socket_flags) && listen_internal(); } inline bool Server::is_running() const { return is_running_; } @@ -2322,8 +2978,9 @@ inline void Server::stop() { } inline bool Server::parse_request_line(const char *s, Request &req) { - static std::regex re("(GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH|PRI) " - "(([^?]+)(?:\\?(.+?))?) (HTTP/1\\.[01])\r\n"); + const static std::regex re( + "(GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH|PRI) " + "(([^?]+)(?:\\?(.*?))?) (HTTP/1\\.[01])\r\n"); std::cmatch m; if (std::regex_match(s, m, re)) { @@ -2348,9 +3005,11 @@ inline bool Server::write_response(Stream &strm, bool last_connection, if (400 <= res.status && error_handler_) { error_handler_(req, res); } + detail::BufferStream bstrm; + // Response line - if (!strm.write_format("HTTP/1.1 %d %s\r\n", res.status, - detail::status_message(res.status))) { + if (!bstrm.write_format("HTTP/1.1 %d %s\r\n", res.status, + detail::status_message(res.status))) { return false; } @@ -2363,11 +3022,12 @@ inline bool Server::write_response(Stream &strm, bool last_connection, res.set_header("Connection", "Keep-Alive"); } - if (!res.has_header("Content-Type")) { + if (!res.has_header("Content-Type") && + (!res.body.empty() || res.content_length > 0)) { res.set_header("Content-Type", "text/plain"); } - if (!res.has_header("Accept-Ranges")) { + if (!res.has_header("Accept-Ranges") && req.method == "HEAD") { res.set_header("Accept-Ranges", "bytes"); } @@ -2388,17 +3048,17 @@ inline bool Server::write_response(Stream &strm, bool last_connection, } if (res.body.empty()) { - if (res.content_provider_resource_length > 0) { + if (res.content_length > 0) { size_t length = 0; if (req.ranges.empty()) { - length = res.content_provider_resource_length; + length = res.content_length; } else if (req.ranges.size() == 1) { - auto offsets = detail::get_range_offset_and_length( - req, res.content_provider_resource_length, 0); + auto offsets = + detail::get_range_offset_and_length(req, res.content_length, 0); auto offset = offsets.first; length = offsets.second; auto content_range = detail::make_content_range_header_field( - offset, length, res.content_provider_resource_length); + offset, length, res.content_length); res.set_header("Content-Range", content_range); } else { length = detail::get_multipart_ranges_data_length(req, res, boundary, @@ -2430,7 +3090,7 @@ inline bool Server::write_response(Stream &strm, bool last_connection, } #ifdef CPPHTTPLIB_ZLIB_SUPPORT - // TODO: 'Accpet-Encoding' has gzip, not gzip;q=0 + // TODO: 'Accept-Encoding' has gzip, not gzip;q=0 const auto &encodings = req.get_header_value("Accept-Encoding"); if (encodings.find("gzip") != std::string::npos && detail::can_compress(res.get_header_value("Content-Type"))) { @@ -2444,7 +3104,11 @@ inline bool Server::write_response(Stream &strm, bool last_connection, res.set_header("Content-Length", length); } - if (!detail::write_headers(strm, res, Headers())) { return false; } + if (!detail::write_headers(bstrm, res, Headers())) { return false; } + + // Flush buffer + auto &data = bstrm.get_buffer(); + strm.write(data.data(), data.size()); // Body if (req.method != "HEAD") { @@ -2468,15 +3132,15 @@ inline bool Server::write_content_with_provider(Stream &strm, const Request &req, Response &res, const std::string &boundary, const std::string &content_type) { - if (res.content_provider_resource_length) { + if (res.content_length) { if (req.ranges.empty()) { if (detail::write_content(strm, res.content_provider, 0, - res.content_provider_resource_length) < 0) { + res.content_length) < 0) { return false; } } else if (req.ranges.size() == 1) { - auto offsets = detail::get_range_offset_and_length( - req, res.content_provider_resource_length, 0); + auto offsets = + detail::get_range_offset_and_length(req, res.content_length, 0); auto offset = offsets.first; auto length = offsets.second; if (detail::write_content(strm, res.content_provider, offset, length) < @@ -2490,29 +3154,123 @@ Server::write_content_with_provider(Stream &strm, const Request &req, } } } else { - if (detail::write_content_chunked(strm, res.content_provider) < 0) { + auto is_shutting_down = [this]() { + return this->svr_sock_ == INVALID_SOCKET; + }; + if (detail::write_content_chunked(strm, res.content_provider, + is_shutting_down) < 0) { return false; } } return true; } -inline bool Server::handle_file_request(Request &req, Response &res) { - if (!base_dir_.empty() && detail::is_valid_path(req.path)) { - std::string path = base_dir_ + req.path; +inline bool Server::read_content(Stream &strm, bool last_connection, + Request &req, Response &res) { + MultipartFormDataMap::iterator cur; + auto ret = read_content_core( + strm, last_connection, req, res, + // Regular + [&](const char *buf, size_t n) { + if (req.body.size() + n > req.body.max_size()) { return false; } + req.body.append(buf, n); + return true; + }, + // Multipart + [&](const MultipartFormData &file) { + cur = req.files.emplace(file.name, file); + return true; + }, + [&](const char *buf, size_t n) { + auto &content = cur->second.content; + if (content.size() + n > content.max_size()) { return false; } + content.append(buf, n); + return true; + }); - if (!path.empty() && path.back() == '/') { path += "index.html"; } + const auto &content_type = req.get_header_value("Content-Type"); + if (!content_type.find("application/x-www-form-urlencoded")) { + detail::parse_query_text(req.body, req.params); + } - if (detail::is_file(path)) { - detail::read_file(path, res.body); - auto type = detail::find_content_type(path); - if (type) { res.set_header("Content-Type", type); } - res.status = 200; - if (file_request_handler_) { file_request_handler_(req, res); } - return true; + return ret; +} + +inline bool Server::read_content_with_content_receiver( + Stream &strm, bool last_connection, Request &req, Response &res, + ContentReceiver receiver, MultipartContentHeader multipart_header, + ContentReceiver multipart_receiver) { + return read_content_core(strm, last_connection, req, res, receiver, + multipart_header, multipart_receiver); +} + +inline bool Server::read_content_core(Stream &strm, bool last_connection, + Request &req, Response &res, + ContentReceiver receiver, + MultipartContentHeader mulitpart_header, + ContentReceiver multipart_receiver) { + detail::MultipartFormDataParser multipart_form_data_parser; + ContentReceiver out; + + if (req.is_multipart_form_data()) { + const auto &content_type = req.get_header_value("Content-Type"); + std::string boundary; + if (!detail::parse_multipart_boundary(content_type, boundary)) { + res.status = 400; + return write_response(strm, last_connection, req, res); } + + multipart_form_data_parser.set_boundary(boundary); + out = [&](const char *buf, size_t n) { + return multipart_form_data_parser.parse(buf, n, multipart_receiver, + mulitpart_header); + }; + } else { + out = receiver; + } + + if (!detail::read_content(strm, req, payload_max_length_, res.status, + Progress(), out)) { + return write_response(strm, last_connection, req, res); } + if (req.is_multipart_form_data()) { + if (!multipart_form_data_parser.is_valid()) { + res.status = 400; + return write_response(strm, last_connection, req, res); + } + } + + return true; +} + +inline bool Server::handle_file_request(Request &req, Response &res, + bool head) { + for (const auto &kv : base_dirs_) { + const auto &mount_point = kv.first; + const auto &base_dir = kv.second; + + // Prefix match + if (!req.path.find(mount_point)) { + std::string sub_path = "/" + req.path.substr(mount_point.size()); + if (detail::is_valid_path(sub_path)) { + auto path = base_dir + sub_path; + if (path.back() == '/') { path += "index.html"; } + + if (detail::is_file(path)) { + detail::read_file(path, res.body); + auto type = + detail::find_content_type(path, file_extension_and_mimetype_map_); + if (type) { res.set_header("Content-Type", type); } + res.status = 200; + if (!head && file_request_handler_) { + file_request_handler_(req, res); + } + return true; + } + } + } + } return false; } @@ -2605,9 +3363,51 @@ inline bool Server::listen_internal() { return ret; } -inline bool Server::routing(Request &req, Response &res) { - if (req.method == "GET" && handle_file_request(req, res)) { return true; } +inline bool Server::routing(Request &req, Response &res, Stream &strm, + bool last_connection) { + // File handler + bool is_head_request = req.method == "HEAD"; + if ((req.method == "GET" || is_head_request) && + handle_file_request(req, res, is_head_request)) { + return true; + } + + if (detail::expect_content(req)) { + // Content reader handler + { + ContentReader reader( + [&](ContentReceiver receiver) { + return read_content_with_content_receiver( + strm, last_connection, req, res, receiver, nullptr, nullptr); + }, + [&](MultipartContentHeader header, ContentReceiver receiver) { + return read_content_with_content_receiver( + strm, last_connection, req, res, nullptr, header, receiver); + }); + + if (req.method == "POST") { + if (dispatch_request_for_content_reader( + req, res, reader, post_handlers_for_content_reader_)) { + return true; + } + } else if (req.method == "PUT") { + if (dispatch_request_for_content_reader( + req, res, reader, put_handlers_for_content_reader_)) { + return true; + } + } else if (req.method == "PATCH") { + if (dispatch_request_for_content_reader( + req, res, reader, patch_handlers_for_content_reader_)) { + return true; + } + } + } + // Read content into `req.body` + if (!read_content(strm, last_connection, req, res)) { return false; } + } + + // Regular handler if (req.method == "GET" || req.method == "HEAD") { return dispatch_request(req, res, get_handlers_); } else if (req.method == "POST") { @@ -2640,17 +3440,31 @@ inline bool Server::dispatch_request(Request &req, Response &res, return false; } +inline bool Server::dispatch_request_for_content_reader( + Request &req, Response &res, ContentReader content_reader, + HandlersForContentReader &handlers) { + for (const auto &x : handlers) { + const auto &pattern = x.first; + const auto &handler = x.second; + + if (std::regex_match(req.path, req.matches, pattern)) { + handler(req, res, content_reader); + return true; + } + } + return false; +} + inline bool Server::process_request(Stream &strm, bool last_connection, bool &connection_close, - std::function<void(Request &)> setup_request) { - const auto bufsiz = 2048; - char buf[bufsiz]; + const std::function<void(Request &)> &setup_request) { + std::array<char, 2048> buf{}; - detail::stream_line_reader reader(strm, buf, bufsiz); + detail::stream_line_reader line_reader(strm, buf.data(), buf.size()); // Connection has been closed on client - if (!reader.getline()) { return false; } + if (!line_reader.getline()) { return false; } Request req; Response res; @@ -2658,7 +3472,7 @@ Server::process_request(Stream &strm, bool last_connection, res.version = "HTTP/1.1"; // Check if the request URI doesn't exceed the limit - if (reader.size() > CPPHTTPLIB_REQUEST_URI_MAX_LENGTH) { + if (line_reader.size() > CPPHTTPLIB_REQUEST_URI_MAX_LENGTH) { Headers dummy; detail::read_headers(strm, dummy); res.status = 414; @@ -2666,7 +3480,7 @@ Server::process_request(Stream &strm, bool last_connection, } // Request line and headers - if (!parse_request_line(reader.ptr(), req) || + if (!parse_request_line(line_reader.ptr(), req) || !detail::read_headers(strm, req.headers)) { res.status = 400; return write_response(strm, last_connection, req, res); @@ -2683,33 +3497,6 @@ Server::process_request(Stream &strm, bool last_connection, req.set_header("REMOTE_ADDR", strm.get_remote_addr()); - // Body - if (req.method == "POST" || req.method == "PUT" || req.method == "PATCH" || req.method == "PRI") { - if (!detail::read_content(strm, req, payload_max_length_, res.status, - Progress(), [&](const char *buf, size_t n) { - if (req.body.size() + n > req.body.max_size()) { - return false; - } - req.body.append(buf, n); - return true; - })) { - return write_response(strm, last_connection, req, res); - } - - const auto &content_type = req.get_header_value("Content-Type"); - - if (!content_type.find("application/x-www-form-urlencoded")) { - detail::parse_query_text(req.body, req.params); - } else if (!content_type.find("multipart/form-data")) { - std::string boundary; - if (!detail::parse_multipart_boundary(content_type, boundary) || - !detail::parse_multipart_formdata(boundary, req.body, req.files)) { - res.status = 400; - return write_response(strm, last_connection, req, res); - } - } - } - if (req.has_header("Range")) { const auto &range_header_value = req.get_header_value("Range"); if (!detail::parse_range_header(range_header_value, req.ranges)) { @@ -2719,7 +3506,23 @@ Server::process_request(Stream &strm, bool last_connection, if (setup_request) { setup_request(req); } - if (routing(req, res)) { + if (req.get_header_value("Expect") == "100-continue") { + auto status = 100; + if (expect_100_continue_handler_) { + status = expect_100_continue_handler_(req, res); + } + switch (status) { + case 100: + case 417: + strm.write_format("HTTP/1.1 %d %s\r\n\r\n", status, + detail::status_message(status)); + break; + default: return write_response(strm, last_connection, req, res); + } + } + + // Rounting + if (routing(req, res, strm, last_connection)) { if (res.status == -1) { res.status = req.ranges.empty() ? 200 : 206; } } else { if (res.status == -1) { res.status = 404; } @@ -2732,7 +3535,7 @@ inline bool Server::is_valid() const { return true; } inline bool Server::process_and_close_socket(socket_t sock) { return detail::process_and_close_socket( - false, sock, keep_alive_max_count_, + false, sock, keep_alive_max_count_, read_timeout_sec_, read_timeout_usec_, [this](Stream &strm, bool last_connection, bool &connection_close) { return process_request(strm, last_connection, connection_close, nullptr); @@ -2740,47 +3543,37 @@ inline bool Server::process_and_close_socket(socket_t sock) { } // HTTP client implementation -inline Client::Client(const char *host, int port, time_t timeout_sec) - : host_(host), port_(port), timeout_sec_(timeout_sec), +inline Client::Client(const std::string &host, int port, + const std::string &client_cert_path, + const std::string &client_key_path) + : host_(host), port_(port), host_and_port_(host_ + ":" + std::to_string(port_)), - keep_alive_max_count_(CPPHTTPLIB_KEEPALIVE_MAX_COUNT), - follow_location_(false) {} + client_cert_path_(client_cert_path), client_key_path_(client_key_path) {} inline Client::~Client() {} inline bool Client::is_valid() const { return true; } inline socket_t Client::create_client_socket() const { - return detail::create_socket( - host_.c_str(), port_, [=](socket_t sock, struct addrinfo &ai) -> bool { - detail::set_nonblocking(sock, true); - - auto ret = connect(sock, ai.ai_addr, static_cast<int>(ai.ai_addrlen)); - if (ret < 0) { - if (detail::is_connection_error() || - !detail::wait_until_socket_is_ready(sock, timeout_sec_, 0)) { - detail::close_socket(sock); - return false; - } - } - - detail::set_nonblocking(sock, false); - return true; - }); + if (!proxy_host_.empty()) { + return detail::create_client_socket(proxy_host_.c_str(), proxy_port_, + timeout_sec_, interface_); + } + return detail::create_client_socket(host_.c_str(), port_, timeout_sec_, + interface_); } inline bool Client::read_response_line(Stream &strm, Response &res) { - const auto bufsiz = 2048; - char buf[bufsiz]; + std::array<char, 2048> buf; - detail::stream_line_reader reader(strm, buf, bufsiz); + detail::stream_line_reader line_reader(strm, buf.data(), buf.size()); - if (!reader.getline()) { return false; } + if (!line_reader.getline()) { return false; } const static std::regex re("(HTTP/1\\.[01]) (\\d+?) .*\r\n"); std::cmatch m; - if (std::regex_match(reader.ptr(), m, re)) { + if (std::regex_match(line_reader.ptr(), m, re)) { res.version = std::string(m[1]); res.status = std::stoi(std::string(m[2])); } @@ -2789,22 +3582,21 @@ inline bool Client::read_response_line(Stream &strm, Response &res) { } inline bool Client::send(const Request &req, Response &res) { - if (req.path.empty()) { return false; } - auto sock = create_client_socket(); if (sock == INVALID_SOCKET) { return false; } - auto ret = process_and_close_socket( - sock, 1, [&](Stream &strm, bool last_connection, bool &connection_close) { - return process_request(strm, req, res, last_connection, - connection_close); - }); - - if (ret && follow_location_ && (300 < res.status && res.status < 400)) { - ret = redirect(req, res); +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + if (is_ssl() && !proxy_host_.empty()) { + bool error; + if (!connect(sock, res, error)) { return error; } } +#endif - return ret; + return process_and_close_socket( + sock, 1, [&](Stream &strm, bool last_connection, bool &connection_close) { + return handle_request(strm, req, res, last_connection, + connection_close); + }); } inline bool Client::send(const std::vector<Request> &requests, @@ -2814,32 +3606,136 @@ inline bool Client::send(const std::vector<Request> &requests, auto sock = create_client_socket(); if (sock == INVALID_SOCKET) { return false; } - if (!process_and_close_socket( - sock, requests.size() - i, - [&](Stream &strm, bool last_connection, bool &connection_close) -> bool { - auto &req = requests[i]; - auto res = Response(); - i++; +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + if (is_ssl() && !proxy_host_.empty()) { + Response res; + bool error; + if (!connect(sock, res, error)) { return false; } + } +#endif + + if (!process_and_close_socket(sock, requests.size() - i, + [&](Stream &strm, bool last_connection, + bool &connection_close) -> bool { + auto &req = requests[i++]; + auto res = Response(); + auto ret = handle_request(strm, req, res, + last_connection, + connection_close); + if (ret) { + responses.emplace_back(std::move(res)); + } + return ret; + })) { + return false; + } + } - if (req.path.empty()) { return false; } - auto ret = process_request(strm, req, res, last_connection, - connection_close); + return true; +} + +inline bool Client::handle_request(Stream &strm, const Request &req, + Response &res, bool last_connection, + bool &connection_close) { + if (req.path.empty()) { return false; } + + bool ret; + + if (!is_ssl() && !proxy_host_.empty()) { + auto req2 = req; + req2.path = "http://" + host_and_port_ + req.path; + ret = process_request(strm, req2, res, last_connection, connection_close); + } else { + ret = process_request(strm, req, res, last_connection, connection_close); + } + + if (!ret) { return false; } + + if (300 < res.status && res.status < 400 && follow_location_) { + ret = redirect(req, res); + } - if (ret && follow_location_ && - (300 < res.status && res.status < 400)) { - ret = redirect(req, res); - } +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + if (res.status == 401 || res.status == 407) { + auto is_proxy = res.status == 407; + const auto &username = + is_proxy ? proxy_digest_auth_username_ : digest_auth_username_; + const auto &password = + is_proxy ? proxy_digest_auth_password_ : digest_auth_password_; + + if (!username.empty() && !password.empty()) { + std::map<std::string, std::string> auth; + if (parse_www_authenticate(res, auth, is_proxy)) { + Request new_req = req; + auto key = is_proxy ? "Proxy-Authorization" : "WWW-Authorization"; + new_req.headers.erase(key); + new_req.headers.insert(make_digest_authentication_header( + req, auth, 1, random_string(10), username, password, is_proxy)); + + Response new_res; + + ret = send(new_req, new_res); + if (ret) { res = new_res; } + } + } + } +#endif + + return ret; +} - if (ret) { responses.emplace_back(std::move(res)); } +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline bool Client::connect(socket_t sock, Response &res, bool &error) { + error = true; + Response res2; + + if (!detail::process_socket( + true, sock, 1, read_timeout_sec_, read_timeout_usec_, + [&](Stream &strm, bool /*last_connection*/, bool &connection_close) { + Request req2; + req2.method = "CONNECT"; + req2.path = host_and_port_; + return process_request(strm, req2, res2, false, connection_close); + })) { + detail::close_socket(sock); + error = false; + return false; + } - return ret; - })) { + if (res2.status == 407) { + if (!proxy_digest_auth_username_.empty() && + !proxy_digest_auth_password_.empty()) { + std::map<std::string, std::string> auth; + if (parse_www_authenticate(res2, auth, true)) { + Response res3; + if (!detail::process_socket( + true, sock, 1, read_timeout_sec_, read_timeout_usec_, + [&](Stream &strm, bool /*last_connection*/, + bool &connection_close) { + Request req3; + req3.method = "CONNECT"; + req3.path = host_and_port_; + req3.headers.insert(make_digest_authentication_header( + req3, auth, 1, random_string(10), + proxy_digest_auth_username_, proxy_digest_auth_password_, + true)); + return process_request(strm, req3, res3, false, + connection_close); + })) { + detail::close_socket(sock); + error = false; + return false; + } + } + } else { + res = res2; return false; } } return true; } +#endif inline bool Client::redirect(const Request &req, Response &res) { if (req.redirect_count == 0) { return false; } @@ -2847,46 +3743,47 @@ inline bool Client::redirect(const Request &req, Response &res) { auto location = res.get_header_value("location"); if (location.empty()) { return false; } - std::regex re( + const static std::regex re( R"(^(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*(?:\?[^#]*)?)(?:#.*)?)"); + std::smatch m; + if (!regex_match(location, m, re)) { return false; } + auto scheme = is_ssl() ? "https" : "http"; - std::smatch m; - if (regex_match(location, m, re)) { - auto next_scheme = m[1].str(); - auto next_host = m[2].str(); - auto next_path = m[3].str(); - if (next_host.empty()) { next_host = host_; } - if (next_path.empty()) { next_path = "/"; } - - if (next_scheme == scheme && next_host == host_) { - return detail::redirect(*this, req, res, next_path); - } else { - if (next_scheme == "https") { + auto next_scheme = m[1].str(); + auto next_host = m[2].str(); + auto next_path = m[3].str(); + if (next_scheme.empty()) { next_scheme = scheme; } + if (next_scheme.empty()) { next_scheme = scheme; } + if (next_host.empty()) { next_host = host_; } + if (next_path.empty()) { next_path = "/"; } + + if (next_scheme == scheme && next_host == host_) { + return detail::redirect(*this, req, res, next_path); + } else { + if (next_scheme == "https") { #ifdef CPPHTTPLIB_OPENSSL_SUPPORT - SSLClient cli(next_host.c_str()); - cli.follow_location(true); - return detail::redirect(cli, req, res, next_path); + SSLClient cli(next_host.c_str()); + cli.copy_settings(*this); + return detail::redirect(cli, req, res, next_path); #else - return false; + return false; #endif - } else { - Client cli(next_host.c_str()); - cli.follow_location(true); - return detail::redirect(cli, req, res, next_path); - } + } else { + Client cli(next_host.c_str()); + cli.copy_settings(*this); + return detail::redirect(cli, req, res, next_path); } } - return false; } -inline void Client::write_request(Stream &strm, const Request &req, +inline bool Client::write_request(Stream &strm, const Request &req, bool last_connection) { - BufferStream bstrm; + detail::BufferStream bstrm; // Request line - auto path = detail::encode_url(req.path); + const auto &path = detail::encode_url(req.path); bstrm.write_format("%s %s HTTP/1.1\r\n", req.method.c_str(), path.c_str()); @@ -2913,11 +3810,14 @@ inline void Client::write_request(Stream &strm, const Request &req, if (!req.has_header("Accept")) { headers.emplace("Accept", "*/*"); } if (!req.has_header("User-Agent")) { - headers.emplace("User-Agent", "cpp-httplib/0.2"); + headers.emplace("User-Agent", "cpp-httplib/0.5"); } if (req.body.empty()) { - if (req.method == "POST" || req.method == "PUT" || req.method == "PATCH") { + if (req.content_provider) { + auto length = std::to_string(req.content_length); + headers.emplace("Content-Length", length); + } else { headers.emplace("Content-Length", "0"); } } else { @@ -2931,21 +3831,100 @@ inline void Client::write_request(Stream &strm, const Request &req, } } - detail::write_headers(bstrm, req, headers); + if (!basic_auth_username_.empty() && !basic_auth_password_.empty()) { + headers.insert(make_basic_authentication_header( + basic_auth_username_, basic_auth_password_, false)); + } - // Body - if (!req.body.empty()) { bstrm.write(req.body); } + if (!proxy_basic_auth_username_.empty() && + !proxy_basic_auth_password_.empty()) { + headers.insert(make_basic_authentication_header( + proxy_basic_auth_username_, proxy_basic_auth_password_, true)); + } + + detail::write_headers(bstrm, req, headers); // Flush buffer auto &data = bstrm.get_buffer(); strm.write(data.data(), data.size()); + + // Body + if (req.body.empty()) { + if (req.content_provider) { + size_t offset = 0; + size_t end_offset = req.content_length; + + DataSink data_sink; + data_sink.write = [&](const char *d, size_t l) { + auto written_length = strm.write(d, l); + offset += written_length; + }; + data_sink.is_writable = [&](void) { return strm.is_writable(); }; + + while (offset < end_offset) { + req.content_provider(offset, end_offset - offset, data_sink); + } + } + } else { + strm.write(req.body); + } + + return true; +} + +inline std::shared_ptr<Response> Client::send_with_content_provider( + const char *method, const char *path, const Headers &headers, + const std::string &body, size_t content_length, + ContentProvider content_provider, const char *content_type) { + Request req; + req.method = method; + req.headers = headers; + req.path = path; + + req.headers.emplace("Content-Type", content_type); + +#ifdef CPPHTTPLIB_ZLIB_SUPPORT + if (compress_) { + if (content_provider) { + size_t offset = 0; + + DataSink data_sink; + data_sink.write = [&](const char *data, size_t data_len) { + req.body.append(data, data_len); + offset += data_len; + }; + data_sink.is_writable = [&](void) { return true; }; + + while (offset < content_length) { + content_provider(offset, content_length - offset, data_sink); + } + } else { + req.body = body; + } + + if (!detail::compress(req.body)) { return nullptr; } + req.headers.emplace("Content-Encoding", "gzip"); + } else +#endif + { + if (content_provider) { + req.content_length = content_length; + req.content_provider = content_provider; + } else { + req.body = body; + } + } + + auto res = std::make_shared<Response>(); + + return send(req, *res) ? res : nullptr; } inline bool Client::process_request(Stream &strm, const Request &req, Response &res, bool last_connection, bool &connection_close) { // Send request - write_request(strm, req, last_connection); + if (!write_request(strm, req, last_connection)) { return false; } // Receive response and headers if (!read_response_line(strm, res) || @@ -2963,21 +3942,16 @@ inline bool Client::process_request(Stream &strm, const Request &req, } // Body - if (req.method != "HEAD") { - detail::ContentReceiverCore out = [&](const char *buf, size_t n) { + if (req.method != "HEAD" && req.method != "CONNECT") { + ContentReceiver out = [&](const char *buf, size_t n) { if (res.body.size() + n > res.body.max_size()) { return false; } res.body.append(buf, n); return true; }; if (req.content_receiver) { - auto offset = std::make_shared<size_t>(); - auto length = get_header_value_uint64(res.headers, "Content-Length", 0); - auto receiver = req.content_receiver; - out = [offset, length, receiver](const char *buf, size_t n) { - auto ret = receiver(buf, n, *offset, length); - (*offset) += n; - return ret; + out = [&](const char *buf, size_t n) { + return req.content_receiver(buf, n); }; } @@ -2988,6 +3962,9 @@ inline bool Client::process_request(Stream &strm, const Request &req, } } + // Log + if (logger_) { logger_(req, res); } + return true; } @@ -2997,25 +3974,25 @@ inline bool Client::process_and_close_socket( bool &connection_close)> callback) { request_count = std::min(request_count, keep_alive_max_count_); - return detail::process_and_close_socket(true, sock, request_count, callback); + return detail::process_and_close_socket(true, sock, request_count, + read_timeout_sec_, read_timeout_usec_, + callback); } inline bool Client::is_ssl() const { return false; } inline std::shared_ptr<Response> Client::Get(const char *path) { - Progress dummy; - return Get(path, Headers(), dummy); + return Get(path, Headers(), Progress()); } inline std::shared_ptr<Response> Client::Get(const char *path, Progress progress) { - return Get(path, Headers(), progress); + return Get(path, Headers(), std::move(progress)); } inline std::shared_ptr<Response> Client::Get(const char *path, const Headers &headers) { - Progress dummy; - return Get(path, headers, dummy); + return Get(path, headers, Progress()); } inline std::shared_ptr<Response> @@ -3024,7 +4001,7 @@ Client::Get(const char *path, const Headers &headers, Progress progress) { req.method = "GET"; req.path = path; req.headers = headers; - req.progress = progress; + req.progress = std::move(progress); auto res = std::make_shared<Response>(); return send(req, *res) ? res : nullptr; @@ -3032,36 +4009,36 @@ Client::Get(const char *path, const Headers &headers, Progress progress) { inline std::shared_ptr<Response> Client::Get(const char *path, ContentReceiver content_receiver) { - Progress dummy; - return Get(path, Headers(), nullptr, content_receiver, dummy); + return Get(path, Headers(), nullptr, std::move(content_receiver), Progress()); } inline std::shared_ptr<Response> Client::Get(const char *path, ContentReceiver content_receiver, Progress progress) { - return Get(path, Headers(), nullptr, content_receiver, progress); + return Get(path, Headers(), nullptr, std::move(content_receiver), + std::move(progress)); } inline std::shared_ptr<Response> Client::Get(const char *path, const Headers &headers, ContentReceiver content_receiver) { - Progress dummy; - return Get(path, headers, nullptr, content_receiver, dummy); + return Get(path, headers, nullptr, std::move(content_receiver), Progress()); } inline std::shared_ptr<Response> Client::Get(const char *path, const Headers &headers, ContentReceiver content_receiver, Progress progress) { - return Get(path, headers, nullptr, content_receiver, progress); + return Get(path, headers, nullptr, std::move(content_receiver), + std::move(progress)); } inline std::shared_ptr<Response> Client::Get(const char *path, const Headers &headers, ResponseHandler response_handler, ContentReceiver content_receiver) { - Progress dummy; - return Get(path, headers, response_handler, content_receiver, dummy); + return Get(path, headers, std::move(response_handler), content_receiver, + Progress()); } inline std::shared_ptr<Response> Client::Get(const char *path, @@ -3073,9 +4050,9 @@ inline std::shared_ptr<Response> Client::Get(const char *path, req.method = "GET"; req.path = path; req.headers = headers; - req.response_handler = response_handler; - req.content_receiver = content_receiver; - req.progress = progress; + req.response_handler = std::move(response_handler); + req.content_receiver = std::move(content_receiver); + req.progress = std::move(progress); auto res = std::make_shared<Response>(); return send(req, *res) ? res : nullptr; @@ -3107,17 +4084,8 @@ inline std::shared_ptr<Response> Client::Post(const char *path, const Headers &headers, const std::string &body, const char *content_type) { - Request req; - req.method = "POST"; - req.headers = headers; - req.path = path; - - req.headers.emplace("Content-Type", content_type); - req.body = body; - - auto res = std::make_shared<Response>(); - - return send(req, *res) ? res : nullptr; + return send_with_content_provider("POST", path, headers, body, 0, nullptr, + content_type); } inline std::shared_ptr<Response> Client::Post(const char *path, @@ -3125,6 +4093,21 @@ inline std::shared_ptr<Response> Client::Post(const char *path, return Post(path, Headers(), params); } +inline std::shared_ptr<Response> Client::Post(const char *path, + size_t content_length, + ContentProvider content_provider, + const char *content_type) { + return Post(path, Headers(), content_length, content_provider, content_type); +} + +inline std::shared_ptr<Response> +Client::Post(const char *path, const Headers &headers, size_t content_length, + ContentProvider content_provider, const char *content_type) { + return send_with_content_provider("POST", path, headers, std::string(), + content_length, content_provider, + content_type); +} + inline std::shared_ptr<Response> Client::Post(const char *path, const Headers &headers, const Params ¶ms) { std::string query; @@ -3146,35 +4129,28 @@ Client::Post(const char *path, const MultipartFormDataItems &items) { inline std::shared_ptr<Response> Client::Post(const char *path, const Headers &headers, const MultipartFormDataItems &items) { - Request req; - req.method = "POST"; - req.headers = headers; - req.path = path; - auto boundary = detail::make_multipart_data_boundary(); - req.headers.emplace("Content-Type", - "multipart/form-data; boundary=" + boundary); + std::string body; for (const auto &item : items) { - req.body += "--" + boundary + "\r\n"; - req.body += "Content-Disposition: form-data; name=\"" + item.name + "\""; + body += "--" + boundary + "\r\n"; + body += "Content-Disposition: form-data; name=\"" + item.name + "\""; if (!item.filename.empty()) { - req.body += "; filename=\"" + item.filename + "\""; + body += "; filename=\"" + item.filename + "\""; } - req.body += "\r\n"; + body += "\r\n"; if (!item.content_type.empty()) { - req.body += "Content-Type: " + item.content_type + "\r\n"; + body += "Content-Type: " + item.content_type + "\r\n"; } - req.body += "\r\n"; - req.body += item.content + "\r\n"; + body += "\r\n"; + body += item.content + "\r\n"; } - req.body += "--" + boundary + "--\r\n"; + body += "--" + boundary + "--\r\n"; - auto res = std::make_shared<Response>(); - - return send(req, *res) ? res : nullptr; + std::string content_type = "multipart/form-data; boundary=" + boundary; + return Post(path, headers, body, content_type.c_str()); } inline std::shared_ptr<Response> Client::Put(const char *path, @@ -3187,17 +4163,41 @@ inline std::shared_ptr<Response> Client::Put(const char *path, const Headers &headers, const std::string &body, const char *content_type) { - Request req; - req.method = "PUT"; - req.headers = headers; - req.path = path; + return send_with_content_provider("PUT", path, headers, body, 0, nullptr, + content_type); +} - req.headers.emplace("Content-Type", content_type); - req.body = body; +inline std::shared_ptr<Response> Client::Put(const char *path, + size_t content_length, + ContentProvider content_provider, + const char *content_type) { + return Put(path, Headers(), content_length, content_provider, content_type); +} - auto res = std::make_shared<Response>(); +inline std::shared_ptr<Response> +Client::Put(const char *path, const Headers &headers, size_t content_length, + ContentProvider content_provider, const char *content_type) { + return send_with_content_provider("PUT", path, headers, std::string(), + content_length, content_provider, + content_type); +} - return send(req, *res) ? res : nullptr; +inline std::shared_ptr<Response> Client::Put(const char *path, + const Params ¶ms) { + return Put(path, Headers(), params); +} + +inline std::shared_ptr<Response> +Client::Put(const char *path, const Headers &headers, const Params ¶ms) { + std::string query; + for (auto it = params.begin(); it != params.end(); ++it) { + if (it != params.begin()) { query += "&"; } + query += it->first; + query += "="; + query += detail::encode_url(it->second); + } + + return Put(path, headers, query, "application/x-www-form-urlencoded"); } inline std::shared_ptr<Response> Client::Patch(const char *path, @@ -3210,17 +4210,23 @@ inline std::shared_ptr<Response> Client::Patch(const char *path, const Headers &headers, const std::string &body, const char *content_type) { - Request req; - req.method = "PATCH"; - req.headers = headers; - req.path = path; - - req.headers.emplace("Content-Type", content_type); - req.body = body; + return send_with_content_provider("PATCH", path, headers, body, 0, nullptr, + content_type); +} - auto res = std::make_shared<Response>(); +inline std::shared_ptr<Response> Client::Patch(const char *path, + size_t content_length, + ContentProvider content_provider, + const char *content_type) { + return Patch(path, Headers(), content_length, content_provider, content_type); +} - return send(req, *res) ? res : nullptr; +inline std::shared_ptr<Response> +Client::Patch(const char *path, const Headers &headers, size_t content_length, + ContentProvider content_provider, const char *content_type) { + return send_with_content_provider("PATCH", path, headers, std::string(), + content_length, content_provider, + content_type); } inline std::shared_ptr<Response> Client::Delete(const char *path) { @@ -3271,11 +4277,58 @@ inline std::shared_ptr<Response> Client::Options(const char *path, return send(req, *res) ? res : nullptr; } +inline void Client::set_timeout_sec(time_t timeout_sec) { + timeout_sec_ = timeout_sec; +} + +inline void Client::set_read_timeout(time_t sec, time_t usec) { + read_timeout_sec_ = sec; + read_timeout_usec_ = usec; +} + inline void Client::set_keep_alive_max_count(size_t count) { keep_alive_max_count_ = count; } -inline void Client::follow_location(bool on) { follow_location_ = on; } +inline void Client::set_basic_auth(const char *username, const char *password) { + basic_auth_username_ = username; + basic_auth_password_ = password; +} + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline void Client::set_digest_auth(const char *username, + const char *password) { + digest_auth_username_ = username; + digest_auth_password_ = password; +} +#endif + +inline void Client::set_follow_location(bool on) { follow_location_ = on; } + +inline void Client::set_compress(bool on) { compress_ = on; } + +inline void Client::set_interface(const char *intf) { interface_ = intf; } + +inline void Client::set_proxy(const char *host, int port) { + proxy_host_ = host; + proxy_port_ = port; +} + +inline void Client::set_proxy_basic_auth(const char *username, + const char *password) { + proxy_basic_auth_username_ = username; + proxy_basic_auth_password_ = password; +} + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline void Client::set_proxy_digest_auth(const char *username, + const char *password) { + proxy_digest_auth_username_ = username; + proxy_digest_auth_password_ = password; +} +#endif + +inline void Client::set_logger(Logger logger) { logger_ = std::move(logger); } /* * SSL Implementation @@ -3284,11 +4337,10 @@ inline void Client::follow_location(bool on) { follow_location_ = on; } namespace detail { template <typename U, typename V, typename T> -inline bool process_and_close_socket_ssl(bool is_client_request, socket_t sock, - size_t keep_alive_max_count, - SSL_CTX *ctx, std::mutex &ctx_mutex, - U SSL_connect_or_accept, V setup, - T callback) { +inline bool process_and_close_socket_ssl( + bool is_client_request, socket_t sock, size_t keep_alive_max_count, + time_t read_timeout_sec, time_t read_timeout_usec, SSL_CTX *ctx, + std::mutex &ctx_mutex, U SSL_connect_or_accept, V setup, T callback) { assert(keep_alive_max_count > 0); SSL *ssl = nullptr; @@ -3316,7 +4368,7 @@ inline bool process_and_close_socket_ssl(bool is_client_request, socket_t sock, return false; } - bool ret = false; + auto ret = false; if (SSL_connect_or_accept(ssl) == 1) { if (keep_alive_max_count > 1) { @@ -3325,7 +4377,7 @@ inline bool process_and_close_socket_ssl(bool is_client_request, socket_t sock, (is_client_request || detail::select_read(sock, CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND, CPPHTTPLIB_KEEPALIVE_TIMEOUT_USECOND) > 0)) { - SSLSocketStream strm(sock, ssl); + SSLSocketStream strm(sock, ssl, read_timeout_sec, read_timeout_usec); auto last_connection = count == 1; auto connection_close = false; @@ -3335,7 +4387,7 @@ inline bool process_and_close_socket_ssl(bool is_client_request, socket_t sock, count--; } } else { - SSLSocketStream strm(sock, ssl); + SSLSocketStream strm(sock, ssl, read_timeout_sec, read_timeout_usec); auto dummy_connection_close = false; ret = callback(ssl, strm, true, dummy_connection_close); } @@ -3382,11 +4434,20 @@ private: class SSLInit { public: SSLInit() { +#if OPENSSL_VERSION_NUMBER < 0x1010001fL SSL_load_error_strings(); SSL_library_init(); +#else + OPENSSL_init_ssl( + OPENSSL_INIT_LOAD_SSL_STRINGS | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL); +#endif } - ~SSLInit() { ERR_free_strings(); } + ~SSLInit() { +#if OPENSSL_VERSION_NUMBER < 0x1010001fL + ERR_free_strings(); +#endif + } private: #if OPENSSL_VERSION_NUMBER < 0x10100000L @@ -3394,41 +4455,44 @@ private: #endif }; -static SSLInit sslinit_; - -} // namespace detail - // SSL socket stream implementation -inline SSLSocketStream::SSLSocketStream(socket_t sock, SSL *ssl) - : sock_(sock), ssl_(ssl) {} +inline SSLSocketStream::SSLSocketStream(socket_t sock, SSL *ssl, + time_t read_timeout_sec, + time_t read_timeout_usec) + : sock_(sock), ssl_(ssl), read_timeout_sec_(read_timeout_sec), + read_timeout_usec_(read_timeout_usec) {} inline SSLSocketStream::~SSLSocketStream() {} +inline bool SSLSocketStream::is_readable() const { + return detail::select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0; +} + +inline bool SSLSocketStream::is_writable() const { + return detail::select_write(sock_, 0, 0) > 0; +} + inline int SSLSocketStream::read(char *ptr, size_t size) { if (SSL_pending(ssl_) > 0 || - detail::select_read(sock_, CPPHTTPLIB_READ_TIMEOUT_SECOND, - CPPHTTPLIB_READ_TIMEOUT_USECOND) > 0) { + select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0) { return SSL_read(ssl_, ptr, static_cast<int>(size)); } return -1; } inline int SSLSocketStream::write(const char *ptr, size_t size) { - return SSL_write(ssl_, ptr, static_cast<int>(size)); -} - -inline int SSLSocketStream::write(const char *ptr) { - return write(ptr, strlen(ptr)); -} - -inline int SSLSocketStream::write(const std::string &s) { - return write(s.data(), s.size()); + if (is_writable()) { return SSL_write(ssl_, ptr, static_cast<int>(size)); } + return -1; } inline std::string SSLSocketStream::get_remote_addr() const { return detail::get_remote_addr(sock_); } +static SSLInit sslinit_; + +} // namespace detail + // SSL HTTP server implementation inline SSLServer::SSLServer(const char *cert_path, const char *private_key_path, const char *client_ca_cert_file_path, @@ -3476,8 +4540,8 @@ inline bool SSLServer::is_valid() const { return ctx_; } inline bool SSLServer::process_and_close_socket(socket_t sock) { return detail::process_and_close_socket_ssl( - false, sock, keep_alive_max_count_, ctx_, ctx_mutex_, SSL_accept, - [](SSL * /*ssl*/) { return true; }, + false, sock, keep_alive_max_count_, read_timeout_sec_, read_timeout_usec_, + ctx_, ctx_mutex_, SSL_accept, [](SSL * /*ssl*/) { return true; }, [this](SSL *ssl, Stream &strm, bool last_connection, bool &connection_close) { return process_request(strm, last_connection, connection_close, @@ -3486,21 +4550,21 @@ inline bool SSLServer::process_and_close_socket(socket_t sock) { } // SSL HTTP client implementation -inline SSLClient::SSLClient(const char *host, int port, time_t timeout_sec, - const char *client_cert_path, - const char *client_key_path) - : Client(host, port, timeout_sec) { +inline SSLClient::SSLClient(const std::string &host, int port, + const std::string &client_cert_path, + const std::string &client_key_path) + : Client(host, port, client_cert_path, client_key_path) { ctx_ = SSL_CTX_new(SSLv23_client_method()); detail::split(&host_[0], &host_[host_.size()], '.', [&](const char *b, const char *e) { host_components_.emplace_back(std::string(b, e)); }); - if (client_cert_path && client_key_path) { - if (SSL_CTX_use_certificate_file(ctx_, client_cert_path, + if (!client_cert_path.empty() && !client_key_path.empty()) { + if (SSL_CTX_use_certificate_file(ctx_, client_cert_path.c_str(), SSL_FILETYPE_PEM) != 1 || - SSL_CTX_use_PrivateKey_file(ctx_, client_key_path, SSL_FILETYPE_PEM) != - 1) { + SSL_CTX_use_PrivateKey_file(ctx_, client_key_path.c_str(), + SSL_FILETYPE_PEM) != 1) { SSL_CTX_free(ctx_); ctx_ = nullptr; } @@ -3527,9 +4591,7 @@ inline long SSLClient::get_openssl_verify_result() const { return verify_result_; } -inline SSL_CTX* SSLClient::ssl_context() const noexcept { - return ctx_; -} +inline SSL_CTX *SSLClient::ssl_context() const noexcept { return ctx_; } inline bool SSLClient::process_and_close_socket( socket_t sock, size_t request_count, @@ -3541,7 +4603,8 @@ inline bool SSLClient::process_and_close_socket( return is_valid() && detail::process_and_close_socket_ssl( - true, sock, request_count, ctx_, ctx_mutex_, + true, sock, request_count, read_timeout_sec_, read_timeout_usec_, + ctx_, ctx_mutex_, [&](SSL *ssl) { if (ca_cert_file_path_.empty()) { SSL_CTX_set_verify(ctx_, SSL_VERIFY_NONE, nullptr); @@ -3712,6 +4775,8 @@ inline bool SSLClient::check_host_name(const char *pattern, } #endif +// ---------------------------------------------------------------------------- + } // namespace httplib #endif // CPPHTTPLIB_HTTPLIB_H diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp index 5005ba519..a58f24169 100644 --- a/src/audio_core/algorithm/interpolate.cpp +++ b/src/audio_core/algorithm/interpolate.cpp @@ -5,6 +5,7 @@ #define _USE_MATH_DEFINES #include <algorithm> +#include <climits> #include <cmath> #include <vector> #include "audio_core/algorithm/interpolate.h" @@ -13,13 +14,131 @@ namespace AudioCore { -/// The Lanczos kernel -static double Lanczos(std::size_t a, double x) { - if (x == 0.0) - return 1.0; - const double px = M_PI * x; - return a * std::sin(px) * std::sin(px / a) / (px * px); -} +constexpr std::array<s16, 512> curve_lut0 = { + 6600, 19426, 6722, 3, 6479, 19424, 6845, 9, 6359, 19419, 6968, 15, 6239, + 19412, 7093, 22, 6121, 19403, 7219, 28, 6004, 19391, 7345, 34, 5888, 19377, + 7472, 41, 5773, 19361, 7600, 48, 5659, 19342, 7728, 55, 5546, 19321, 7857, + 62, 5434, 19298, 7987, 69, 5323, 19273, 8118, 77, 5213, 19245, 8249, 84, + 5104, 19215, 8381, 92, 4997, 19183, 8513, 101, 4890, 19148, 8646, 109, 4785, + 19112, 8780, 118, 4681, 19073, 8914, 127, 4579, 19031, 9048, 137, 4477, 18988, + 9183, 147, 4377, 18942, 9318, 157, 4277, 18895, 9454, 168, 4179, 18845, 9590, + 179, 4083, 18793, 9726, 190, 3987, 18738, 9863, 202, 3893, 18682, 10000, 215, + 3800, 18624, 10137, 228, 3709, 18563, 10274, 241, 3618, 18500, 10411, 255, 3529, + 18436, 10549, 270, 3441, 18369, 10687, 285, 3355, 18300, 10824, 300, 3269, 18230, + 10962, 317, 3186, 18157, 11100, 334, 3103, 18082, 11238, 351, 3022, 18006, 11375, + 369, 2942, 17927, 11513, 388, 2863, 17847, 11650, 408, 2785, 17765, 11788, 428, + 2709, 17681, 11925, 449, 2635, 17595, 12062, 471, 2561, 17507, 12198, 494, 2489, + 17418, 12334, 517, 2418, 17327, 12470, 541, 2348, 17234, 12606, 566, 2280, 17140, + 12741, 592, 2213, 17044, 12876, 619, 2147, 16946, 13010, 647, 2083, 16846, 13144, + 675, 2020, 16745, 13277, 704, 1958, 16643, 13409, 735, 1897, 16539, 13541, 766, + 1838, 16434, 13673, 798, 1780, 16327, 13803, 832, 1723, 16218, 13933, 866, 1667, + 16109, 14062, 901, 1613, 15998, 14191, 937, 1560, 15885, 14318, 975, 1508, 15772, + 14445, 1013, 1457, 15657, 14571, 1052, 1407, 15540, 14695, 1093, 1359, 15423, 14819, + 1134, 1312, 15304, 14942, 1177, 1266, 15185, 15064, 1221, 1221, 15064, 15185, 1266, + 1177, 14942, 15304, 1312, 1134, 14819, 15423, 1359, 1093, 14695, 15540, 1407, 1052, + 14571, 15657, 1457, 1013, 14445, 15772, 1508, 975, 14318, 15885, 1560, 937, 14191, + 15998, 1613, 901, 14062, 16109, 1667, 866, 13933, 16218, 1723, 832, 13803, 16327, + 1780, 798, 13673, 16434, 1838, 766, 13541, 16539, 1897, 735, 13409, 16643, 1958, + 704, 13277, 16745, 2020, 675, 13144, 16846, 2083, 647, 13010, 16946, 2147, 619, + 12876, 17044, 2213, 592, 12741, 17140, 2280, 566, 12606, 17234, 2348, 541, 12470, + 17327, 2418, 517, 12334, 17418, 2489, 494, 12198, 17507, 2561, 471, 12062, 17595, + 2635, 449, 11925, 17681, 2709, 428, 11788, 17765, 2785, 408, 11650, 17847, 2863, + 388, 11513, 17927, 2942, 369, 11375, 18006, 3022, 351, 11238, 18082, 3103, 334, + 11100, 18157, 3186, 317, 10962, 18230, 3269, 300, 10824, 18300, 3355, 285, 10687, + 18369, 3441, 270, 10549, 18436, 3529, 255, 10411, 18500, 3618, 241, 10274, 18563, + 3709, 228, 10137, 18624, 3800, 215, 10000, 18682, 3893, 202, 9863, 18738, 3987, + 190, 9726, 18793, 4083, 179, 9590, 18845, 4179, 168, 9454, 18895, 4277, 157, + 9318, 18942, 4377, 147, 9183, 18988, 4477, 137, 9048, 19031, 4579, 127, 8914, + 19073, 4681, 118, 8780, 19112, 4785, 109, 8646, 19148, 4890, 101, 8513, 19183, + 4997, 92, 8381, 19215, 5104, 84, 8249, 19245, 5213, 77, 8118, 19273, 5323, + 69, 7987, 19298, 5434, 62, 7857, 19321, 5546, 55, 7728, 19342, 5659, 48, + 7600, 19361, 5773, 41, 7472, 19377, 5888, 34, 7345, 19391, 6004, 28, 7219, + 19403, 6121, 22, 7093, 19412, 6239, 15, 6968, 19419, 6359, 9, 6845, 19424, + 6479, 3, 6722, 19426, 6600}; + +constexpr std::array<s16, 512> curve_lut1 = { + -68, 32639, 69, -5, -200, 32630, 212, -15, -328, 32613, 359, -26, -450, + 32586, 512, -36, -568, 32551, 669, -47, -680, 32507, 832, -58, -788, 32454, + 1000, -69, -891, 32393, 1174, -80, -990, 32323, 1352, -92, -1084, 32244, 1536, + -103, -1173, 32157, 1724, -115, -1258, 32061, 1919, -128, -1338, 31956, 2118, -140, + -1414, 31844, 2322, -153, -1486, 31723, 2532, -167, -1554, 31593, 2747, -180, -1617, + 31456, 2967, -194, -1676, 31310, 3192, -209, -1732, 31157, 3422, -224, -1783, 30995, + 3657, -240, -1830, 30826, 3897, -256, -1874, 30649, 4143, -272, -1914, 30464, 4393, + -289, -1951, 30272, 4648, -307, -1984, 30072, 4908, -325, -2014, 29866, 5172, -343, + -2040, 29652, 5442, -362, -2063, 29431, 5716, -382, -2083, 29203, 5994, -403, -2100, + 28968, 6277, -424, -2114, 28727, 6565, -445, -2125, 28480, 6857, -468, -2133, 28226, + 7153, -490, -2139, 27966, 7453, -514, -2142, 27700, 7758, -538, -2142, 27428, 8066, + -563, -2141, 27151, 8378, -588, -2136, 26867, 8694, -614, -2130, 26579, 9013, -641, + -2121, 26285, 9336, -668, -2111, 25987, 9663, -696, -2098, 25683, 9993, -724, -2084, + 25375, 10326, -753, -2067, 25063, 10662, -783, -2049, 24746, 11000, -813, -2030, 24425, + 11342, -844, -2009, 24100, 11686, -875, -1986, 23771, 12033, -907, -1962, 23438, 12382, + -939, -1937, 23103, 12733, -972, -1911, 22764, 13086, -1005, -1883, 22422, 13441, -1039, + -1855, 22077, 13798, -1072, -1825, 21729, 14156, -1107, -1795, 21380, 14516, -1141, -1764, + 21027, 14877, -1176, -1732, 20673, 15239, -1211, -1700, 20317, 15602, -1246, -1667, 19959, + 15965, -1282, -1633, 19600, 16329, -1317, -1599, 19239, 16694, -1353, -1564, 18878, 17058, + -1388, -1530, 18515, 17423, -1424, -1495, 18151, 17787, -1459, -1459, 17787, 18151, -1495, + -1424, 17423, 18515, -1530, -1388, 17058, 18878, -1564, -1353, 16694, 19239, -1599, -1317, + 16329, 19600, -1633, -1282, 15965, 19959, -1667, -1246, 15602, 20317, -1700, -1211, 15239, + 20673, -1732, -1176, 14877, 21027, -1764, -1141, 14516, 21380, -1795, -1107, 14156, 21729, + -1825, -1072, 13798, 22077, -1855, -1039, 13441, 22422, -1883, -1005, 13086, 22764, -1911, + -972, 12733, 23103, -1937, -939, 12382, 23438, -1962, -907, 12033, 23771, -1986, -875, + 11686, 24100, -2009, -844, 11342, 24425, -2030, -813, 11000, 24746, -2049, -783, 10662, + 25063, -2067, -753, 10326, 25375, -2084, -724, 9993, 25683, -2098, -696, 9663, 25987, + -2111, -668, 9336, 26285, -2121, -641, 9013, 26579, -2130, -614, 8694, 26867, -2136, + -588, 8378, 27151, -2141, -563, 8066, 27428, -2142, -538, 7758, 27700, -2142, -514, + 7453, 27966, -2139, -490, 7153, 28226, -2133, -468, 6857, 28480, -2125, -445, 6565, + 28727, -2114, -424, 6277, 28968, -2100, -403, 5994, 29203, -2083, -382, 5716, 29431, + -2063, -362, 5442, 29652, -2040, -343, 5172, 29866, -2014, -325, 4908, 30072, -1984, + -307, 4648, 30272, -1951, -289, 4393, 30464, -1914, -272, 4143, 30649, -1874, -256, + 3897, 30826, -1830, -240, 3657, 30995, -1783, -224, 3422, 31157, -1732, -209, 3192, + 31310, -1676, -194, 2967, 31456, -1617, -180, 2747, 31593, -1554, -167, 2532, 31723, + -1486, -153, 2322, 31844, -1414, -140, 2118, 31956, -1338, -128, 1919, 32061, -1258, + -115, 1724, 32157, -1173, -103, 1536, 32244, -1084, -92, 1352, 32323, -990, -80, + 1174, 32393, -891, -69, 1000, 32454, -788, -58, 832, 32507, -680, -47, 669, + 32551, -568, -36, 512, 32586, -450, -26, 359, 32613, -328, -15, 212, 32630, + -200, -5, 69, 32639, -68}; + +constexpr std::array<s16, 512> curve_lut2 = { + 3195, 26287, 3329, -32, 3064, 26281, 3467, -34, 2936, 26270, 3608, -38, 2811, + 26253, 3751, -42, 2688, 26230, 3897, -46, 2568, 26202, 4046, -50, 2451, 26169, + 4199, -54, 2338, 26130, 4354, -58, 2227, 26085, 4512, -63, 2120, 26035, 4673, + -67, 2015, 25980, 4837, -72, 1912, 25919, 5004, -76, 1813, 25852, 5174, -81, + 1716, 25780, 5347, -87, 1622, 25704, 5522, -92, 1531, 25621, 5701, -98, 1442, + 25533, 5882, -103, 1357, 25440, 6066, -109, 1274, 25342, 6253, -115, 1193, 25239, + 6442, -121, 1115, 25131, 6635, -127, 1040, 25018, 6830, -133, 967, 24899, 7027, + -140, 897, 24776, 7227, -146, 829, 24648, 7430, -153, 764, 24516, 7635, -159, + 701, 24379, 7842, -166, 641, 24237, 8052, -174, 583, 24091, 8264, -181, 526, + 23940, 8478, -187, 472, 23785, 8695, -194, 420, 23626, 8914, -202, 371, 23462, + 9135, -209, 324, 23295, 9358, -215, 279, 23123, 9583, -222, 236, 22948, 9809, + -230, 194, 22769, 10038, -237, 154, 22586, 10269, -243, 117, 22399, 10501, -250, + 81, 22208, 10735, -258, 47, 22015, 10970, -265, 15, 21818, 11206, -271, -16, + 21618, 11444, -277, -44, 21415, 11684, -283, -71, 21208, 11924, -290, -97, 20999, + 12166, -296, -121, 20786, 12409, -302, -143, 20571, 12653, -306, -163, 20354, 12898, + -311, -183, 20134, 13143, -316, -201, 19911, 13389, -321, -218, 19686, 13635, -325, + -234, 19459, 13882, -328, -248, 19230, 14130, -332, -261, 18998, 14377, -335, -273, + 18765, 14625, -337, -284, 18531, 14873, -339, -294, 18295, 15121, -341, -302, 18057, + 15369, -341, -310, 17817, 15617, -341, -317, 17577, 15864, -340, -323, 17335, 16111, + -340, -328, 17092, 16357, -338, -332, 16848, 16603, -336, -336, 16603, 16848, -332, + -338, 16357, 17092, -328, -340, 16111, 17335, -323, -340, 15864, 17577, -317, -341, + 15617, 17817, -310, -341, 15369, 18057, -302, -341, 15121, 18295, -294, -339, 14873, + 18531, -284, -337, 14625, 18765, -273, -335, 14377, 18998, -261, -332, 14130, 19230, + -248, -328, 13882, 19459, -234, -325, 13635, 19686, -218, -321, 13389, 19911, -201, + -316, 13143, 20134, -183, -311, 12898, 20354, -163, -306, 12653, 20571, -143, -302, + 12409, 20786, -121, -296, 12166, 20999, -97, -290, 11924, 21208, -71, -283, 11684, + 21415, -44, -277, 11444, 21618, -16, -271, 11206, 21818, 15, -265, 10970, 22015, + 47, -258, 10735, 22208, 81, -250, 10501, 22399, 117, -243, 10269, 22586, 154, + -237, 10038, 22769, 194, -230, 9809, 22948, 236, -222, 9583, 23123, 279, -215, + 9358, 23295, 324, -209, 9135, 23462, 371, -202, 8914, 23626, 420, -194, 8695, + 23785, 472, -187, 8478, 23940, 526, -181, 8264, 24091, 583, -174, 8052, 24237, + 641, -166, 7842, 24379, 701, -159, 7635, 24516, 764, -153, 7430, 24648, 829, + -146, 7227, 24776, 897, -140, 7027, 24899, 967, -133, 6830, 25018, 1040, -127, + 6635, 25131, 1115, -121, 6442, 25239, 1193, -115, 6253, 25342, 1274, -109, 6066, + 25440, 1357, -103, 5882, 25533, 1442, -98, 5701, 25621, 1531, -92, 5522, 25704, + 1622, -87, 5347, 25780, 1716, -81, 5174, 25852, 1813, -76, 5004, 25919, 1912, + -72, 4837, 25980, 2015, -67, 4673, 26035, 2120, -63, 4512, 26085, 2227, -58, + 4354, 26130, 2338, -54, 4199, 26169, 2451, -50, 4046, 26202, 2568, -46, 3897, + 26230, 2688, -42, 3751, 26253, 2811, -38, 3608, 26270, 2936, -34, 3467, 26281, + 3064, -32, 3329, 26287, 3195}; std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, double ratio) { if (input.size() < 2) @@ -30,40 +149,39 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, ratio = 1.0; } - if (ratio != state.current_ratio) { - const double cutoff_frequency = std::min(0.5 / ratio, 0.5 * ratio); - state.nyquist = CascadingFilter::LowPass(std::clamp(cutoff_frequency, 0.0, 0.4), 3); - state.current_ratio = ratio; - } - state.nyquist.Process(input); - - constexpr std::size_t taps = InterpolationState::lanczos_taps; - const std::size_t num_frames = input.size() / 2; - - std::vector<s16> output; - output.reserve(static_cast<std::size_t>(input.size() / ratio + 4)); - - double& pos = state.position; - auto& h = state.history; - for (std::size_t i = 0; i < num_frames; ++i) { - std::rotate(h.begin(), h.end() - 1, h.end()); - h[0][0] = input[i * 2 + 0]; - h[0][1] = input[i * 2 + 1]; - - while (pos <= 1.0) { - double l = 0.0; - double r = 0.0; - for (std::size_t j = 0; j < h.size(); j++) { - const double lanczos_calc = Lanczos(taps, pos + j - taps + 1); - l += lanczos_calc * h[j][0]; - r += lanczos_calc * h[j][1]; - } - output.emplace_back(static_cast<s16>(std::clamp(l, -32768.0, 32767.0))); - output.emplace_back(static_cast<s16>(std::clamp(r, -32768.0, 32767.0))); - - pos += ratio; + const int step = static_cast<int>(ratio * 0x8000); + const std::array<s16, 512>& lut = [step] { + if (step > 0xaaaa) { + return curve_lut0; + } + if (step <= 0x8000) { + return curve_lut1; } - pos -= 1.0; + return curve_lut2; + }(); + + std::vector<s16> output(static_cast<std::size_t>(input.size() / ratio)); + int in_offset = 0; + for (std::size_t out_offset = 0; out_offset < output.size(); out_offset += 2) { + const int lut_index = (state.fraction >> 8) * 4; + + const int l = input[(in_offset + 0) * 2 + 0] * lut[lut_index + 0] + + input[(in_offset + 1) * 2 + 0] * lut[lut_index + 1] + + input[(in_offset + 2) * 2 + 0] * lut[lut_index + 2] + + input[(in_offset + 3) * 2 + 0] * lut[lut_index + 3]; + + const int r = input[(in_offset + 0) * 2 + 1] * lut[lut_index + 0] + + input[(in_offset + 1) * 2 + 1] * lut[lut_index + 1] + + input[(in_offset + 2) * 2 + 1] * lut[lut_index + 2] + + input[(in_offset + 3) * 2 + 1] * lut[lut_index + 3]; + + const int new_offset = state.fraction + step; + + in_offset += new_offset >> 15; + state.fraction = new_offset & 0x7fff; + + output[out_offset + 0] = static_cast<s16>(std::clamp(l >> 15, SHRT_MIN, SHRT_MAX)); + output[out_offset + 1] = static_cast<s16>(std::clamp(r >> 15, SHRT_MIN, SHRT_MAX)); } return output; diff --git a/src/audio_core/algorithm/interpolate.h b/src/audio_core/algorithm/interpolate.h index edbd6460f..1b9831a75 100644 --- a/src/audio_core/algorithm/interpolate.h +++ b/src/audio_core/algorithm/interpolate.h @@ -6,19 +6,12 @@ #include <array> #include <vector> -#include "audio_core/algorithm/filter.h" #include "common/common_types.h" namespace AudioCore { struct InterpolationState { - static constexpr std::size_t lanczos_taps = 4; - static constexpr std::size_t history_size = lanczos_taps * 2 - 1; - - double current_ratio = 0.0; - CascadingFilter nyquist; - std::array<std::array<s16, 2>, history_size> history = {}; - double position = 0; + int fraction = 0; }; /// Interpolates input signal to produce output signal. diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 26612e692..88c06b2ce 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -187,6 +187,8 @@ add_library(core STATIC hle/kernel/synchronization.h hle/kernel/thread.cpp hle/kernel/thread.h + hle/kernel/time_manager.cpp + hle/kernel/time_manager.h hle/kernel/transfer_memory.cpp hle/kernel/transfer_memory.h hle/kernel/vm_manager.cpp diff --git a/src/core/core.cpp b/src/core/core.cpp index 0eb0c0dca..86e314c94 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -707,4 +707,12 @@ const Service::SM::ServiceManager& System::ServiceManager() const { return *impl->service_manager; } +void System::RegisterCoreThread(std::size_t id) { + impl->kernel.RegisterCoreThread(id); +} + +void System::RegisterHostThread() { + impl->kernel.RegisterHostThread(); +} + } // namespace Core diff --git a/src/core/core.h b/src/core/core.h index e69d68fcf..8d862a8e6 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -360,6 +360,12 @@ public: const CurrentBuildProcessID& GetCurrentProcessBuildID() const; + /// Register a host thread as an emulated CPU Core. + void RegisterCoreThread(std::size_t id); + + /// Register a host thread as an auxiliary thread. + void RegisterHostThread(); + private: System(); diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index d6d2cf3f0..2dc795d56 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -27,9 +27,9 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) { // so just calculate them both even if the other isn't showing. FramebufferLayout res{width, height}; - const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) / - ScreenUndocked::Width}; - const auto window_aspect_ratio = static_cast<float>(height) / width; + const float window_aspect_ratio = static_cast<float>(height) / width; + const float emulation_aspect_ratio = EmulationAspectRatio( + static_cast<AspectRatio>(Settings::values.aspect_ratio), window_aspect_ratio); const Common::Rectangle<u32> screen_window_area{0, 0, width, height}; Common::Rectangle<u32> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio); @@ -58,4 +58,19 @@ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) { return DefaultFrameLayout(width, height); } +float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio) { + switch (aspect) { + case AspectRatio::Default: + return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width; + case AspectRatio::R4_3: + return 3.0f / 4.0f; + case AspectRatio::R21_9: + return 9.0f / 21.0f; + case AspectRatio::StretchToWindow: + return window_aspect_ratio; + default: + return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width; + } +} + } // namespace Layout diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index d2370adde..1d39c1faf 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h @@ -18,6 +18,13 @@ enum ScreenDocked : u32 { HeightDocked = 1080, }; +enum class AspectRatio { + Default, + R4_3, + R21_9, + StretchToWindow, +}; + /// Describes the layout of the window framebuffer struct FramebufferLayout { u32 width{ScreenUndocked::Width}; @@ -48,4 +55,12 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height); */ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale); +/** + * Convenience method to determine emulation aspect ratio + * @param aspect Represents the index of aspect ratio stored in Settings::values.aspect_ratio + * @param window_aspect_ratio Current window aspect ratio + * @return Emulation render window aspect ratio + */ +float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio); + } // namespace Layout diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h index 213461b6a..b04e046ed 100644 --- a/src/core/hardware_properties.h +++ b/src/core/hardware_properties.h @@ -20,6 +20,8 @@ constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores } // namespace Hardware +constexpr u32 INVALID_HOST_THREAD_ID = 0xFFFFFFFF; + struct EmuThreadHandle { u32 host_handle; u32 guest_handle; diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 4eb1d8703..9232f4d7e 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -3,9 +3,12 @@ // Refer to the license.txt file included. #include <atomic> +#include <bitset> #include <functional> #include <memory> #include <mutex> +#include <thread> +#include <unordered_map> #include <utility> #include "common/assert.h" @@ -15,6 +18,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/hardware_properties.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" @@ -25,6 +29,7 @@ #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h" #include "core/hle/lock.h" #include "core/hle/result.h" #include "core/memory.h" @@ -44,7 +49,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ std::lock_guard lock{HLE::g_hle_lock}; std::shared_ptr<Thread> thread = - system.Kernel().RetrieveThreadFromWakeupCallbackHandleTable(proper_handle); + system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle); if (thread == nullptr) { LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle); return; @@ -97,8 +102,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ } struct KernelCore::Impl { - explicit Impl(Core::System& system) - : system{system}, global_scheduler{system}, synchronization{system} {} + explicit Impl(Core::System& system, KernelCore& kernel) + : system{system}, global_scheduler{kernel}, synchronization{system}, time_manager{system} {} void Initialize(KernelCore& kernel) { Shutdown(); @@ -120,7 +125,7 @@ struct KernelCore::Impl { system_resource_limit = nullptr; - thread_wakeup_callback_handle_table.Clear(); + global_handle_table.Clear(); thread_wakeup_event_type = nullptr; preemption_event = nullptr; @@ -138,8 +143,8 @@ struct KernelCore::Impl { void InitializePhysicalCores() { exclusive_monitor = - Core::MakeExclusiveMonitor(system.Memory(), global_scheduler.CpuCoresCount()); - for (std::size_t i = 0; i < global_scheduler.CpuCoresCount(); i++) { + Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES); + for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { cores.emplace_back(system, i, *exclusive_monitor); } } @@ -184,6 +189,50 @@ struct KernelCore::Impl { system.Memory().SetCurrentPageTable(*process); } + void RegisterCoreThread(std::size_t core_id) { + std::unique_lock lock{register_thread_mutex}; + const std::thread::id this_id = std::this_thread::get_id(); + const auto it = host_thread_ids.find(this_id); + ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); + ASSERT(it == host_thread_ids.end()); + ASSERT(!registered_core_threads[core_id]); + host_thread_ids[this_id] = static_cast<u32>(core_id); + registered_core_threads.set(core_id); + } + + void RegisterHostThread() { + std::unique_lock lock{register_thread_mutex}; + const std::thread::id this_id = std::this_thread::get_id(); + const auto it = host_thread_ids.find(this_id); + ASSERT(it == host_thread_ids.end()); + host_thread_ids[this_id] = registered_thread_ids++; + } + + u32 GetCurrentHostThreadID() const { + const std::thread::id this_id = std::this_thread::get_id(); + const auto it = host_thread_ids.find(this_id); + if (it == host_thread_ids.end()) { + return Core::INVALID_HOST_THREAD_ID; + } + return it->second; + } + + Core::EmuThreadHandle GetCurrentEmuThreadID() const { + Core::EmuThreadHandle result = Core::EmuThreadHandle::InvalidHandle(); + result.host_handle = GetCurrentHostThreadID(); + if (result.host_handle >= Core::Hardware::NUM_CPU_CORES) { + return result; + } + const Kernel::Scheduler& sched = cores[result.host_handle].Scheduler(); + const Kernel::Thread* current = sched.GetCurrentThread(); + if (current != nullptr) { + result.guest_handle = current->GetGlobalHandle(); + } else { + result.guest_handle = InvalidHandle; + } + return result; + } + std::atomic<u32> next_object_id{0}; std::atomic<u64> next_kernel_process_id{Process::InitialKIPIDMin}; std::atomic<u64> next_user_process_id{Process::ProcessIDMin}; @@ -194,15 +243,16 @@ struct KernelCore::Impl { Process* current_process = nullptr; Kernel::GlobalScheduler global_scheduler; Kernel::Synchronization synchronization; + Kernel::TimeManager time_manager; std::shared_ptr<ResourceLimit> system_resource_limit; std::shared_ptr<Core::Timing::EventType> thread_wakeup_event_type; std::shared_ptr<Core::Timing::EventType> preemption_event; - // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, - // allowing us to simply use a pool index or similar. - Kernel::HandleTable thread_wakeup_callback_handle_table; + // This is the kernel's handle table or supervisor handle table which + // stores all the objects in place. + Kernel::HandleTable global_handle_table; /// Map of named ports managed by the kernel, which can be retrieved using /// the ConnectToPort SVC. @@ -211,11 +261,17 @@ struct KernelCore::Impl { std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor; std::vector<Kernel::PhysicalCore> cores; + // 0-3 IDs represent core threads, >3 represent others + std::unordered_map<std::thread::id, u32> host_thread_ids; + u32 registered_thread_ids{Core::Hardware::NUM_CPU_CORES}; + std::bitset<Core::Hardware::NUM_CPU_CORES> registered_core_threads; + std::mutex register_thread_mutex; + // System context Core::System& system; }; -KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {} +KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system, *this)} {} KernelCore::~KernelCore() { Shutdown(); } @@ -232,9 +288,8 @@ std::shared_ptr<ResourceLimit> KernelCore::GetSystemResourceLimit() const { return impl->system_resource_limit; } -std::shared_ptr<Thread> KernelCore::RetrieveThreadFromWakeupCallbackHandleTable( - Handle handle) const { - return impl->thread_wakeup_callback_handle_table.Get<Thread>(handle); +std::shared_ptr<Thread> KernelCore::RetrieveThreadFromGlobalHandleTable(Handle handle) const { + return impl->global_handle_table.Get<Thread>(handle); } void KernelCore::AppendNewProcess(std::shared_ptr<Process> process) { @@ -265,6 +320,14 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const { return impl->global_scheduler; } +Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) { + return impl->cores[id].Scheduler(); +} + +const Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) const { + return impl->cores[id].Scheduler(); +} + Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) { return impl->cores[id]; } @@ -281,6 +344,14 @@ const Kernel::Synchronization& KernelCore::Synchronization() const { return impl->synchronization; } +Kernel::TimeManager& KernelCore::TimeManager() { + return impl->time_manager; +} + +const Kernel::TimeManager& KernelCore::TimeManager() const { + return impl->time_manager; +} + Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() { return *impl->exclusive_monitor; } @@ -338,12 +409,28 @@ const std::shared_ptr<Core::Timing::EventType>& KernelCore::ThreadWakeupCallback return impl->thread_wakeup_event_type; } -Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() { - return impl->thread_wakeup_callback_handle_table; +Kernel::HandleTable& KernelCore::GlobalHandleTable() { + return impl->global_handle_table; +} + +const Kernel::HandleTable& KernelCore::GlobalHandleTable() const { + return impl->global_handle_table; +} + +void KernelCore::RegisterCoreThread(std::size_t core_id) { + impl->RegisterCoreThread(core_id); +} + +void KernelCore::RegisterHostThread() { + impl->RegisterHostThread(); +} + +u32 KernelCore::GetCurrentHostThreadID() const { + return impl->GetCurrentHostThreadID(); } -const Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() const { - return impl->thread_wakeup_callback_handle_table; +Core::EmuThreadHandle KernelCore::GetCurrentEmuThreadID() const { + return impl->GetCurrentEmuThreadID(); } } // namespace Kernel diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 1eede3063..c4f78ab71 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -11,6 +11,7 @@ #include "core/hle/kernel/object.h" namespace Core { +struct EmuThreadHandle; class ExclusiveMonitor; class System; } // namespace Core @@ -29,8 +30,10 @@ class HandleTable; class PhysicalCore; class Process; class ResourceLimit; +class Scheduler; class Synchronization; class Thread; +class TimeManager; /// Represents a single instance of the kernel. class KernelCore { @@ -64,7 +67,7 @@ public: std::shared_ptr<ResourceLimit> GetSystemResourceLimit() const; /// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table. - std::shared_ptr<Thread> RetrieveThreadFromWakeupCallbackHandleTable(Handle handle) const; + std::shared_ptr<Thread> RetrieveThreadFromGlobalHandleTable(Handle handle) const; /// Adds the given shared pointer to an internal list of active processes. void AppendNewProcess(std::shared_ptr<Process> process); @@ -87,6 +90,12 @@ public: /// Gets the sole instance of the global scheduler const Kernel::GlobalScheduler& GlobalScheduler() const; + /// Gets the sole instance of the Scheduler assoviated with cpu core 'id' + Kernel::Scheduler& Scheduler(std::size_t id); + + /// Gets the sole instance of the Scheduler assoviated with cpu core 'id' + const Kernel::Scheduler& Scheduler(std::size_t id) const; + /// Gets the an instance of the respective physical CPU core. Kernel::PhysicalCore& PhysicalCore(std::size_t id); @@ -99,6 +108,12 @@ public: /// Gets the an instance of the Synchronization Interface. const Kernel::Synchronization& Synchronization() const; + /// Gets the an instance of the TimeManager Interface. + Kernel::TimeManager& TimeManager(); + + /// Gets the an instance of the TimeManager Interface. + const Kernel::TimeManager& TimeManager() const; + /// Stops execution of 'id' core, in order to reschedule a new thread. void PrepareReschedule(std::size_t id); @@ -120,6 +135,18 @@ public: /// Determines whether or not the given port is a valid named port. bool IsValidNamedPort(NamedPortTable::const_iterator port) const; + /// Gets the current host_thread/guest_thread handle. + Core::EmuThreadHandle GetCurrentEmuThreadID() const; + + /// Gets the current host_thread handle. + u32 GetCurrentHostThreadID() const; + + /// Register the current thread as a CPU Core Thread. + void RegisterCoreThread(std::size_t core_id); + + /// Register the current thread as a non CPU core thread. + void RegisterHostThread(); + private: friend class Object; friend class Process; @@ -140,11 +167,11 @@ private: /// Retrieves the event type used for thread wakeup callbacks. const std::shared_ptr<Core::Timing::EventType>& ThreadWakeupCallbackEventType() const; - /// Provides a reference to the thread wakeup callback handle table. - Kernel::HandleTable& ThreadWakeupCallbackHandleTable(); + /// Provides a reference to the global handle table. + Kernel::HandleTable& GlobalHandleTable(); - /// Provides a const reference to the thread wakeup callback handle table. - const Kernel::HandleTable& ThreadWakeupCallbackHandleTable() const; + /// Provides a const reference to the global handle table. + const Kernel::HandleTable& GlobalHandleTable() const; struct Impl; std::unique_ptr<Impl> impl; diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 86f1421bf..c65f82fb7 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -18,10 +18,11 @@ #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/time_manager.h" namespace Kernel { -GlobalScheduler::GlobalScheduler(Core::System& system) : system{system} {} +GlobalScheduler::GlobalScheduler(KernelCore& kernel) : kernel{kernel} {} GlobalScheduler::~GlobalScheduler() = default; @@ -35,7 +36,7 @@ void GlobalScheduler::RemoveThread(std::shared_ptr<Thread> thread) { } void GlobalScheduler::UnloadThread(std::size_t core) { - Scheduler& sched = system.Scheduler(core); + Scheduler& sched = kernel.Scheduler(core); sched.UnloadThread(); } @@ -50,7 +51,7 @@ void GlobalScheduler::SelectThread(std::size_t core) { sched.is_context_switch_pending = sched.selected_thread != sched.current_thread; std::atomic_thread_fence(std::memory_order_seq_cst); }; - Scheduler& sched = system.Scheduler(core); + Scheduler& sched = kernel.Scheduler(core); Thread* current_thread = nullptr; // Step 1: Get top thread in schedule queue. current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front(); @@ -356,6 +357,32 @@ void GlobalScheduler::Shutdown() { thread_list.clear(); } +void GlobalScheduler::Lock() { + Core::EmuThreadHandle current_thread = kernel.GetCurrentEmuThreadID(); + if (current_thread == current_owner) { + ++scope_lock; + } else { + inner_lock.lock(); + current_owner = current_thread; + ASSERT(current_owner != Core::EmuThreadHandle::InvalidHandle()); + scope_lock = 1; + } +} + +void GlobalScheduler::Unlock() { + if (--scope_lock != 0) { + ASSERT(scope_lock > 0); + return; + } + for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { + SelectThread(i); + } + current_owner = Core::EmuThreadHandle::InvalidHandle(); + scope_lock = 1; + inner_lock.unlock(); + // TODO(Blinkhawk): Setup the interrupts and change context on current core. +} + Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, std::size_t core_id) : system(system), cpu_core(cpu_core), core_id(core_id) {} @@ -485,4 +512,27 @@ void Scheduler::Shutdown() { selected_thread = nullptr; } +SchedulerLock::SchedulerLock(KernelCore& kernel) : kernel{kernel} { + kernel.GlobalScheduler().Lock(); +} + +SchedulerLock::~SchedulerLock() { + kernel.GlobalScheduler().Unlock(); +} + +SchedulerLockAndSleep::SchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle, + Thread* time_task, s64 nanoseconds) + : SchedulerLock{kernel}, event_handle{event_handle}, time_task{time_task}, nanoseconds{ + nanoseconds} { + event_handle = InvalidHandle; +} + +SchedulerLockAndSleep::~SchedulerLockAndSleep() { + if (sleep_cancelled) { + return; + } + auto& time_manager = kernel.TimeManager(); + time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds); +} + } // namespace Kernel diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 96db049cb..1c93a838c 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -6,6 +6,7 @@ #include <atomic> #include <memory> +#include <mutex> #include <vector> #include "common/common_types.h" @@ -20,11 +21,13 @@ class System; namespace Kernel { +class KernelCore; class Process; +class SchedulerLock; class GlobalScheduler final { public: - explicit GlobalScheduler(Core::System& system); + explicit GlobalScheduler(KernelCore& kernel); ~GlobalScheduler(); /// Adds a new thread to the scheduler @@ -138,6 +141,14 @@ public: void Shutdown(); private: + friend class SchedulerLock; + + /// Lock the scheduler to the current thread. + void Lock(); + + /// Unlocks the scheduler, reselects threads, interrupts cores for rescheduling + /// and reschedules current core if needed. + void Unlock(); /** * Transfers a thread into an specific core. If the destination_core is -1 * it will be unscheduled from its source code and added into its suggested @@ -158,9 +169,14 @@ private: // ordered from Core 0 to Core 3. std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62}; + /// Scheduler lock mechanisms. + std::mutex inner_lock{}; // TODO(Blinkhawk): Replace for a SpinLock + std::atomic<s64> scope_lock{}; + Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()}; + /// Lists all thread ids that aren't deleted/etc. std::vector<std::shared_ptr<Thread>> thread_list; - Core::System& system; + KernelCore& kernel; }; class Scheduler final { @@ -227,4 +243,30 @@ private: bool is_context_switch_pending = false; }; +class SchedulerLock { +public: + explicit SchedulerLock(KernelCore& kernel); + ~SchedulerLock(); + +protected: + KernelCore& kernel; +}; + +class SchedulerLockAndSleep : public SchedulerLock { +public: + explicit SchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle, Thread* time_task, + s64 nanoseconds); + ~SchedulerLockAndSleep(); + + void CancelSleep() { + sleep_cancelled = true; + } + +private: + Handle& event_handle; + Thread* time_task; + s64 nanoseconds; + bool sleep_cancelled{}; +}; + } // namespace Kernel diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index ae5f2c8bd..bf850e0b2 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -46,9 +46,9 @@ Thread::~Thread() = default; void Thread::Stop() { // Cancel any outstanding wakeup events for this thread Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), - callback_handle); - kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle); - callback_handle = 0; + global_handle); + kernel.GlobalHandleTable().Close(global_handle); + global_handle = 0; SetStatus(ThreadStatus::Dead); Signal(); @@ -73,12 +73,12 @@ void Thread::WakeAfterDelay(s64 nanoseconds) { // thread-safe version of ScheduleEvent. const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds}); Core::System::GetInstance().CoreTiming().ScheduleEvent( - cycles, kernel.ThreadWakeupCallbackEventType(), callback_handle); + cycles, kernel.ThreadWakeupCallbackEventType(), global_handle); } void Thread::CancelWakeupTimer() { Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), - callback_handle); + global_handle); } void Thread::ResumeFromWait() { @@ -190,7 +190,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin thread->condvar_wait_address = 0; thread->wait_handle = 0; thread->name = std::move(name); - thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap(); + thread->global_handle = kernel.GlobalHandleTable().Create(thread).Unwrap(); thread->owner_process = &owner_process; auto& scheduler = kernel.GlobalScheduler(); scheduler.AddThread(thread); diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 7a4916318..129e7858a 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -453,6 +453,10 @@ public: is_sync_cancelled = value; } + Handle GetGlobalHandle() const { + return global_handle; + } + private: void SetSchedulingStatus(ThreadSchedStatus new_status); void SetCurrentPriority(u32 new_priority); @@ -514,7 +518,7 @@ private: VAddr arb_wait_address{0}; /// Handle used as userdata to reference this object when inserting into the CoreTiming queue. - Handle callback_handle = 0; + Handle global_handle = 0; /// Callback that will be invoked when the thread is resumed from a waiting state. If the thread /// was waiting via WaitSynchronization then the object will be the last object that became diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp new file mode 100644 index 000000000..21b290468 --- /dev/null +++ b/src/core/hle/kernel/time_manager.cpp @@ -0,0 +1,44 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "core/core.h" +#include "core/core_timing.h" +#include "core/core_timing_util.h" +#include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h" + +namespace Kernel { + +TimeManager::TimeManager(Core::System& system) : system{system} { + time_manager_event_type = Core::Timing::CreateEvent( + "Kernel::TimeManagerCallback", [this](u64 thread_handle, [[maybe_unused]] s64 cycles_late) { + Handle proper_handle = static_cast<Handle>(thread_handle); + std::shared_ptr<Thread> thread = + this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle); + thread->ResumeFromWait(); + }); +} + +void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 nanoseconds) { + if (nanoseconds > 0) { + ASSERT(timetask); + event_handle = timetask->GetGlobalHandle(); + const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds}); + system.CoreTiming().ScheduleEvent(cycles, time_manager_event_type, event_handle); + } else { + event_handle = InvalidHandle; + } +} + +void TimeManager::UnscheduleTimeEvent(Handle event_handle) { + if (event_handle == InvalidHandle) { + return; + } + system.CoreTiming().UnscheduleEvent(time_manager_event_type, event_handle); +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/time_manager.h b/src/core/hle/kernel/time_manager.h new file mode 100644 index 000000000..eaec486d1 --- /dev/null +++ b/src/core/hle/kernel/time_manager.h @@ -0,0 +1,43 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> + +#include "core/hle/kernel/object.h" + +namespace Core { +class System; +} // namespace Core + +namespace Core::Timing { +struct EventType; +} // namespace Core::Timing + +namespace Kernel { + +class Thread; + +/** + * The `TimeManager` takes care of scheduling time events on threads and executes their TimeUp + * method when the event is triggered. + */ +class TimeManager { +public: + explicit TimeManager(Core::System& system); + + /// Schedule a time event on `timetask` thread that will expire in 'nanoseconds' + /// returns a non-invalid handle in `event_handle` if correctly scheduled + void ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 nanoseconds); + + /// Unschedule an existing time event + void UnscheduleTimeEvent(Handle event_handle); + +private: + Core::System& system; + std::shared_ptr<Core::Timing::EventType> time_manager_event_type; +}; + +} // namespace Kernel diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp index 67e39a5c4..f589864ee 100644 --- a/src/core/hle/service/bcat/backend/boxcat.cpp +++ b/src/core/hle/service/bcat/backend/boxcat.cpp @@ -200,7 +200,8 @@ private: DownloadResult DownloadInternal(const std::string& resolved_path, u32 timeout_seconds, const std::string& content_type_name) { if (client == nullptr) { - client = std::make_unique<httplib::SSLClient>(BOXCAT_HOSTNAME, PORT, timeout_seconds); + client = std::make_unique<httplib::SSLClient>(BOXCAT_HOSTNAME, PORT); + client->set_timeout_sec(timeout_seconds); } httplib::Headers headers{ @@ -448,8 +449,8 @@ std::optional<std::vector<u8>> Boxcat::GetLaunchParameter(TitleIDVersion title) Boxcat::StatusResult Boxcat::GetStatus(std::optional<std::string>& global, std::map<std::string, EventStatus>& games) { - httplib::SSLClient client{BOXCAT_HOSTNAME, static_cast<int>(PORT), - static_cast<int>(TIMEOUT_SECONDS)}; + httplib::SSLClient client{BOXCAT_HOSTNAME, static_cast<int>(PORT)}; + client.set_timeout_sec(static_cast<int>(TIMEOUT_SECONDS)); httplib::Headers headers{ {std::string("Game-Assets-API-Version"), std::string(BOXCAT_API_VERSION)}, diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp index ed5059047..92adde6d4 100644 --- a/src/core/hle/service/ldn/ldn.cpp +++ b/src/core/hle/service/ldn/ldn.cpp @@ -129,12 +129,20 @@ public: {304, nullptr, "Disconnect"}, {400, nullptr, "Initialize"}, {401, nullptr, "Finalize"}, - {402, nullptr, "SetOperationMode"}, + {402, &IUserLocalCommunicationService::Initialize2, "Initialize2"}, // 7.0.0+ }; // clang-format on RegisterHandlers(functions); } + + void Initialize2(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_LDN, "(STUBBED) called"); + // Result success seem make this services start network and continue. + // If we just pass result error then it will stop and maybe try again and again. + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_UNKNOWN); + } }; class LDNS final : public ServiceFramework<LDNS> { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 6d8bca8bb..f1966ac0e 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -44,6 +44,8 @@ u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve return GetWaitbase(input, output); case IoctlCommand::IocChannelSetTimeoutCommand: return ChannelSetTimeout(input, output); + case IoctlCommand::IocChannelSetTimeslice: + return ChannelSetTimeslice(input, output); default: break; } @@ -228,4 +230,14 @@ u32 nvhost_gpu::ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& return 0; } +u32 nvhost_gpu::ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlSetTimeslice params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlSetTimeslice)); + LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice); + + channel_timeslice = params.timeslice; + + return 0; +} + } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index d056dd046..2ac74743f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -48,6 +48,7 @@ private: IocAllocObjCtxCommand = 0xC0104809, IocChannelGetWaitbaseCommand = 0xC0080003, IocChannelSetTimeoutCommand = 0x40044803, + IocChannelSetTimeslice = 0xC004481D, }; enum class CtxObjects : u32_le { @@ -101,6 +102,11 @@ private: static_assert(sizeof(IoctlChannelSetPriority) == 4, "IoctlChannelSetPriority is incorrect size"); + struct IoctlSetTimeslice { + u32_le timeslice; + }; + static_assert(sizeof(IoctlSetTimeslice) == 4, "IoctlSetTimeslice is incorrect size"); + struct IoctlEventIdControl { u32_le cmd; // 0=disable, 1=enable, 2=clear u32_le id; @@ -174,6 +180,7 @@ private: u64_le user_data{}; IoctlZCullBind zcull_params{}; u32_le channel_priority{}; + u32_le channel_timeslice{}; u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); u32 SetClientData(const std::vector<u8>& input, std::vector<u8>& output); @@ -188,6 +195,7 @@ private: const std::vector<u8>& input2, IoctlVersion version); u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output); u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); + u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output); std::shared_ptr<nvmap> nvmap_dev; u32 assigned_syncpoints{}; diff --git a/src/core/settings.h b/src/core/settings.h index e1a9a0ffa..f837d3fbc 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -429,6 +429,7 @@ struct Values { int vulkan_device; float resolution_factor; + int aspect_ratio; bool use_frame_limit; u16 frame_limit; bool use_disk_shader_cache; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index db9332d00..4b0c6346f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -37,6 +37,7 @@ add_library(video_core STATIC memory_manager.h morton.cpp morton.h + query_cache.h rasterizer_accelerated.cpp rasterizer_accelerated.h rasterizer_cache.cpp @@ -74,6 +75,8 @@ add_library(video_core STATIC renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.h + renderer_opengl/gl_query_cache.cpp + renderer_opengl/gl_query_cache.h renderer_opengl/maxwell_to_gl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h @@ -177,6 +180,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_memory_manager.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_query_cache.cpp + renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_renderpass_cache.cpp diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 0b3e8749b..b28de1092 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,6 +4,7 @@ #include <cinttypes> #include <cstring> +#include <optional> #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" @@ -16,6 +17,8 @@ namespace Tegra::Engines { +using VideoCore::QueryType; + /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; @@ -400,6 +403,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ProcessQueryCondition(); break; } + case MAXWELL3D_REG_INDEX(counter_reset): { + ProcessCounterReset(); + break; + } case MAXWELL3D_REG_INDEX(sync_info): { ProcessSyncPoint(); break; @@ -482,7 +489,7 @@ void Maxwell3D::FlushMMEInlineDraw() { const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; if (ShouldExecute()) { - rasterizer.DrawMultiBatch(is_indexed); + rasterizer.Draw(is_indexed, true); } // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if @@ -544,40 +551,28 @@ void Maxwell3D::ProcessQueryGet() { "Units other than CROP are unimplemented"); switch (regs.query.query_get.operation) { - case Regs::QueryOperation::Release: { - const u64 result = regs.query.query_sequence; - StampQueryResult(result, regs.query.query_get.short_query == 0); + case Regs::QueryOperation::Release: + StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); break; - } - case Regs::QueryOperation::Acquire: { - // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU - // to write a value that matches the current payload. + case Regs::QueryOperation::Acquire: + // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that + // matches the current payload. UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); break; - } - case Regs::QueryOperation::Counter: { - u64 result{}; - switch (regs.query.query_get.select) { - case Regs::QuerySelect::Zero: - result = 0; - break; - default: - result = 1; - UNIMPLEMENTED_MSG("Unimplemented query select type {}", - static_cast<u32>(regs.query.query_get.select.Value())); + case Regs::QueryOperation::Counter: + if (const std::optional<u64> result = GetQueryResult()) { + // If the query returns an empty optional it means it's cached and deferred. + // In this case we have a non-empty result, so we stamp it immediately. + StampQueryResult(*result, regs.query.query_get.short_query == 0); } - StampQueryResult(result, regs.query.query_get.short_query == 0); break; - } - case Regs::QueryOperation::Trap: { + case Regs::QueryOperation::Trap: UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); break; - } - default: { + default: UNIMPLEMENTED_MSG("Unknown query operation"); break; } - } } void Maxwell3D::ProcessQueryCondition() { @@ -593,20 +588,20 @@ void Maxwell3D::ProcessQueryCondition() { } case Regs::ConditionMode::ResNonZero: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U; break; } case Regs::ConditionMode::Equal: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode; break; } case Regs::ConditionMode::NotEqual: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode; break; @@ -619,6 +614,18 @@ void Maxwell3D::ProcessQueryCondition() { } } +void Maxwell3D::ProcessCounterReset() { + switch (regs.counter_reset) { + case Regs::CounterReset::SampleCnt: + rasterizer.ResetCounter(QueryType::SamplesPassed); + break; + default: + LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", + static_cast<int>(regs.counter_reset)); + break; + } +} + void Maxwell3D::ProcessSyncPoint() { const u32 sync_point = regs.sync_info.sync_point.Value(); const u32 increment = regs.sync_info.increment.Value(); @@ -647,7 +654,7 @@ void Maxwell3D::DrawArrays() { const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count}; if (ShouldExecute()) { - rasterizer.DrawBatch(is_indexed); + rasterizer.Draw(is_indexed, false); } // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if @@ -661,6 +668,22 @@ void Maxwell3D::DrawArrays() { } } +std::optional<u64> Maxwell3D::GetQueryResult() { + switch (regs.query.query_get.select) { + case Regs::QuerySelect::Zero: + return 0; + case Regs::QuerySelect::SamplesPassed: + // Deferred. + rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, + system.GPU().GetTicks()); + return {}; + default: + UNIMPLEMENTED_MSG("Unimplemented query select type {}", + static_cast<u32>(regs.query.query_get.select.Value())); + return 1; + } +} + void Maxwell3D::ProcessCBBind(std::size_t stage_index) { // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. auto& shader = state.shader_stages[stage_index]; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0a2af54e5..26939be3f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -6,6 +6,7 @@ #include <array> #include <bitset> +#include <optional> #include <type_traits> #include <unordered_map> #include <vector> @@ -409,6 +410,27 @@ public: Linear = 1, }; + enum class CounterReset : u32 { + SampleCnt = 0x01, + Unk02 = 0x02, + Unk03 = 0x03, + Unk04 = 0x04, + EmittedPrimitives = 0x10, // Not tested + Unk11 = 0x11, + Unk12 = 0x12, + Unk13 = 0x13, + Unk15 = 0x15, + Unk16 = 0x16, + Unk17 = 0x17, + Unk18 = 0x18, + Unk1A = 0x1A, + Unk1B = 0x1B, + Unk1C = 0x1C, + Unk1D = 0x1D, + Unk1E = 0x1E, + GeneratedPrimitives = 0x1F, + }; + struct Cull { enum class FrontFace : u32 { ClockWise = 0x0900, @@ -857,7 +879,7 @@ public: BitField<7, 1, u32> c7; } clip_distance_enabled; - INSERT_UNION_PADDING_WORDS(0x1); + u32 samplecnt_enable; float point_size; @@ -865,7 +887,11 @@ public: u32 point_sprite_enable; - INSERT_UNION_PADDING_WORDS(0x5); + INSERT_UNION_PADDING_WORDS(0x3); + + CounterReset counter_reset; + + INSERT_UNION_PADDING_WORDS(0x1); u32 zeta_enable; @@ -1412,12 +1438,15 @@ private: /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); - // Writes the query result accordingly + /// Writes the query result accordingly. void StampQueryResult(u64 payload, bool long_query); - // Handles Conditional Rendering + /// Handles conditional rendering. void ProcessQueryCondition(); + /// Handles counter resets. + void ProcessCounterReset(); + /// Handles writes to syncing register. void ProcessSyncPoint(); @@ -1434,6 +1463,9 @@ private: // Handles a instance drawcall from MME void StepInstance(MMEDrawMode expected_mode, u32 count); + + /// Returns a query's value or an empty object if the value will be deferred through a cache. + std::optional<u64> GetQueryResult(); }; #define ASSERT_REG_POSITION(field_name, position) \ @@ -1499,8 +1531,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(vb_base_instance, 0x50E); ASSERT_REG_POSITION(clip_distance_enabled, 0x544); +ASSERT_REG_POSITION(samplecnt_enable, 0x545); ASSERT_REG_POSITION(point_size, 0x546); ASSERT_REG_POSITION(point_sprite_enable, 0x548); +ASSERT_REG_POSITION(counter_reset, 0x54C); ASSERT_REG_POSITION(zeta_enable, 0x54E); ASSERT_REG_POSITION(multisample_control, 0x54F); ASSERT_REG_POSITION(condition, 0x554); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 4419ab735..7d7137109 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -24,7 +24,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) : system{system}, renderer{renderer}, is_async{is_async} { auto& rasterizer{renderer.Rasterizer()}; - memory_manager = std::make_unique<Tegra::MemoryManager>(system); + memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index f1d50be3e..f5d33f27a 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -11,10 +11,12 @@ #include "core/memory.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" namespace Tegra { -MemoryManager::MemoryManager(Core::System& system) : system{system} { +MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) + : rasterizer{rasterizer}, system{system} { std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); std::fill(page_table.attributes.begin(), page_table.attributes.end(), Common::PageType::Unmapped); @@ -83,6 +85,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { const auto cpu_addr = GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); + // Flush and invalidate through the GPU interface, to be asynchronous if possible. system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); UnmapRange(gpu_addr, aligned_size); @@ -242,7 +245,9 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { const u8* src_ptr{page_table.pointers[page_index] + page_offset}; - system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount); + // Flush must happen on the rasterizer interface, such that memory is always synchronous + // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. + rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); std::memcpy(dest_buffer, src_ptr, copy_amount); break; } @@ -292,7 +297,9 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { u8* dest_ptr{page_table.pointers[page_index] + page_offset}; - system.GPU().InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); + // Invalidate must happen on the rasterizer interface, such that memory is always + // synchronous when it is written (even when in asynchronous GPU mode). + rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); std::memcpy(dest_ptr, src_buffer, copy_amount); break; } @@ -339,8 +346,10 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std:: switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { + // Flush must happen on the rasterizer interface, such that memory is always synchronous + // when it is copied (even when in asynchronous GPU mode). const u8* src_ptr{page_table.pointers[page_index] + page_offset}; - system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount); + rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); WriteBlock(dest_addr, src_ptr, copy_amount); break; } diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 393447eb4..aea010087 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -10,6 +10,10 @@ #include "common/common_types.h" #include "common/page_table.h" +namespace VideoCore { +class RasterizerInterface; +} + namespace Core { class System; } @@ -47,7 +51,7 @@ struct VirtualMemoryArea { class MemoryManager final { public: - explicit MemoryManager(Core::System& system); + explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); ~MemoryManager(); GPUVAddr AllocateSpace(u64 size, u64 align); @@ -172,6 +176,7 @@ private: Common::PageTable page_table{page_bits}; VMAMap vma_map; + VideoCore::RasterizerInterface& rasterizer; Core::System& system; }; diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h new file mode 100644 index 000000000..e66054ed0 --- /dev/null +++ b/src/video_core/query_cache.h @@ -0,0 +1,359 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <array> +#include <cstring> +#include <iterator> +#include <memory> +#include <mutex> +#include <optional> +#include <unordered_map> +#include <vector> + +#include "common/assert.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" + +namespace VideoCommon { + +template <class QueryCache, class HostCounter> +class CounterStreamBase { +public: + explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type) + : cache{cache}, type{type} {} + + /// Updates the state of the stream, enabling or disabling as needed. + void Update(bool enabled) { + if (enabled) { + Enable(); + } else { + Disable(); + } + } + + /// Resets the stream to zero. It doesn't disable the query after resetting. + void Reset() { + if (current) { + current->EndQuery(); + + // Immediately start a new query to avoid disabling its state. + current = cache.Counter(nullptr, type); + } + last = nullptr; + } + + /// Returns the current counter slicing as needed. + std::shared_ptr<HostCounter> Current() { + if (!current) { + return nullptr; + } + current->EndQuery(); + last = std::move(current); + current = cache.Counter(last, type); + return last; + } + + /// Returns true when the counter stream is enabled. + bool IsEnabled() const { + return current != nullptr; + } + +private: + /// Enables the stream. + void Enable() { + if (current) { + return; + } + current = cache.Counter(last, type); + } + + // Disables the stream. + void Disable() { + if (current) { + current->EndQuery(); + } + last = std::exchange(current, nullptr); + } + + QueryCache& cache; + const VideoCore::QueryType type; + + std::shared_ptr<HostCounter> current; + std::shared_ptr<HostCounter> last; +}; + +template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter, + class QueryPool> +class QueryCacheBase { +public: + explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) + : system{system}, rasterizer{rasterizer}, streams{{CounterStream{ + static_cast<QueryCache&>(*this), + VideoCore::QueryType::SamplesPassed}}} {} + + void InvalidateRegion(CacheAddr addr, std::size_t size) { + std::unique_lock lock{mutex}; + FlushAndRemoveRegion(addr, size); + } + + void FlushRegion(CacheAddr addr, std::size_t size) { + std::unique_lock lock{mutex}; + FlushAndRemoveRegion(addr, size); + } + + /** + * Records a query in GPU mapped memory, potentially marked with a timestamp. + * @param gpu_addr GPU address to flush to when the mapped memory is read. + * @param type Query type, e.g. SamplesPassed. + * @param timestamp Timestamp, when empty the flushed query is assumed to be short. + */ + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { + std::unique_lock lock{mutex}; + auto& memory_manager = system.GPU().MemoryManager(); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + + CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); + if (!query) { + const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT_OR_EXECUTE(cpu_addr, return;); + + query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); + } + + query->BindCounter(Stream(type).Current(), timestamp); + } + + /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. + void UpdateCounters() { + std::unique_lock lock{mutex}; + const auto& regs = system.GPU().Maxwell3D().regs; + Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); + } + + /// Resets a counter to zero. It doesn't disable the query after resetting. + void ResetCounter(VideoCore::QueryType type) { + std::unique_lock lock{mutex}; + Stream(type).Reset(); + } + + /// Disable all active streams. Expected to be called at the end of a command buffer. + void DisableStreams() { + std::unique_lock lock{mutex}; + for (auto& stream : streams) { + stream.Update(false); + } + } + + /// Returns a new host counter. + std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type) { + return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency), + type); + } + + /// Returns the counter stream of the specified type. + CounterStream& Stream(VideoCore::QueryType type) { + return streams[static_cast<std::size_t>(type)]; + } + + /// Returns the counter stream of the specified type. + const CounterStream& Stream(VideoCore::QueryType type) const { + return streams[static_cast<std::size_t>(type)]; + } + +protected: + std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; + +private: + /// Flushes a memory range to guest memory and removes it from the cache. + void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { + const u64 addr_begin = static_cast<u64>(addr); + const u64 addr_end = addr_begin + static_cast<u64>(size); + const auto in_range = [addr_begin, addr_end](CachedQuery& query) { + const u64 cache_begin = query.GetCacheAddr(); + const u64 cache_end = cache_begin + query.SizeInBytes(); + return cache_begin < addr_end && addr_begin < cache_end; + }; + + const u64 page_end = addr_end >> PAGE_SHIFT; + for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { + const auto& it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + continue; + } + auto& contents = it->second; + for (auto& query : contents) { + if (!in_range(query)) { + continue; + } + rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); + query.Flush(); + } + contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), + std::end(contents)); + } + } + + /// Registers the passed parameters as cached and returns a pointer to the stored cached query. + CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { + rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); + const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; + return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, + host_ptr); + } + + /// Tries to a get a cached query. Returns nullptr on failure. + CachedQuery* TryGet(CacheAddr addr) { + const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; + const auto it = cached_queries.find(page); + if (it == std::end(cached_queries)) { + return nullptr; + } + auto& contents = it->second; + const auto found = + std::find_if(std::begin(contents), std::end(contents), + [addr](auto& query) { return query.GetCacheAddr() == addr; }); + return found != std::end(contents) ? &*found : nullptr; + } + + static constexpr std::uintptr_t PAGE_SIZE = 4096; + static constexpr unsigned PAGE_SHIFT = 12; + + Core::System& system; + VideoCore::RasterizerInterface& rasterizer; + + std::recursive_mutex mutex; + + std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; + + std::array<CounterStream, VideoCore::NumQueryTypes> streams; +}; + +template <class QueryCache, class HostCounter> +class HostCounterBase { +public: + explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_) + : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} { + // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted. + constexpr u64 depth_threshold = 96; + if (depth > depth_threshold) { + depth = 0; + base_result = dependency->Query(); + dependency = nullptr; + } + } + virtual ~HostCounterBase() = default; + + /// Returns the current value of the query. + u64 Query() { + if (result) { + return *result; + } + + u64 value = BlockingQuery() + base_result; + if (dependency) { + value += dependency->Query(); + dependency = nullptr; + } + + result = value; + return *result; + } + + /// Returns true when flushing this query will potentially wait. + bool WaitPending() const noexcept { + return result.has_value(); + } + + u64 Depth() const noexcept { + return depth; + } + +protected: + /// Returns the value of query from the backend API blocking as needed. + virtual u64 BlockingQuery() const = 0; + +private: + std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. + std::optional<u64> result; ///< Filled with the already returned value. + u64 depth; ///< Number of nested dependencies. + u64 base_result = 0; ///< Equivalent to nested dependencies value. +}; + +template <class HostCounter> +class CachedQueryBase { +public: + explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) + : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + virtual ~CachedQueryBase() = default; + + CachedQueryBase(CachedQueryBase&&) noexcept = default; + CachedQueryBase(const CachedQueryBase&) = delete; + + CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default; + CachedQueryBase& operator=(const CachedQueryBase&) = delete; + + /// Flushes the query to guest memory. + virtual void Flush() { + // When counter is nullptr it means that it's just been reseted. We are supposed to write a + // zero in these cases. + const u64 value = counter ? counter->Query() : 0; + std::memcpy(host_ptr, &value, sizeof(u64)); + + if (timestamp) { + std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); + } + } + + /// Binds a counter to this query. + void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { + if (counter) { + // If there's an old counter set it means the query is being rewritten by the game. + // To avoid losing the data forever, flush here. + Flush(); + } + counter = std::move(counter_); + timestamp = timestamp_; + } + + VAddr CpuAddr() const noexcept { + return cpu_addr; + } + + CacheAddr GetCacheAddr() const noexcept { + return ToCacheAddr(host_ptr); + } + + u64 SizeInBytes() const noexcept { + return SizeInBytes(timestamp.has_value()); + } + + static constexpr u64 SizeInBytes(bool with_timestamp) noexcept { + return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; + } + +protected: + /// Returns true when querying the counter may potentially block. + bool WaitPending() const noexcept { + return counter && counter->WaitPending(); + } + +private: + static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp. + static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp. + static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query. + + VAddr cpu_addr; ///< Guest CPU address. + u8* host_ptr; ///< Writable host pointer. + std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. + std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. +}; + +} // namespace VideoCommon diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index c586cd6fe..f18eaf4bc 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -6,6 +6,7 @@ #include <atomic> #include <functional> +#include <optional> #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" @@ -17,6 +18,11 @@ class MemoryManager; namespace VideoCore { +enum class QueryType { + SamplesPassed, +}; +constexpr std::size_t NumQueryTypes = 1; + enum class LoadCallbackStage { Prepare, Decompile, @@ -29,11 +35,8 @@ class RasterizerInterface { public: virtual ~RasterizerInterface() {} - /// Draw the current batch of vertex arrays - virtual bool DrawBatch(bool is_indexed) = 0; - - /// Draw the current batch of multiple instances of vertex arrays - virtual bool DrawMultiBatch(bool is_indexed) = 0; + /// Dispatches a draw invocation + virtual void Draw(bool is_indexed, bool is_instanced) = 0; /// Clear the current framebuffer virtual void Clear() = 0; @@ -41,6 +44,12 @@ public: /// Dispatches a compute shader invocation virtual void DispatchCompute(GPUVAddr code_addr) = 0; + /// Resets the counter of a query + virtual void ResetCounter(QueryType type) = 0; + + /// Records a GPU query and caches it + virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; + /// Notify rasterizer that all caches should be flushed to Switch memory virtual void FlushAll() = 0; diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp new file mode 100644 index 000000000..f12e9f55f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -0,0 +1,120 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <cstring> +#include <memory> +#include <unordered_map> +#include <utility> +#include <vector> + +#include <glad/glad.h> + +#include "common/assert.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_query_cache.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" + +namespace OpenGL { + +namespace { + +constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; + +constexpr GLenum GetTarget(VideoCore::QueryType type) { + return QueryTargets[static_cast<std::size_t>(type)]; +} + +} // Anonymous namespace + +QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) + : VideoCommon::QueryCacheBase< + QueryCache, CachedQuery, CounterStream, HostCounter, + std::vector<OGLQuery>>{system, + static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)}, + gl_rasterizer{gl_rasterizer} {} + +QueryCache::~QueryCache() = default; + +OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { + auto& reserve = query_pools[static_cast<std::size_t>(type)]; + OGLQuery query; + if (reserve.empty()) { + query.Create(GetTarget(type)); + return query; + } + + query = std::move(reserve.back()); + reserve.pop_back(); + return query; +} + +void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) { + query_pools[static_cast<std::size_t>(type)].push_back(std::move(query)); +} + +bool QueryCache::AnyCommandQueued() const noexcept { + return gl_rasterizer.AnyCommandQueued(); +} + +HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type) + : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache}, + type{type}, query{cache.AllocateQuery(type)} { + glBeginQuery(GetTarget(type), query.handle); +} + +HostCounter::~HostCounter() { + cache.Reserve(type, std::move(query)); +} + +void HostCounter::EndQuery() { + if (!cache.AnyCommandQueued()) { + // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not + // having any of these causes a lock. glFlush is considered a command, so we can safely wait + // for this. Insert to the OpenGL command stream a flush. + glFlush(); + } + glEndQuery(GetTarget(type)); +} + +u64 HostCounter::BlockingQuery() const { + GLint64 value; + glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value); + return static_cast<u64>(value); +} + +CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr) + : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {} + +CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept + : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} + +CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { + VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs)); + cache = rhs.cache; + type = rhs.type; + return *this; +} + +void CachedQuery::Flush() { + // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. + // To avoid this disable and re-enable keeping the dependency stream. + // But we only have to do this if we have pending waits to be done. + auto& stream = cache->Stream(type); + const bool slice_counter = WaitPending() && stream.IsEnabled(); + if (slice_counter) { + stream.Update(false); + } + + VideoCommon::CachedQueryBase<HostCounter>::Flush(); + + if (slice_counter) { + stream.Update(true); + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h new file mode 100644 index 000000000..d8e7052a1 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -0,0 +1,78 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <memory> +#include <vector> + +#include "common/common_types.h" +#include "video_core/query_cache.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace Core { +class System; +} + +namespace OpenGL { + +class CachedQuery; +class HostCounter; +class QueryCache; +class RasterizerOpenGL; + +using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; + +class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, + HostCounter, std::vector<OGLQuery>> { +public: + explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); + ~QueryCache(); + + OGLQuery AllocateQuery(VideoCore::QueryType type); + + void Reserve(VideoCore::QueryType type, OGLQuery&& query); + + bool AnyCommandQueued() const noexcept; + +private: + RasterizerOpenGL& gl_rasterizer; +}; + +class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { +public: + explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type); + ~HostCounter(); + + void EndQuery(); + +private: + u64 BlockingQuery() const override; + + QueryCache& cache; + const VideoCore::QueryType type; + OGLQuery query; +}; + +class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> { +public: + explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, + u8* host_ptr); + CachedQuery(CachedQuery&& rhs) noexcept; + CachedQuery(const CachedQuery&) = delete; + + CachedQuery& operator=(CachedQuery&& rhs) noexcept; + CachedQuery& operator=(const CachedQuery&) = delete; + + void Flush() override; + +private: + QueryCache* cache; + VideoCore::QueryType type; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b0eb14c8b..e1965fb21 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -25,6 +25,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, - shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info}, - buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { + shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, + screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { shader_program_manager = std::make_unique<GLShader::ProgramManager>(); state.draw.shader_program = 0; state.Apply(); @@ -541,11 +542,16 @@ void RasterizerOpenGL::Clear() { } else if (use_stencil) { glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); } + + ++num_queued_commands; } void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); + const auto& regs = gpu.regs; + + query_cache.UpdateCounters(); SyncRasterizeEnable(state); SyncColorMask(); @@ -611,7 +617,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Setup shaders and their used resources. texture_cache.GuardSamplers(true); - const auto primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology); + const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology); SetupShaders(primitive_mode); texture_cache.GuardSamplers(false); @@ -638,35 +644,47 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { glTextureBarrier(); } + ++num_queued_commands; + const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); const GLsizei num_instances = static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1); if (is_indexed) { - const GLenum index_format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format); const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base); const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count); - glDrawElementsInstancedBaseVertexBaseInstance( - primitive_mode, num_vertices, index_format, - reinterpret_cast<const void*>(index_buffer_offset), num_instances, base_vertex, - base_instance); + const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset); + const GLenum format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format); + if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { + glDrawElements(primitive_mode, num_vertices, format, offset); + } else if (num_instances == 1 && base_instance == 0) { + glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex); + } else if (base_vertex == 0 && base_instance == 0) { + glDrawElementsInstanced(primitive_mode, num_vertices, format, offset, num_instances); + } else if (base_vertex == 0) { + glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset, + num_instances, base_instance); + } else if (base_instance == 0) { + glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset, + num_instances, base_vertex); + } else { + glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format, + offset, num_instances, base_vertex, + base_instance); + } } else { const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first); const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count); - glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, num_instances, - base_instance); + if (num_instances == 1 && base_instance == 0) { + glDrawArrays(primitive_mode, base_vertex, num_vertices); + } else if (base_instance == 0) { + glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances); + } else { + glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, + num_instances, base_instance); + } } } -bool RasterizerOpenGL::DrawBatch(bool is_indexed) { - Draw(is_indexed, false); - return true; -} - -bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) { - Draw(is_indexed, true); - return true; -} - void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { if (device.HasBrokenCompute()) { return; @@ -707,6 +725,16 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { state.ApplyProgramPipeline(); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); + ++num_queued_commands; +} + +void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { + query_cache.ResetCounter(type); +} + +void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, + std::optional<u64> timestamp) { + query_cache.Query(gpu_addr, type, timestamp); } void RasterizerOpenGL::FlushAll() {} @@ -718,6 +746,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { } texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); + query_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -728,6 +757,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { texture_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); + query_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { @@ -738,10 +768,18 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { } void RasterizerOpenGL::FlushCommands() { + // Only flush when we have commands queued to OpenGL. + if (num_queued_commands == 0) { + return; + } + num_queued_commands = 0; glFlush(); } void RasterizerOpenGL::TickFrame() { + // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. + num_queued_commands = 0; + buffer_cache.TickFrame(); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 0501f3828..68abe9a21 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -24,6 +24,7 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_framebuffer_cache.h" +#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -57,10 +58,11 @@ public: ScreenInfo& info); ~RasterizerOpenGL() override; - bool DrawBatch(bool is_indexed) override; - bool DrawMultiBatch(bool is_indexed) override; + void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; + void ResetCounter(VideoCore::QueryType type) override; + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -75,6 +77,11 @@ public: void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; + /// Returns true when there are commands queued to the OpenGL server. + bool AnyCommandQueued() const { + return num_queued_commands > 0; + } + private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); @@ -102,9 +109,6 @@ private: void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size); - /// Syncs all the state, shaders, render targets and textures setting before a draw call. - void Draw(bool is_indexed, bool is_instanced); - /// Configures the current textures to use for the draw command. void SetupDrawTextures(std::size_t stage_index, const Shader& shader); @@ -180,10 +184,23 @@ private: /// Syncs the alpha test state to match the guest state void SyncAlphaTest(); - /// Check for extension that are not strictly required - /// but are needed for correct emulation + /// Check for extension that are not strictly required but are needed for correct emulation void CheckExtensions(); + std::size_t CalculateVertexArraysSize() const; + + std::size_t CalculateIndexBufferSize() const; + + /// Updates and returns a vertex array object representing current vertex format + GLuint SetupVertexFormat(); + + void SetupVertexBuffer(GLuint vao); + void SetupVertexInstances(GLuint vao); + + GLintptr SetupIndexBuffer(); + + void SetupShaders(GLenum primitive_mode); + const Device device; OpenGLState state; @@ -191,6 +208,7 @@ private: ShaderCacheOpenGL shader_cache; SamplerCacheOpenGL sampler_cache; FramebufferCacheOpenGL framebuffer_cache; + QueryCache query_cache; Core::System& system; ScreenInfo& screen_info; @@ -208,19 +226,8 @@ private: BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; - std::size_t CalculateVertexArraysSize() const; - - std::size_t CalculateIndexBufferSize() const; - - /// Updates and returns a vertex array object representing current vertex format - GLuint SetupVertexFormat(); - - void SetupVertexBuffer(GLuint vao); - void SetupVertexInstances(GLuint vao); - - GLintptr SetupIndexBuffer(); - - void SetupShaders(GLenum primitive_mode); + /// Number of commands queued to the OpenGL driver. Reseted on flush. + std::size_t num_queued_commands = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 5c96c1d46..f0ddfb276 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -207,4 +207,21 @@ void OGLFramebuffer::Release() { handle = 0; } +void OGLQuery::Create(GLenum target) { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glCreateQueries(target, 1, &handle); +} + +void OGLQuery::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteQueries(1, &handle); + handle = 0; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 3a85a1d4c..514d1d165 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -266,4 +266,29 @@ public: GLuint handle = 0; }; +class OGLQuery : private NonCopyable { +public: + OGLQuery() = default; + + OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + + ~OGLQuery() { + Release(); + } + + OGLQuery& operator=(OGLQuery&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + /// Creates a new internal OpenGL resource and stores the handle + void Create(GLenum target); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle = 0; +}; + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index d4b81cd87..5c1ae1418 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -260,6 +260,13 @@ CachedSurface::~CachedSurface() = default; void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Download); + if (params.IsBuffer()) { + glGetNamedBufferSubData(texture_buffer.handle, 0, + static_cast<GLsizeiptr>(params.GetHostSizeInBytes()), + staging_buffer.data()); + return; + } + SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.emulated_levels; ++level) { diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 331808113..5403c3ab7 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -164,7 +164,7 @@ struct FormatTuple { {vk::Format::eUndefined, {}}, // ASTC_2D_5X4 {vk::Format::eUndefined, {}}, // BGRA8_SRGB {vk::Format::eBc1RgbaSrgbBlock, {}}, // DXT1_SRGB - {vk::Format::eUndefined, {}}, // DXT23_SRGB + {vk::Format::eBc2SrgbBlock, {}}, // DXT23_SRGB {vk::Format::eBc3SrgbBlock, {}}, // DXT45_SRGB {vk::Format::eBc7SrgbBlock, {}}, // BC7U_SRGB {vk::Format::eR4G4B4A4UnormPack16, Attachable}, // R4G4B4A4U @@ -363,6 +363,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr return vk::Format::eR8G8B8A8Uint; case Maxwell::VertexAttribute::Size::Size_32: return vk::Format::eR32Uint; + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return vk::Format::eR32G32B32A32Uint; default: break; } diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 9840f26e5..d1da4f9d3 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -104,8 +104,11 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan features.depthBiasClamp = true; features.geometryShader = true; features.tessellationShader = true; + features.occlusionQueryPrecise = true; features.fragmentStoresAndAtomics = true; features.shaderImageGatherExtended = true; + features.shaderStorageImageReadWithoutFormat = + is_shader_storage_img_read_without_format_supported; features.shaderStorageImageWriteWithoutFormat = true; features.textureCompressionASTC_LDR = is_optimal_astc_supported; @@ -117,6 +120,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan bit8_storage.uniformAndStorageBuffer8BitAccess = true; SetNext(next, bit8_storage); + vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; + host_query_reset.hostQueryReset = true; + SetNext(next, host_query_reset); + vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { float16_int8.shaderFloat16 = true; @@ -273,6 +280,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, + VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, }; std::bitset<required_extensions.size()> available_extensions{}; @@ -340,6 +348,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev std::make_pair(features.depthBiasClamp, "depthBiasClamp"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), + std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, @@ -376,7 +385,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami } }; - extensions.reserve(13); + extensions.reserve(14); extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); @@ -384,6 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); + extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME); [[maybe_unused]] const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); @@ -457,6 +467,8 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { const auto supported_features{physical.getFeatures(dldi)}; + is_shader_storage_img_read_without_format_supported = + supported_features.shaderStorageImageReadWithoutFormat; is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); } @@ -530,6 +542,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti vk::Format::eBc6HUfloatBlock, vk::Format::eBc6HSfloatBlock, vk::Format::eBc1RgbaSrgbBlock, + vk::Format::eBc2SrgbBlock, vk::Format::eBc3SrgbBlock, vk::Format::eBc7SrgbBlock, vk::Format::eAstc4x4SrgbBlock, diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 72603f9f6..2c27ad730 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -122,6 +122,11 @@ public: return properties.limits.maxPushConstantsSize; } + /// Returns true if Shader storage Image Read Without Format supported. + bool IsShaderStorageImageReadWithoutFormatSupported() const { + return is_shader_storage_img_read_without_format_supported; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -227,6 +232,8 @@ private: bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. + bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage + ///< image read without format // Telemetry parameters std::string vendor_name; ///< Device's driver name. diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp new file mode 100644 index 000000000..ffbf60dda --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -0,0 +1,122 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <utility> +#include <vector> + +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Vulkan { + +namespace { + +constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion}; + +constexpr vk::QueryType GetTarget(VideoCore::QueryType type) { + return QUERY_TARGETS[static_cast<std::size_t>(type)]; +} + +} // Anonymous namespace + +QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {} + +QueryPool::~QueryPool() = default; + +void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) { + device = &device_; + type = type_; +} + +std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) { + std::size_t index; + do { + index = CommitResource(fence); + } while (usage[index]); + usage[index] = true; + + return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)}; +} + +void QueryPool::Allocate(std::size_t begin, std::size_t end) { + usage.resize(end); + + const auto dev = device->GetLogical(); + const u32 size = static_cast<u32>(end - begin); + const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {}); + pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader())); +} + +void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) { + const auto it = + std::find_if(std::begin(pools), std::end(pools), + [query_pool = query.first](auto& pool) { return query_pool == *pool; }); + ASSERT(it != std::end(pools)); + + const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); + usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; +} + +VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + const VKDevice& device, VKScheduler& scheduler) + : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, + QueryPool>{system, rasterizer}, + device{device}, scheduler{scheduler} { + for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) { + query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i)); + } +} + +VKQueryCache::~VKQueryCache() = default; + +std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { + return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence()); +} + +void VKQueryCache::Reserve(VideoCore::QueryType type, + std::pair<vk::QueryPool, std::uint32_t> query) { + query_pools[static_cast<std::size_t>(type)].Reserve(query); +} + +HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type) + : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, + type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { + const auto dev = cache.Device().GetLogical(); + cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) { + dev.resetQueryPoolEXT(query.first, query.second, 1, dld); + cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld); + }); +} + +HostCounter::~HostCounter() { + cache.Reserve(type, query); +} + +void HostCounter::EndQuery() { + cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) { + cmdbuf.endQuery(query.first, query.second, dld); + }); +} + +u64 HostCounter::BlockingQuery() const { + if (ticks >= cache.Scheduler().Ticks()) { + cache.Scheduler().Flush(); + } + + const auto dev = cache.Device().GetLogical(); + const auto& dld = cache.Device().GetDispatchLoader(); + u64 value; + dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value), + vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld); + return value; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h new file mode 100644 index 000000000..c3092ee96 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -0,0 +1,104 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <utility> +#include <vector> + +#include "common/common_types.h" +#include "video_core/query_cache.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" + +namespace VideoCore { +class RasterizerInterface; +} + +namespace Vulkan { + +class CachedQuery; +class HostCounter; +class VKDevice; +class VKQueryCache; +class VKScheduler; + +using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>; + +class QueryPool final : public VKFencedPool { +public: + explicit QueryPool(); + ~QueryPool() override; + + void Initialize(const VKDevice& device, VideoCore::QueryType type); + + std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence); + + void Reserve(std::pair<vk::QueryPool, std::uint32_t> query); + +protected: + void Allocate(std::size_t begin, std::size_t end) override; + +private: + static constexpr std::size_t GROW_STEP = 512; + + const VKDevice* device = nullptr; + VideoCore::QueryType type = {}; + + std::vector<UniqueQueryPool> pools; + std::vector<bool> usage; +}; + +class VKQueryCache final + : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, + QueryPool> { +public: + explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + const VKDevice& device, VKScheduler& scheduler); + ~VKQueryCache(); + + std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type); + + void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query); + + const VKDevice& Device() const noexcept { + return device; + } + + VKScheduler& Scheduler() const noexcept { + return scheduler; + } + +private: + const VKDevice& device; + VKScheduler& scheduler; +}; + +class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> { +public: + explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, + VideoCore::QueryType type); + ~HostCounter(); + + void EndQuery(); + +private: + u64 BlockingQuery() const override; + + VKQueryCache& cache; + const VideoCore::QueryType type; + const std::pair<vk::QueryPool, std::uint32_t> query; + const u64 ticks; +}; + +class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { +public: + explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr) + : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {} +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index aada38702..31c078f6a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -289,25 +289,19 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind staging_pool), pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), - sampler_cache(device) {} - -RasterizerVulkan::~RasterizerVulkan() = default; - -bool RasterizerVulkan::DrawBatch(bool is_indexed) { - Draw(is_indexed, false); - return true; + sampler_cache(device), query_cache(system, *this, device, scheduler) { + scheduler.SetQueryCache(query_cache); } -bool RasterizerVulkan::DrawMultiBatch(bool is_indexed) { - Draw(is_indexed, true); - return true; -} +RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(Vulkan_Drawing); FlushWork(); + query_cache.UpdateCounters(); + const auto& gpu = system.GPU().Maxwell3D(); GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; @@ -362,6 +356,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { void RasterizerVulkan::Clear() { MICROPROFILE_SCOPE(Vulkan_Clearing); + query_cache.UpdateCounters(); + const auto& gpu = system.GPU().Maxwell3D(); if (!system.GPU().Maxwell3D().ShouldExecute()) { return; @@ -429,6 +425,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { sampled_views.clear(); image_views.clear(); + query_cache.UpdateCounters(); + const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const ComputePipelineCacheKey key{ code_addr, @@ -471,17 +469,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { }); } +void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { + query_cache.ResetCounter(type); +} + +void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, + std::optional<u64> timestamp) { + query_cache.Query(gpu_addr, type, timestamp); +} + void RasterizerVulkan::FlushAll() {} void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); + query_cache.FlushRegion(addr, size); } void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { texture_cache.InvalidateRegion(addr, size); pipeline_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); + query_cache.InvalidateRegion(addr, size); } void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7be71e734..138903d60 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -24,6 +24,7 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" @@ -96,7 +97,7 @@ struct ImageView { vk::ImageLayout* layout = nullptr; }; -class RasterizerVulkan : public VideoCore::RasterizerAccelerated { +class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, VKScreenInfo& screen_info, const VKDevice& device, @@ -104,10 +105,11 @@ public: VKScheduler& scheduler); ~RasterizerVulkan() override; - bool DrawBatch(bool is_indexed) override; - bool DrawMultiBatch(bool is_indexed) override; + void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; + void ResetCounter(VideoCore::QueryType type) override; + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -140,8 +142,6 @@ private: static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; - void Draw(bool is_indexed, bool is_instanced); - void FlushWork(); Texceptions UpdateAttachments(); @@ -247,6 +247,7 @@ private: VKPipelineCache pipeline_cache; VKBufferCache buffer_cache; VKSamplerCache sampler_cache; + VKQueryCache query_cache; std::array<View, Maxwell::NumRenderTargets> color_attachments; View zeta_attachment; diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 0a8ec8398..204b7c39c 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -23,7 +23,14 @@ static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> } else if (color == std::array<float, 4>{1, 1, 1, 1}) { return vk::BorderColor::eFloatOpaqueWhite; } else { - return {}; + if (color[0] + color[1] + color[2] > 1.35f) { + // If color elements are brighter than roughly 0.5 average, use white border + return vk::BorderColor::eFloatOpaqueWhite; + } + if (color[3] > 0.5f) { + return vk::BorderColor::eFloatOpaqueBlack; + } + return vk::BorderColor::eFloatTransparentBlack; } } @@ -37,8 +44,6 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const auto border_color{tsc.GetBorderColor()}; const auto vk_border_color{TryConvertBorderColor(border_color)}; - UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}", - border_color[0], border_color[1], border_color[2], border_color[3]); constexpr bool unnormalized_coords{false}; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index d66133ad1..92bd6c344 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { } void VKScheduler::AllocateNewContext() { + ++ticks; + std::unique_lock lock{mutex}; current_fence = next_fence; next_fence = &resource_manager.CommitFence(); @@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() { current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, device.GetDispatchLoader()); + // Enable counters once again. These are disabled when a command buffer is finished. + if (query_cache) { + query_cache->UpdateCounters(); + } } void VKScheduler::InvalidateState() { @@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() { } void VKScheduler::EndPendingOperations() { + query_cache->DisableStreams(); EndRenderPass(); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index bcdffbba0..62fd7858b 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -4,6 +4,7 @@ #pragma once +#include <atomic> #include <condition_variable> #include <memory> #include <optional> @@ -18,6 +19,7 @@ namespace Vulkan { class VKDevice; class VKFence; +class VKQueryCache; class VKResourceManager; class VKFenceView { @@ -67,6 +69,11 @@ public: /// Binds a pipeline to the current execution context. void BindGraphicsPipeline(vk::Pipeline pipeline); + /// Assigns the query cache. + void SetQueryCache(VKQueryCache& query_cache_) { + query_cache = &query_cache_; + } + /// Returns true when viewports have been set in the current command buffer. bool TouchViewports() { return std::exchange(state.viewports, true); @@ -112,6 +119,11 @@ public: return current_fence; } + /// Returns the current command buffer tick. + u64 Ticks() const { + return ticks; + } + private: class Command { public: @@ -205,6 +217,8 @@ private: const VKDevice& device; VKResourceManager& resource_manager; + VKQueryCache* query_cache = nullptr; + vk::CommandBuffer current_cmdbuf; VKFence* current_fence = nullptr; VKFence* next_fence = nullptr; @@ -227,6 +241,7 @@ private: Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; std::mutex mutex; std::condition_variable cv; + std::atomic<u64> ticks = 0; bool quit = false; }; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 99dbc473d..2da622d15 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -276,12 +276,14 @@ public: AddCapability(spv::Capability::ImageGatherExtended); AddCapability(spv::Capability::SampledBuffer); AddCapability(spv::Capability::StorageImageWriteWithoutFormat); + AddCapability(spv::Capability::DrawParameters); AddCapability(spv::Capability::SubgroupBallotKHR); AddCapability(spv::Capability::SubgroupVoteKHR); AddExtension("SPV_KHR_shader_ballot"); AddExtension("SPV_KHR_subgroup_vote"); AddExtension("SPV_KHR_storage_buffer_storage_class"); AddExtension("SPV_KHR_variable_pointers"); + AddExtension("SPV_KHR_shader_draw_parameters"); if (ir.UsesLayer() || ir.UsesViewportIndex()) { if (ir.UsesViewportIndex()) { @@ -293,6 +295,10 @@ public: } } + if (device.IsShaderStorageImageReadWithoutFormatSupported()) { + AddCapability(spv::Capability::StorageImageReadWithoutFormat); + } + if (device.IsFloat16Supported()) { AddCapability(spv::Capability::Float16); } @@ -495,9 +501,11 @@ private: interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); // Declare input attributes - vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_uint, "vertex_index"); + vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index"); instance_index = - DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_uint, "instance_index"); + DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index"); + base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex"); + base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance"); } void DeclareTessControl() { @@ -1080,9 +1088,12 @@ private: return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)), Type::Float}; case 2: - return {OpLoad(t_uint, instance_index), Type::Uint}; + return { + OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)), + Type::Int}; case 3: - return {OpLoad(t_uint, vertex_index), Type::Uint}; + return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)), + Type::Int}; } UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); return {Constant(t_uint, 0U), Type::Uint}; @@ -1772,8 +1783,16 @@ private: } Expression ImageLoad(Operation operation) { - UNIMPLEMENTED(); - return {}; + if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { + return {v_float_zero, Type::Float}; + } + + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; + + const Id coords = GetCoordinates(operation, Type::Int); + const Id texel = OpImageRead(t_uint4, GetImage(operation), coords); + + return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint}; } Expression ImageStore(Operation operation) { @@ -2566,6 +2585,8 @@ private: Id instance_index{}; Id vertex_index{}; + Id base_instance{}; + Id base_vertex{}; std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; Id frag_depth{}; Id frag_coord{}; diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 0eeb75559..6ead42070 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -83,14 +83,14 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { const bool input_signed = instr.conversion.is_input_signed; - if (instr.conversion.src_size == Register::Size::Byte) { - const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8; - if (offset > 0) { - value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, - std::move(value), Immediate(offset)); + if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) { + ASSERT(instr.conversion.src_size == Register::Size::Byte || + instr.conversion.src_size == Register::Size::Short); + if (instr.conversion.src_size == Register::Size::Short) { + ASSERT(offset == 0 || offset == 2); } - } else { - UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); + value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, + std::move(value), Immediate(offset * 8)); } value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 351c8c2f1..bee7d8cad 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -522,68 +522,53 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { - const auto is_array = static_cast<bool>(array); - const auto is_shadow = static_cast<bool>(depth_compare); + const bool is_array = array != nullptr; + const bool is_shadow = depth_compare != nullptr; const bool is_bindless = bindless_reg.has_value(); - UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || - (texture_type == TextureType::TextureCube && is_array && is_shadow), - "This method is not supported."); + UNIMPLEMENTED_IF(texture_type == TextureType::TextureCube && is_array && is_shadow); + ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, + "Illegal texture type"); const SamplerInfo info{texture_type, is_array, is_shadow, false}; - Node index_var{}; + Node index_var; const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info) : GetSampler(instr.sampler, info); - Node4 values; - if (sampler == nullptr) { - for (u32 element = 0; element < values.size(); ++element) { - values[element] = Immediate(0); - } - return values; + if (!sampler) { + return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; } const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || process_mode == TextureProcessMode::LLA; - - // LOD selection (either via bias or explicit textureLod) not supported in GL for - // sampler2DArrayShadow and samplerCubeArrayShadow. - const bool gl_lod_supported = - !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || - (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); - - const OperationCode read_method = - (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture; - - UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); + const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture; Node bias; Node lod; - if (process_mode != TextureProcessMode::None && gl_lod_supported) { - switch (process_mode) { - case TextureProcessMode::LZ: - lod = Immediate(0.0f); - break; - case TextureProcessMode::LB: - // If present, lod or bias are always stored in the register - // indexed by the gpr20 field with an offset depending on the - // usage of the other registers - bias = GetRegister(instr.gpr20.Value() + bias_offset); - break; - case TextureProcessMode::LL: - lod = GetRegister(instr.gpr20.Value() + bias_offset); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode)); - break; - } + switch (process_mode) { + case TextureProcessMode::None: + break; + case TextureProcessMode::LZ: + lod = Immediate(0.0f); + break; + case TextureProcessMode::LB: + // If present, lod or bias are always stored in the register indexed by the gpr20 field with + // an offset depending on the usage of the other registers. + bias = GetRegister(instr.gpr20.Value() + bias_offset); + break; + case TextureProcessMode::LL: + lod = GetRegister(instr.gpr20.Value() + bias_offset); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode)); + break; } + Node4 values; for (u32 element = 0; element < values.size(); ++element) { - auto copy_coords = coords; MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element, index_var}; - values[element] = Operation(read_method, meta, std::move(copy_coords)); + values[element] = Operation(opcode, meta, coords); } return values; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 84469b7ba..002df414f 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -277,6 +277,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, staging_buffer.data() + host_offset, level); } + } else if (params.IsBuffer()) { + // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest + // memory. + std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); } else { ASSERT(params.target == SurfaceTarget::Texture2D); ASSERT(params.num_levels == 1); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f4c015635..0d105d386 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -721,7 +721,6 @@ private: std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { - // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. @@ -733,14 +732,18 @@ private: return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } + const auto struct_result = current_surface->MatchesStructure(params); - if (struct_result != MatchStructureResult::None && - (params.target != SurfaceTarget::Texture3D || - current_surface->MatchTarget(params.target))) { - if (struct_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params, is_render); - } else { - return RebuildSurface(current_surface, params, is_render); + if (struct_result != MatchStructureResult::None) { + const auto& old_params = current_surface->GetSurfaceParams(); + const bool not_3d = params.target != SurfaceTarget::Texture3D && + old_params.target != SurfaceTarget::Texture3D; + if (not_3d || current_surface->MatchTarget(params.target)) { + if (struct_result == MatchStructureResult::FullMatch) { + return ManageStructuralMatch(current_surface, params, is_render); + } else { + return RebuildSurface(current_surface, params, is_render); + } } } } diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp index 6683f459f..737ffe409 100644 --- a/src/web_service/web_backend.cpp +++ b/src/web_service/web_backend.cpp @@ -73,14 +73,12 @@ struct Client::Impl { if (!parsedUrl.GetPort(&port)) { port = HTTP_PORT; } - cli = std::make_unique<httplib::Client>(parsedUrl.m_Host.c_str(), port, - TIMEOUT_SECONDS); + cli = std::make_unique<httplib::Client>(parsedUrl.m_Host.c_str(), port); } else if (parsedUrl.m_Scheme == "https") { if (!parsedUrl.GetPort(&port)) { port = HTTPS_PORT; } - cli = std::make_unique<httplib::SSLClient>(parsedUrl.m_Host.c_str(), port, - TIMEOUT_SECONDS); + cli = std::make_unique<httplib::SSLClient>(parsedUrl.m_Host.c_str(), port); } else { LOG_ERROR(WebService, "Bad URL scheme {}", parsedUrl.m_Scheme); return Common::WebResult{Common::WebResult::Code::InvalidURL, "Bad URL scheme"}; @@ -90,6 +88,7 @@ struct Client::Impl { LOG_ERROR(WebService, "Invalid URL {}", host + path); return Common::WebResult{Common::WebResult::Code::InvalidURL, "Invalid URL"}; } + cli->set_timeout_sec(TIMEOUT_SECONDS); httplib::Headers params; if (!jwt.empty()) { diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index cd94693c1..6209fff75 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -630,6 +630,7 @@ void Config::ReadRendererValues() { Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); Settings::values.resolution_factor = ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); + Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); Settings::values.use_frame_limit = ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); @@ -1064,6 +1065,7 @@ void Config::SaveRendererValues() { WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); WriteSetting(QStringLiteral("resolution_factor"), static_cast<double>(Settings::values.resolution_factor), 1.0); + WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index f57a24e36..ea899c080 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -97,6 +97,7 @@ void ConfigureGraphics::SetConfiguration() { ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); ui->resolution_factor_combobox->setCurrentIndex( static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); + ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); @@ -114,6 +115,7 @@ void ConfigureGraphics::ApplyConfiguration() { Settings::values.vulkan_device = vulkan_device; Settings::values.resolution_factor = ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); + Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); Settings::values.use_asynchronous_gpu_emulation = diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index e24372204..db60426ab 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -139,6 +139,41 @@ </layout> </item> <item> + <layout class="QHBoxLayout" name="horizontalLayout_6"> + <item> + <widget class="QLabel" name="ar_label"> + <property name="text"> + <string>Aspect Ratio:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="aspect_ratio_combobox"> + <item> + <property name="text"> + <string>Default (16:9)</string> + </property> + </item> + <item> + <property name="text"> + <string>Force 4:3</string> + </property> + </item> + <item> + <property name="text"> + <string>Force 21:9</string> + </property> + </item> + <item> + <property name="text"> + <string>Stretch to Window</string> + </property> + </item> + </widget> + </item> + </layout> + </item> + <item> <layout class="QHBoxLayout" name="horizontalLayout_3"> <item> <widget class="QLabel" name="bg_label"> diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index b01a36023..96f1ce3af 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -379,6 +379,8 @@ void Config::ReadValues() { Settings::values.resolution_factor = static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); + Settings::values.aspect_ratio = + static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); Settings::values.frame_limit = static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 00fd88279..8a2b658cd 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -122,6 +122,10 @@ use_shader_jit = # factor for the Switch resolution resolution_factor = +# Aspect ratio +# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window +aspect_ratio = + # Whether to enable V-Sync (caps the framerate at 60FPS) or not. # 0 (default): Off, 1: On use_vsync = diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index 84ab4d687..0ac93b62a 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp @@ -118,6 +118,8 @@ void Config::ReadValues() { // Renderer Settings::values.resolution_factor = static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); + Settings::values.aspect_ratio = + static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); Settings::values.use_frame_limit = false; Settings::values.frame_limit = 100; Settings::values.use_disk_shader_cache = diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index 9a3e86d68..8d93f7b88 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h @@ -26,6 +26,10 @@ use_shader_jit = # factor for the Switch resolution resolution_factor = +# Aspect ratio +# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window +aspect_ratio = + # Whether to enable V-Sync (caps the framerate at 60FPS) or not. # 0 (default): Off, 1: On use_vsync = |