diff options
Diffstat (limited to '')
274 files changed, 19614 insertions, 2501 deletions
diff --git a/.ci/scripts/common/pre-upload.sh b/.ci/scripts/common/pre-upload.sh index 705362a3c..3583f9840 100644 --- a/.ci/scripts/common/pre-upload.sh +++ b/.ci/scripts/common/pre-upload.sh @@ -5,6 +5,6 @@ GITDATE="`git show -s --date=short --format='%ad' | sed 's/-//g'`" GITREV="`git show -s --format='%h'`" -ARTIFACTS_DIR="artifacts" +ARTIFACTS_DIR="$PWD/artifacts" mkdir -p "${ARTIFACTS_DIR}/" diff --git a/.ci/scripts/linux/docker.sh b/.ci/scripts/linux/docker.sh index 4b0193565..7bba01d62 100755 --- a/.ci/scripts/linux/docker.sh +++ b/.ci/scripts/linux/docker.sh @@ -11,7 +11,7 @@ ccache -s mkdir build || true && cd build cmake .. \ -DBoost_USE_STATIC_LIBS=ON \ - -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_CXX_FLAGS="-march=x86-64-v2" \ -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ \ -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc \ @@ -32,6 +32,19 @@ ccache -s ctest -VV -C Release +# Separate debug symbols from specified executables +for EXE in yuzu; do + EXE_PATH="bin/$EXE" + # Copy debug symbols out + objcopy --only-keep-debug $EXE_PATH $EXE_PATH.debug + # Add debug link and strip debug symbols + objcopy -g --add-gnu-debuglink=$EXE_PATH.debug $EXE_PATH $EXE_PATH.out + # Overwrite original with stripped copy + mv $EXE_PATH.out $EXE_PATH +done +# Strip debug symbols from all executables +find bin/ -type f -not -regex '.*.debug' -exec strip -g {} ';' + DESTDIR="$PWD/AppDir" ninja install rm -vf AppDir/usr/bin/yuzu-cmd AppDir/usr/bin/yuzu-tester diff --git a/.ci/scripts/linux/upload.sh b/.ci/scripts/linux/upload.sh index e0f336427..fbb2d9c1b 100755 --- a/.ci/scripts/linux/upload.sh +++ b/.ci/scripts/linux/upload.sh @@ -59,4 +59,9 @@ if [ "${RELEASE_NAME}" = "mainline" ] || [ "${RELEASE_NAME}" = "early-access" ]; cp "build/${APPIMAGE_NAME}" "${DIR_NAME}/yuzu-${RELEASE_NAME}.AppImage" fi +# Copy debug symbols to artifacts +cd build/bin +tar $COMPRESSION_FLAGS "${ARTIFACTS_DIR}/${REV_NAME}-debug.tar.xz" *.debug +cd - + . .ci/scripts/common/post-upload.sh diff --git a/.codespellrc b/.codespellrc index 944194a25..3336d31fe 100644 --- a/.codespellrc +++ b/.codespellrc @@ -3,4 +3,4 @@ [codespell] skip = ./.git,./build,./dist,./Doxyfile,./externals,./LICENSES,./src/android/app/src/main/res -ignore-words-list = aci,allright,ba,canonicalizations,deques,froms,hda,inout,lod,masia,nam,nax,nd,optin,pullrequests,pullrequest,te,transfered,unstall,uscaled,zink +ignore-words-list = aci,allright,ba,canonicalizations,deques,froms,hda,inout,lod,masia,nam,nax,nd,optin,pullrequests,pullrequest,te,transfered,unstall,uscaled,vas,zink diff --git a/.reuse/dep5 b/.reuse/dep5 index 31178fc4c..d682fbdba 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -147,3 +147,7 @@ License: GPL-3.0-or-later Files: src/android/gradle/wrapper/* Copyright: 2023 yuzu Emulator Project License: GPL-3.0-or-later + +Files: externals/stb/* +Copyright: Sean Barrett +License: MIT diff --git a/CMakeLists.txt b/CMakeLists.txt index ed757eb0a..9c35e0946 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,6 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modul include(DownloadExternals) include(CMakeDependentOption) include(CTest) -include(FetchContent) # Set bundled sdl2/qt as dependent options. # OFF by default, but if ENABLE_SDL2 and MSVC are true then ON @@ -99,47 +98,8 @@ if (ANDROID AND YUZU_DOWNLOAD_ANDROID_VVL) DESTINATION "${vvl_lib_path}") endif() -# On Android, fetch and compile libcxx before doing anything else if (ANDROID) set(CMAKE_SKIP_INSTALL_RULES ON) - set(LLVM_VERSION "15.0.6") - - # Note: even though libcxx and libcxxabi have separate releases on the project page, - # the separated releases cannot be compiled. Only in-tree builds work. Therefore we - # must fetch the source release for the entire llvm tree. - FetchContent_Declare(llvm - URL "https://github.com/llvm/llvm-project/releases/download/llvmorg-${LLVM_VERSION}/llvm-project-${LLVM_VERSION}.src.tar.xz" - URL_HASH SHA256=9d53ad04dc60cb7b30e810faf64c5ab8157dadef46c8766f67f286238256ff92 - TLS_VERIFY TRUE - ) - FetchContent_MakeAvailable(llvm) - - # libcxx has support for most of the range library, but it's gated behind a flag: - add_compile_definitions(_LIBCPP_ENABLE_EXPERIMENTAL) - - # Disable standard header inclusion - set(ANDROID_STL "none") - - # libcxxabi - set(LIBCXXABI_INCLUDE_TESTS OFF) - set(LIBCXXABI_ENABLE_SHARED FALSE) - set(LIBCXXABI_ENABLE_STATIC TRUE) - set(LIBCXXABI_LIBCXX_INCLUDES "${LIBCXX_TARGET_INCLUDE_DIRECTORY}" CACHE STRING "" FORCE) - add_subdirectory("${llvm_SOURCE_DIR}/libcxxabi" "${llvm_BINARY_DIR}/libcxxabi") - link_libraries(cxxabi_static) - - # libcxx - set(LIBCXX_ABI_NAMESPACE "__ndk1" CACHE STRING "" FORCE) - set(LIBCXX_CXX_ABI "libcxxabi") - set(LIBCXX_INCLUDE_TESTS OFF) - set(LIBCXX_INCLUDE_BENCHMARKS OFF) - set(LIBCXX_INCLUDE_DOCS OFF) - set(LIBCXX_ENABLE_SHARED FALSE) - set(LIBCXX_ENABLE_STATIC TRUE) - set(LIBCXX_ENABLE_ASSERTIONS FALSE) - add_subdirectory("${llvm_SOURCE_DIR}/libcxx" "${llvm_BINARY_DIR}/libcxx") - set_target_properties(cxx-headers PROPERTIES INTERFACE_COMPILE_OPTIONS "-isystem${CMAKE_BINARY_DIR}/${LIBCXX_INSTALL_INCLUDE_DIR}") - link_libraries(cxx_static cxx-headers) endif() if (YUZU_USE_BUNDLED_VCPKG) @@ -326,11 +286,12 @@ find_package(Boost 1.79.0 REQUIRED context) find_package(enet 1.3 MODULE) find_package(fmt 9 REQUIRED) find_package(inih 52 MODULE COMPONENTS INIReader) -find_package(LLVM 17 MODULE COMPONENTS Demangle) +find_package(LLVM 17.0.2 MODULE COMPONENTS Demangle) find_package(lz4 REQUIRED) find_package(nlohmann_json 3.8 REQUIRED) find_package(Opus 1.3 MODULE) find_package(RenderDoc MODULE) +find_package(stb MODULE) find_package(VulkanMemoryAllocator CONFIG) find_package(ZLIB 1.2 REQUIRED) find_package(zstd 1.5 REQUIRED) @@ -397,6 +358,9 @@ function(set_yuzu_qt_components) if (ENABLE_QT_TRANSLATION) list(APPEND YUZU_QT_COMPONENTS2 LinguistTools) endif() + if (USE_DISCORD_PRESENCE) + list(APPEND YUZU_QT_COMPONENTS2 Network) + endif() set(YUZU_QT_COMPONENTS ${YUZU_QT_COMPONENTS2} PARENT_SCOPE) endfunction(set_yuzu_qt_components) diff --git a/CMakeModules/Findstb.cmake b/CMakeModules/Findstb.cmake new file mode 100644 index 000000000..bff998580 --- /dev/null +++ b/CMakeModules/Findstb.cmake @@ -0,0 +1,31 @@ +# SPDX-FileCopyrightText: 2023 Alexandre Bouvier <contact@amb.tf> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +find_path(stb_image_INCLUDE_DIR stb_image.h PATH_SUFFIXES stb) +find_path(stb_image_resize_INCLUDE_DIR stb_image_resize.h PATH_SUFFIXES stb) +find_path(stb_image_write_INCLUDE_DIR stb_image_write.h PATH_SUFFIXES stb) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(stb + REQUIRED_VARS + stb_image_INCLUDE_DIR + stb_image_resize_INCLUDE_DIR + stb_image_write_INCLUDE_DIR +) + +if (stb_FOUND AND NOT TARGET stb::headers) + add_library(stb::headers INTERFACE IMPORTED) + set_property(TARGET stb::headers PROPERTY + INTERFACE_INCLUDE_DIRECTORIES + "${stb_image_INCLUDE_DIR}" + "${stb_image_resize_INCLUDE_DIR}" + "${stb_image_write_INCLUDE_DIR}" + ) +endif() + +mark_as_advanced( + stb_image_INCLUDE_DIR + stb_image_resize_INCLUDE_DIR + stb_image_write_INCLUDE_DIR +) diff --git a/dist/org.yuzu_emu.yuzu.desktop b/dist/org.yuzu_emu.yuzu.desktop index 008422863..51e191a8e 100644 --- a/dist/org.yuzu_emu.yuzu.desktop +++ b/dist/org.yuzu_emu.yuzu.desktop @@ -13,3 +13,4 @@ Exec=yuzu %f Categories=Game;Emulator;Qt; MimeType=application/x-nx-nro;application/x-nx-nso;application/x-nx-nsp;application/x-nx-xci; Keywords=Nintendo;Switch; +StartupWMClass=yuzu diff --git a/dist/qt_themes/default/style.qss b/dist/qt_themes/default/style.qss index 79960ee0c..921950c6c 100644 --- a/dist/qt_themes/default/style.qss +++ b/dist/qt_themes/default/style.qss @@ -120,6 +120,10 @@ QWidget#connectedControllers { background: transparent; } +QWidget#closeButtons { + background: transparent; +} + QWidget#playersSupported, QWidget#controllersSupported, QWidget#controllerSupported1, diff --git a/dist/qt_themes/qdarkstyle/style.qss b/dist/qt_themes/qdarkstyle/style.qss index 63a636ae6..328ac942f 100644 --- a/dist/qt_themes/qdarkstyle/style.qss +++ b/dist/qt_themes/qdarkstyle/style.qss @@ -1380,6 +1380,10 @@ QWidget#connectedControllers { background: transparent; } +QWidget#closeButtons { + background: transparent; +} + QWidget#playersSupported, QWidget#controllersSupported, QWidget#controllerSupported1, diff --git a/dist/qt_themes/qdarkstyle_midnight_blue/style.qss b/dist/qt_themes/qdarkstyle_midnight_blue/style.qss index 0c53115f6..eb0889b13 100644 --- a/dist/qt_themes/qdarkstyle_midnight_blue/style.qss +++ b/dist/qt_themes/qdarkstyle_midnight_blue/style.qss @@ -2305,6 +2305,10 @@ QWidget#connectedControllers { background: transparent; } +QWidget#closeButtons { + background: transparent; +} + QWidget#playersSupported, QWidget#controllersSupported, QWidget#controllerSupported1, diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index c2009f34b..61baabb03 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -171,6 +171,10 @@ endif() add_library(stb stb/stb_dxt.cpp) target_include_directories(stb PUBLIC ./stb) +if (NOT TARGET stb::headers) + add_library(stb::headers ALIAS stb) +endif() + add_library(bc_decoder bc_decoder/bc_decoder.cpp) target_include_directories(bc_decoder PUBLIC ./bc_decoder) diff --git a/externals/nx_tzdb/CMakeLists.txt b/externals/nx_tzdb/CMakeLists.txt index 593786250..0fad24642 100644 --- a/externals/nx_tzdb/CMakeLists.txt +++ b/externals/nx_tzdb/CMakeLists.txt @@ -27,7 +27,7 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Windows" OR ANDROID) set(CAN_BUILD_NX_TZDB false) endif() -set(NX_TZDB_VERSION "220816") +set(NX_TZDB_VERSION "221202") set(NX_TZDB_ARCHIVE "${CMAKE_CURRENT_BINARY_DIR}/${NX_TZDB_VERSION}.zip") set(NX_TZDB_ROMFS_DIR "${CMAKE_CURRENT_BINARY_DIR}/nx_tzdb") diff --git a/externals/nx_tzdb/tzdb_to_nx b/externals/nx_tzdb/tzdb_to_nx -Subproject 212afa2394a74226dcf1b7996a570aae17debb6 +Subproject 0d17dd066d91f954a4c89d46dcb067eead6b1e4 diff --git a/externals/stb/stb_image.h b/externals/stb/stb_image.h new file mode 100644 index 000000000..5e807a0a6 --- /dev/null +++ b/externals/stb/stb_image.h @@ -0,0 +1,7987 @@ +/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb + no warranty implied; use at your own risk + + Do this: + #define STB_IMAGE_IMPLEMENTATION + before you include this file in *one* C or C++ file to create the implementation. + + // i.e. it should look like this: + #include ... + #include ... + #include ... + #define STB_IMAGE_IMPLEMENTATION + #include "stb_image.h" + + You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. + And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free + + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) + PNG 1/2/4/8/16-bit-per-channel + + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels, 8/16 bit-per-channel) + + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + PNM (PPM and PGM binary only) + + Animated GIF still needs a proper API, but here's one way to do it: + http://gist.github.com/urraka/685d9a6340b26b830d49 + + - decode from memory or through FILE (define STBI_NO_STDIO to remove code) + - decode from arbitrary I/O callbacks + - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) + + Full documentation under "DOCUMENTATION" below. + + +LICENSE + + See end of file for license information. + +RECENT REVISION HISTORY: + + 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff + 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes + 2.26 (2020-07-13) many minor fixes + 2.25 (2020-02-02) fix warnings + 2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically + 2.23 (2019-08-11) fix clang static analysis warning + 2.22 (2019-03-04) gif fixes, fix warnings + 2.21 (2019-02-25) fix typo in comment + 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings + 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes + 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 + RGB-format JPEG; remove white matting in PSD; + allocate large structures on the stack; + correct channel count for PNG & BMP + 2.10 (2016-01-22) avoid warning introduced in 2.09 + 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED + + See end of file for full revision history. + + + ============================ Contributors ========================= + + Image formats Extensions, features + Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) + Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) + Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) + Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) + Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) + Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) + Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) + github:urraka (animated gif) Junggon Kim (PNM comments) + Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + socks-the-fox (16-bit PNG) + Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes Mikhail Morozov (1-bit BMP) + Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) + Arseny Kapoulkine Simon Breuss (16-bit PNM) + John-Mark Allen + Carmelo J Fdez-Aguera + + Bug & warning fixes + Marc LeBlanc David Woo Guillaume George Martins Mozeiko + Christpher Lloyd Jerry Jansson Joseph Thomson Blazej Dariusz Roszkowski + Phil Jordan Dave Moore Roy Eltham + Hayaki Saito Nathan Reed Won Chun + Luke Graham Johan Duparc Nick Verigakis the Horde3D community + Thomas Ruf Ronny Chevalier github:rlyeh + Janez Zemva John Bartholomew Michal Cichon github:romigrou + Jonathan Blow Ken Hamada Tero Hanninen github:svdijk + Eugene Golushkov Laurent Gomila Cort Stratton github:snagar + Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex + Cass Everitt Ryamond Barbiero github:grim210 + Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw + Philipp Wiesemann Dale Weiler Oriol Ferrer Mesia github:phprus + Josh Tobin Neil Bickford Matthew Gregan github:poppolopoppo + Julian Raschke Gregory Mullen Christian Floisand github:darealshinji + Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007 + Brad Weinberger Matvey Cherevko github:mosra + Luca Sas Alexander Veselov Zack Middleton [reserved] + Ryan C. Gordon [reserved] [reserved] + DO NOT ADD YOUR NAME HERE + + Jacko Dirks + + To add your name to the credits, pick a random blank space in the middle and fill it. + 80% of merge conflicts on stb PRs are due to people adding their name at the end + of the credits. +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// DOCUMENTATION +// +// Limitations: +// - no 12-bit-per-channel JPEG +// - no JPEGs with arithmetic coding +// - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below for HDR usage): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// // ... but 'n' will always be the number that it would have been if you said 0 +// stbi_image_free(data); +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *channels_in_file -- outputs # of image components in image file +// int desired_channels -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data, or NULL on an allocation failure or if the image is +// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'desired_channels' if desired_channels is non-zero, or +// *channels_in_file otherwise. If desired_channels is non-zero, +// *channels_in_file has the number of components that _would_ have been +// output otherwise. E.g. if you set desired_channels to 4, you will always +// get RGBA output, but you can check *channels_in_file to see if it's trivially +// opaque because e.g. there were only 3 channels in the source image. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *channels_in_file will be unchanged. The function +// stbi_failure_reason() can be queried for an extremely brief, end-user +// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS +// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// To query the width, height and component count of an image without having to +// decode the full file, you can use the stbi_info family of functions: +// +// int x,y,n,ok; +// ok = stbi_info(filename, &x, &y, &n); +// // returns ok=1 and sets x, y, n if image is a supported format, +// // 0 otherwise. +// +// Note that stb_image pervasively uses ints in its public API for sizes, +// including sizes of memory buffers. This is now part of the API and thus +// hard to change without causing breakage. As a result, the various image +// loaders all have certain limits on image size; these differ somewhat +// by format but generally boil down to either just under 2GB or just under +// 1GB. When the decoded image would be larger than this, stb_image decoding +// will fail. +// +// Additionally, stb_image will reject image files that have any of their +// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS, +// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit, +// the only way to have an image with such dimensions load correctly +// is for it to have a rather extreme aspect ratio. Either way, the +// assumption here is that such larger images are likely to be malformed +// or malicious. If you do need to load an image with individual dimensions +// larger than that, and it still fits in the overall size limit, you can +// #define STBI_MAX_DIMENSIONS on your own to be something larger. +// +// =========================================================================== +// +// UNICODE: +// +// If compiling for Windows and you wish to use Unicode filenames, compile +// with +// #define STBI_WINDOWS_UTF8 +// and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert +// Windows wchar_t filenames to utf8. +// +// =========================================================================== +// +// Philosophy +// +// stb libraries are designed with the following priorities: +// +// 1. easy to use +// 2. easy to maintain +// 3. good performance +// +// Sometimes I let "good performance" creep up in priority over "easy to maintain", +// and for best performance I may provide less-easy-to-use APIs that give higher +// performance, in addition to the easy-to-use ones. Nevertheless, it's important +// to keep in mind that from the standpoint of you, a client of this library, +// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. +// +// Some secondary priorities arise directly from the first two, some of which +// provide more explicit reasons why performance can't be emphasized. +// +// - Portable ("ease of use") +// - Small source code footprint ("easy to maintain") +// - No dependencies ("ease of use") +// +// =========================================================================== +// +// I/O callbacks +// +// I/O callbacks allow you to read from arbitrary sources, like packaged +// files or some other source. Data read from callbacks are processed +// through a small internal buffer (currently 128 bytes) to try to reduce +// overhead. +// +// The three functions you must define are "read" (reads some bytes of data), +// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). +// +// =========================================================================== +// +// SIMD support +// +// The JPEG decoder will try to automatically use SIMD kernels on x86 when +// supported by the compiler. For ARM Neon support, you must explicitly +// request it. +// +// (The old do-it-yourself SIMD API is no longer supported in the current +// code.) +// +// On x86, SSE2 will automatically be used when available based on a run-time +// test; if not, the generic C versions are used as a fall-back. On ARM targets, +// the typical path is to have separate builds for NEON and non-NEON devices +// (at least this is true for iOS and Android). Therefore, the NEON support is +// toggled by a build flag: define STBI_NEON to get NEON loops. +// +// If for some reason you do not want to use any of SIMD code, or if +// you have issues compiling it, you can disable it entirely by +// defining STBI_NO_SIMD. +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image supports loading HDR images in general, and currently the Radiance +// .HDR file format specifically. You can still load any file through the existing +// interface; if you attempt to load an HDR file, it will be automatically remapped +// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); +// +// =========================================================================== +// +// iPhone PNG support: +// +// We optionally support converting iPhone-formatted PNGs (which store +// premultiplied BGRA) back to RGB, even though they're internally encoded +// differently. To enable this conversion, call +// stbi_convert_iphone_png_to_rgb(1). +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// ADDITIONAL CONFIGURATION +// +// - You can suppress implementation of any of the decoders to reduce +// your code footprint by #defining one or more of the following +// symbols before creating the implementation. +// +// STBI_NO_JPEG +// STBI_NO_PNG +// STBI_NO_BMP +// STBI_NO_PSD +// STBI_NO_TGA +// STBI_NO_GIF +// STBI_NO_HDR +// STBI_NO_PIC +// STBI_NO_PNM (.ppm and .pgm) +// +// - You can request *only* certain decoders and suppress all other ones +// (this will be more forward-compatible, as addition of new decoders +// doesn't require you to disable them explicitly): +// +// STBI_ONLY_JPEG +// STBI_ONLY_PNG +// STBI_ONLY_BMP +// STBI_ONLY_PSD +// STBI_ONLY_TGA +// STBI_ONLY_GIF +// STBI_ONLY_HDR +// STBI_ONLY_PIC +// STBI_ONLY_PNM (.ppm and .pgm) +// +// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// +// - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater +// than that size (in either width or height) without further processing. +// This is to let programs in the wild set an upper bound to prevent +// denial-of-service attacks on untrusted data, as one could generate a +// valid image of gigantic dimensions and force stb_image to allocate a +// huge block of memory and spend disproportionate time decoding it. By +// default this is set to (1 << 24), which is 16777216, but that's still +// very big. + +#ifndef STBI_NO_STDIO +#include <stdio.h> +#endif // STBI_NO_STDIO + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for desired_channels + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4 +}; + +#include <stdlib.h> +typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef STBIDEF +#ifdef STB_IMAGE_STATIC +#define STBIDEF static +#else +#define STBIDEF extern +#endif +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// PRIMARY API - works on images of any type +// + +// +// load image by filename, open file, or memory buffer +// + +typedef struct +{ + int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read + void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative + int (*eof) (void *user); // returns nonzero if we are at end of file/data +} stbi_io_callbacks; + +//////////////////////////////////// +// +// 8-bits-per-channel interface +// + +STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +#endif + +#ifdef STBI_WINDOWS_UTF8 +STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); +#endif + +//////////////////////////////////// +// +// 16-bits-per-channel interface +// + +STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +#endif + +//////////////////////////////////// +// +// float-per-channel interface +// +#ifndef STBI_NO_LINEAR + STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + + #ifndef STBI_NO_STDIO + STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); + #endif +#endif + +#ifndef STBI_NO_HDR + STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); + STBIDEF void stbi_hdr_to_ldr_scale(float scale); +#endif // STBI_NO_HDR + +#ifndef STBI_NO_LINEAR + STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); + STBIDEF void stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_LINEAR + +// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename); +STBIDEF int stbi_is_hdr_from_file(FILE *f); +#endif // STBI_NO_STDIO + + +// get a VERY brief reason for failure +// on most compilers (and ALL modern mainstream compilers) this is threadsafe +STBIDEF const char *stbi_failure_reason (void); + +// free the loaded image -- this is just free() +STBIDEF void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit (char const *filename); +STBIDEF int stbi_is_16_bit_from_file(FILE *f); +#endif + + + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + +// flip the image vertically, so the first pixel in the output array is the bottom left +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); + +// as above, but only applies to images loaded on the thread that calls the function +// this function is only available if your compiler supports thread-local variables; +// calling it will fail to link if your compiler doesn't +STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply); +STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert); +STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip); + +// ZLIB client - used by PNG, available for other purposes + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); +STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifdef STB_IMAGE_IMPLEMENTATION + +#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ + || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ + || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ + || defined(STBI_ONLY_ZLIB) + #ifndef STBI_ONLY_JPEG + #define STBI_NO_JPEG + #endif + #ifndef STBI_ONLY_PNG + #define STBI_NO_PNG + #endif + #ifndef STBI_ONLY_BMP + #define STBI_NO_BMP + #endif + #ifndef STBI_ONLY_PSD + #define STBI_NO_PSD + #endif + #ifndef STBI_ONLY_TGA + #define STBI_NO_TGA + #endif + #ifndef STBI_ONLY_GIF + #define STBI_NO_GIF + #endif + #ifndef STBI_ONLY_HDR + #define STBI_NO_HDR + #endif + #ifndef STBI_ONLY_PIC + #define STBI_NO_PIC + #endif + #ifndef STBI_ONLY_PNM + #define STBI_NO_PNM + #endif +#endif + +#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) +#define STBI_NO_ZLIB +#endif + + +#include <stdarg.h> +#include <stddef.h> // ptrdiff_t on osx +#include <stdlib.h> +#include <string.h> +#include <limits.h> + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +#include <math.h> // ldexp, pow +#endif + +#ifndef STBI_NO_STDIO +#include <stdio.h> +#endif + +#ifndef STBI_ASSERT +#include <assert.h> +#define STBI_ASSERT(x) assert(x) +#endif + +#ifdef __cplusplus +#define STBI_EXTERN extern "C" +#else +#define STBI_EXTERN extern +#endif + + +#ifndef _MSC_VER + #ifdef __cplusplus + #define stbi_inline inline + #else + #define stbi_inline + #endif +#else + #define stbi_inline __forceinline +#endif + +#ifndef STBI_NO_THREAD_LOCALS + #if defined(__cplusplus) && __cplusplus >= 201103L + #define STBI_THREAD_LOCAL thread_local + #elif defined(__GNUC__) && __GNUC__ < 5 + #define STBI_THREAD_LOCAL __thread + #elif defined(_MSC_VER) + #define STBI_THREAD_LOCAL __declspec(thread) + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__) + #define STBI_THREAD_LOCAL _Thread_local + #endif + + #ifndef STBI_THREAD_LOCAL + #if defined(__GNUC__) + #define STBI_THREAD_LOCAL __thread + #endif + #endif +#endif + +#if defined(_MSC_VER) || defined(__SYMBIAN32__) +typedef unsigned short stbi__uint16; +typedef signed short stbi__int16; +typedef unsigned int stbi__uint32; +typedef signed int stbi__int32; +#else +#include <stdint.h> +typedef uint16_t stbi__uint16; +typedef int16_t stbi__int16; +typedef uint32_t stbi__uint32; +typedef int32_t stbi__int32; +#endif + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBI_NOTUSED(v) (void)(v) +#else +#define STBI_NOTUSED(v) (void)sizeof(v) +#endif + +#ifdef _MSC_VER +#define STBI_HAS_LROTL +#endif + +#ifdef STBI_HAS_LROTL + #define stbi_lrot(x,y) _lrotl(x,y) +#else + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) +#endif + +#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) +// ok +#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." +#endif + +#ifndef STBI_MALLOC +#define STBI_MALLOC(sz) malloc(sz) +#define STBI_REALLOC(p,newsz) realloc(p,newsz) +#define STBI_FREE(p) free(p) +#endif + +#ifndef STBI_REALLOC_SIZED +#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) +#endif + +// x86/x64 detection +#if defined(__x86_64__) || defined(_M_X64) +#define STBI__X64_TARGET +#elif defined(__i386) || defined(_M_IX86) +#define STBI__X86_TARGET +#endif + +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) +// gcc doesn't support sse2 intrinsics unless you compile with -msse2, +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. +#define STBI_NO_SIMD +#endif + +#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) +// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET +// +// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the +// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. +// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not +// simultaneously enabling "-mstackrealign". +// +// See https://github.com/nothings/stb/issues/81 for more information. +// +// So default to no SSE2 on 32-bit MinGW. If you've read this far and added +// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. +#define STBI_NO_SIMD +#endif + +#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) +#define STBI_SSE2 +#include <emmintrin.h> + +#ifdef _MSC_VER + +#if _MSC_VER >= 1400 // not VC6 +#include <intrin.h> // __cpuid +static int stbi__cpuid3(void) +{ + int info[4]; + __cpuid(info,1); + return info[3]; +} +#else +static int stbi__cpuid3(void) +{ + int res; + __asm { + mov eax,1 + cpuid + mov res,edx + } + return res; +} +#endif + +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name + +#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) +static int stbi__sse2_available(void) +{ + int info3 = stbi__cpuid3(); + return ((info3 >> 26) & 1) != 0; +} +#endif + +#else // assume GCC-style if not VC++ +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) + +#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) +static int stbi__sse2_available(void) +{ + // If we're even attempting to compile this on GCC/Clang, that means + // -msse2 is on, which means the compiler is allowed to use SSE2 + // instructions at will, and so are we. + return 1; +} +#endif + +#endif +#endif + +// ARM NEON +#if defined(STBI_NO_SIMD) && defined(STBI_NEON) +#undef STBI_NEON +#endif + +#ifdef STBI_NEON +#include <arm_neon.h> +#ifdef _MSC_VER +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name +#else +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) +#endif +#endif + +#ifndef STBI_SIMD_ALIGN +#define STBI_SIMD_ALIGN(type, name) type name +#endif + +#ifndef STBI_MAX_DIMENSIONS +#define STBI_MAX_DIMENSIONS (1 << 24) +#endif + +/////////////////////////////////////////////// +// +// stbi__context struct and start_xxx functions + +// stbi__context structure is our basic context used by all images, so it +// contains all the IO context, plus some basic image information +typedef struct +{ + stbi__uint32 img_x, img_y; + int img_n, img_out_n; + + stbi_io_callbacks io; + void *io_user_data; + + int read_from_callbacks; + int buflen; + stbi_uc buffer_start[128]; + int callback_already_read; + + stbi_uc *img_buffer, *img_buffer_end; + stbi_uc *img_buffer_original, *img_buffer_original_end; +} stbi__context; + + +static void stbi__refill_buffer(stbi__context *s); + +// initialize a memory-decode context +static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) +{ + s->io.read = NULL; + s->read_from_callbacks = 0; + s->callback_already_read = 0; + s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; + s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; +} + +// initialize a callback-based context +static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) +{ + s->io = *c; + s->io_user_data = user; + s->buflen = sizeof(s->buffer_start); + s->read_from_callbacks = 1; + s->callback_already_read = 0; + s->img_buffer = s->img_buffer_original = s->buffer_start; + stbi__refill_buffer(s); + s->img_buffer_original_end = s->img_buffer_end; +} + +#ifndef STBI_NO_STDIO + +static int stbi__stdio_read(void *user, char *data, int size) +{ + return (int) fread(data,1,size,(FILE*) user); +} + +static void stbi__stdio_skip(void *user, int n) +{ + int ch; + fseek((FILE*) user, n, SEEK_CUR); + ch = fgetc((FILE*) user); /* have to read a byte to reset feof()'s flag */ + if (ch != EOF) { + ungetc(ch, (FILE *) user); /* push byte back onto stream if valid. */ + } +} + +static int stbi__stdio_eof(void *user) +{ + return feof((FILE*) user) || ferror((FILE *) user); +} + +static stbi_io_callbacks stbi__stdio_callbacks = +{ + stbi__stdio_read, + stbi__stdio_skip, + stbi__stdio_eof, +}; + +static void stbi__start_file(stbi__context *s, FILE *f) +{ + stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); +} + +//static void stop_file(stbi__context *s) { } + +#endif // !STBI_NO_STDIO + +static void stbi__rewind(stbi__context *s) +{ + // conceptually rewind SHOULD rewind to the beginning of the stream, + // but we just rewind to the beginning of the initial buffer, because + // we only use it after doing 'test', which only ever looks at at most 92 bytes + s->img_buffer = s->img_buffer_original; + s->img_buffer_end = s->img_buffer_original_end; +} + +enum +{ + STBI_ORDER_RGB, + STBI_ORDER_BGR +}; + +typedef struct +{ + int bits_per_channel; + int num_channels; + int channel_order; +} stbi__result_info; + +#ifndef STBI_NO_JPEG +static int stbi__jpeg_test(stbi__context *s); +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNG +static int stbi__png_test(stbi__context *s); +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__png_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_BMP +static int stbi__bmp_test(stbi__context *s); +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_TGA +static int stbi__tga_test(stbi__context *s); +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s); +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__psd_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_HDR +static int stbi__hdr_test(stbi__context *s); +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_test(stbi__context *s); +static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_GIF +static int stbi__gif_test(stbi__context *s); +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNM +static int stbi__pnm_test(stbi__context *s); +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__pnm_is16(stbi__context *s); +#endif + +static +#ifdef STBI_THREAD_LOCAL +STBI_THREAD_LOCAL +#endif +const char *stbi__g_failure_reason; + +STBIDEF const char *stbi_failure_reason(void) +{ + return stbi__g_failure_reason; +} + +#ifndef STBI_NO_FAILURE_STRINGS +static int stbi__err(const char *str) +{ + stbi__g_failure_reason = str; + return 0; +} +#endif + +static void *stbi__malloc(size_t size) +{ + return STBI_MALLOC(size); +} + +// stb_image uses ints pervasively, including for offset calculations. +// therefore the largest decoded image size we can support with the +// current code, even on 64-bit targets, is INT_MAX. this is not a +// significant limitation for the intended use case. +// +// we do, however, need to make sure our size calculations don't +// overflow. hence a few helper functions for size calculations that +// multiply integers together, making sure that they're non-negative +// and no overflow occurs. + +// return 1 if the sum is valid, 0 on overflow. +// negative terms are considered invalid. +static int stbi__addsizes_valid(int a, int b) +{ + if (b < 0) return 0; + // now 0 <= b <= INT_MAX, hence also + // 0 <= INT_MAX - b <= INTMAX. + // And "a + b <= INT_MAX" (which might overflow) is the + // same as a <= INT_MAX - b (no overflow) + return a <= INT_MAX - b; +} + +// returns 1 if the product is valid, 0 on overflow. +// negative factors are considered invalid. +static int stbi__mul2sizes_valid(int a, int b) +{ + if (a < 0 || b < 0) return 0; + if (b == 0) return 1; // mul-by-0 is always safe + // portable way to check for no overflows in a*b + return a <= INT_MAX/b; +} + +#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) +// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +static int stbi__mad2sizes_valid(int a, int b, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); +} +#endif + +// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +static int stbi__mad3sizes_valid(int a, int b, int c, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__addsizes_valid(a*b*c, add); +} + +// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM) +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); +} +#endif + +#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) +// mallocs with size overflow checking +static void *stbi__malloc_mad2(int a, int b, int add) +{ + if (!stbi__mad2sizes_valid(a, b, add)) return NULL; + return stbi__malloc(a*b + add); +} +#endif + +static void *stbi__malloc_mad3(int a, int b, int c, int add) +{ + if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; + return stbi__malloc(a*b*c + add); +} + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM) +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) +{ + if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; + return stbi__malloc(a*b*c*d + add); +} +#endif + +// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow. +static int stbi__addints_valid(int a, int b) +{ + if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow + if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0. + return a <= INT_MAX - b; +} + +// returns 1 if the product of two signed shorts is valid, 0 on overflow. +static int stbi__mul2shorts_valid(short a, short b) +{ + if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow + if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid + if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN + return a >= SHRT_MIN / b; +} + +// stbi__err - error +// stbi__errpf - error returning pointer to float +// stbi__errpuc - error returning pointer to unsigned char + +#ifdef STBI_NO_FAILURE_STRINGS + #define stbi__err(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define stbi__err(x,y) stbi__err(y) +#else + #define stbi__err(x,y) stbi__err(x) +#endif + +#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) +#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) + +STBIDEF void stbi_image_free(void *retval_from_stbi_load) +{ + STBI_FREE(retval_from_stbi_load); +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_HDR +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +static int stbi__vertically_flip_on_load_global = 0; + +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load_global = flag_true_if_should_flip; +} + +#ifndef STBI_THREAD_LOCAL +#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global +#else +static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set; + +STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load_local = flag_true_if_should_flip; + stbi__vertically_flip_on_load_set = 1; +} + +#define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set \ + ? stbi__vertically_flip_on_load_local \ + : stbi__vertically_flip_on_load_global) +#endif // STBI_THREAD_LOCAL + +static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields + ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed + ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order + ri->num_channels = 0; + + // test the formats with a very explicit header first (at least a FOURCC + // or distinctive magic number first) + #ifndef STBI_NO_PNG + if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_BMP + if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_GIF + if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PSD + if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); + #else + STBI_NOTUSED(bpc); + #endif + #ifndef STBI_NO_PIC + if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); + #endif + + // then the formats that can end up attempting to load with just 1 or 2 + // bytes matching expectations; these are prone to false positives, so + // try them later + #ifndef STBI_NO_JPEG + if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNM + if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); + return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + #ifndef STBI_NO_TGA + // test tga last because it's a crappy test! + if (stbi__tga_test(s)) + return stbi__tga_load(s,x,y,comp,req_comp, ri); + #endif + + return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); +} + +static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi_uc *reduced; + + reduced = (stbi_uc *) stbi__malloc(img_len); + if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + + STBI_FREE(orig); + return reduced; +} + +static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi__uint16 *enlarged; + + enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); + if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + + STBI_FREE(orig); + return enlarged; +} + +static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) +{ + int row; + size_t bytes_per_row = (size_t)w * bytes_per_pixel; + stbi_uc temp[2048]; + stbi_uc *bytes = (stbi_uc *)image; + + for (row = 0; row < (h>>1); row++) { + stbi_uc *row0 = bytes + row*bytes_per_row; + stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; + // swap row0 with row1 + size_t bytes_left = bytes_per_row; + while (bytes_left) { + size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); + memcpy(temp, row0, bytes_copy); + memcpy(row0, row1, bytes_copy); + memcpy(row1, temp, bytes_copy); + row0 += bytes_copy; + row1 += bytes_copy; + bytes_left -= bytes_copy; + } + } +} + +#ifndef STBI_NO_GIF +static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) +{ + int slice; + int slice_size = w * h * bytes_per_pixel; + + stbi_uc *bytes = (stbi_uc *)image; + for (slice = 0; slice < z; ++slice) { + stbi__vertical_flip(bytes, w, h, bytes_per_pixel); + bytes += slice_size; + } +} +#endif + +static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + + if (result == NULL) + return NULL; + + // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. + STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); + + if (ri.bits_per_channel != 8) { + result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 8; + } + + // @TODO: move stbi__convert_format to here + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); + } + + return (unsigned char *) result; +} + +static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + + if (result == NULL) + return NULL; + + // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. + STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); + + if (ri.bits_per_channel != 16) { + result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 16; + } + + // @TODO: move stbi__convert_format16 to here + // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); + } + + return (stbi__uint16 *) result; +} + +#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR) +static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) +{ + if (stbi__vertically_flip_on_load && result != NULL) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); + } +} +#endif + +#ifndef STBI_NO_STDIO + +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) +STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); +STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); +#endif + +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) +STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) +{ + return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); +} +#endif + +static FILE *stbi__fopen(char const *filename, char const *mode) +{ + FILE *f; +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) + wchar_t wMode[64]; + wchar_t wFilename[1024]; + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename))) + return 0; + + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode))) + return 0; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != _wfopen_s(&f, wFilename, wMode)) + f = 0; +#else + f = _wfopen(wFilename, wMode); +#endif + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != fopen_s(&f, filename, mode)) + f=0; +#else + f = fopen(filename, mode); +#endif + return f; +} + + +STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + unsigned char *result; + if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__uint16 *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + stbi__uint16 *result; + if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file_16(f,x,y,comp,req_comp); + fclose(f); + return result; +} + + +#endif //!STBI_NO_STDIO + +STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_mem(&s,buffer,len); + + result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); + if (stbi__vertically_flip_on_load) { + stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); + } + + return result; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + stbi__result_info ri; + float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); + if (hdr_data) + stbi__float_postprocess(hdr_data,x,y,comp,req_comp); + return hdr_data; + } + #endif + data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); + if (data) + return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); +} + +STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + float *result; + FILE *f = stbi__fopen(filename, "rb"); + if (!f) return stbi__errpf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_file(&s,f); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} +#endif // !STBI_NO_STDIO + +#endif // !STBI_NO_LINEAR + +// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is +// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always +// reports false! + +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(buffer); + STBI_NOTUSED(len); + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +STBIDEF int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + long pos = ftell(f); + int res; + stbi__context s; + stbi__start_file(&s,f); + res = stbi__hdr_test(&s); + fseek(f, pos, SEEK_SET); + return res; + #else + STBI_NOTUSED(f); + return 0; + #endif +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(clbk); + STBI_NOTUSED(user); + return 0; + #endif +} + +#ifndef STBI_NO_LINEAR +static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; + +STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } +STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } +#endif + +static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; + +STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } +STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + STBI__SCAN_load=0, + STBI__SCAN_type, + STBI__SCAN_header +}; + +static void stbi__refill_buffer(stbi__context *s) +{ + int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); + s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original); + if (n == 0) { + // at end of file, treat same as if from memory, but need to handle case + // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file + s->read_from_callbacks = 0; + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start+1; + *s->img_buffer = 0; + } else { + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; + } +} + +stbi_inline static stbi_uc stbi__get8(stbi__context *s) +{ + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + if (s->read_from_callbacks) { + stbi__refill_buffer(s); + return *s->img_buffer++; + } + return 0; +} + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +stbi_inline static int stbi__at_eof(stbi__context *s) +{ + if (s->io.read) { + if (!(s->io.eof)(s->io_user_data)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->read_from_callbacks == 0) return 1; + } + + return s->img_buffer >= s->img_buffer_end; +} +#endif + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) +// nothing +#else +static void stbi__skip(stbi__context *s, int n) +{ + if (n == 0) return; // already there! + if (n < 0) { + s->img_buffer = s->img_buffer_end; + return; + } + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + s->img_buffer = s->img_buffer_end; + (s->io.skip)(s->io_user_data, n - blen); + return; + } + } + s->img_buffer += n; +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM) +// nothing +#else +static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) +{ + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + int res, count; + + memcpy(buffer, s->img_buffer, blen); + + count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); + res = (count == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } + } + + if (s->img_buffer+n <= s->img_buffer_end) { + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; + } else + return 0; +} +#endif + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) +// nothing +#else +static int stbi__get16be(stbi__context *s) +{ + int z = stbi__get8(s); + return (z << 8) + stbi__get8(s); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) +// nothing +#else +static stbi__uint32 stbi__get32be(stbi__context *s) +{ + stbi__uint32 z = stbi__get16be(s); + return (z << 16) + stbi__get16be(s); +} +#endif + +#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) +// nothing +#else +static int stbi__get16le(stbi__context *s) +{ + int z = stbi__get8(s); + return z + (stbi__get8(s) << 8); +} +#endif + +#ifndef STBI_NO_BMP +static stbi__uint32 stbi__get32le(stbi__context *s) +{ + stbi__uint32 z = stbi__get16le(s); + z += (stbi__uint32)stbi__get16le(s) << 16; + return z; +} +#endif + +#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing + +static stbi_uc stbi__compute_y(int r, int g, int b) +{ + return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); + if (good == NULL) { + STBI_FREE(data); + return stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; + default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion"); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) +// nothing +#else +static stbi__uint16 stbi__compute_y_16(int r, int g, int b) +{ + return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) +// nothing +#else +static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + stbi__uint16 *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); + if (good == NULL) { + STBI_FREE(data); + return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + stbi__uint16 *src = data + j * x * img_n ; + stbi__uint16 *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; + default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion"); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output; + if (!data) return NULL; + output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); + } + } + if (n < comp) { + for (i=0; i < x*y; ++i) { + output[i*comp + n] = data[i*comp + n]/255.0f; + } + } + STBI_FREE(data); + return output; +} +#endif + +#ifndef STBI_NO_HDR +#define stbi__float2int(x) ((int) (x)) +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output; + if (!data) return NULL; + output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + } + STBI_FREE(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder +// +// simple implementation +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - some SIMD kernels for common paths on targets with SSE2/NEON +// - uses a lot of intermediate memory, could cache poorly + +#ifndef STBI_NO_JPEG + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + stbi_uc fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + stbi__uint16 code[256]; + stbi_uc values[256]; + stbi_uc size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} stbi__huffman; + +typedef struct +{ + stbi__context *s; + stbi__huffman huff_dc[4]; + stbi__huffman huff_ac[4]; + stbi__uint16 dequant[4][64]; + stbi__int16 fast_ac[4][1 << FAST_BITS]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + stbi_uc *data; + void *raw_data, *raw_coeff; + stbi_uc *linebuf; + short *coeff; // progressive only + int coeff_w, coeff_h; // number of 8x8 coefficient blocks + } img_comp[4]; + + stbi__uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int progressive; + int spec_start; + int spec_end; + int succ_high; + int succ_low; + int eob_run; + int jfif; + int app14_color_transform; // Adobe APP14 tag + int rgb; + + int scan_n, order[4]; + int restart_interval, todo; + +// kernels + void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); + void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); + stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); +} stbi__jpeg; + +static int stbi__build_huffman(stbi__huffman *h, int *count) +{ + int i,j,k=0; + unsigned int code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) { + for (j=0; j < count[i]; ++j) { + h->size[k++] = (stbi_uc) (i+1); + if(k >= 257) return stbi__err("bad size list","Corrupt JPEG"); + } + } + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (stbi__uint16) (code++); + if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (stbi_uc) i; + } + } + } + return 1; +} + +// build a table that decodes both magnitude and value of small ACs in +// one go. +static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) +{ + int i; + for (i=0; i < (1 << FAST_BITS); ++i) { + stbi_uc fast = h->fast[i]; + fast_ac[i] = 0; + if (fast < 255) { + int rs = h->values[fast]; + int run = (rs >> 4) & 15; + int magbits = rs & 15; + int len = h->size[fast]; + + if (magbits && len + magbits <= FAST_BITS) { + // magnitude code followed by receive_extend code + int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); + int m = 1 << (magbits - 1); + if (k < m) k += (~0U << magbits) + 1; + // if the result is small enough, we can fit it in fast_ac table + if (k >= -128 && k <= 127) + fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); + } + } + } +} + +static void stbi__grow_buffer_unsafe(stbi__jpeg *j) +{ + do { + unsigned int b = j->nomore ? 0 : stbi__get8(j->s); + if (b == 0xff) { + int c = stbi__get8(j->s); + while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + temp = j->code_buffer >> 16; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; + if(c < 0 || c >= 256) // symbol id out of bounds! + return -1; + STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + j->code_buffer <<= k; + return h->values[c]; +} + +// bias[n] = (-1<<n) + 1 +static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767}; + +// combined JPEG 'receive' and JPEG 'extend', since baseline +// always extends everything it receives. +stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n) +{ + unsigned int k; + int sgn; + if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing + + sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative) + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k + (stbi__jbias[n] & (sgn - 1)); +} + +// get some unsigned bits +stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) +{ + unsigned int k; + if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k; +} + +stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) +{ + unsigned int k; + if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); + if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing + k = j->code_buffer; + j->code_buffer <<= 1; + --j->code_bits; + return k & 0x80000000; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static const stbi_uc stbi__jpeg_dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) +{ + int diff,dc,k; + int t; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? stbi__extend_receive(j, t) : 0; + if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG"); + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + data[0] = (short) (dc * dequant[0]); + + // decode AC components, see JPEG spec + k = 1; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available"); + j->code_buffer <<= s; + j->code_bits -= s; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * dequant[zig]); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); + } + } + } while (k < 64); + return 1; +} + +static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) +{ + int diff,dc; + int t; + if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + if (j->succ_high == 0) { + // first scan for DC coefficient, must be first + memset(data,0,64*sizeof(data[0])); // 0 all the ac values now + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + diff = t ? stbi__extend_receive(j, t) : 0; + + if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG"); + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + data[0] = (short) (dc * (1 << j->succ_low)); + } else { + // refinement scan for DC coefficient + if (stbi__jpeg_get_bit(j)) + data[0] += (short) (1 << j->succ_low); + } + return 1; +} + +// @OPTIMIZE: store non-zigzagged during the decode passes, +// and only de-zigzag when dequantizing +static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) +{ + int k; + if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->succ_high == 0) { + int shift = j->succ_low; + + if (j->eob_run) { + --j->eob_run; + return 1; + } + + k = j->spec_start; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available"); + j->code_buffer <<= s; + j->code_bits -= s; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * (1 << shift)); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r); + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + --j->eob_run; + break; + } + k += 16; + } else { + k += r; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift)); + } + } + } while (k <= j->spec_end); + } else { + // refinement scan for these AC coefficients + + short bit = (short) (1 << j->succ_low); + + if (j->eob_run) { + --j->eob_run; + for (k = j->spec_start; k <= j->spec_end; ++k) { + short *p = &data[stbi__jpeg_dezigzag[k]]; + if (*p != 0) + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } + } else { + k = j->spec_start; + do { + int r,s; + int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r) - 1; + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + r = 64; // force end of block + } else { + // r=15 s=0 should write 16 0s, so we just do + // a run of 15 0s and then write s (which is 0), + // so we don't have to do anything special here + } + } else { + if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); + // sign bit + if (stbi__jpeg_get_bit(j)) + s = bit; + else + s = -bit; + } + + // advance by r + while (k <= j->spec_end) { + short *p = &data[stbi__jpeg_dezigzag[k++]]; + if (*p != 0) { + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } else { + if (r == 0) { + *p = (short) s; + break; + } + --r; + } + } + } while (k <= j->spec_end); + } + } + return 1; +} + +// take a -128..127 value and stbi__clamp it and convert to 0..255 +stbi_inline static stbi_uc stbi__clamp(int x) +{ + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (stbi_uc) x; +} + +#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) +#define stbi__fsh(x) ((x) * 4096) + +// derived from jidctint -- DCT_ISLOW +#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * stbi__f2f(0.5411961f); \ + t2 = p1 + p3*stbi__f2f(-1.847759065f); \ + t3 = p1 + p2*stbi__f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = stbi__fsh(p2+p3); \ + t1 = stbi__fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ + t0 = t0*stbi__f2f( 0.298631336f); \ + t1 = t1*stbi__f2f( 2.053119869f); \ + t2 = t2*stbi__f2f( 3.072711026f); \ + t3 = t3*stbi__f2f( 1.501321110f); \ + p1 = p5 + p1*stbi__f2f(-0.899976223f); \ + p2 = p5 + p2*stbi__f2f(-2.562915447f); \ + p3 = p3*stbi__f2f(-1.961570560f); \ + p4 = p4*stbi__f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) +{ + int i,val[64],*v=val; + stbi_uc *o; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0]*4; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = stbi__clamp((x0+t3) >> 17); + o[7] = stbi__clamp((x0-t3) >> 17); + o[1] = stbi__clamp((x1+t2) >> 17); + o[6] = stbi__clamp((x1-t2) >> 17); + o[2] = stbi__clamp((x2+t1) >> 17); + o[5] = stbi__clamp((x2-t1) >> 17); + o[3] = stbi__clamp((x3+t0) >> 17); + o[4] = stbi__clamp((x3-t0) >> 17); + } +} + +#ifdef STBI_SSE2 +// sse2 integer IDCT. not the fastest possible implementation but it +// produces bit-identical results to the generic C version so it's +// fully "transparent". +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + // This is constructed to match our regular (generic) integer IDCT exactly. + __m128i row0, row1, row2, row3, row4, row5, row6, row7; + __m128i tmp; + + // dot product constant: even elems=x, odd elems=y + #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) + + // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) + // out(1) = c1[even]*x + c1[odd]*y + #define dct_rot(out0,out1, x,y,c0,c1) \ + __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ + __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ + __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ + __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ + __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ + __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + + // out = in << 12 (in 16-bit, out 32-bit) + #define dct_widen(out, in) \ + __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ + __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + + // wide add + #define dct_wadd(out, a, b) \ + __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + + // wide sub + #define dct_wsub(out, a, b) \ + __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + + // butterfly a/b, add bias, then shift by "s" and pack + #define dct_bfly32o(out0, out1, a,b,bias,s) \ + { \ + __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ + __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ + dct_wadd(sum, abiased, b); \ + dct_wsub(dif, abiased, b); \ + out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ + out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ + } + + // 8-bit interleave step (for transposes) + #define dct_interleave8(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi8(a, b); \ + b = _mm_unpackhi_epi8(tmp, b) + + // 16-bit interleave step (for transposes) + #define dct_interleave16(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi16(a, b); \ + b = _mm_unpackhi_epi16(tmp, b) + + #define dct_pass(bias,shift) \ + { \ + /* even part */ \ + dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ + __m128i sum04 = _mm_add_epi16(row0, row4); \ + __m128i dif04 = _mm_sub_epi16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ + dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ + __m128i sum17 = _mm_add_epi16(row1, row7); \ + __m128i sum35 = _mm_add_epi16(row3, row5); \ + dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ + dct_wadd(x4, y0o, y4o); \ + dct_wadd(x5, y1o, y5o); \ + dct_wadd(x6, y2o, y5o); \ + dct_wadd(x7, y3o, y4o); \ + dct_bfly32o(row0,row7, x0,x7,bias,shift); \ + dct_bfly32o(row1,row6, x1,x6,bias,shift); \ + dct_bfly32o(row2,row5, x2,x5,bias,shift); \ + dct_bfly32o(row3,row4, x3,x4,bias,shift); \ + } + + __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); + __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); + __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); + __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); + __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); + __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); + __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); + __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); + + // rounding biases in column/row passes, see stbi__idct_block for explanation. + __m128i bias_0 = _mm_set1_epi32(512); + __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); + + // load + row0 = _mm_load_si128((const __m128i *) (data + 0*8)); + row1 = _mm_load_si128((const __m128i *) (data + 1*8)); + row2 = _mm_load_si128((const __m128i *) (data + 2*8)); + row3 = _mm_load_si128((const __m128i *) (data + 3*8)); + row4 = _mm_load_si128((const __m128i *) (data + 4*8)); + row5 = _mm_load_si128((const __m128i *) (data + 5*8)); + row6 = _mm_load_si128((const __m128i *) (data + 6*8)); + row7 = _mm_load_si128((const __m128i *) (data + 7*8)); + + // column pass + dct_pass(bias_0, 10); + + { + // 16bit 8x8 transpose pass 1 + dct_interleave16(row0, row4); + dct_interleave16(row1, row5); + dct_interleave16(row2, row6); + dct_interleave16(row3, row7); + + // transpose pass 2 + dct_interleave16(row0, row2); + dct_interleave16(row1, row3); + dct_interleave16(row4, row6); + dct_interleave16(row5, row7); + + // transpose pass 3 + dct_interleave16(row0, row1); + dct_interleave16(row2, row3); + dct_interleave16(row4, row5); + dct_interleave16(row6, row7); + } + + // row pass + dct_pass(bias_1, 17); + + { + // pack + __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 + __m128i p1 = _mm_packus_epi16(row2, row3); + __m128i p2 = _mm_packus_epi16(row4, row5); + __m128i p3 = _mm_packus_epi16(row6, row7); + + // 8bit 8x8 transpose pass 1 + dct_interleave8(p0, p2); // a0e0a1e1... + dct_interleave8(p1, p3); // c0g0c1g1... + + // transpose pass 2 + dct_interleave8(p0, p1); // a0c0e0g0... + dct_interleave8(p2, p3); // b0d0f0h0... + + // transpose pass 3 + dct_interleave8(p0, p2); // a0b0c0d0... + dct_interleave8(p1, p3); // a4b4c4d4... + + // store + _mm_storel_epi64((__m128i *) out, p0); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p2); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p1); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p3); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); + } + +#undef dct_const +#undef dct_rot +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_interleave8 +#undef dct_interleave16 +#undef dct_pass +} + +#endif // STBI_SSE2 + +#ifdef STBI_NEON + +// NEON integer IDCT. should produce bit-identical +// results to the generic C version. +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; + + int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); + int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); + int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); + int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); + int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); + int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); + int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); + int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); + int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); + int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); + int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); + int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); + +#define dct_long_mul(out, inq, coeff) \ + int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) + +#define dct_long_mac(out, acc, inq, coeff) \ + int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) + +#define dct_widen(out, inq) \ + int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ + int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) + +// wide add +#define dct_wadd(out, a, b) \ + int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vaddq_s32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ + int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vsubq_s32(a##_h, b##_h) + +// butterfly a/b, then shift using "shiftop" by "s" and pack +#define dct_bfly32o(out0,out1, a,b,shiftop,s) \ + { \ + dct_wadd(sum, a, b); \ + dct_wsub(dif, a, b); \ + out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ + out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ + } + +#define dct_pass(shiftop, shift) \ + { \ + /* even part */ \ + int16x8_t sum26 = vaddq_s16(row2, row6); \ + dct_long_mul(p1e, sum26, rot0_0); \ + dct_long_mac(t2e, p1e, row6, rot0_1); \ + dct_long_mac(t3e, p1e, row2, rot0_2); \ + int16x8_t sum04 = vaddq_s16(row0, row4); \ + int16x8_t dif04 = vsubq_s16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + int16x8_t sum15 = vaddq_s16(row1, row5); \ + int16x8_t sum17 = vaddq_s16(row1, row7); \ + int16x8_t sum35 = vaddq_s16(row3, row5); \ + int16x8_t sum37 = vaddq_s16(row3, row7); \ + int16x8_t sumodd = vaddq_s16(sum17, sum35); \ + dct_long_mul(p5o, sumodd, rot1_0); \ + dct_long_mac(p1o, p5o, sum17, rot1_1); \ + dct_long_mac(p2o, p5o, sum35, rot1_2); \ + dct_long_mul(p3o, sum37, rot2_0); \ + dct_long_mul(p4o, sum15, rot2_1); \ + dct_wadd(sump13o, p1o, p3o); \ + dct_wadd(sump24o, p2o, p4o); \ + dct_wadd(sump23o, p2o, p3o); \ + dct_wadd(sump14o, p1o, p4o); \ + dct_long_mac(x4, sump13o, row7, rot3_0); \ + dct_long_mac(x5, sump24o, row5, rot3_1); \ + dct_long_mac(x6, sump23o, row3, rot3_2); \ + dct_long_mac(x7, sump14o, row1, rot3_3); \ + dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ + dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ + dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ + dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ + } + + // load + row0 = vld1q_s16(data + 0*8); + row1 = vld1q_s16(data + 1*8); + row2 = vld1q_s16(data + 2*8); + row3 = vld1q_s16(data + 3*8); + row4 = vld1q_s16(data + 4*8); + row5 = vld1q_s16(data + 5*8); + row6 = vld1q_s16(data + 6*8); + row7 = vld1q_s16(data + 7*8); + + // add DC bias + row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); + + // column pass + dct_pass(vrshrn_n_s32, 10); + + // 16bit 8x8 transpose + { +// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. +// whether compilers actually get this is another story, sadly. +#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } +#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } + + // pass 1 + dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 + dct_trn16(row2, row3); + dct_trn16(row4, row5); + dct_trn16(row6, row7); + + // pass 2 + dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 + dct_trn32(row1, row3); + dct_trn32(row4, row6); + dct_trn32(row5, row7); + + // pass 3 + dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 + dct_trn64(row1, row5); + dct_trn64(row2, row6); + dct_trn64(row3, row7); + +#undef dct_trn16 +#undef dct_trn32 +#undef dct_trn64 + } + + // row pass + // vrshrn_n_s32 only supports shifts up to 16, we need + // 17. so do a non-rounding shift of 16 first then follow + // up with a rounding shift by 1. + dct_pass(vshrn_n_s32, 16); + + { + // pack and round + uint8x8_t p0 = vqrshrun_n_s16(row0, 1); + uint8x8_t p1 = vqrshrun_n_s16(row1, 1); + uint8x8_t p2 = vqrshrun_n_s16(row2, 1); + uint8x8_t p3 = vqrshrun_n_s16(row3, 1); + uint8x8_t p4 = vqrshrun_n_s16(row4, 1); + uint8x8_t p5 = vqrshrun_n_s16(row5, 1); + uint8x8_t p6 = vqrshrun_n_s16(row6, 1); + uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + + // again, these can translate into one instruction, but often don't. +#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } +#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } + + // sadly can't use interleaved stores here since we only write + // 8 bytes to each scan line! + + // 8x8 8-bit transpose pass 1 + dct_trn8_8(p0, p1); + dct_trn8_8(p2, p3); + dct_trn8_8(p4, p5); + dct_trn8_8(p6, p7); + + // pass 2 + dct_trn8_16(p0, p2); + dct_trn8_16(p1, p3); + dct_trn8_16(p4, p6); + dct_trn8_16(p5, p7); + + // pass 3 + dct_trn8_32(p0, p4); + dct_trn8_32(p1, p5); + dct_trn8_32(p2, p6); + dct_trn8_32(p3, p7); + + // store + vst1_u8(out, p0); out += out_stride; + vst1_u8(out, p1); out += out_stride; + vst1_u8(out, p2); out += out_stride; + vst1_u8(out, p3); out += out_stride; + vst1_u8(out, p4); out += out_stride; + vst1_u8(out, p5); out += out_stride; + vst1_u8(out, p6); out += out_stride; + vst1_u8(out, p7); + +#undef dct_trn8_8 +#undef dct_trn8_16 +#undef dct_trn8_32 + } + +#undef dct_long_mul +#undef dct_long_mac +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_pass +} + +#endif // STBI_NEON + +#define STBI__MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static stbi_uc stbi__get_marker(stbi__jpeg *j) +{ + stbi_uc x; + if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } + x = stbi__get8(j->s); + if (x != 0xff) return STBI__MARKER_none; + while (x == 0xff) + x = stbi__get8(j->s); // consume repeated 0xff fill bytes + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, stbi__jpeg_reset the entropy decoder and +// the dc prediction +static void stbi__jpeg_reset(stbi__jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; + j->marker = STBI__MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + j->eob_run = 0; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int stbi__parse_entropy_coded_data(stbi__jpeg *z) +{ + stbi__jpeg_reset(z); + if (!z->progressive) { + if (z->scan_n == 1) { + int i,j; + STBI_SIMD_ALIGN(short, data[64]); + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + STBI_SIMD_ALIGN(short, data[64]); + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } else { + if (z->scan_n == 1) { + int i,j; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + if (z->spec_start == 0) { + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } else { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) + return 0; + } + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x); + int y2 = (j*z->img_comp[n].v + y); + short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } +} + +static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) +{ + int i; + for (i=0; i < 64; ++i) + data[i] *= dequant[i]; +} + +static void stbi__jpeg_finish(stbi__jpeg *z) +{ + if (z->progressive) { + // dequantize and idct the data + int i,j,n; + for (n=0; n < z->s->img_n; ++n) { + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + } + } + } + } +} + +static int stbi__process_marker(stbi__jpeg *z, int m) +{ + int L; + switch (m) { + case STBI__MARKER_none: // no marker found + return stbi__err("expected marker","Corrupt JPEG"); + + case 0xDD: // DRI - specify restart interval + if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); + z->restart_interval = stbi__get16be(z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = stbi__get16be(z->s)-2; + while (L > 0) { + int q = stbi__get8(z->s); + int p = q >> 4, sixteen = (p != 0); + int t = q & 15,i; + if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); + if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + + for (i=0; i < 64; ++i) + z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); + L -= (sixteen ? 129 : 65); + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = stbi__get16be(z->s)-2; + while (L > 0) { + stbi_uc *v; + int sizes[16],i,n=0; + int q = stbi__get8(z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = stbi__get8(z->s); + n += sizes[i]; + } + if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values! + L -= 17; + if (tc == 0) { + if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < n; ++i) + v[i] = stbi__get8(z->s); + if (tc != 0) + stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); + L -= n; + } + return L==0; + } + + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + L = stbi__get16be(z->s); + if (L < 2) { + if (m == 0xFE) + return stbi__err("bad COM len","Corrupt JPEG"); + else + return stbi__err("bad APP len","Corrupt JPEG"); + } + L -= 2; + + if (m == 0xE0 && L >= 5) { // JFIF APP0 segment + static const unsigned char tag[5] = {'J','F','I','F','\0'}; + int ok = 1; + int i; + for (i=0; i < 5; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 5; + if (ok) + z->jfif = 1; + } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment + static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; + int ok = 1; + int i; + for (i=0; i < 6; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 6; + if (ok) { + stbi__get8(z->s); // version + stbi__get16be(z->s); // flags0 + stbi__get16be(z->s); // flags1 + z->app14_color_transform = stbi__get8(z->s); // color transform + L -= 6; + } + } + + stbi__skip(z->s, L); + return 1; + } + + return stbi__err("unknown marker","Corrupt JPEG"); +} + +// after we see SOS +static int stbi__process_scan_header(stbi__jpeg *z) +{ + int i; + int Ls = stbi__get16be(z->s); + z->scan_n = stbi__get8(z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = stbi__get8(z->s), which; + int q = stbi__get8(z->s); + for (which = 0; which < z->s->img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s->img_n) return 0; // no match + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + + { + int aa; + z->spec_start = stbi__get8(z->s); + z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 + aa = stbi__get8(z->s); + z->succ_high = (aa >> 4); + z->succ_low = (aa & 15); + if (z->progressive) { + if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) + return stbi__err("bad SOS", "Corrupt JPEG"); + } else { + if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); + if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); + z->spec_end = 63; + } + } + + return 1; +} + +static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) +{ + int i; + for (i=0; i < ncomp; ++i) { + if (z->img_comp[i].raw_data) { + STBI_FREE(z->img_comp[i].raw_data); + z->img_comp[i].raw_data = NULL; + z->img_comp[i].data = NULL; + } + if (z->img_comp[i].raw_coeff) { + STBI_FREE(z->img_comp[i].raw_coeff); + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].coeff = 0; + } + if (z->img_comp[i].linebuf) { + STBI_FREE(z->img_comp[i].linebuf); + z->img_comp[i].linebuf = NULL; + } + } + return why; +} + +static int stbi__process_frame_header(stbi__jpeg *z, int scan) +{ + stbi__context *s = z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG + p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + c = stbi__get8(s); + if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); + + z->rgb = 0; + for (i=0; i < s->img_n; ++i) { + static const unsigned char rgb[3] = { 'R', 'G', 'B' }; + z->img_comp[i].id = stbi__get8(s); + if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) + ++z->rgb; + q = stbi__get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); + z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); + } + + if (scan != STBI__SCAN_load) return 1; + + if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios + // and I've never seen a non-corrupted JPEG file actually use them + for (i=0; i < s->img_n; ++i) { + if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG"); + if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG"); + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + // these sizes can't be more than 17 bits + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + // + // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) + // so these muls can't overflow with 32-bit ints (which we require) + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].coeff = 0; + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].linebuf = NULL; + z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); + if (z->img_comp[i].raw_data == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + // align blocks for idct using mmx/sse + z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + if (z->progressive) { + // w2, h2 are multiples of 8 (see above) + z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; + z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; + z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); + if (z->img_comp[i].raw_coeff == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); + } + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define stbi__DNL(x) ((x) == 0xdc) +#define stbi__SOI(x) ((x) == 0xd8) +#define stbi__EOI(x) ((x) == 0xd9) +#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) +#define stbi__SOS(x) ((x) == 0xda) + +#define stbi__SOF_progressive(x) ((x) == 0xc2) + +static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) +{ + int m; + z->jfif = 0; + z->app14_color_transform = -1; // valid values are 0,1,2 + z->marker = STBI__MARKER_none; // initialize cached marker to empty + m = stbi__get_marker(z); + if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); + if (scan == STBI__SCAN_type) return 1; + m = stbi__get_marker(z); + while (!stbi__SOF(m)) { + if (!stbi__process_marker(z,m)) return 0; + m = stbi__get_marker(z); + while (m == STBI__MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); + m = stbi__get_marker(z); + } + } + z->progressive = stbi__SOF_progressive(m); + if (!stbi__process_frame_header(z, scan)) return 0; + return 1; +} + +static int stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) +{ + // some JPEGs have junk at end, skip over it but if we find what looks + // like a valid marker, resume there + while (!stbi__at_eof(j->s)) { + int x = stbi__get8(j->s); + while (x == 255) { // might be a marker + if (stbi__at_eof(j->s)) return STBI__MARKER_none; + x = stbi__get8(j->s); + if (x != 0x00 && x != 0xff) { + // not a stuffed zero or lead-in to another marker, looks + // like an actual marker, return it + return x; + } + // stuffed zero has x=0 now which ends the loop, meaning we go + // back to regular scan loop. + // repeated 0xff keeps trying to read the next byte of the marker. + } + } + return STBI__MARKER_none; +} + +// decode image to YCbCr format +static int stbi__decode_jpeg_image(stbi__jpeg *j) +{ + int m; + for (m = 0; m < 4; m++) { + j->img_comp[m].raw_data = NULL; + j->img_comp[m].raw_coeff = NULL; + } + j->restart_interval = 0; + if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; + m = stbi__get_marker(j); + while (!stbi__EOI(m)) { + if (stbi__SOS(m)) { + if (!stbi__process_scan_header(j)) return 0; + if (!stbi__parse_entropy_coded_data(j)) return 0; + if (j->marker == STBI__MARKER_none ) { + j->marker = stbi__skip_jpeg_junk_at_end(j); + // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 + } + m = stbi__get_marker(j); + if (STBI__RESTART(m)) + m = stbi__get_marker(j); + } else if (stbi__DNL(m)) { + int Ld = stbi__get16be(j->s); + stbi__uint32 NL = stbi__get16be(j->s); + if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); + if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + m = stbi__get_marker(j); + } else { + if (!stbi__process_marker(j, m)) return 1; + m = stbi__get_marker(j); + } + } + if (j->progressive) + stbi__jpeg_finish(j); + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, + int w, int hs); + +#define stbi__div4(x) ((stbi_uc) ((x) >> 2)) + +static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + STBI_NOTUSED(out); + STBI_NOTUSED(in_far); + STBI_NOTUSED(w); + STBI_NOTUSED(hs); + return in_near; +} + +static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + STBI_NOTUSED(hs); + for (i=0; i < w; ++i) + out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + stbi_uc *input = in_near; + + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = stbi__div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = stbi__div4(n+input[i-1]); + out[i*2+1] = stbi__div4(n+input[i+1]); + } + out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + + STBI_NOTUSED(in_far); + STBI_NOTUSED(hs); + + return out; +} + +#define stbi__div16(x) ((stbi_uc) ((x) >> 4)) + +static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = stbi__div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i=0,t0,t1; + + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + // process groups of 8 pixels for as long as we can. + // note we can't handle the last pixel in a row in this loop + // because we need to handle the filter boundary conditions. + for (; i < ((w-1) & ~7); i += 8) { +#if defined(STBI_SSE2) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + __m128i zero = _mm_setzero_si128(); + __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); + __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); + __m128i farw = _mm_unpacklo_epi8(farb, zero); + __m128i nearw = _mm_unpacklo_epi8(nearb, zero); + __m128i diff = _mm_sub_epi16(farw, nearw); + __m128i nears = _mm_slli_epi16(nearw, 2); + __m128i curr = _mm_add_epi16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + __m128i prv0 = _mm_slli_si128(curr, 2); + __m128i nxt0 = _mm_srli_si128(curr, 2); + __m128i prev = _mm_insert_epi16(prv0, t1, 0); + __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + __m128i bias = _mm_set1_epi16(8); + __m128i curs = _mm_slli_epi16(curr, 2); + __m128i prvd = _mm_sub_epi16(prev, curr); + __m128i nxtd = _mm_sub_epi16(next, curr); + __m128i curb = _mm_add_epi16(curs, bias); + __m128i even = _mm_add_epi16(prvd, curb); + __m128i odd = _mm_add_epi16(nxtd, curb); + + // interleave even and odd pixels, then undo scaling. + __m128i int0 = _mm_unpacklo_epi16(even, odd); + __m128i int1 = _mm_unpackhi_epi16(even, odd); + __m128i de0 = _mm_srli_epi16(int0, 4); + __m128i de1 = _mm_srli_epi16(int1, 4); + + // pack and write output + __m128i outv = _mm_packus_epi16(de0, de1); + _mm_storeu_si128((__m128i *) (out + i*2), outv); +#elif defined(STBI_NEON) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + uint8x8_t farb = vld1_u8(in_far + i); + uint8x8_t nearb = vld1_u8(in_near + i); + int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); + int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); + int16x8_t curr = vaddq_s16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + int16x8_t prv0 = vextq_s16(curr, curr, 7); + int16x8_t nxt0 = vextq_s16(curr, curr, 1); + int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); + int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + int16x8_t curs = vshlq_n_s16(curr, 2); + int16x8_t prvd = vsubq_s16(prev, curr); + int16x8_t nxtd = vsubq_s16(next, curr); + int16x8_t even = vaddq_s16(curs, prvd); + int16x8_t odd = vaddq_s16(curs, nxtd); + + // undo scaling and round, then store with even/odd phases interleaved + uint8x8x2_t o; + o.val[0] = vqrshrun_n_s16(even, 4); + o.val[1] = vqrshrun_n_s16(odd, 4); + vst2_u8(out + i*2, o); +#endif + + // "previous" value for next iter + t1 = 3*in_near[i+7] + in_far[i+7]; + } + + t0 = t1; + t1 = 3*in_near[i] + in_far[i]; + out[i*2] = stbi__div16(3*t1 + t0 + 8); + + for (++i; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} +#endif + +static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + STBI_NOTUSED(in_far); + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +// this is a reduced-precision calculation of YCbCr-to-RGB introduced +// to make sure the code produces the same results in both SIMD and scalar +#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) +static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) +{ + int i = 0; + +#ifdef STBI_SSE2 + // step == 3 is pretty ugly on the final interleave, and i'm not convinced + // it's useful in practice (you wouldn't use it for textures, for example). + // so just accelerate step == 4 case. + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + __m128i signflip = _mm_set1_epi8(-0x80); + __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); + __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); + __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); + __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); + __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); + __m128i xw = _mm_set1_epi16(255); // alpha channel + + for (; i+7 < count; i += 8) { + // load + __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); + __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); + __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); + __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 + __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + + // unpack to short (and left-shift cr, cb by 8) + __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); + __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); + __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + + // color transform + __m128i yws = _mm_srli_epi16(yw, 4); + __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); + __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); + __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); + __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); + __m128i rws = _mm_add_epi16(cr0, yws); + __m128i gwt = _mm_add_epi16(cb0, yws); + __m128i bws = _mm_add_epi16(yws, cb1); + __m128i gws = _mm_add_epi16(gwt, cr1); + + // descale + __m128i rw = _mm_srai_epi16(rws, 4); + __m128i bw = _mm_srai_epi16(bws, 4); + __m128i gw = _mm_srai_epi16(gws, 4); + + // back to byte, set up for transpose + __m128i brb = _mm_packus_epi16(rw, bw); + __m128i gxb = _mm_packus_epi16(gw, xw); + + // transpose to interleave channels + __m128i t0 = _mm_unpacklo_epi8(brb, gxb); + __m128i t1 = _mm_unpackhi_epi8(brb, gxb); + __m128i o0 = _mm_unpacklo_epi16(t0, t1); + __m128i o1 = _mm_unpackhi_epi16(t0, t1); + + // store + _mm_storeu_si128((__m128i *) (out + 0), o0); + _mm_storeu_si128((__m128i *) (out + 16), o1); + out += 32; + } + } +#endif + +#ifdef STBI_NEON + // in this version, step=3 support would be easy to add. but is there demand? + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + uint8x8_t signflip = vdup_n_u8(0x80); + int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); + int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); + int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); + int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); + + for (; i+7 < count; i += 8) { + // load + uint8x8_t y_bytes = vld1_u8(y + i); + uint8x8_t cr_bytes = vld1_u8(pcr + i); + uint8x8_t cb_bytes = vld1_u8(pcb + i); + int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); + int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + + // expand to s16 + int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); + int16x8_t crw = vshll_n_s8(cr_biased, 7); + int16x8_t cbw = vshll_n_s8(cb_biased, 7); + + // color transform + int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); + int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); + int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); + int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); + int16x8_t rws = vaddq_s16(yws, cr0); + int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); + int16x8_t bws = vaddq_s16(yws, cb1); + + // undo scaling, round, convert to byte + uint8x8x4_t o; + o.val[0] = vqrshrun_n_s16(rws, 4); + o.val[1] = vqrshrun_n_s16(gws, 4); + o.val[2] = vqrshrun_n_s16(bws, 4); + o.val[3] = vdup_n_u8(255); + + // store, interleaving r/g/b/a + vst4_u8(out, o); + out += 8*4; + } + } +#endif + + for (; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} +#endif + +// set up the kernels +static void stbi__setup_jpeg(stbi__jpeg *j) +{ + j->idct_block_kernel = stbi__idct_block; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; + +#ifdef STBI_SSE2 + if (stbi__sse2_available()) { + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; + } +#endif + +#ifdef STBI_NEON + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; +#endif +} + +// clean up the temporary component buffers +static void stbi__cleanup_jpeg(stbi__jpeg *j) +{ + stbi__free_jpeg_components(j, j->s->img_n, 0); +} + +typedef struct +{ + resample_row_func resample; + stbi_uc *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi__resample; + +// fast 0..255 * 0..255 => 0..255 rounded multiplication +static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) +{ + unsigned int t = x*y + 128; + return (stbi_uc) ((t + (t >>8)) >> 8); +} + +static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n, is_rgb; + z->s->img_n = 0; // make stbi__cleanup_jpeg safe + + // validate req_comp + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + + // load a jpeg image from whichever source, but leave in YCbCr format + if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; + + is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); + + if (z->s->img_n == 3 && n < 3 && !is_rgb) + decode_n = 1; + else + decode_n = z->s->img_n; + + // nothing to do if no components requested; check this now to avoid + // accessing uninitialized coutput[0] later + if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; } + + // resample and color-convert + { + int k; + unsigned int i,j; + stbi_uc *output; + stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL }; + + stbi__resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); + if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s->img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; + else r->resample = stbi__resample_row_generic; + } + + // can't error after this so, this is safe + output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); + if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s->img_y; ++j) { + stbi_uc *out = output + n * z->s->img_x * j; + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + stbi_uc *y = coutput[0]; + if (z->s->img_n == 3) { + if (is_rgb) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = y[i]; + out[1] = coutput[1][i]; + out[2] = coutput[2][i]; + out[3] = 255; + out += n; + } + } else { + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else if (z->s->img_n == 4) { + if (z->app14_color_transform == 0) { // CMYK + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(coutput[0][i], m); + out[1] = stbi__blinn_8x8(coutput[1][i], m); + out[2] = stbi__blinn_8x8(coutput[2][i], m); + out[3] = 255; + out += n; + } + } else if (z->app14_color_transform == 2) { // YCCK + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(255 - out[0], m); + out[1] = stbi__blinn_8x8(255 - out[1], m); + out[2] = stbi__blinn_8x8(255 - out[2], m); + out += n; + } + } else { // YCbCr + alpha? Ignore the fourth channel for now + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else + for (i=0; i < z->s->img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + if (is_rgb) { + if (n == 1) + for (i=0; i < z->s->img_x; ++i) + *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + else { + for (i=0; i < z->s->img_x; ++i, out += 2) { + out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + out[1] = 255; + } + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); + stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); + stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); + out[0] = stbi__compute_y(r, g, b); + out[1] = 255; + out += n; + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); + out[1] = 255; + out += n; + } + } else { + stbi_uc *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; } + } + } + } + stbi__cleanup_jpeg(z); + *out_x = z->s->img_x; + *out_y = z->s->img_y; + if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output + return output; + } +} + +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + unsigned char* result; + stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); + if (!j) return stbi__errpuc("outofmem", "Out of memory"); + memset(j, 0, sizeof(stbi__jpeg)); + STBI_NOTUSED(ri); + j->s = s; + stbi__setup_jpeg(j); + result = load_jpeg_image(j, x,y,comp,req_comp); + STBI_FREE(j); + return result; +} + +static int stbi__jpeg_test(stbi__context *s) +{ + int r; + stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); + if (!j) return stbi__err("outofmem", "Out of memory"); + memset(j, 0, sizeof(stbi__jpeg)); + j->s = s; + stbi__setup_jpeg(j); + r = stbi__decode_jpeg_header(j, STBI__SCAN_type); + stbi__rewind(s); + STBI_FREE(j); + return r; +} + +static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) +{ + if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { + stbi__rewind( j->s ); + return 0; + } + if (x) *x = j->s->img_x; + if (y) *y = j->s->img_y; + if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; + return 1; +} + +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) +{ + int result; + stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); + if (!j) return stbi__err("outofmem", "Out of memory"); + memset(j, 0, sizeof(stbi__jpeg)); + j->s = s; + result = stbi__jpeg_info_raw(j, x, y, comp); + STBI_FREE(j); + return result; +} +#endif + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman + +#ifndef STBI_NO_ZLIB + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables +#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) +#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + stbi__uint16 fast[1 << STBI__ZFAST_BITS]; + stbi__uint16 firstcode[16]; + int maxcode[17]; + stbi__uint16 firstsymbol[16]; + stbi_uc size[STBI__ZNSYMS]; + stbi__uint16 value[STBI__ZNSYMS]; +} stbi__zhuffman; + +stbi_inline static int stbi__bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +stbi_inline static int stbi__bit_reverse(int v, int bits) +{ + STBI_ASSERT(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return stbi__bitreverse16(v) >> (16-bits); +} + +static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 0, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + if (sizes[i] > (1 << i)) + return stbi__err("bad sizes", "Corrupt PNG"); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (stbi__uint16) code; + z->firstsymbol[i] = (stbi__uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); + z->size [c] = (stbi_uc ) s; + z->value[c] = (stbi__uint16) i; + if (s <= STBI__ZFAST_BITS) { + int j = stbi__bit_reverse(next_code[s],s); + while (j < (1 << STBI__ZFAST_BITS)) { + z->fast[j] = fastv; + j += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + stbi_uc *zbuffer, *zbuffer_end; + int num_bits; + stbi__uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + stbi__zhuffman z_length, z_distance; +} stbi__zbuf; + +stbi_inline static int stbi__zeof(stbi__zbuf *z) +{ + return (z->zbuffer >= z->zbuffer_end); +} + +stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) +{ + return stbi__zeof(z) ? 0 : *z->zbuffer++; +} + +static void stbi__fill_bits(stbi__zbuf *z) +{ + do { + if (z->code_buffer >= (1U << z->num_bits)) { + z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */ + return; + } + z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) stbi__fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s,k; + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = stbi__bit_reverse(a->code_buffer, 16); + for (s=STBI__ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s >= 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere! + if (z->size[b] != s) return -1; // was originally an assert, but report failure instead. + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s; + if (a->num_bits < 16) { + if (stbi__zeof(a)) { + return -1; /* report error for unexpected end of data. */ + } + stbi__fill_bits(a); + } + b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; + if (b) { + s = b >> 9; + a->code_buffer >>= s; + a->num_bits -= s; + return b & 511; + } + return stbi__zhuffman_decode_slowpath(a, z); +} + +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes +{ + char *q; + unsigned int cur, limit, old_limit; + z->zout = zout; + if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); + cur = (unsigned int) (z->zout - z->zout_start); + limit = old_limit = (unsigned) (z->zout_end - z->zout_start); + if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory"); + while (cur + n > limit) { + if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory"); + limit *= 2; + } + q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); + STBI_NOTUSED(old_limit); + if (q == NULL) return stbi__err("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static const int stbi__zlength_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static const int stbi__zlength_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static const int stbi__zdist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int stbi__parse_huffman_block(stbi__zbuf *a) +{ + char *zout = a->zout; + for(;;) { + int z = stbi__zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes + if (zout >= a->zout_end) { + if (!stbi__zexpand(a, zout, 1)) return 0; + zout = a->zout; + } + *zout++ = (char) z; + } else { + stbi_uc *p; + int len,dist; + if (z == 256) { + a->zout = zout; + return 1; + } + if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data + z -= 257; + len = stbi__zlength_base[z]; + if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); + z = stbi__zhuffman_decode(a, &a->z_distance); + if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data + dist = stbi__zdist_base[z]; + if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); + if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); + if (zout + len > a->zout_end) { + if (!stbi__zexpand(a, zout, len)) return 0; + zout = a->zout; + } + p = (stbi_uc *) (zout - dist); + if (dist == 1) { // run of one byte; common in images. + stbi_uc v = *p; + if (len) { do *zout++ = v; while (--len); } + } else { + if (len) { do *zout++ = *p++; while (--len); } + } + } + } +} + +static int stbi__compute_huffman_codes(stbi__zbuf *a) +{ + static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + stbi__zhuffman z_codelength; + stbi_uc lencodes[286+32+137];//padding for maximum single op + stbi_uc codelength_sizes[19]; + int i,n; + + int hlit = stbi__zreceive(a,5) + 257; + int hdist = stbi__zreceive(a,5) + 1; + int hclen = stbi__zreceive(a,4) + 4; + int ntot = hlit + hdist; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = stbi__zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; + } + if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < ntot) { + int c = stbi__zhuffman_decode(a, &z_codelength); + if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); + if (c < 16) + lencodes[n++] = (stbi_uc) c; + else { + stbi_uc fill = 0; + if (c == 16) { + c = stbi__zreceive(a,2)+3; + if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); + fill = lencodes[n-1]; + } else if (c == 17) { + c = stbi__zreceive(a,3)+3; + } else if (c == 18) { + c = stbi__zreceive(a,7)+11; + } else { + return stbi__err("bad codelengths", "Corrupt PNG"); + } + if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); + memset(lencodes+n, fill, c); + n += c; + } + } + if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); + if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int stbi__parse_uncompressed_block(stbi__zbuf *a) +{ + stbi_uc header[4]; + int len,nlen,k; + if (a->num_bits & 7) + stbi__zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check + a->code_buffer >>= 8; + a->num_bits -= 8; + } + if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG"); + // now fill header the normal way + while (k < 4) + header[k++] = stbi__zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!stbi__zexpand(a, a->zout, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int stbi__parse_zlib_header(stbi__zbuf *a) +{ + int cmf = stbi__zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = stbi__zget8(a); + if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] = +{ + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 +}; +static const stbi_uc stbi__zdefault_distance[32] = +{ + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +}; +/* +Init algorithm: +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; + for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; + for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; + for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; + + for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; +} +*/ + +static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!stbi__parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = stbi__zreceive(a,1); + type = stbi__zreceive(a,2); + if (type == 0) { + if (!stbi__parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , STBI__ZNSYMS)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; + } else { + if (!stbi__compute_huffman_codes(a)) return 0; + } + if (!stbi__parse_huffman_block(a)) return 0; + } + } while (!final); + return 1; +} + +static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return stbi__parse_zlib(a, parse_header); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer+len; + if (stbi__do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} +#endif + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + +#ifndef STBI_NO_PNG +typedef struct +{ + stbi__uint32 length; + stbi__uint32 type; +} stbi__pngchunk; + +static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) +{ + stbi__pngchunk c; + c.length = stbi__get32be(s); + c.type = stbi__get32be(s); + return c; +} + +static int stbi__check_png_header(stbi__context *s) +{ + static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi__context *s; + stbi_uc *idata, *expanded, *out; + int depth; +} stbi__png; + + +enum { + STBI__F_none=0, + STBI__F_sub=1, + STBI__F_up=2, + STBI__F_avg=3, + STBI__F_paeth=4, + // synthetic filters used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static stbi_uc first_row_filter[5] = +{ + STBI__F_none, + STBI__F_sub, + STBI__F_none, + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static int stbi__paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; + +// create the png data from post-deflated data +static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) +{ + int bytes = (depth == 16? 2 : 1); + stbi__context *s = a->s; + stbi__uint32 i,j,stride = x*out_n*bytes; + stbi__uint32 img_len, img_width_bytes; + int k; + int img_n = s->img_n; // copy it into a local for later + + int output_bytes = out_n*bytes; + int filter_bytes = img_n*bytes; + int width = x; + + STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); + a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into + if (!a->out) return stbi__err("outofmem", "Out of memory"); + + if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); + img_width_bytes = (((img_n * x * depth) + 7) >> 3); + img_len = (img_width_bytes + 1) * y; + + // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, + // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), + // so just check for raw_len < img_len always. + if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *prior; + int filter = *raw++; + + if (filter > 4) + return stbi__err("invalid filter","Corrupt PNG"); + + if (depth < 8) { + if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG"); + cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place + filter_bytes = 1; + width = img_width_bytes; + } + prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above + + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + + // handle first byte explicitly + for (k=0; k < filter_bytes; ++k) { + switch (filter) { + case STBI__F_none : cur[k] = raw[k]; break; + case STBI__F_sub : cur[k] = raw[k]; break; + case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; + case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; + case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; + case STBI__F_avg_first : cur[k] = raw[k]; break; + case STBI__F_paeth_first: cur[k] = raw[k]; break; + } + } + + if (depth == 8) { + if (img_n != out_n) + cur[img_n] = 255; // first pixel + raw += img_n; + cur += out_n; + prior += out_n; + } else if (depth == 16) { + if (img_n != out_n) { + cur[filter_bytes] = 255; // first pixel top byte + cur[filter_bytes+1] = 255; // first pixel bottom byte + } + raw += filter_bytes; + cur += output_bytes; + prior += output_bytes; + } else { + raw += 1; + cur += 1; + prior += 1; + } + + // this is a little gross, so that we don't switch per-pixel or per-component + if (depth < 8 || img_n == out_n) { + int nk = (width - 1)*filter_bytes; + #define STBI__CASE(f) \ + case f: \ + for (k=0; k < nk; ++k) + switch (filter) { + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; + } + #undef STBI__CASE + raw += nk; + } else { + STBI_ASSERT(img_n+1 == out_n); + #define STBI__CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ + for (k=0; k < filter_bytes; ++k) + switch (filter) { + STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; + } + #undef STBI__CASE + + // the loop above sets the high byte of the pixels' alpha, but for + // 16 bit png files we also need the low byte set. we'll do that here. + if (depth == 16) { + cur = a->out + stride*j; // start at the beginning of the row again + for (i=0; i < x; ++i,cur+=output_bytes) { + cur[filter_bytes+1] = 255; + } + } + } + } + + // we make a separate pass to expand bits to pixels; for performance, + // this could run two scanlines behind the above code, so it won't + // intefere with filtering but will still be in the cache. + if (depth < 8) { + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; + // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit + // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + + // note that the final byte might overshoot and write more data than desired. + // we can allocate enough data that this never writes out of memory, but it + // could also overwrite the next scanline. can it overwrite non-empty data + // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. + // so we need to explicitly clamp the final ones + + if (depth == 4) { + for (k=x*img_n; k >= 2; k-=2, ++in) { + *cur++ = scale * ((*in >> 4) ); + *cur++ = scale * ((*in ) & 0x0f); + } + if (k > 0) *cur++ = scale * ((*in >> 4) ); + } else if (depth == 2) { + for (k=x*img_n; k >= 4; k-=4, ++in) { + *cur++ = scale * ((*in >> 6) ); + *cur++ = scale * ((*in >> 4) & 0x03); + *cur++ = scale * ((*in >> 2) & 0x03); + *cur++ = scale * ((*in ) & 0x03); + } + if (k > 0) *cur++ = scale * ((*in >> 6) ); + if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); + if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); + } else if (depth == 1) { + for (k=x*img_n; k >= 8; k-=8, ++in) { + *cur++ = scale * ((*in >> 7) ); + *cur++ = scale * ((*in >> 6) & 0x01); + *cur++ = scale * ((*in >> 5) & 0x01); + *cur++ = scale * ((*in >> 4) & 0x01); + *cur++ = scale * ((*in >> 3) & 0x01); + *cur++ = scale * ((*in >> 2) & 0x01); + *cur++ = scale * ((*in >> 1) & 0x01); + *cur++ = scale * ((*in ) & 0x01); + } + if (k > 0) *cur++ = scale * ((*in >> 7) ); + if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); + if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); + if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); + if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); + if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); + if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); + } + if (img_n != out_n) { + int q; + // insert alpha = 255 + cur = a->out + stride*j; + if (img_n == 1) { + for (q=x-1; q >= 0; --q) { + cur[q*2+1] = 255; + cur[q*2+0] = cur[q]; + } + } else { + STBI_ASSERT(img_n == 3); + for (q=x-1; q >= 0; --q) { + cur[q*4+3] = 255; + cur[q*4+2] = cur[q*3+2]; + cur[q*4+1] = cur[q*3+1]; + cur[q*4+0] = cur[q*3+0]; + } + } + } + } + } else if (depth == 16) { + // force the image data from big-endian to platform-native. + // this is done in a separate pass due to the decoding relying + // on the data being untouched, but could probably be done + // per-line during decode if care is taken. + stbi_uc *cur = a->out; + stbi__uint16 *cur16 = (stbi__uint16*)cur; + + for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { + *cur16 = (cur[0] << 8) | cur[1]; + } + } + + return 1; +} + +static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) +{ + int bytes = (depth == 16 ? 2 : 1); + int out_bytes = out_n * bytes; + stbi_uc *final; + int p; + if (!interlaced) + return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); + + // de-interlacing + final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); + if (!final) return stbi__err("outofmem", "Out of memory"); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; + if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { + STBI_FREE(final); + return 0; + } + for (j=0; j < y; ++j) { + for (i=0; i < x; ++i) { + int out_y = j*yspc[p]+yorig[p]; + int out_x = i*xspc[p]+xorig[p]; + memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, + a->out + (j*x+i)*out_bytes, out_bytes); + } + } + STBI_FREE(a->out); + image_data += img_len; + image_data_len -= img_len; + } + } + a->out = final; + + return 1; +} + +static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi__uint16 *p = (stbi__uint16*) z->out; + + // compute color-based transparency, assuming we've + // already got 65535 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i = 0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 65535); + p += 2; + } + } else { + for (i = 0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) +{ + stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; + stbi_uc *p, *temp_out, *orig = a->out; + + p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); + if (p == NULL) return stbi__err("outofmem", "Out of memory"); + + // between here and free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + STBI_FREE(a->out); + a->out = temp_out; + + STBI_NOTUSED(len); + + return 1; +} + +static int stbi__unpremultiply_on_load_global = 0; +static int stbi__de_iphone_flag_global = 0; + +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag_global = flag_true_if_should_convert; +} + +#ifndef STBI_THREAD_LOCAL +#define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global +#define stbi__de_iphone_flag stbi__de_iphone_flag_global +#else +static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set; +static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set; + +STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply; + stbi__unpremultiply_on_load_set = 1; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag_local = flag_true_if_should_convert; + stbi__de_iphone_flag_set = 1; +} + +#define stbi__unpremultiply_on_load (stbi__unpremultiply_on_load_set \ + ? stbi__unpremultiply_on_load_local \ + : stbi__unpremultiply_on_load_global) +#define stbi__de_iphone_flag (stbi__de_iphone_flag_set \ + ? stbi__de_iphone_flag_local \ + : stbi__de_iphone_flag_global) +#endif // STBI_THREAD_LOCAL + +static void stbi__de_iphone(stbi__png *z) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } + } else { + STBI_ASSERT(s->img_out_n == 4); + if (stbi__unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + stbi_uc a = p[3]; + stbi_uc t = p[0]; + if (a) { + stbi_uc half = a / 2; + p[0] = (p[2] * 255 + half) / a; + p[1] = (p[1] * 255 + half) / a; + p[2] = ( t * 255 + half) / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) + +static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) +{ + stbi_uc palette[1024], pal_img_n=0; + stbi_uc has_trans=0, tc[3]={0}; + stbi__uint16 tc16[3]; + stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0, color=0, is_iphone=0; + stbi__context *s = z->s; + + z->expanded = NULL; + z->idata = NULL; + z->out = NULL; + + if (!stbi__check_png_header(s)) return 0; + + if (scan == STBI__SCAN_type) return 1; + + for (;;) { + stbi__pngchunk c = stbi__get_chunk_header(s); + switch (c.type) { + case STBI__PNG_TYPE('C','g','B','I'): + is_iphone = 1; + stbi__skip(s, c.length); + break; + case STBI__PNG_TYPE('I','H','D','R'): { + int comp,filter; + if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); + s->img_x = stbi__get32be(s); + s->img_y = stbi__get32be(s); + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); + color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); + comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); + filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); + interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); + } + // even with SCAN_header, have to scan to see if we have a tRNS + break; + } + + case STBI__PNG_TYPE('P','L','T','E'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = stbi__get8(s); + palette[i*4+1] = stbi__get8(s); + palette[i*4+2] = stbi__get8(s); + palette[i*4+3] = 255; + } + break; + } + + case STBI__PNG_TYPE('t','R','N','S'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = stbi__get8(s); + } else { + if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); + if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); + has_trans = 1; + // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now. + if (scan == STBI__SCAN_header) { ++s->img_n; return 1; } + if (z->depth == 16) { + for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + } else { + for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + } + } + break; + } + + case STBI__PNG_TYPE('I','D','A','T'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); + if (scan == STBI__SCAN_header) { + // header scan definitely stops at first IDAT + if (pal_img_n) + s->img_n = pal_img_n; + return 1; + } + if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes"); + if ((int)(ioff + c.length) < (int)ioff) return 0; + if (ioff + c.length > idata_limit) { + stbi__uint32 idata_limit_old = idata_limit; + stbi_uc *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + STBI_NOTUSED(idata_limit_old); + p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); + z->idata = p; + } + if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case STBI__PNG_TYPE('I','E','N','D'): { + stbi__uint32 raw_len, bpl; + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (scan != STBI__SCAN_load) return 1; + if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); + // initial guess for decoded data size to avoid unnecessary reallocs + bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component + raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; + z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); + if (z->expanded == NULL) return 0; // zlib should set error + STBI_FREE(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; + if (has_trans) { + if (z->depth == 16) { + if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; + } else { + if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; + } + } + if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) + stbi__de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } else if (has_trans) { + // non-paletted image with tRNS -> source image has (constant) alpha + ++s->img_n; + } + STBI_FREE(z->expanded); z->expanded = NULL; + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + return 1; + } + + default: + // if critical, fail + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX PNG chunk not known"; + invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); + invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); + invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); + invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); + #endif + return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); + } + stbi__skip(s, c.length); + break; + } + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + } +} + +static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) +{ + void *result=NULL; + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { + if (p->depth <= 8) + ri->bits_per_channel = 8; + else if (p->depth == 16) + ri->bits_per_channel = 16; + else + return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth"); + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s->img_out_n) { + if (ri->bits_per_channel == 8) + result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + else + result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + p->s->img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s->img_x; + *y = p->s->img_y; + if (n) *n = p->s->img_n; + } + STBI_FREE(p->out); p->out = NULL; + STBI_FREE(p->expanded); p->expanded = NULL; + STBI_FREE(p->idata); p->idata = NULL; + + return result; +} + +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi__png p; + p.s = s; + return stbi__do_png(&p, x,y,comp,req_comp, ri); +} + +static int stbi__png_test(stbi__context *s) +{ + int r; + r = stbi__check_png_header(s); + stbi__rewind(s); + return r; +} + +static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) +{ + if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { + stbi__rewind( p->s ); + return 0; + } + if (x) *x = p->s->img_x; + if (y) *y = p->s->img_y; + if (comp) *comp = p->s->img_n; + return 1; +} + +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__png p; + p.s = s; + return stbi__png_info_raw(&p, x, y, comp); +} + +static int stbi__png_is16(stbi__context *s) +{ + stbi__png p; + p.s = s; + if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) + return 0; + if (p.depth != 16) { + stbi__rewind(p.s); + return 0; + } + return 1; +} +#endif + +// Microsoft/Windows BMP image + +#ifndef STBI_NO_BMP +static int stbi__bmp_test_raw(stbi__context *s) +{ + int r; + int sz; + if (stbi__get8(s) != 'B') return 0; + if (stbi__get8(s) != 'M') return 0; + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + stbi__get32le(s); // discard data offset + sz = stbi__get32le(s); + r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); + return r; +} + +static int stbi__bmp_test(stbi__context *s) +{ + int r = stbi__bmp_test_raw(s); + stbi__rewind(s); + return r; +} + + +// returns 0..31 for the highest set bit +static int stbi__high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) { n += 16; z >>= 16; } + if (z >= 0x00100) { n += 8; z >>= 8; } + if (z >= 0x00010) { n += 4; z >>= 4; } + if (z >= 0x00004) { n += 2; z >>= 2; } + if (z >= 0x00002) { n += 1;/* >>= 1;*/ } + return n; +} + +static int stbi__bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +// extract an arbitrarily-aligned N-bit value (N=bits) +// from v, and then make it 8-bits long and fractionally +// extend it to full full range. +static int stbi__shiftsigned(unsigned int v, int shift, int bits) +{ + static unsigned int mul_table[9] = { + 0, + 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, + 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, + }; + static unsigned int shift_table[9] = { + 0, 0,0,1,0,2,4,6,0, + }; + if (shift < 0) + v <<= -shift; + else + v >>= shift; + STBI_ASSERT(v < 256); + v >>= (8-bits); + STBI_ASSERT(bits >= 0 && bits <= 8); + return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; +} + +typedef struct +{ + int bpp, offset, hsz; + unsigned int mr,mg,mb,ma, all_a; + int extra_read; +} stbi__bmp_data; + +static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress) +{ + // BI_BITFIELDS specifies masks explicitly, don't override + if (compress == 3) + return 1; + + if (compress == 0) { + if (info->bpp == 16) { + info->mr = 31u << 10; + info->mg = 31u << 5; + info->mb = 31u << 0; + } else if (info->bpp == 32) { + info->mr = 0xffu << 16; + info->mg = 0xffu << 8; + info->mb = 0xffu << 0; + info->ma = 0xffu << 24; + info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 + } else { + // otherwise, use defaults, which is all-0 + info->mr = info->mg = info->mb = info->ma = 0; + } + return 1; + } + return 0; // error +} + +static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) +{ + int hsz; + if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + info->offset = stbi__get32le(s); + info->hsz = hsz = stbi__get32le(s); + info->mr = info->mg = info->mb = info->ma = 0; + info->extra_read = 14; + + if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP"); + + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); + if (hsz == 12) { + s->img_x = stbi__get16le(s); + s->img_y = stbi__get16le(s); + } else { + s->img_x = stbi__get32le(s); + s->img_y = stbi__get32le(s); + } + if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); + info->bpp = stbi__get16le(s); + if (hsz != 12) { + int compress = stbi__get32le(s); + if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); + if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes + if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel + stbi__get32le(s); // discard sizeof + stbi__get32le(s); // discard hres + stbi__get32le(s); // discard vres + stbi__get32le(s); // discard colorsused + stbi__get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + } + if (info->bpp == 16 || info->bpp == 32) { + if (compress == 0) { + stbi__bmp_set_mask_defaults(info, compress); + } else if (compress == 3) { + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->extra_read += 12; + // not documented, but generated by photoshop and handled by mspaint + if (info->mr == info->mg && info->mg == info->mb) { + // ?!?!? + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else { + // V4/V5 header + int i; + if (hsz != 108 && hsz != 124) + return stbi__errpuc("bad BMP", "bad BMP"); + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->ma = stbi__get32le(s); + if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs + stbi__bmp_set_mask_defaults(info, compress); + stbi__get32le(s); // discard color space + for (i=0; i < 12; ++i) + stbi__get32le(s); // discard color space parameters + if (hsz == 124) { + stbi__get32le(s); // discard rendering intent + stbi__get32le(s); // discard offset of profile data + stbi__get32le(s); // discard size of profile data + stbi__get32le(s); // discard reserved + } + } + } + return (void *) 1; +} + + +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + unsigned int mr=0,mg=0,mb=0,ma=0, all_a; + stbi_uc pal[256][4]; + int psize=0,i,j,width; + int flip_vertically, pad, target; + stbi__bmp_data info; + STBI_NOTUSED(ri); + + info.all_a = 255; + if (stbi__bmp_parse_header(s, &info) == NULL) + return NULL; // error code already set + + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + mr = info.mr; + mg = info.mg; + mb = info.mb; + ma = info.ma; + all_a = info.all_a; + + if (info.hsz == 12) { + if (info.bpp < 24) + psize = (info.offset - info.extra_read - 24) / 3; + } else { + if (info.bpp < 16) + psize = (info.offset - info.extra_read - info.hsz) >> 2; + } + if (psize == 0) { + // accept some number of extra bytes after the header, but if the offset points either to before + // the header ends or implies a large amount of extra data, reject the file as malformed + int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original); + int header_limit = 1024; // max we actually read is below 256 bytes currently. + int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size. + if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) { + return stbi__errpuc("bad header", "Corrupt BMP"); + } + // we established that bytes_read_so_far is positive and sensible. + // the first half of this test rejects offsets that are either too small positives, or + // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn + // ensures the number computed in the second half of the test can't overflow. + if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) { + return stbi__errpuc("bad offset", "Corrupt BMP"); + } else { + stbi__skip(s, info.offset - bytes_read_so_far); + } + } + + if (info.bpp == 24 && ma == 0xff000000) + s->img_n = 3; + else + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + + // sanity-check size + if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "Corrupt BMP"); + + out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + if (info.bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + if (info.hsz != 12) stbi__get8(s); + pal[i][3] = 255; + } + stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); + if (info.bpp == 1) width = (s->img_x + 7) >> 3; + else if (info.bpp == 4) width = (s->img_x + 1) >> 1; + else if (info.bpp == 8) width = s->img_x; + else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + if (info.bpp == 1) { + for (j=0; j < (int) s->img_y; ++j) { + int bit_offset = 7, v = stbi__get8(s); + for (i=0; i < (int) s->img_x; ++i) { + int color = (v>>bit_offset)&0x1; + out[z++] = pal[color][0]; + out[z++] = pal[color][1]; + out[z++] = pal[color][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + if((--bit_offset) < 0) { + bit_offset = 7; + v = stbi__get8(s); + } + } + stbi__skip(s, pad); + } + } else { + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=stbi__get8(s),v2=0; + if (info.bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (info.bpp == 8) ? stbi__get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + stbi__skip(s, pad); + } + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + stbi__skip(s, info.offset - info.extra_read - info.hsz); + if (info.bpp == 24) width = 3 * s->img_x; + else if (info.bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (info.bpp == 24) { + easy = 1; + } else if (info.bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + // right shift amt to put high bit in position #7 + rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); + gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); + bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); + ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); + if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + unsigned char a; + out[z+2] = stbi__get8(s); + out[z+1] = stbi__get8(s); + out[z+0] = stbi__get8(s); + z += 3; + a = (easy == 2 ? stbi__get8(s) : 255); + all_a |= a; + if (target == 4) out[z++] = a; + } + } else { + int bpp = info.bpp; + for (i=0; i < (int) s->img_x; ++i) { + stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); + unsigned int a; + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); + a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); + all_a |= a; + if (target == 4) out[z++] = STBI__BYTECAST(a); + } + } + stbi__skip(s, pad); + } + } + + // if alpha channel is all 0s, replace with all 255s + if (target == 4 && all_a == 0) + for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) + out[i] = 255; + + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i]; p1[i] = p2[i]; p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + return out; +} +#endif + +// Targa Truevision - TGA +// by Jonathan Dummer +#ifndef STBI_NO_TGA +// returns STBI_rgb or whatever, 0 on error +static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) +{ + // only RGB or RGBA (incl. 16bit) or grey allowed + if (is_rgb16) *is_rgb16 = 0; + switch(bits_per_pixel) { + case 8: return STBI_grey; + case 16: if(is_grey) return STBI_grey_alpha; + // fallthrough + case 15: if(is_rgb16) *is_rgb16 = 1; + return STBI_rgb; + case 24: // fallthrough + case 32: return bits_per_pixel/8; + default: return 0; + } +} + +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) +{ + int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; + int sz, tga_colormap_type; + stbi__get8(s); // discard Offset + tga_colormap_type = stbi__get8(s); // colormap type + if( tga_colormap_type > 1 ) { + stbi__rewind(s); + return 0; // only RGB or indexed allowed + } + tga_image_type = stbi__get8(s); // image type + if ( tga_colormap_type == 1 ) { // colormapped (paletted) image + if (tga_image_type != 1 && tga_image_type != 9) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip image x and y origin + tga_colormap_bpp = sz; + } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE + if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { + stbi__rewind(s); + return 0; // only RGB or grey allowed, +/- RLE + } + stbi__skip(s,9); // skip colormap specification and image x/y origin + tga_colormap_bpp = 0; + } + tga_w = stbi__get16le(s); + if( tga_w < 1 ) { + stbi__rewind(s); + return 0; // test width + } + tga_h = stbi__get16le(s); + if( tga_h < 1 ) { + stbi__rewind(s); + return 0; // test height + } + tga_bits_per_pixel = stbi__get8(s); // bits per pixel + stbi__get8(s); // ignore alpha bits + if (tga_colormap_bpp != 0) { + if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { + // when using a colormap, tga_bits_per_pixel is the size of the indexes + // I don't think anything but 8 or 16bit indexes makes sense + stbi__rewind(s); + return 0; + } + tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); + } else { + tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); + } + if(!tga_comp) { + stbi__rewind(s); + return 0; + } + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp; + return 1; // seems to have passed everything +} + +static int stbi__tga_test(stbi__context *s) +{ + int res = 0; + int sz, tga_color_type; + stbi__get8(s); // discard Offset + tga_color_type = stbi__get8(s); // color type + if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed + sz = stbi__get8(s); // image type + if ( tga_color_type == 1 ) { // colormapped (paletted) image + if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + stbi__skip(s,4); // skip image x and y origin + } else { // "normal" image w/o colormap + if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE + stbi__skip(s,9); // skip colormap specification and image x/y origin + } + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height + sz = stbi__get8(s); // bits per pixel + if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + + res = 1; // if we got this far, everything's good and we can return 1 instead of 0 + +errorEnd: + stbi__rewind(s); + return res; +} + +// read 16bit value and convert to 24bit RGB +static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) +{ + stbi__uint16 px = (stbi__uint16)stbi__get16le(s); + stbi__uint16 fiveBitMask = 31; + // we have 3 channels with 5bits each + int r = (px >> 10) & fiveBitMask; + int g = (px >> 5) & fiveBitMask; + int b = px & fiveBitMask; + // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later + out[0] = (stbi_uc)((r * 255)/31); + out[1] = (stbi_uc)((g * 255)/31); + out[2] = (stbi_uc)((b * 255)/31); + + // some people claim that the most significant bit might be used for alpha + // (possibly if an alpha-bit is set in the "image descriptor byte") + // but that only made 16bit test images completely translucent.. + // so let's treat all 15 and 16bit TGAs as RGB with no alpha. +} + +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + // read in the TGA header stuff + int tga_offset = stbi__get8(s); + int tga_indexed = stbi__get8(s); + int tga_image_type = stbi__get8(s); + int tga_is_RLE = 0; + int tga_palette_start = stbi__get16le(s); + int tga_palette_len = stbi__get16le(s); + int tga_palette_bits = stbi__get8(s); + int tga_x_origin = stbi__get16le(s); + int tga_y_origin = stbi__get16le(s); + int tga_width = stbi__get16le(s); + int tga_height = stbi__get16le(s); + int tga_bits_per_pixel = stbi__get8(s); + int tga_comp, tga_rgb16=0; + int tga_inverted = stbi__get8(s); + // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4] = {0}; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + STBI_NOTUSED(ri); + STBI_NOTUSED(tga_x_origin); // @TODO + STBI_NOTUSED(tga_y_origin); // @TODO + + if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + // do a tiny bit of precessing + if ( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // If I'm paletted, then I'll use the number of bits from the palette + if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); + else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); + + if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency + return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); + + // tga info + *x = tga_width; + *y = tga_height; + if (comp) *comp = tga_comp; + + if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) + return stbi__errpuc("too large", "Corrupt TGA"); + + tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); + if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); + + // skip to the data's starting position (offset usually = 0) + stbi__skip(s, tga_offset ); + + if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { + for (i=0; i < tga_height; ++i) { + int row = tga_inverted ? tga_height -i - 1 : i; + stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; + stbi__getn(s, tga_row, tga_width * tga_comp); + } + } else { + // do I need to load a palette? + if ( tga_indexed) + { + if (tga_palette_len == 0) { /* you have to have at least one entry! */ + STBI_FREE(tga_data); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + + // any data to skip? (offset usually = 0) + stbi__skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); + if (!tga_palette) { + STBI_FREE(tga_data); + return stbi__errpuc("outofmem", "Out of memory"); + } + if (tga_rgb16) { + stbi_uc *pal_entry = tga_palette; + STBI_ASSERT(tga_comp == STBI_rgb); + for (i=0; i < tga_palette_len; ++i) { + stbi__tga_read_rgb16(s, pal_entry); + pal_entry += tga_comp; + } + } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { + STBI_FREE(tga_data); + STBI_FREE(tga_palette); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + } + // load the data + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = stbi__get8(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in index, then perform the lookup + int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); + if ( pal_idx >= tga_palette_len ) { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_comp; + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else if(tga_rgb16) { + STBI_ASSERT(tga_comp == STBI_rgb); + stbi__tga_read_rgb16(s, raw_data); + } else { + // read in the data raw + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = stbi__get8(s); + } + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + + // copy data + for (j = 0; j < tga_comp; ++j) + tga_data[i*tga_comp+j] = raw_data[j]; + + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * tga_comp; + int index2 = (tga_height - 1 - j) * tga_width * tga_comp; + for (i = tga_width * tga_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + STBI_FREE( tga_palette ); + } + } + + // swap RGB - if the source data was RGB16, it already is in the right order + if (tga_comp >= 3 && !tga_rgb16) + { + unsigned char* tga_pixel = tga_data; + for (i=0; i < tga_width * tga_height; ++i) + { + unsigned char temp = tga_pixel[0]; + tga_pixel[0] = tga_pixel[2]; + tga_pixel[2] = temp; + tga_pixel += tga_comp; + } + } + + // convert to target component count + if (req_comp && req_comp != tga_comp) + tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); + + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + STBI_NOTUSED(tga_palette_start); + // OK, done + return tga_data; +} +#endif + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s) +{ + int r = (stbi__get32be(s) == 0x38425053); + stbi__rewind(s); + return r; +} + +static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) +{ + int count, nleft, len; + + count = 0; + while ((nleft = pixelCount - count) > 0) { + len = stbi__get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + if (len > nleft) return 0; // corrupt data + count += len; + while (len) { + *p = stbi__get8(s); + p += 4; + len--; + } + } else if (len > 128) { + stbi_uc val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len = 257 - len; + if (len > nleft) return 0; // corrupt data + val = stbi__get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + + return 1; +} + +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + int pixelCount; + int channelCount, compression; + int channel, i; + int bitdepth; + int w,h; + stbi_uc *out; + STBI_NOTUSED(ri); + + // Check identifier + if (stbi__get32be(s) != 0x38425053) // "8BPS" + return stbi__errpuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (stbi__get16be(s) != 1) + return stbi__errpuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + stbi__skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) + return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = stbi__get32be(s); + w = stbi__get32be(s); + + if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + // Make sure the depth is 8 bits. + bitdepth = stbi__get16be(s); + if (bitdepth != 8 && bitdepth != 16) + return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (stbi__get16be(s) != 3) + return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + stbi__skip(s,stbi__get32be(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + stbi__skip(s, stbi__get32be(s) ); + + // Skip the reserved data. + stbi__skip(s, stbi__get32be(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = stbi__get16be(s); + if (compression > 1) + return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + + // Check size + if (!stbi__mad3sizes_valid(4, w, h, 0)) + return stbi__errpuc("too large", "Corrupt PSD"); + + // Create the destination image. + + if (!compression && bitdepth == 16 && bpc == 16) { + out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); + ri->bits_per_channel = 16; + } else + out = (stbi_uc *) stbi__malloc(4 * w*h); + + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceded by a 2-byte data count for each row in the data, + // which we're going to just skip. + stbi__skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + stbi_uc *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++, p += 4) + *p = (channel == 3 ? 255 : 0); + } else { + // Read the RLE data. + if (!stbi__psd_decode_rle(s, p, pixelCount)) { + STBI_FREE(out); + return stbi__errpuc("corrupt", "bad RLE data"); + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + if (channel >= channelCount) { + // Fill this channel with default data. + if (bitdepth == 16 && bpc == 16) { + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + stbi__uint16 val = channel == 3 ? 65535 : 0; + for (i = 0; i < pixelCount; i++, q += 4) + *q = val; + } else { + stbi_uc *p = out+channel; + stbi_uc val = channel == 3 ? 255 : 0; + for (i = 0; i < pixelCount; i++, p += 4) + *p = val; + } + } else { + if (ri->bits_per_channel == 16) { // output bpc + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + for (i = 0; i < pixelCount; i++, q += 4) + *q = (stbi__uint16) stbi__get16be(s); + } else { + stbi_uc *p = out+channel; + if (bitdepth == 16) { // input bpc + for (i = 0; i < pixelCount; i++, p += 4) + *p = (stbi_uc) (stbi__get16be(s) >> 8); + } else { + for (i = 0; i < pixelCount; i++, p += 4) + *p = stbi__get8(s); + } + } + } + } + } + + // remove weird white matte from PSD + if (channelCount >= 4) { + if (ri->bits_per_channel == 16) { + for (i=0; i < w*h; ++i) { + stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; + if (pixel[3] != 0 && pixel[3] != 65535) { + float a = pixel[3] / 65535.0f; + float ra = 1.0f / a; + float inv_a = 65535.0f * (1 - ra); + pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); + pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); + pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); + } + } + } else { + for (i=0; i < w*h; ++i) { + unsigned char *pixel = out + 4*i; + if (pixel[3] != 0 && pixel[3] != 255) { + float a = pixel[3] / 255.0f; + float ra = 1.0f / a; + float inv_a = 255.0f * (1 - ra); + pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); + pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); + pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + } + } + } + } + + // convert to desired output format + if (req_comp && req_comp != 4) { + if (ri->bits_per_channel == 16) + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); + else + out = stbi__convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + if (comp) *comp = 4; + *y = h; + *x = w; + + return out; +} +#endif + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +#ifndef STBI_NO_PIC +static int stbi__pic_is4(stbi__context *s,const char *str) +{ + int i; + for (i=0; i<4; ++i) + if (stbi__get8(s) != (stbi_uc)str[i]) + return 0; + + return 1; +} + +static int stbi__pic_test_core(stbi__context *s) +{ + int i; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) + return 0; + + for(i=0;i<84;++i) + stbi__get8(s); + + if (!stbi__pic_is4(s,"PICT")) + return 0; + + return 1; +} + +typedef struct +{ + stbi_uc size,type,channel; +} stbi__pic_packet; + +static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) +{ + int mask=0x80, i; + + for (i=0; i<4; ++i, mask>>=1) { + if (channel & mask) { + if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); + dest[i]=stbi__get8(s); + } + } + + return dest; +} + +static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ + int mask=0x80,i; + + for (i=0;i<4; ++i, mask>>=1) + if (channel&mask) + dest[i]=src[i]; +} + +static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) +{ + int act_comp=0,num_packets=0,y,chained; + stbi__pic_packet packets[10]; + + // this will (should...) cater for even some bizarre stuff like having data + // for the same channel in multiple packets. + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return stbi__errpuc("bad format","too many packets"); + + packet = &packets[num_packets++]; + + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + + act_comp |= packet->channel; + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + + for(y=0; y<height; ++y) { + int packet_idx; + + for(packet_idx=0; packet_idx < num_packets; ++packet_idx) { + stbi__pic_packet *packet = &packets[packet_idx]; + stbi_uc *dest = result+y*width*4; + + switch (packet->type) { + default: + return stbi__errpuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;x<width;++x, dest+=4) + if (!stbi__readval(s,packet->channel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=stbi__get8(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (stbi_uc) left; + + if (!stbi__readval(s,packet->channel,value)) return 0; + + for(i=0; i<count; ++i,dest+=4) + stbi__copyval(packet->channel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = stbi__get8(s), i; + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + + if (count==128) + count = stbi__get16be(s); + else + count -= 127; + if (count > left) + return stbi__errpuc("bad file","scanline overrun"); + + if (!stbi__readval(s,packet->channel,value)) + return 0; + + for(i=0;i<count;++i, dest += 4) + stbi__copyval(packet->channel,dest,value); + } else { // Raw + ++count; + if (count>left) return stbi__errpuc("bad file","scanline overrun"); + + for(i=0;i<count;++i, dest+=4) + if (!stbi__readval(s,packet->channel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } + } + + return result; +} + +static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) +{ + stbi_uc *result; + int i, x,y, internal_comp; + STBI_NOTUSED(ri); + + if (!comp) comp = &internal_comp; + + for (i=0; i<92; ++i) + stbi__get8(s); + + x = stbi__get16be(s); + y = stbi__get16be(s); + + if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); + if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); + + stbi__get32be(s); //skip `ratio' + stbi__get16be(s); //skip `fields' + stbi__get16be(s); //skip `pad' + + // intermediate buffer is RGBA + result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); + if (!result) return stbi__errpuc("outofmem", "Out of memory"); + memset(result, 0xff, x*y*4); + + if (!stbi__pic_load_core(s,x,y,comp, result)) { + STBI_FREE(result); + result=0; + } + *px = x; + *py = y; + if (req_comp == 0) req_comp = *comp; + result=stbi__convert_format(result,4,req_comp,x,y); + + return result; +} + +static int stbi__pic_test(stbi__context *s) +{ + int r = stbi__pic_test_core(s); + stbi__rewind(s); + return r; +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb + +#ifndef STBI_NO_GIF +typedef struct +{ + stbi__int16 prefix; + stbi_uc first; + stbi_uc suffix; +} stbi__gif_lzw; + +typedef struct +{ + int w,h; + stbi_uc *out; // output buffer (always 4 components) + stbi_uc *background; // The current "background" as far as a gif is concerned + stbi_uc *history; + int flags, bgindex, ratio, transparent, eflags; + stbi_uc pal[256][4]; + stbi_uc lpal[256][4]; + stbi__gif_lzw codes[8192]; + stbi_uc *color_table; + int parse, step; + int lflags; + int start_x, start_y; + int max_x, max_y; + int cur_x, cur_y; + int line_size; + int delay; +} stbi__gif; + +static int stbi__gif_test_raw(stbi__context *s) +{ + int sz; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; + sz = stbi__get8(s); + if (sz != '9' && sz != '7') return 0; + if (stbi__get8(s) != 'a') return 0; + return 1; +} + +static int stbi__gif_test(stbi__context *s) +{ + int r = stbi__gif_test_raw(s); + stbi__rewind(s); + return r; +} + +static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) +{ + int i; + for (i=0; i < num_entries; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + pal[i][3] = transp == i ? 0 : 255; + } +} + +static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) +{ + stbi_uc version; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') + return stbi__err("not GIF", "Corrupt GIF"); + + version = stbi__get8(s); + if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); + if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); + + stbi__g_failure_reason = ""; + g->w = stbi__get16le(s); + g->h = stbi__get16le(s); + g->flags = stbi__get8(s); + g->bgindex = stbi__get8(s); + g->ratio = stbi__get8(s); + g->transparent = -1; + + if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments + + if (is_info) return 1; + + if (g->flags & 0x80) + stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + + return 1; +} + +static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); + if (!g) return stbi__err("outofmem", "Out of memory"); + if (!stbi__gif_header(s, g, comp, 1)) { + STBI_FREE(g); + stbi__rewind( s ); + return 0; + } + if (x) *x = g->w; + if (y) *y = g->h; + STBI_FREE(g); + return 1; +} + +static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) +{ + stbi_uc *p, *c; + int idx; + + // recurse to decode the prefixes, since the linked-list is backwards, + // and working backwards through an interleaved image would be nasty + if (g->codes[code].prefix >= 0) + stbi__out_gif_code(g, g->codes[code].prefix); + + if (g->cur_y >= g->max_y) return; + + idx = g->cur_x + g->cur_y; + p = &g->out[idx]; + g->history[idx / 4] = 1; + + c = &g->color_table[g->codes[code].suffix * 4]; + if (c[3] > 128) { // don't render transparent pixels; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } + g->cur_x += 4; + + if (g->cur_x >= g->max_x) { + g->cur_x = g->start_x; + g->cur_y += g->step; + + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } + } +} + +static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) +{ + stbi_uc lzw_cs; + stbi__int32 len, init_code; + stbi__uint32 first; + stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; + stbi__gif_lzw *p; + + lzw_cs = stbi__get8(s); + if (lzw_cs > 12) return NULL; + clear = 1 << lzw_cs; + first = 1; + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + bits = 0; + valid_bits = 0; + for (init_code = 0; init_code < clear; init_code++) { + g->codes[init_code].prefix = -1; + g->codes[init_code].first = (stbi_uc) init_code; + g->codes[init_code].suffix = (stbi_uc) init_code; + } + + // support no starting clear code + avail = clear+2; + oldcode = -1; + + len = 0; + for(;;) { + if (valid_bits < codesize) { + if (len == 0) { + len = stbi__get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (stbi__int32) stbi__get8(s) << valid_bits; + valid_bits += 8; + } else { + stbi__int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + stbi__skip(s, len); + while ((len = stbi__get8(s)) > 0) + stbi__skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) { + return stbi__errpuc("no clear code", "Corrupt GIF"); + } + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 8192) { + return stbi__errpuc("too many codes", "Corrupt GIF"); + } + + p->prefix = (stbi__int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + + stbi__out_gif_code(g, (stbi__uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + } + } + } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +// two back is the image from two frames ago, used for a very specific disposal format +static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) +{ + int dispose; + int first_frame; + int pi; + int pcount; + STBI_NOTUSED(req_comp); + + // on first frame, any non-written pixels get the background colour (non-transparent) + first_frame = 0; + if (g->out == 0) { + if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header + if (!stbi__mad3sizes_valid(4, g->w, g->h, 0)) + return stbi__errpuc("too large", "GIF image is too large"); + pcount = g->w * g->h; + g->out = (stbi_uc *) stbi__malloc(4 * pcount); + g->background = (stbi_uc *) stbi__malloc(4 * pcount); + g->history = (stbi_uc *) stbi__malloc(pcount); + if (!g->out || !g->background || !g->history) + return stbi__errpuc("outofmem", "Out of memory"); + + // image is treated as "transparent" at the start - ie, nothing overwrites the current background; + // background colour is only used for pixels that are not rendered first frame, after that "background" + // color refers to the color that was there the previous frame. + memset(g->out, 0x00, 4 * pcount); + memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent) + memset(g->history, 0x00, pcount); // pixels that were affected previous frame + first_frame = 1; + } else { + // second frame - how do we dispose of the previous one? + dispose = (g->eflags & 0x1C) >> 2; + pcount = g->w * g->h; + + if ((dispose == 3) && (two_back == 0)) { + dispose = 2; // if I don't have an image to revert back to, default to the old background + } + + if (dispose == 3) { // use previous graphic + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); + } + } + } else if (dispose == 2) { + // restore what was changed last frame to background before that frame; + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); + } + } + } else { + // This is a non-disposal case eithe way, so just + // leave the pixels as is, and they will become the new background + // 1: do not dispose + // 0: not specified. + } + + // background is what out is after the undoing of the previou frame; + memcpy( g->background, g->out, 4 * g->w * g->h ); + } + + // clear my history; + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + + for (;;) { + int tag = stbi__get8(s); + switch (tag) { + case 0x2C: /* Image Descriptor */ + { + stbi__int32 x, y, w, h; + stbi_uc *o; + + x = stbi__get16le(s); + y = stbi__get16le(s); + w = stbi__get16le(s); + h = stbi__get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + // if the width of the specified rectangle is 0, that means + // we may not see *any* pixels or the image is malformed; + // to make sure this is caught, move the current y down to + // max_y (which is what out_gif_code checks). + if (w == 0) + g->cur_y = g->max_y; + + g->lflags = stbi__get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (stbi_uc *) g->lpal; + } else if (g->flags & 0x80) { + g->color_table = (stbi_uc *) g->pal; + } else + return stbi__errpuc("missing color table", "Corrupt GIF"); + + o = stbi__process_gif_raster(s, g); + if (!o) return NULL; + + // if this was the first frame, + pcount = g->w * g->h; + if (first_frame && (g->bgindex > 0)) { + // if first frame, any pixel not drawn to gets the background color + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi] == 0) { + g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; + memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); + } + } + } + + return o; + } + + case 0x21: // Comment Extension. + { + int len; + int ext = stbi__get8(s); + if (ext == 0xF9) { // Graphic Control Extension. + len = stbi__get8(s); + if (len == 4) { + g->eflags = stbi__get8(s); + g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + + // unset old transparent + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 255; + } + if (g->eflags & 0x01) { + g->transparent = stbi__get8(s); + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 0; + } + } else { + // don't need transparent + stbi__skip(s, 1); + g->transparent = -1; + } + } else { + stbi__skip(s, len); + break; + } + } + while ((len = stbi__get8(s)) != 0) { + stbi__skip(s, len); + } + break; + } + + case 0x3B: // gif stream termination code + return (stbi_uc *) s; // using '1' causes warning on some compilers + + default: + return stbi__errpuc("unknown code", "Corrupt GIF"); + } + } +} + +static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays) +{ + STBI_FREE(g->out); + STBI_FREE(g->history); + STBI_FREE(g->background); + + if (out) STBI_FREE(out); + if (delays && *delays) STBI_FREE(*delays); + return stbi__errpuc("outofmem", "Out of memory"); +} + +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + if (stbi__gif_test(s)) { + int layers = 0; + stbi_uc *u = 0; + stbi_uc *out = 0; + stbi_uc *two_back = 0; + stbi__gif g; + int stride; + int out_size = 0; + int delays_size = 0; + + STBI_NOTUSED(out_size); + STBI_NOTUSED(delays_size); + + memset(&g, 0, sizeof(g)); + if (delays) { + *delays = 0; + } + + do { + u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + + if (u) { + *x = g.w; + *y = g.h; + ++layers; + stride = g.w * g.h * 4; + + if (out) { + void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride ); + if (!tmp) + return stbi__load_gif_main_outofmem(&g, out, delays); + else { + out = (stbi_uc*) tmp; + out_size = layers * stride; + } + + if (delays) { + int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers ); + if (!new_delays) + return stbi__load_gif_main_outofmem(&g, out, delays); + *delays = new_delays; + delays_size = layers * sizeof(int); + } + } else { + out = (stbi_uc*)stbi__malloc( layers * stride ); + if (!out) + return stbi__load_gif_main_outofmem(&g, out, delays); + out_size = layers * stride; + if (delays) { + *delays = (int*) stbi__malloc( layers * sizeof(int) ); + if (!*delays) + return stbi__load_gif_main_outofmem(&g, out, delays); + delays_size = layers * sizeof(int); + } + } + memcpy( out + ((layers - 1) * stride), u, stride ); + if (layers >= 2) { + two_back = out - 2 * stride; + } + + if (delays) { + (*delays)[layers - 1U] = g.delay; + } + } + } while (u != 0); + + // free temp buffer; + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + + // do the final conversion after loading everything; + if (req_comp && req_comp != 4) + out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + + *z = layers; + return out; + } else { + return stbi__errpuc("not GIF", "Image was not as a gif type."); + } +} + +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *u = 0; + stbi__gif g; + memset(&g, 0, sizeof(g)); + STBI_NOTUSED(ri); + + u = stbi__gif_load_next(s, &g, comp, req_comp, 0); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + + // moved conversion to after successful load so that the same + // can be done for multiple frames. + if (req_comp && req_comp != 4) + u = stbi__convert_format(u, 4, req_comp, g.w, g.h); + } else if (g.out) { + // if there was an error and we allocated an image buffer, free it! + STBI_FREE(g.out); + } + + // free buffers needed for multiple frame loading; + STBI_FREE(g.history); + STBI_FREE(g.background); + + return u; +} + +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) +{ + return stbi__gif_info_raw(s,x,y,comp); +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int stbi__hdr_test_core(stbi__context *s, const char *signature) +{ + int i; + for (i=0; signature[i]; ++i) + if (stbi__get8(s) != signature[i]) + return 0; + stbi__rewind(s); + return 1; +} + +static int stbi__hdr_test(stbi__context* s) +{ + int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); + stbi__rewind(s); + if(!r) { + r = stbi__hdr_test_core(s, "#?RGBE\n"); + stbi__rewind(s); + } + return r; +} + +#define STBI__HDR_BUFLEN 1024 +static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) +{ + int len=0; + char c = '\0'; + + c = (char) stbi__get8(z); + + while (!stbi__at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == STBI__HDR_BUFLEN-1) { + // flush to end of line + while (!stbi__at_eof(z) && stbi__get8(z) != '\n') + ; + break; + } + c = (char) stbi__get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if ( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + const char *headerToken; + STBI_NOTUSED(ri); + + // Check identifier + headerToken = stbi__hdr_gettoken(s,buffer); + if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) + return stbi__errpf("not HDR", "Corrupt HDR image"); + + // Parse header + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = (int) strtol(token, NULL, 10); + + if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); + if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); + + *x = width; + *y = height; + + if (comp) *comp = 3; + if (req_comp == 0) req_comp = 3; + + if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) + return stbi__errpf("too large", "HDR image is too large"); + + // Read data + hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); + if (!hdr_data) + return stbi__errpf("outofmem", "Out of memory"); + + // Load image data + // image data is stored as some number of sca + if ( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + stbi__getn(s, rgbe, 4); + stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = stbi__get8(s); + c2 = stbi__get8(s); + len = stbi__get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4]; + rgbe[0] = (stbi_uc) c1; + rgbe[1] = (stbi_uc) c2; + rgbe[2] = (stbi_uc) len; + rgbe[3] = (stbi_uc) stbi__get8(s); + stbi__hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + STBI_FREE(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= stbi__get8(s); + if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) { + scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); + if (!scanline) { + STBI_FREE(hdr_data); + return stbi__errpf("outofmem", "Out of memory"); + } + } + + for (k = 0; k < 4; ++k) { + int nleft; + i = 0; + while ((nleft = width - i) > 0) { + count = stbi__get8(s); + if (count > 128) { + // Run + value = stbi__get8(s); + count -= 128; + if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = stbi__get8(s); + } + } + } + for (i=0; i < width; ++i) + stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + if (scanline) + STBI_FREE(scanline); + } + + return hdr_data; +} + +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int dummy; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (stbi__hdr_test(s) == 0) { + stbi__rewind( s ); + return 0; + } + + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) { + stbi__rewind( s ); + return 0; + } + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *y = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *x = (int) strtol(token, NULL, 10); + *comp = 3; + return 1; +} +#endif // STBI_NO_HDR + +#ifndef STBI_NO_BMP +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) +{ + void *p; + stbi__bmp_data info; + + info.all_a = 255; + p = stbi__bmp_parse_header(s, &info); + if (p == NULL) { + stbi__rewind( s ); + return 0; + } + if (x) *x = s->img_x; + if (y) *y = s->img_y; + if (comp) { + if (info.bpp == 24 && info.ma == 0xff000000) + *comp = 3; + else + *comp = info.ma ? 4 : 3; + } + return 1; +} +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) +{ + int channelCount, dummy, depth; + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + *y = stbi__get32be(s); + *x = stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 8 && depth != 16) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 3) { + stbi__rewind( s ); + return 0; + } + *comp = 4; + return 1; +} + +static int stbi__psd_is16(stbi__context *s) +{ + int channelCount, depth; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + STBI_NOTUSED(stbi__get32be(s)); + STBI_NOTUSED(stbi__get32be(s)); + depth = stbi__get16be(s); + if (depth != 16) { + stbi__rewind( s ); + return 0; + } + return 1; +} +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) +{ + int act_comp=0,num_packets=0,chained,dummy; + stbi__pic_packet packets[10]; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { + stbi__rewind(s); + return 0; + } + + stbi__skip(s, 88); + + *x = stbi__get16be(s); + *y = stbi__get16be(s); + if (stbi__at_eof(s)) { + stbi__rewind( s); + return 0; + } + if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { + stbi__rewind( s ); + return 0; + } + + stbi__skip(s, 8); + + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return 0; + + packet = &packets[num_packets++]; + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + act_comp |= packet->channel; + + if (stbi__at_eof(s)) { + stbi__rewind( s ); + return 0; + } + if (packet->size != 8) { + stbi__rewind( s ); + return 0; + } + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); + + return 1; +} +#endif + +// ************************************************************************************************* +// Portable Gray Map and Portable Pixel Map loader +// by Ken Miller +// +// PGM: http://netpbm.sourceforge.net/doc/pgm.html +// PPM: http://netpbm.sourceforge.net/doc/ppm.html +// +// Known limitations: +// Does not support comments in the header section +// Does not support ASCII image data (formats P2 and P3) + +#ifndef STBI_NO_PNM + +static int stbi__pnm_test(stbi__context *s) +{ + char p, t; + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind( s ); + return 0; + } + return 1; +} + +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + STBI_NOTUSED(ri); + + ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n); + if (ri->bits_per_channel == 0) + return 0; + + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + + if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0)) + return stbi__errpuc("too large", "PNM too large"); + + out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) { + STBI_FREE(out); + return stbi__errpuc("bad PNM", "PNM file truncated"); + } + + if (req_comp && req_comp != s->img_n) { + if (ri->bits_per_channel == 16) { + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y); + } else { + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + } + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + return out; +} + +static int stbi__pnm_isspace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; +} + +static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) +{ + for (;;) { + while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) + *c = (char) stbi__get8(s); + + if (stbi__at_eof(s) || *c != '#') + break; + + while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) + *c = (char) stbi__get8(s); + } +} + +static int stbi__pnm_isdigit(char c) +{ + return c >= '0' && c <= '9'; +} + +static int stbi__pnm_getinteger(stbi__context *s, char *c) +{ + int value = 0; + + while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { + value = value*10 + (*c - '0'); + *c = (char) stbi__get8(s); + if((value > 214748364) || (value == 214748364 && *c > '7')) + return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int"); + } + + return value; +} + +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) +{ + int maxv, dummy; + char c, p, t; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + stbi__rewind(s); + + // Get identifier + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind(s); + return 0; + } + + *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm + + c = (char) stbi__get8(s); + stbi__pnm_skip_whitespace(s, &c); + + *x = stbi__pnm_getinteger(s, &c); // read width + if(*x == 0) + return stbi__err("invalid width", "PPM image header had zero or overflowing width"); + stbi__pnm_skip_whitespace(s, &c); + + *y = stbi__pnm_getinteger(s, &c); // read height + if (*y == 0) + return stbi__err("invalid width", "PPM image header had zero or overflowing width"); + stbi__pnm_skip_whitespace(s, &c); + + maxv = stbi__pnm_getinteger(s, &c); // read max value + if (maxv > 65535) + return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images"); + else if (maxv > 255) + return 16; + else + return 8; +} + +static int stbi__pnm_is16(stbi__context *s) +{ + if (stbi__pnm_info(s, NULL, NULL, NULL) == 16) + return 1; + return 0; +} +#endif + +static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) +{ + #ifndef STBI_NO_JPEG + if (stbi__jpeg_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNG + if (stbi__png_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_GIF + if (stbi__gif_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_BMP + if (stbi__bmp_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PIC + if (stbi__pic_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_info(s, x, y, comp)) return 1; + #endif + + // test tga last because it's a crappy test! + #ifndef STBI_NO_TGA + if (stbi__tga_info(s, x, y, comp)) + return 1; + #endif + return stbi__err("unknown image type", "Image not of any known type, or corrupt"); +} + +static int stbi__is_16_main(stbi__context *s) +{ + #ifndef STBI_NO_PNG + if (stbi__png_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_is16(s)) return 1; + #endif + return 0; +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; +} + +STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__info_main(&s,x,y,comp); + fseek(f,pos,SEEK_SET); + return r; +} + +STBIDEF int stbi_is_16_bit(char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_is_16_bit_from_file(f); + fclose(f); + return result; +} + +STBIDEF int stbi_is_16_bit_from_file(FILE *f) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__is_16_main(&s); + fseek(f,pos,SEEK_SET); + return r; +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__is_16_main(&s); +} + +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__is_16_main(&s); +} + +#endif // STB_IMAGE_IMPLEMENTATION + +/* + revision history: + 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug + 1-bit BMP + *_is_16_bit api + avoid warnings + 2.16 (2017-07-23) all functions have 16-bit variants; + STBI_NO_STDIO works again; + compilation fixes; + fix rounding in unpremultiply; + optimize vertical flip; + disable raw_len validation; + documentation fixes + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc; + uniform handling of optional "return" values; + thread-safe initialization of zlib tables + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) allocate large structures on the stack + remove white matting for transparent PSD + fix reported channel count for PNG & BMP + re-enable SSE2 in non-gcc 64-bit + support RGB-formatted JPEG + read 16-bit PNGs (only as 8-bit) + 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED + 2.09 (2016-01-16) allow comments in PNM files + 16-bit-per-pixel TGA (not bit-per-component) + info() for TGA could break due to .hdr handling + info() for BMP to shares code instead of sloppy parse + can use STBI_REALLOC_SIZED if allocator doesn't support realloc + code cleanup + 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA + 2.07 (2015-09-13) fix compiler warnings + partial animated GIF support + limited 16-bpc PSD support + #ifdef unused functions + bug with < 92 byte PIC,PNM,HDR,TGA + 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value + 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning + 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit + 2.03 (2015-04-12) extra corruption checking (mmozeiko) + stbi_set_flip_vertically_on_load (nguillemot) + fix NEON support; fix mingw support + 2.02 (2015-01-19) fix incorrect assert, fix warning + 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 + 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG + 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) + progressive JPEG (stb) + PGM/PPM support (Ken Miller) + STBI_MALLOC,STBI_REALLOC,STBI_FREE + GIF bugfix -- seemingly never worked + STBI_NO_*, STBI_ONLY_* + 1.48 (2014-12-14) fix incorrectly-named assert() + 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) + optimize PNG (ryg) + fix bug in interlaced PNG with user-specified channel count (stb) + 1.46 (2014-08-26) + fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG + 1.45 (2014-08-16) + fix MSVC-ARM internal compiler error by wrapping malloc + 1.44 (2014-08-07) + various warning fixes from Ronny Chevalier + 1.43 (2014-07-15) + fix MSVC-only compiler problem in code changed in 1.42 + 1.42 (2014-07-09) + don't define _CRT_SECURE_NO_WARNINGS (affects user code) + fixes to stbi__cleanup_jpeg path + added STBI_ASSERT to avoid requiring assert.h + 1.41 (2014-06-25) + fix search&replace from 1.36 that messed up comments/error messages + 1.40 (2014-06-22) + fix gcc struct-initialization warning + 1.39 (2014-06-15) + fix to TGA optimization when req_comp != number of components in TGA; + fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) + add support for BMP version 5 (more ignored fields) + 1.38 (2014-06-06) + suppress MSVC warnings on integer casts truncating values + fix accidental rename of 'skip' field of I/O + 1.37 (2014-06-04) + remove duplicate typedef + 1.36 (2014-06-03) + convert to header file single-file library + if de-iphone isn't set, load iphone images color-swapped instead of returning NULL + 1.35 (2014-05-27) + various warnings + fix broken STBI_SIMD path + fix bug where stbi_load_from_file no longer left file pointer in correct place + fix broken non-easy path for 32-bit BMP (possibly never used) + TGA optimization by Arseny Kapoulkine + 1.34 (unknown) + use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case + 1.33 (2011-07-14) + make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements + 1.32 (2011-07-13) + support for "info" function for all supported filetypes (SpartanJ) + 1.31 (2011-06-20) + a few more leak fixes, bug in PNG handling (SpartanJ) + 1.30 (2011-06-11) + added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) + removed deprecated format-specific test/load functions + removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway + error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) + fix inefficiency in decoding 32-bit BMP (David Woo) + 1.29 (2010-08-16) + various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) + fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-stbi_uc to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) + 1.21 fix use of 'stbi_uc' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 (2008-08-02) + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - stbi__convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant + 0.50 (2006-11-19) + first released version +*/ + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/externals/stb/stb_image_resize.h b/externals/stb/stb_image_resize.h new file mode 100644 index 000000000..ef9e6fe87 --- /dev/null +++ b/externals/stb/stb_image_resize.h @@ -0,0 +1,2634 @@ +/* stb_image_resize - v0.97 - public domain image resizing + by Jorge L Rodriguez (@VinoBS) - 2014 + http://github.com/nothings/stb + + Written with emphasis on usability, portability, and efficiency. (No + SIMD or threads, so it be easily outperformed by libs that use those.) + Only scaling and translation is supported, no rotations or shears. + Easy API downsamples w/Mitchell filter, upsamples w/cubic interpolation. + + COMPILING & LINKING + In one C/C++ file that #includes this file, do this: + #define STB_IMAGE_RESIZE_IMPLEMENTATION + before the #include. That will create the implementation in that file. + + QUICKSTART + stbir_resize_uint8( input_pixels , in_w , in_h , 0, + output_pixels, out_w, out_h, 0, num_channels) + stbir_resize_float(...) + stbir_resize_uint8_srgb( input_pixels , in_w , in_h , 0, + output_pixels, out_w, out_h, 0, + num_channels , alpha_chan , 0) + stbir_resize_uint8_srgb_edgemode( + input_pixels , in_w , in_h , 0, + output_pixels, out_w, out_h, 0, + num_channels , alpha_chan , 0, STBIR_EDGE_CLAMP) + // WRAP/REFLECT/ZERO + + FULL API + See the "header file" section of the source for API documentation. + + ADDITIONAL DOCUMENTATION + + SRGB & FLOATING POINT REPRESENTATION + The sRGB functions presume IEEE floating point. If you do not have + IEEE floating point, define STBIR_NON_IEEE_FLOAT. This will use + a slower implementation. + + MEMORY ALLOCATION + The resize functions here perform a single memory allocation using + malloc. To control the memory allocation, before the #include that + triggers the implementation, do: + + #define STBIR_MALLOC(size,context) ... + #define STBIR_FREE(ptr,context) ... + + Each resize function makes exactly one call to malloc/free, so to use + temp memory, store the temp memory in the context and return that. + + ASSERT + Define STBIR_ASSERT(boolval) to override assert() and not use assert.h + + OPTIMIZATION + Define STBIR_SATURATE_INT to compute clamp values in-range using + integer operations instead of float operations. This may be faster + on some platforms. + + DEFAULT FILTERS + For functions which don't provide explicit control over what filters + to use, you can change the compile-time defaults with + + #define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_something + #define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_something + + See stbir_filter in the header-file section for the list of filters. + + NEW FILTERS + A number of 1D filter kernels are used. For a list of + supported filters see the stbir_filter enum. To add a new filter, + write a filter function and add it to stbir__filter_info_table. + + PROGRESS + For interactive use with slow resize operations, you can install + a progress-report callback: + + #define STBIR_PROGRESS_REPORT(val) some_func(val) + + The parameter val is a float which goes from 0 to 1 as progress is made. + + For example: + + static void my_progress_report(float progress); + #define STBIR_PROGRESS_REPORT(val) my_progress_report(val) + + #define STB_IMAGE_RESIZE_IMPLEMENTATION + #include "stb_image_resize.h" + + static void my_progress_report(float progress) + { + printf("Progress: %f%%\n", progress*100); + } + + MAX CHANNELS + If your image has more than 64 channels, define STBIR_MAX_CHANNELS + to the max you'll have. + + ALPHA CHANNEL + Most of the resizing functions provide the ability to control how + the alpha channel of an image is processed. The important things + to know about this: + + 1. The best mathematically-behaved version of alpha to use is + called "premultiplied alpha", in which the other color channels + have had the alpha value multiplied in. If you use premultiplied + alpha, linear filtering (such as image resampling done by this + library, or performed in texture units on GPUs) does the "right + thing". While premultiplied alpha is standard in the movie CGI + industry, it is still uncommon in the videogame/real-time world. + + If you linearly filter non-premultiplied alpha, strange effects + occur. (For example, the 50/50 average of 99% transparent bright green + and 1% transparent black produces 50% transparent dark green when + non-premultiplied, whereas premultiplied it produces 50% + transparent near-black. The former introduces green energy + that doesn't exist in the source image.) + + 2. Artists should not edit premultiplied-alpha images; artists + want non-premultiplied alpha images. Thus, art tools generally output + non-premultiplied alpha images. + + 3. You will get best results in most cases by converting images + to premultiplied alpha before processing them mathematically. + + 4. If you pass the flag STBIR_FLAG_ALPHA_PREMULTIPLIED, the + resizer does not do anything special for the alpha channel; + it is resampled identically to other channels. This produces + the correct results for premultiplied-alpha images, but produces + less-than-ideal results for non-premultiplied-alpha images. + + 5. If you do not pass the flag STBIR_FLAG_ALPHA_PREMULTIPLIED, + then the resizer weights the contribution of input pixels + based on their alpha values, or, equivalently, it multiplies + the alpha value into the color channels, resamples, then divides + by the resultant alpha value. Input pixels which have alpha=0 do + not contribute at all to output pixels unless _all_ of the input + pixels affecting that output pixel have alpha=0, in which case + the result for that pixel is the same as it would be without + STBIR_FLAG_ALPHA_PREMULTIPLIED. However, this is only true for + input images in integer formats. For input images in float format, + input pixels with alpha=0 have no effect, and output pixels + which have alpha=0 will be 0 in all channels. (For float images, + you can manually achieve the same result by adding a tiny epsilon + value to the alpha channel of every image, and then subtracting + or clamping it at the end.) + + 6. You can suppress the behavior described in #5 and make + all-0-alpha pixels have 0 in all channels by #defining + STBIR_NO_ALPHA_EPSILON. + + 7. You can separately control whether the alpha channel is + interpreted as linear or affected by the colorspace. By default + it is linear; you almost never want to apply the colorspace. + (For example, graphics hardware does not apply sRGB conversion + to the alpha channel.) + + CONTRIBUTORS + Jorge L Rodriguez: Implementation + Sean Barrett: API design, optimizations + Aras Pranckevicius: bugfix + Nathan Reed: warning fixes + + REVISIONS + 0.97 (2020-02-02) fixed warning + 0.96 (2019-03-04) fixed warnings + 0.95 (2017-07-23) fixed warnings + 0.94 (2017-03-18) fixed warnings + 0.93 (2017-03-03) fixed bug with certain combinations of heights + 0.92 (2017-01-02) fix integer overflow on large (>2GB) images + 0.91 (2016-04-02) fix warnings; fix handling of subpixel regions + 0.90 (2014-09-17) first released version + + LICENSE + See end of file for license information. + + TODO + Don't decode all of the image data when only processing a partial tile + Don't use full-width decode buffers when only processing a partial tile + When processing wide images, break processing into tiles so data fits in L1 cache + Installable filters? + Resize that respects alpha test coverage + (Reference code: FloatImage::alphaTestCoverage and FloatImage::scaleAlphaToCoverage: + https://code.google.com/p/nvidia-texture-tools/source/browse/trunk/src/nvimage/FloatImage.cpp ) +*/ + +#ifndef STBIR_INCLUDE_STB_IMAGE_RESIZE_H +#define STBIR_INCLUDE_STB_IMAGE_RESIZE_H + +#ifdef _MSC_VER +typedef unsigned char stbir_uint8; +typedef unsigned short stbir_uint16; +typedef unsigned int stbir_uint32; +#else +#include <stdint.h> +typedef uint8_t stbir_uint8; +typedef uint16_t stbir_uint16; +typedef uint32_t stbir_uint32; +#endif + +#ifndef STBIRDEF +#ifdef STB_IMAGE_RESIZE_STATIC +#define STBIRDEF static +#else +#ifdef __cplusplus +#define STBIRDEF extern "C" +#else +#define STBIRDEF extern +#endif +#endif +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// Easy-to-use API: +// +// * "input pixels" points to an array of image data with 'num_channels' channels (e.g. RGB=3, RGBA=4) +// * input_w is input image width (x-axis), input_h is input image height (y-axis) +// * stride is the offset between successive rows of image data in memory, in bytes. you can +// specify 0 to mean packed continuously in memory +// * alpha channel is treated identically to other channels. +// * colorspace is linear or sRGB as specified by function name +// * returned result is 1 for success or 0 in case of an error. +// #define STBIR_ASSERT() to trigger an assert on parameter validation errors. +// * Memory required grows approximately linearly with input and output size, but with +// discontinuities at input_w == output_w and input_h == output_h. +// * These functions use a "default" resampling filter defined at compile time. To change the filter, +// you can change the compile-time defaults by #defining STBIR_DEFAULT_FILTER_UPSAMPLE +// and STBIR_DEFAULT_FILTER_DOWNSAMPLE, or you can use the medium-complexity API. + +STBIRDEF int stbir_resize_uint8( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels); + +STBIRDEF int stbir_resize_float( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + float *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels); + + +// The following functions interpret image data as gamma-corrected sRGB. +// Specify STBIR_ALPHA_CHANNEL_NONE if you have no alpha channel, +// or otherwise provide the index of the alpha channel. Flags value +// of 0 will probably do the right thing if you're not sure what +// the flags mean. + +#define STBIR_ALPHA_CHANNEL_NONE -1 + +// Set this flag if your texture has premultiplied alpha. Otherwise, stbir will +// use alpha-weighted resampling (effectively premultiplying, resampling, +// then unpremultiplying). +#define STBIR_FLAG_ALPHA_PREMULTIPLIED (1 << 0) +// The specified alpha channel should be handled as gamma-corrected value even +// when doing sRGB operations. +#define STBIR_FLAG_ALPHA_USES_COLORSPACE (1 << 1) + +STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags); + + +typedef enum +{ + STBIR_EDGE_CLAMP = 1, + STBIR_EDGE_REFLECT = 2, + STBIR_EDGE_WRAP = 3, + STBIR_EDGE_ZERO = 4, +} stbir_edge; + +// This function adds the ability to specify how requests to sample off the edge of the image are handled. +STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode); + +////////////////////////////////////////////////////////////////////////////// +// +// Medium-complexity API +// +// This extends the easy-to-use API as follows: +// +// * Alpha-channel can be processed separately +// * If alpha_channel is not STBIR_ALPHA_CHANNEL_NONE +// * Alpha channel will not be gamma corrected (unless flags&STBIR_FLAG_GAMMA_CORRECT) +// * Filters will be weighted by alpha channel (unless flags&STBIR_FLAG_ALPHA_PREMULTIPLIED) +// * Filter can be selected explicitly +// * uint16 image type +// * sRGB colorspace available for all types +// * context parameter for passing to STBIR_MALLOC + +typedef enum +{ + STBIR_FILTER_DEFAULT = 0, // use same filter type that easy-to-use API chooses + STBIR_FILTER_BOX = 1, // A trapezoid w/1-pixel wide ramps, same result as box for integer scale ratios + STBIR_FILTER_TRIANGLE = 2, // On upsampling, produces same results as bilinear texture filtering + STBIR_FILTER_CUBICBSPLINE = 3, // The cubic b-spline (aka Mitchell-Netrevalli with B=1,C=0), gaussian-esque + STBIR_FILTER_CATMULLROM = 4, // An interpolating cubic spline + STBIR_FILTER_MITCHELL = 5, // Mitchell-Netrevalli filter with B=1/3, C=1/3 +} stbir_filter; + +typedef enum +{ + STBIR_COLORSPACE_LINEAR, + STBIR_COLORSPACE_SRGB, + + STBIR_MAX_COLORSPACES, +} stbir_colorspace; + +// The following functions are all identical except for the type of the image data + +STBIRDEF int stbir_resize_uint8_generic( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space, + void *alloc_context); + +STBIRDEF int stbir_resize_uint16_generic(const stbir_uint16 *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + stbir_uint16 *output_pixels , int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space, + void *alloc_context); + +STBIRDEF int stbir_resize_float_generic( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + float *output_pixels , int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space, + void *alloc_context); + + + +////////////////////////////////////////////////////////////////////////////// +// +// Full-complexity API +// +// This extends the medium API as follows: +// +// * uint32 image type +// * not typesafe +// * separate filter types for each axis +// * separate edge modes for each axis +// * can specify scale explicitly for subpixel correctness +// * can specify image source tile using texture coordinates + +typedef enum +{ + STBIR_TYPE_UINT8 , + STBIR_TYPE_UINT16, + STBIR_TYPE_UINT32, + STBIR_TYPE_FLOAT , + + STBIR_MAX_TYPES +} stbir_datatype; + +STBIRDEF int stbir_resize( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_datatype datatype, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical, + stbir_filter filter_horizontal, stbir_filter filter_vertical, + stbir_colorspace space, void *alloc_context); + +STBIRDEF int stbir_resize_subpixel(const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_datatype datatype, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical, + stbir_filter filter_horizontal, stbir_filter filter_vertical, + stbir_colorspace space, void *alloc_context, + float x_scale, float y_scale, + float x_offset, float y_offset); + +STBIRDEF int stbir_resize_region( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_datatype datatype, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical, + stbir_filter filter_horizontal, stbir_filter filter_vertical, + stbir_colorspace space, void *alloc_context, + float s0, float t0, float s1, float t1); +// (s0, t0) & (s1, t1) are the top-left and bottom right corner (uv addressing style: [0, 1]x[0, 1]) of a region of the input image to use. + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBIR_INCLUDE_STB_IMAGE_RESIZE_H + + + + + +#ifdef STB_IMAGE_RESIZE_IMPLEMENTATION + +#ifndef STBIR_ASSERT +#include <assert.h> +#define STBIR_ASSERT(x) assert(x) +#endif + +// For memset +#include <string.h> + +#include <math.h> + +#ifndef STBIR_MALLOC +#include <stdlib.h> +// use comma operator to evaluate c, to avoid "unused parameter" warnings +#define STBIR_MALLOC(size,c) ((void)(c), malloc(size)) +#define STBIR_FREE(ptr,c) ((void)(c), free(ptr)) +#endif + +#ifndef _MSC_VER +#ifdef __cplusplus +#define stbir__inline inline +#else +#define stbir__inline +#endif +#else +#define stbir__inline __forceinline +#endif + + +// should produce compiler error if size is wrong +typedef unsigned char stbir__validate_uint32[sizeof(stbir_uint32) == 4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBIR__NOTUSED(v) (void)(v) +#else +#define STBIR__NOTUSED(v) (void)sizeof(v) +#endif + +#define STBIR__ARRAY_SIZE(a) (sizeof((a))/sizeof((a)[0])) + +#ifndef STBIR_DEFAULT_FILTER_UPSAMPLE +#define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_CATMULLROM +#endif + +#ifndef STBIR_DEFAULT_FILTER_DOWNSAMPLE +#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_MITCHELL +#endif + +#ifndef STBIR_PROGRESS_REPORT +#define STBIR_PROGRESS_REPORT(float_0_to_1) +#endif + +#ifndef STBIR_MAX_CHANNELS +#define STBIR_MAX_CHANNELS 64 +#endif + +#if STBIR_MAX_CHANNELS > 65536 +#error "Too many channels; STBIR_MAX_CHANNELS must be no more than 65536." +// because we store the indices in 16-bit variables +#endif + +// This value is added to alpha just before premultiplication to avoid +// zeroing out color values. It is equivalent to 2^-80. If you don't want +// that behavior (it may interfere if you have floating point images with +// very small alpha values) then you can define STBIR_NO_ALPHA_EPSILON to +// disable it. +#ifndef STBIR_ALPHA_EPSILON +#define STBIR_ALPHA_EPSILON ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20)) +#endif + + + +#ifdef _MSC_VER +#define STBIR__UNUSED_PARAM(v) (void)(v) +#else +#define STBIR__UNUSED_PARAM(v) (void)sizeof(v) +#endif + +// must match stbir_datatype +static unsigned char stbir__type_size[] = { + 1, // STBIR_TYPE_UINT8 + 2, // STBIR_TYPE_UINT16 + 4, // STBIR_TYPE_UINT32 + 4, // STBIR_TYPE_FLOAT +}; + +// Kernel function centered at 0 +typedef float (stbir__kernel_fn)(float x, float scale); +typedef float (stbir__support_fn)(float scale); + +typedef struct +{ + stbir__kernel_fn* kernel; + stbir__support_fn* support; +} stbir__filter_info; + +// When upsampling, the contributors are which source pixels contribute. +// When downsampling, the contributors are which destination pixels are contributed to. +typedef struct +{ + int n0; // First contributing pixel + int n1; // Last contributing pixel +} stbir__contributors; + +typedef struct +{ + const void* input_data; + int input_w; + int input_h; + int input_stride_bytes; + + void* output_data; + int output_w; + int output_h; + int output_stride_bytes; + + float s0, t0, s1, t1; + + float horizontal_shift; // Units: output pixels + float vertical_shift; // Units: output pixels + float horizontal_scale; + float vertical_scale; + + int channels; + int alpha_channel; + stbir_uint32 flags; + stbir_datatype type; + stbir_filter horizontal_filter; + stbir_filter vertical_filter; + stbir_edge edge_horizontal; + stbir_edge edge_vertical; + stbir_colorspace colorspace; + + stbir__contributors* horizontal_contributors; + float* horizontal_coefficients; + + stbir__contributors* vertical_contributors; + float* vertical_coefficients; + + int decode_buffer_pixels; + float* decode_buffer; + + float* horizontal_buffer; + + // cache these because ceil/floor are inexplicably showing up in profile + int horizontal_coefficient_width; + int vertical_coefficient_width; + int horizontal_filter_pixel_width; + int vertical_filter_pixel_width; + int horizontal_filter_pixel_margin; + int vertical_filter_pixel_margin; + int horizontal_num_contributors; + int vertical_num_contributors; + + int ring_buffer_length_bytes; // The length of an individual entry in the ring buffer. The total number of ring buffers is stbir__get_filter_pixel_width(filter) + int ring_buffer_num_entries; // Total number of entries in the ring buffer. + int ring_buffer_first_scanline; + int ring_buffer_last_scanline; + int ring_buffer_begin_index; // first_scanline is at this index in the ring buffer + float* ring_buffer; + + float* encode_buffer; // A temporary buffer to store floats so we don't lose precision while we do multiply-adds. + + int horizontal_contributors_size; + int horizontal_coefficients_size; + int vertical_contributors_size; + int vertical_coefficients_size; + int decode_buffer_size; + int horizontal_buffer_size; + int ring_buffer_size; + int encode_buffer_size; +} stbir__info; + + +static const float stbir__max_uint8_as_float = 255.0f; +static const float stbir__max_uint16_as_float = 65535.0f; +static const double stbir__max_uint32_as_float = 4294967295.0; + + +static stbir__inline int stbir__min(int a, int b) +{ + return a < b ? a : b; +} + +static stbir__inline float stbir__saturate(float x) +{ + if (x < 0) + return 0; + + if (x > 1) + return 1; + + return x; +} + +#ifdef STBIR_SATURATE_INT +static stbir__inline stbir_uint8 stbir__saturate8(int x) +{ + if ((unsigned int) x <= 255) + return x; + + if (x < 0) + return 0; + + return 255; +} + +static stbir__inline stbir_uint16 stbir__saturate16(int x) +{ + if ((unsigned int) x <= 65535) + return x; + + if (x < 0) + return 0; + + return 65535; +} +#endif + +static float stbir__srgb_uchar_to_linear_float[256] = { + 0.000000f, 0.000304f, 0.000607f, 0.000911f, 0.001214f, 0.001518f, 0.001821f, 0.002125f, 0.002428f, 0.002732f, 0.003035f, + 0.003347f, 0.003677f, 0.004025f, 0.004391f, 0.004777f, 0.005182f, 0.005605f, 0.006049f, 0.006512f, 0.006995f, 0.007499f, + 0.008023f, 0.008568f, 0.009134f, 0.009721f, 0.010330f, 0.010960f, 0.011612f, 0.012286f, 0.012983f, 0.013702f, 0.014444f, + 0.015209f, 0.015996f, 0.016807f, 0.017642f, 0.018500f, 0.019382f, 0.020289f, 0.021219f, 0.022174f, 0.023153f, 0.024158f, + 0.025187f, 0.026241f, 0.027321f, 0.028426f, 0.029557f, 0.030713f, 0.031896f, 0.033105f, 0.034340f, 0.035601f, 0.036889f, + 0.038204f, 0.039546f, 0.040915f, 0.042311f, 0.043735f, 0.045186f, 0.046665f, 0.048172f, 0.049707f, 0.051269f, 0.052861f, + 0.054480f, 0.056128f, 0.057805f, 0.059511f, 0.061246f, 0.063010f, 0.064803f, 0.066626f, 0.068478f, 0.070360f, 0.072272f, + 0.074214f, 0.076185f, 0.078187f, 0.080220f, 0.082283f, 0.084376f, 0.086500f, 0.088656f, 0.090842f, 0.093059f, 0.095307f, + 0.097587f, 0.099899f, 0.102242f, 0.104616f, 0.107023f, 0.109462f, 0.111932f, 0.114435f, 0.116971f, 0.119538f, 0.122139f, + 0.124772f, 0.127438f, 0.130136f, 0.132868f, 0.135633f, 0.138432f, 0.141263f, 0.144128f, 0.147027f, 0.149960f, 0.152926f, + 0.155926f, 0.158961f, 0.162029f, 0.165132f, 0.168269f, 0.171441f, 0.174647f, 0.177888f, 0.181164f, 0.184475f, 0.187821f, + 0.191202f, 0.194618f, 0.198069f, 0.201556f, 0.205079f, 0.208637f, 0.212231f, 0.215861f, 0.219526f, 0.223228f, 0.226966f, + 0.230740f, 0.234551f, 0.238398f, 0.242281f, 0.246201f, 0.250158f, 0.254152f, 0.258183f, 0.262251f, 0.266356f, 0.270498f, + 0.274677f, 0.278894f, 0.283149f, 0.287441f, 0.291771f, 0.296138f, 0.300544f, 0.304987f, 0.309469f, 0.313989f, 0.318547f, + 0.323143f, 0.327778f, 0.332452f, 0.337164f, 0.341914f, 0.346704f, 0.351533f, 0.356400f, 0.361307f, 0.366253f, 0.371238f, + 0.376262f, 0.381326f, 0.386430f, 0.391573f, 0.396755f, 0.401978f, 0.407240f, 0.412543f, 0.417885f, 0.423268f, 0.428691f, + 0.434154f, 0.439657f, 0.445201f, 0.450786f, 0.456411f, 0.462077f, 0.467784f, 0.473532f, 0.479320f, 0.485150f, 0.491021f, + 0.496933f, 0.502887f, 0.508881f, 0.514918f, 0.520996f, 0.527115f, 0.533276f, 0.539480f, 0.545725f, 0.552011f, 0.558340f, + 0.564712f, 0.571125f, 0.577581f, 0.584078f, 0.590619f, 0.597202f, 0.603827f, 0.610496f, 0.617207f, 0.623960f, 0.630757f, + 0.637597f, 0.644480f, 0.651406f, 0.658375f, 0.665387f, 0.672443f, 0.679543f, 0.686685f, 0.693872f, 0.701102f, 0.708376f, + 0.715694f, 0.723055f, 0.730461f, 0.737911f, 0.745404f, 0.752942f, 0.760525f, 0.768151f, 0.775822f, 0.783538f, 0.791298f, + 0.799103f, 0.806952f, 0.814847f, 0.822786f, 0.830770f, 0.838799f, 0.846873f, 0.854993f, 0.863157f, 0.871367f, 0.879622f, + 0.887923f, 0.896269f, 0.904661f, 0.913099f, 0.921582f, 0.930111f, 0.938686f, 0.947307f, 0.955974f, 0.964686f, 0.973445f, + 0.982251f, 0.991102f, 1.0f +}; + +static float stbir__srgb_to_linear(float f) +{ + if (f <= 0.04045f) + return f / 12.92f; + else + return (float)pow((f + 0.055f) / 1.055f, 2.4f); +} + +static float stbir__linear_to_srgb(float f) +{ + if (f <= 0.0031308f) + return f * 12.92f; + else + return 1.055f * (float)pow(f, 1 / 2.4f) - 0.055f; +} + +#ifndef STBIR_NON_IEEE_FLOAT +// From https://gist.github.com/rygorous/2203834 + +typedef union +{ + stbir_uint32 u; + float f; +} stbir__FP32; + +static const stbir_uint32 fp32_to_srgb8_tab4[104] = { + 0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d, 0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d, + 0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a, 0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a, + 0x010e0033, 0x01280033, 0x01410033, 0x015b0033, 0x01750033, 0x018f0033, 0x01a80033, 0x01c20033, + 0x01dc0067, 0x020f0067, 0x02430067, 0x02760067, 0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067, + 0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce, 0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5, + 0x06970158, 0x07420142, 0x07e30130, 0x087b0120, 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2, + 0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, 0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143, + 0x11070264, 0x1238023e, 0x1357021d, 0x14660201, 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af, + 0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad, 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240, + 0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392, 0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300, + 0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5, 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401, + 0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d, 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559, + 0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, 0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723, +}; + +static stbir_uint8 stbir__linear_to_srgb_uchar(float in) +{ + static const stbir__FP32 almostone = { 0x3f7fffff }; // 1-eps + static const stbir__FP32 minval = { (127-13) << 23 }; + stbir_uint32 tab,bias,scale,t; + stbir__FP32 f; + + // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively. + // The tests are carefully written so that NaNs map to 0, same as in the reference + // implementation. + if (!(in > minval.f)) // written this way to catch NaNs + in = minval.f; + if (in > almostone.f) + in = almostone.f; + + // Do the table lookup and unpack bias, scale + f.f = in; + tab = fp32_to_srgb8_tab4[(f.u - minval.u) >> 20]; + bias = (tab >> 16) << 9; + scale = tab & 0xffff; + + // Grab next-highest mantissa bits and perform linear interpolation + t = (f.u >> 12) & 0xff; + return (unsigned char) ((bias + scale*t) >> 16); +} + +#else +// sRGB transition values, scaled by 1<<28 +static int stbir__srgb_offset_to_linear_scaled[256] = +{ + 0, 40738, 122216, 203693, 285170, 366648, 448125, 529603, + 611080, 692557, 774035, 855852, 942009, 1033024, 1128971, 1229926, + 1335959, 1447142, 1563542, 1685229, 1812268, 1944725, 2082664, 2226148, + 2375238, 2529996, 2690481, 2856753, 3028870, 3206888, 3390865, 3580856, + 3776916, 3979100, 4187460, 4402049, 4622919, 4850123, 5083710, 5323731, + 5570236, 5823273, 6082892, 6349140, 6622065, 6901714, 7188133, 7481369, + 7781466, 8088471, 8402427, 8723380, 9051372, 9386448, 9728650, 10078021, + 10434603, 10798439, 11169569, 11548036, 11933879, 12327139, 12727857, 13136073, + 13551826, 13975156, 14406100, 14844697, 15290987, 15745007, 16206795, 16676389, + 17153826, 17639142, 18132374, 18633560, 19142734, 19659934, 20185196, 20718552, + 21260042, 21809696, 22367554, 22933648, 23508010, 24090680, 24681686, 25281066, + 25888850, 26505076, 27129772, 27762974, 28404716, 29055026, 29713942, 30381490, + 31057708, 31742624, 32436272, 33138682, 33849884, 34569912, 35298800, 36036568, + 36783260, 37538896, 38303512, 39077136, 39859796, 40651528, 41452360, 42262316, + 43081432, 43909732, 44747252, 45594016, 46450052, 47315392, 48190064, 49074096, + 49967516, 50870356, 51782636, 52704392, 53635648, 54576432, 55526772, 56486700, + 57456236, 58435408, 59424248, 60422780, 61431036, 62449032, 63476804, 64514376, + 65561776, 66619028, 67686160, 68763192, 69850160, 70947088, 72053992, 73170912, + 74297864, 75434880, 76581976, 77739184, 78906536, 80084040, 81271736, 82469648, + 83677792, 84896192, 86124888, 87363888, 88613232, 89872928, 91143016, 92423512, + 93714432, 95015816, 96327688, 97650056, 98982952, 100326408, 101680440, 103045072, + 104420320, 105806224, 107202800, 108610064, 110028048, 111456776, 112896264, 114346544, + 115807632, 117279552, 118762328, 120255976, 121760536, 123276016, 124802440, 126339832, + 127888216, 129447616, 131018048, 132599544, 134192112, 135795792, 137410592, 139036528, + 140673648, 142321952, 143981456, 145652208, 147334208, 149027488, 150732064, 152447968, + 154175200, 155913792, 157663776, 159425168, 161197984, 162982240, 164777968, 166585184, + 168403904, 170234160, 172075968, 173929344, 175794320, 177670896, 179559120, 181458992, + 183370528, 185293776, 187228736, 189175424, 191133888, 193104112, 195086128, 197079968, + 199085648, 201103184, 203132592, 205173888, 207227120, 209292272, 211369392, 213458480, + 215559568, 217672656, 219797792, 221934976, 224084240, 226245600, 228419056, 230604656, + 232802400, 235012320, 237234432, 239468736, 241715280, 243974080, 246245120, 248528464, + 250824112, 253132064, 255452368, 257785040, 260130080, 262487520, 264857376, 267239664, +}; + +static stbir_uint8 stbir__linear_to_srgb_uchar(float f) +{ + int x = (int) (f * (1 << 28)); // has headroom so you don't need to clamp + int v = 0; + int i; + + // Refine the guess with a short binary search. + i = v + 128; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 64; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 32; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 16; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 8; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 4; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 2; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + i = v + 1; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i; + + return (stbir_uint8) v; +} +#endif + +static float stbir__filter_trapezoid(float x, float scale) +{ + float halfscale = scale / 2; + float t = 0.5f + halfscale; + STBIR_ASSERT(scale <= 1); + + x = (float)fabs(x); + + if (x >= t) + return 0; + else + { + float r = 0.5f - halfscale; + if (x <= r) + return 1; + else + return (t - x) / scale; + } +} + +static float stbir__support_trapezoid(float scale) +{ + STBIR_ASSERT(scale <= 1); + return 0.5f + scale / 2; +} + +static float stbir__filter_triangle(float x, float s) +{ + STBIR__UNUSED_PARAM(s); + + x = (float)fabs(x); + + if (x <= 1.0f) + return 1 - x; + else + return 0; +} + +static float stbir__filter_cubic(float x, float s) +{ + STBIR__UNUSED_PARAM(s); + + x = (float)fabs(x); + + if (x < 1.0f) + return (4 + x*x*(3*x - 6))/6; + else if (x < 2.0f) + return (8 + x*(-12 + x*(6 - x)))/6; + + return (0.0f); +} + +static float stbir__filter_catmullrom(float x, float s) +{ + STBIR__UNUSED_PARAM(s); + + x = (float)fabs(x); + + if (x < 1.0f) + return 1 - x*x*(2.5f - 1.5f*x); + else if (x < 2.0f) + return 2 - x*(4 + x*(0.5f*x - 2.5f)); + + return (0.0f); +} + +static float stbir__filter_mitchell(float x, float s) +{ + STBIR__UNUSED_PARAM(s); + + x = (float)fabs(x); + + if (x < 1.0f) + return (16 + x*x*(21 * x - 36))/18; + else if (x < 2.0f) + return (32 + x*(-60 + x*(36 - 7*x)))/18; + + return (0.0f); +} + +static float stbir__support_zero(float s) +{ + STBIR__UNUSED_PARAM(s); + return 0; +} + +static float stbir__support_one(float s) +{ + STBIR__UNUSED_PARAM(s); + return 1; +} + +static float stbir__support_two(float s) +{ + STBIR__UNUSED_PARAM(s); + return 2; +} + +static stbir__filter_info stbir__filter_info_table[] = { + { NULL, stbir__support_zero }, + { stbir__filter_trapezoid, stbir__support_trapezoid }, + { stbir__filter_triangle, stbir__support_one }, + { stbir__filter_cubic, stbir__support_two }, + { stbir__filter_catmullrom, stbir__support_two }, + { stbir__filter_mitchell, stbir__support_two }, +}; + +stbir__inline static int stbir__use_upsampling(float ratio) +{ + return ratio > 1; +} + +stbir__inline static int stbir__use_width_upsampling(stbir__info* stbir_info) +{ + return stbir__use_upsampling(stbir_info->horizontal_scale); +} + +stbir__inline static int stbir__use_height_upsampling(stbir__info* stbir_info) +{ + return stbir__use_upsampling(stbir_info->vertical_scale); +} + +// This is the maximum number of input samples that can affect an output sample +// with the given filter +static int stbir__get_filter_pixel_width(stbir_filter filter, float scale) +{ + STBIR_ASSERT(filter != 0); + STBIR_ASSERT(filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); + + if (stbir__use_upsampling(scale)) + return (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2); + else + return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2 / scale); +} + +// This is how much to expand buffers to account for filters seeking outside +// the image boundaries. +static int stbir__get_filter_pixel_margin(stbir_filter filter, float scale) +{ + return stbir__get_filter_pixel_width(filter, scale) / 2; +} + +static int stbir__get_coefficient_width(stbir_filter filter, float scale) +{ + if (stbir__use_upsampling(scale)) + return (int)ceil(stbir__filter_info_table[filter].support(1 / scale) * 2); + else + return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2); +} + +static int stbir__get_contributors(float scale, stbir_filter filter, int input_size, int output_size) +{ + if (stbir__use_upsampling(scale)) + return output_size; + else + return (input_size + stbir__get_filter_pixel_margin(filter, scale) * 2); +} + +static int stbir__get_total_horizontal_coefficients(stbir__info* info) +{ + return info->horizontal_num_contributors + * stbir__get_coefficient_width (info->horizontal_filter, info->horizontal_scale); +} + +static int stbir__get_total_vertical_coefficients(stbir__info* info) +{ + return info->vertical_num_contributors + * stbir__get_coefficient_width (info->vertical_filter, info->vertical_scale); +} + +static stbir__contributors* stbir__get_contributor(stbir__contributors* contributors, int n) +{ + return &contributors[n]; +} + +// For perf reasons this code is duplicated in stbir__resample_horizontal_upsample/downsample, +// if you change it here change it there too. +static float* stbir__get_coefficient(float* coefficients, stbir_filter filter, float scale, int n, int c) +{ + int width = stbir__get_coefficient_width(filter, scale); + return &coefficients[width*n + c]; +} + +static int stbir__edge_wrap_slow(stbir_edge edge, int n, int max) +{ + switch (edge) + { + case STBIR_EDGE_ZERO: + return 0; // we'll decode the wrong pixel here, and then overwrite with 0s later + + case STBIR_EDGE_CLAMP: + if (n < 0) + return 0; + + if (n >= max) + return max - 1; + + return n; // NOTREACHED + + case STBIR_EDGE_REFLECT: + { + if (n < 0) + { + if (n < max) + return -n; + else + return max - 1; + } + + if (n >= max) + { + int max2 = max * 2; + if (n >= max2) + return 0; + else + return max2 - n - 1; + } + + return n; // NOTREACHED + } + + case STBIR_EDGE_WRAP: + if (n >= 0) + return (n % max); + else + { + int m = (-n) % max; + + if (m != 0) + m = max - m; + + return (m); + } + // NOTREACHED + + default: + STBIR_ASSERT(!"Unimplemented edge type"); + return 0; + } +} + +stbir__inline static int stbir__edge_wrap(stbir_edge edge, int n, int max) +{ + // avoid per-pixel switch + if (n >= 0 && n < max) + return n; + return stbir__edge_wrap_slow(edge, n, max); +} + +// What input pixels contribute to this output pixel? +static void stbir__calculate_sample_range_upsample(int n, float out_filter_radius, float scale_ratio, float out_shift, int* in_first_pixel, int* in_last_pixel, float* in_center_of_out) +{ + float out_pixel_center = (float)n + 0.5f; + float out_pixel_influence_lowerbound = out_pixel_center - out_filter_radius; + float out_pixel_influence_upperbound = out_pixel_center + out_filter_radius; + + float in_pixel_influence_lowerbound = (out_pixel_influence_lowerbound + out_shift) / scale_ratio; + float in_pixel_influence_upperbound = (out_pixel_influence_upperbound + out_shift) / scale_ratio; + + *in_center_of_out = (out_pixel_center + out_shift) / scale_ratio; + *in_first_pixel = (int)(floor(in_pixel_influence_lowerbound + 0.5)); + *in_last_pixel = (int)(floor(in_pixel_influence_upperbound - 0.5)); +} + +// What output pixels does this input pixel contribute to? +static void stbir__calculate_sample_range_downsample(int n, float in_pixels_radius, float scale_ratio, float out_shift, int* out_first_pixel, int* out_last_pixel, float* out_center_of_in) +{ + float in_pixel_center = (float)n + 0.5f; + float in_pixel_influence_lowerbound = in_pixel_center - in_pixels_radius; + float in_pixel_influence_upperbound = in_pixel_center + in_pixels_radius; + + float out_pixel_influence_lowerbound = in_pixel_influence_lowerbound * scale_ratio - out_shift; + float out_pixel_influence_upperbound = in_pixel_influence_upperbound * scale_ratio - out_shift; + + *out_center_of_in = in_pixel_center * scale_ratio - out_shift; + *out_first_pixel = (int)(floor(out_pixel_influence_lowerbound + 0.5)); + *out_last_pixel = (int)(floor(out_pixel_influence_upperbound - 0.5)); +} + +static void stbir__calculate_coefficients_upsample(stbir_filter filter, float scale, int in_first_pixel, int in_last_pixel, float in_center_of_out, stbir__contributors* contributor, float* coefficient_group) +{ + int i; + float total_filter = 0; + float filter_scale; + + STBIR_ASSERT(in_last_pixel - in_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical. + + contributor->n0 = in_first_pixel; + contributor->n1 = in_last_pixel; + + STBIR_ASSERT(contributor->n1 >= contributor->n0); + + for (i = 0; i <= in_last_pixel - in_first_pixel; i++) + { + float in_pixel_center = (float)(i + in_first_pixel) + 0.5f; + coefficient_group[i] = stbir__filter_info_table[filter].kernel(in_center_of_out - in_pixel_center, 1 / scale); + + // If the coefficient is zero, skip it. (Don't do the <0 check here, we want the influence of those outside pixels.) + if (i == 0 && !coefficient_group[i]) + { + contributor->n0 = ++in_first_pixel; + i--; + continue; + } + + total_filter += coefficient_group[i]; + } + + // NOTE(fg): Not actually true in general, nor is there any reason to expect it should be. + // It would be true in exact math but is at best approximately true in floating-point math, + // and it would not make sense to try and put actual bounds on this here because it depends + // on the image aspect ratio which can get pretty extreme. + //STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(in_last_pixel + 1) + 0.5f - in_center_of_out, 1/scale) == 0); + + STBIR_ASSERT(total_filter > 0.9); + STBIR_ASSERT(total_filter < 1.1f); // Make sure it's not way off. + + // Make sure the sum of all coefficients is 1. + filter_scale = 1 / total_filter; + + for (i = 0; i <= in_last_pixel - in_first_pixel; i++) + coefficient_group[i] *= filter_scale; + + for (i = in_last_pixel - in_first_pixel; i >= 0; i--) + { + if (coefficient_group[i]) + break; + + // This line has no weight. We can skip it. + contributor->n1 = contributor->n0 + i - 1; + } +} + +static void stbir__calculate_coefficients_downsample(stbir_filter filter, float scale_ratio, int out_first_pixel, int out_last_pixel, float out_center_of_in, stbir__contributors* contributor, float* coefficient_group) +{ + int i; + + STBIR_ASSERT(out_last_pixel - out_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(scale_ratio) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical. + + contributor->n0 = out_first_pixel; + contributor->n1 = out_last_pixel; + + STBIR_ASSERT(contributor->n1 >= contributor->n0); + + for (i = 0; i <= out_last_pixel - out_first_pixel; i++) + { + float out_pixel_center = (float)(i + out_first_pixel) + 0.5f; + float x = out_pixel_center - out_center_of_in; + coefficient_group[i] = stbir__filter_info_table[filter].kernel(x, scale_ratio) * scale_ratio; + } + + // NOTE(fg): Not actually true in general, nor is there any reason to expect it should be. + // It would be true in exact math but is at best approximately true in floating-point math, + // and it would not make sense to try and put actual bounds on this here because it depends + // on the image aspect ratio which can get pretty extreme. + //STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(out_last_pixel + 1) + 0.5f - out_center_of_in, scale_ratio) == 0); + + for (i = out_last_pixel - out_first_pixel; i >= 0; i--) + { + if (coefficient_group[i]) + break; + + // This line has no weight. We can skip it. + contributor->n1 = contributor->n0 + i - 1; + } +} + +static void stbir__normalize_downsample_coefficients(stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, int input_size, int output_size) +{ + int num_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size); + int num_coefficients = stbir__get_coefficient_width(filter, scale_ratio); + int i, j; + int skip; + + for (i = 0; i < output_size; i++) + { + float scale; + float total = 0; + + for (j = 0; j < num_contributors; j++) + { + if (i >= contributors[j].n0 && i <= contributors[j].n1) + { + float coefficient = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0); + total += coefficient; + } + else if (i < contributors[j].n0) + break; + } + + STBIR_ASSERT(total > 0.9f); + STBIR_ASSERT(total < 1.1f); + + scale = 1 / total; + + for (j = 0; j < num_contributors; j++) + { + if (i >= contributors[j].n0 && i <= contributors[j].n1) + *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0) *= scale; + else if (i < contributors[j].n0) + break; + } + } + + // Optimize: Skip zero coefficients and contributions outside of image bounds. + // Do this after normalizing because normalization depends on the n0/n1 values. + for (j = 0; j < num_contributors; j++) + { + int range, max, width; + + skip = 0; + while (*stbir__get_coefficient(coefficients, filter, scale_ratio, j, skip) == 0) + skip++; + + contributors[j].n0 += skip; + + while (contributors[j].n0 < 0) + { + contributors[j].n0++; + skip++; + } + + range = contributors[j].n1 - contributors[j].n0 + 1; + max = stbir__min(num_coefficients, range); + + width = stbir__get_coefficient_width(filter, scale_ratio); + for (i = 0; i < max; i++) + { + if (i + skip >= width) + break; + + *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i) = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i + skip); + } + + continue; + } + + // Using min to avoid writing into invalid pixels. + for (i = 0; i < num_contributors; i++) + contributors[i].n1 = stbir__min(contributors[i].n1, output_size - 1); +} + +// Each scan line uses the same kernel values so we should calculate the kernel +// values once and then we can use them for every scan line. +static void stbir__calculate_filters(stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, float shift, int input_size, int output_size) +{ + int n; + int total_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size); + + if (stbir__use_upsampling(scale_ratio)) + { + float out_pixels_radius = stbir__filter_info_table[filter].support(1 / scale_ratio) * scale_ratio; + + // Looping through out pixels + for (n = 0; n < total_contributors; n++) + { + float in_center_of_out; // Center of the current out pixel in the in pixel space + int in_first_pixel, in_last_pixel; + + stbir__calculate_sample_range_upsample(n, out_pixels_radius, scale_ratio, shift, &in_first_pixel, &in_last_pixel, &in_center_of_out); + + stbir__calculate_coefficients_upsample(filter, scale_ratio, in_first_pixel, in_last_pixel, in_center_of_out, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0)); + } + } + else + { + float in_pixels_radius = stbir__filter_info_table[filter].support(scale_ratio) / scale_ratio; + + // Looping through in pixels + for (n = 0; n < total_contributors; n++) + { + float out_center_of_in; // Center of the current out pixel in the in pixel space + int out_first_pixel, out_last_pixel; + int n_adjusted = n - stbir__get_filter_pixel_margin(filter, scale_ratio); + + stbir__calculate_sample_range_downsample(n_adjusted, in_pixels_radius, scale_ratio, shift, &out_first_pixel, &out_last_pixel, &out_center_of_in); + + stbir__calculate_coefficients_downsample(filter, scale_ratio, out_first_pixel, out_last_pixel, out_center_of_in, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0)); + } + + stbir__normalize_downsample_coefficients(contributors, coefficients, filter, scale_ratio, input_size, output_size); + } +} + +static float* stbir__get_decode_buffer(stbir__info* stbir_info) +{ + // The 0 index of the decode buffer starts after the margin. This makes + // it okay to use negative indexes on the decode buffer. + return &stbir_info->decode_buffer[stbir_info->horizontal_filter_pixel_margin * stbir_info->channels]; +} + +#define STBIR__DECODE(type, colorspace) ((int)(type) * (STBIR_MAX_COLORSPACES) + (int)(colorspace)) + +static void stbir__decode_scanline(stbir__info* stbir_info, int n) +{ + int c; + int channels = stbir_info->channels; + int alpha_channel = stbir_info->alpha_channel; + int type = stbir_info->type; + int colorspace = stbir_info->colorspace; + int input_w = stbir_info->input_w; + size_t input_stride_bytes = stbir_info->input_stride_bytes; + float* decode_buffer = stbir__get_decode_buffer(stbir_info); + stbir_edge edge_horizontal = stbir_info->edge_horizontal; + stbir_edge edge_vertical = stbir_info->edge_vertical; + size_t in_buffer_row_offset = stbir__edge_wrap(edge_vertical, n, stbir_info->input_h) * input_stride_bytes; + const void* input_data = (char *) stbir_info->input_data + in_buffer_row_offset; + int max_x = input_w + stbir_info->horizontal_filter_pixel_margin; + int decode = STBIR__DECODE(type, colorspace); + + int x = -stbir_info->horizontal_filter_pixel_margin; + + // special handling for STBIR_EDGE_ZERO because it needs to return an item that doesn't appear in the input, + // and we want to avoid paying overhead on every pixel if not STBIR_EDGE_ZERO + if (edge_vertical == STBIR_EDGE_ZERO && (n < 0 || n >= stbir_info->input_h)) + { + for (; x < max_x; x++) + for (c = 0; c < channels; c++) + decode_buffer[x*channels + c] = 0; + return; + } + + switch (decode) + { + case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = ((float)((const unsigned char*)input_data)[input_pixel_index + c]) / stbir__max_uint8_as_float; + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = stbir__srgb_uchar_to_linear_float[((const unsigned char*)input_data)[input_pixel_index + c]]; + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned char*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint8_as_float; + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = ((float)((const unsigned short*)input_data)[input_pixel_index + c]) / stbir__max_uint16_as_float; + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((float)((const unsigned short*)input_data)[input_pixel_index + c]) / stbir__max_uint16_as_float); + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned short*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint16_as_float; + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / stbir__max_uint32_as_float); + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear((float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / stbir__max_uint32_as_float)); + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + decode_buffer[decode_pixel_index + alpha_channel] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint32_as_float); + } + break; + + case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = ((const float*)input_data)[input_pixel_index + c]; + } + break; + + case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB): + for (; x < max_x; x++) + { + int decode_pixel_index = x * channels; + int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels; + for (c = 0; c < channels; c++) + decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((const float*)input_data)[input_pixel_index + c]); + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + decode_buffer[decode_pixel_index + alpha_channel] = ((const float*)input_data)[input_pixel_index + alpha_channel]; + } + + break; + + default: + STBIR_ASSERT(!"Unknown type/colorspace/channels combination."); + break; + } + + if (!(stbir_info->flags & STBIR_FLAG_ALPHA_PREMULTIPLIED)) + { + for (x = -stbir_info->horizontal_filter_pixel_margin; x < max_x; x++) + { + int decode_pixel_index = x * channels; + + // If the alpha value is 0 it will clobber the color values. Make sure it's not. + float alpha = decode_buffer[decode_pixel_index + alpha_channel]; +#ifndef STBIR_NO_ALPHA_EPSILON + if (stbir_info->type != STBIR_TYPE_FLOAT) { + alpha += STBIR_ALPHA_EPSILON; + decode_buffer[decode_pixel_index + alpha_channel] = alpha; + } +#endif + for (c = 0; c < channels; c++) + { + if (c == alpha_channel) + continue; + + decode_buffer[decode_pixel_index + c] *= alpha; + } + } + } + + if (edge_horizontal == STBIR_EDGE_ZERO) + { + for (x = -stbir_info->horizontal_filter_pixel_margin; x < 0; x++) + { + for (c = 0; c < channels; c++) + decode_buffer[x*channels + c] = 0; + } + for (x = input_w; x < max_x; x++) + { + for (c = 0; c < channels; c++) + decode_buffer[x*channels + c] = 0; + } + } +} + +static float* stbir__get_ring_buffer_entry(float* ring_buffer, int index, int ring_buffer_length) +{ + return &ring_buffer[index * ring_buffer_length]; +} + +static float* stbir__add_empty_ring_buffer_entry(stbir__info* stbir_info, int n) +{ + int ring_buffer_index; + float* ring_buffer; + + stbir_info->ring_buffer_last_scanline = n; + + if (stbir_info->ring_buffer_begin_index < 0) + { + ring_buffer_index = stbir_info->ring_buffer_begin_index = 0; + stbir_info->ring_buffer_first_scanline = n; + } + else + { + ring_buffer_index = (stbir_info->ring_buffer_begin_index + (stbir_info->ring_buffer_last_scanline - stbir_info->ring_buffer_first_scanline)) % stbir_info->ring_buffer_num_entries; + STBIR_ASSERT(ring_buffer_index != stbir_info->ring_buffer_begin_index); + } + + ring_buffer = stbir__get_ring_buffer_entry(stbir_info->ring_buffer, ring_buffer_index, stbir_info->ring_buffer_length_bytes / sizeof(float)); + memset(ring_buffer, 0, stbir_info->ring_buffer_length_bytes); + + return ring_buffer; +} + + +static void stbir__resample_horizontal_upsample(stbir__info* stbir_info, float* output_buffer) +{ + int x, k; + int output_w = stbir_info->output_w; + int channels = stbir_info->channels; + float* decode_buffer = stbir__get_decode_buffer(stbir_info); + stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors; + float* horizontal_coefficients = stbir_info->horizontal_coefficients; + int coefficient_width = stbir_info->horizontal_coefficient_width; + + for (x = 0; x < output_w; x++) + { + int n0 = horizontal_contributors[x].n0; + int n1 = horizontal_contributors[x].n1; + + int out_pixel_index = x * channels; + int coefficient_group = coefficient_width * x; + int coefficient_counter = 0; + + STBIR_ASSERT(n1 >= n0); + STBIR_ASSERT(n0 >= -stbir_info->horizontal_filter_pixel_margin); + STBIR_ASSERT(n1 >= -stbir_info->horizontal_filter_pixel_margin); + STBIR_ASSERT(n0 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin); + STBIR_ASSERT(n1 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin); + + switch (channels) { + case 1: + for (k = n0; k <= n1; k++) + { + int in_pixel_index = k * 1; + float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++]; + STBIR_ASSERT(coefficient != 0); + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + } + break; + case 2: + for (k = n0; k <= n1; k++) + { + int in_pixel_index = k * 2; + float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++]; + STBIR_ASSERT(coefficient != 0); + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient; + } + break; + case 3: + for (k = n0; k <= n1; k++) + { + int in_pixel_index = k * 3; + float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++]; + STBIR_ASSERT(coefficient != 0); + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient; + output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient; + } + break; + case 4: + for (k = n0; k <= n1; k++) + { + int in_pixel_index = k * 4; + float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++]; + STBIR_ASSERT(coefficient != 0); + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient; + output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient; + output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient; + } + break; + default: + for (k = n0; k <= n1; k++) + { + int in_pixel_index = k * channels; + float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++]; + int c; + STBIR_ASSERT(coefficient != 0); + for (c = 0; c < channels; c++) + output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient; + } + break; + } + } +} + +static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, float* output_buffer) +{ + int x, k; + int input_w = stbir_info->input_w; + int channels = stbir_info->channels; + float* decode_buffer = stbir__get_decode_buffer(stbir_info); + stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors; + float* horizontal_coefficients = stbir_info->horizontal_coefficients; + int coefficient_width = stbir_info->horizontal_coefficient_width; + int filter_pixel_margin = stbir_info->horizontal_filter_pixel_margin; + int max_x = input_w + filter_pixel_margin * 2; + + STBIR_ASSERT(!stbir__use_width_upsampling(stbir_info)); + + switch (channels) { + case 1: + for (x = 0; x < max_x; x++) + { + int n0 = horizontal_contributors[x].n0; + int n1 = horizontal_contributors[x].n1; + + int in_x = x - filter_pixel_margin; + int in_pixel_index = in_x * 1; + int max_n = n1; + int coefficient_group = coefficient_width * x; + + for (k = n0; k <= max_n; k++) + { + int out_pixel_index = k * 1; + float coefficient = horizontal_coefficients[coefficient_group + k - n0]; + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + } + } + break; + + case 2: + for (x = 0; x < max_x; x++) + { + int n0 = horizontal_contributors[x].n0; + int n1 = horizontal_contributors[x].n1; + + int in_x = x - filter_pixel_margin; + int in_pixel_index = in_x * 2; + int max_n = n1; + int coefficient_group = coefficient_width * x; + + for (k = n0; k <= max_n; k++) + { + int out_pixel_index = k * 2; + float coefficient = horizontal_coefficients[coefficient_group + k - n0]; + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient; + } + } + break; + + case 3: + for (x = 0; x < max_x; x++) + { + int n0 = horizontal_contributors[x].n0; + int n1 = horizontal_contributors[x].n1; + + int in_x = x - filter_pixel_margin; + int in_pixel_index = in_x * 3; + int max_n = n1; + int coefficient_group = coefficient_width * x; + + for (k = n0; k <= max_n; k++) + { + int out_pixel_index = k * 3; + float coefficient = horizontal_coefficients[coefficient_group + k - n0]; + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient; + output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient; + } + } + break; + + case 4: + for (x = 0; x < max_x; x++) + { + int n0 = horizontal_contributors[x].n0; + int n1 = horizontal_contributors[x].n1; + + int in_x = x - filter_pixel_margin; + int in_pixel_index = in_x * 4; + int max_n = n1; + int coefficient_group = coefficient_width * x; + + for (k = n0; k <= max_n; k++) + { + int out_pixel_index = k * 4; + float coefficient = horizontal_coefficients[coefficient_group + k - n0]; + output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient; + output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient; + output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient; + output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient; + } + } + break; + + default: + for (x = 0; x < max_x; x++) + { + int n0 = horizontal_contributors[x].n0; + int n1 = horizontal_contributors[x].n1; + + int in_x = x - filter_pixel_margin; + int in_pixel_index = in_x * channels; + int max_n = n1; + int coefficient_group = coefficient_width * x; + + for (k = n0; k <= max_n; k++) + { + int c; + int out_pixel_index = k * channels; + float coefficient = horizontal_coefficients[coefficient_group + k - n0]; + for (c = 0; c < channels; c++) + output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient; + } + } + break; + } +} + +static void stbir__decode_and_resample_upsample(stbir__info* stbir_info, int n) +{ + // Decode the nth scanline from the source image into the decode buffer. + stbir__decode_scanline(stbir_info, n); + + // Now resample it into the ring buffer. + if (stbir__use_width_upsampling(stbir_info)) + stbir__resample_horizontal_upsample(stbir_info, stbir__add_empty_ring_buffer_entry(stbir_info, n)); + else + stbir__resample_horizontal_downsample(stbir_info, stbir__add_empty_ring_buffer_entry(stbir_info, n)); + + // Now it's sitting in the ring buffer ready to be used as source for the vertical sampling. +} + +static void stbir__decode_and_resample_downsample(stbir__info* stbir_info, int n) +{ + // Decode the nth scanline from the source image into the decode buffer. + stbir__decode_scanline(stbir_info, n); + + memset(stbir_info->horizontal_buffer, 0, stbir_info->output_w * stbir_info->channels * sizeof(float)); + + // Now resample it into the horizontal buffer. + if (stbir__use_width_upsampling(stbir_info)) + stbir__resample_horizontal_upsample(stbir_info, stbir_info->horizontal_buffer); + else + stbir__resample_horizontal_downsample(stbir_info, stbir_info->horizontal_buffer); + + // Now it's sitting in the horizontal buffer ready to be distributed into the ring buffers. +} + +// Get the specified scan line from the ring buffer. +static float* stbir__get_ring_buffer_scanline(int get_scanline, float* ring_buffer, int begin_index, int first_scanline, int ring_buffer_num_entries, int ring_buffer_length) +{ + int ring_buffer_index = (begin_index + (get_scanline - first_scanline)) % ring_buffer_num_entries; + return stbir__get_ring_buffer_entry(ring_buffer, ring_buffer_index, ring_buffer_length); +} + + +static void stbir__encode_scanline(stbir__info* stbir_info, int num_pixels, void *output_buffer, float *encode_buffer, int channels, int alpha_channel, int decode) +{ + int x; + int n; + int num_nonalpha; + stbir_uint16 nonalpha[STBIR_MAX_CHANNELS]; + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_PREMULTIPLIED)) + { + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + float alpha = encode_buffer[pixel_index + alpha_channel]; + float reciprocal_alpha = alpha ? 1.0f / alpha : 0; + + // unrolling this produced a 1% slowdown upscaling a large RGBA linear-space image on my machine - stb + for (n = 0; n < channels; n++) + if (n != alpha_channel) + encode_buffer[pixel_index + n] *= reciprocal_alpha; + + // We added in a small epsilon to prevent the color channel from being deleted with zero alpha. + // Because we only add it for integer types, it will automatically be discarded on integer + // conversion, so we don't need to subtract it back out (which would be problematic for + // numeric precision reasons). + } + } + + // build a table of all channels that need colorspace correction, so + // we don't perform colorspace correction on channels that don't need it. + for (x = 0, num_nonalpha = 0; x < channels; ++x) + { + if (x != alpha_channel || (stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE)) + { + nonalpha[num_nonalpha++] = (stbir_uint16)x; + } + } + + #define STBIR__ROUND_INT(f) ((int) ((f)+0.5)) + #define STBIR__ROUND_UINT(f) ((stbir_uint32) ((f)+0.5)) + + #ifdef STBIR__SATURATE_INT + #define STBIR__ENCODE_LINEAR8(f) stbir__saturate8 (STBIR__ROUND_INT((f) * stbir__max_uint8_as_float )) + #define STBIR__ENCODE_LINEAR16(f) stbir__saturate16(STBIR__ROUND_INT((f) * stbir__max_uint16_as_float)) + #else + #define STBIR__ENCODE_LINEAR8(f) (unsigned char ) STBIR__ROUND_INT(stbir__saturate(f) * stbir__max_uint8_as_float ) + #define STBIR__ENCODE_LINEAR16(f) (unsigned short) STBIR__ROUND_INT(stbir__saturate(f) * stbir__max_uint16_as_float) + #endif + + switch (decode) + { + case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < channels; n++) + { + int index = pixel_index + n; + ((unsigned char*)output_buffer)[index] = STBIR__ENCODE_LINEAR8(encode_buffer[index]); + } + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < num_nonalpha; n++) + { + int index = pixel_index + nonalpha[n]; + ((unsigned char*)output_buffer)[index] = stbir__linear_to_srgb_uchar(encode_buffer[index]); + } + + if (!(stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE)) + ((unsigned char *)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR8(encode_buffer[pixel_index+alpha_channel]); + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < channels; n++) + { + int index = pixel_index + n; + ((unsigned short*)output_buffer)[index] = STBIR__ENCODE_LINEAR16(encode_buffer[index]); + } + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < num_nonalpha; n++) + { + int index = pixel_index + nonalpha[n]; + ((unsigned short*)output_buffer)[index] = (unsigned short)STBIR__ROUND_INT(stbir__linear_to_srgb(stbir__saturate(encode_buffer[index])) * stbir__max_uint16_as_float); + } + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + ((unsigned short*)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR16(encode_buffer[pixel_index + alpha_channel]); + } + + break; + + case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < channels; n++) + { + int index = pixel_index + n; + ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__saturate(encode_buffer[index])) * stbir__max_uint32_as_float); + } + } + break; + + case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < num_nonalpha; n++) + { + int index = pixel_index + nonalpha[n]; + ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__linear_to_srgb(stbir__saturate(encode_buffer[index]))) * stbir__max_uint32_as_float); + } + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + ((unsigned int*)output_buffer)[pixel_index + alpha_channel] = (unsigned int)STBIR__ROUND_INT(((double)stbir__saturate(encode_buffer[pixel_index + alpha_channel])) * stbir__max_uint32_as_float); + } + break; + + case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < channels; n++) + { + int index = pixel_index + n; + ((float*)output_buffer)[index] = encode_buffer[index]; + } + } + break; + + case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB): + for (x=0; x < num_pixels; ++x) + { + int pixel_index = x*channels; + + for (n = 0; n < num_nonalpha; n++) + { + int index = pixel_index + nonalpha[n]; + ((float*)output_buffer)[index] = stbir__linear_to_srgb(encode_buffer[index]); + } + + if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE)) + ((float*)output_buffer)[pixel_index + alpha_channel] = encode_buffer[pixel_index + alpha_channel]; + } + break; + + default: + STBIR_ASSERT(!"Unknown type/colorspace/channels combination."); + break; + } +} + +static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n) +{ + int x, k; + int output_w = stbir_info->output_w; + stbir__contributors* vertical_contributors = stbir_info->vertical_contributors; + float* vertical_coefficients = stbir_info->vertical_coefficients; + int channels = stbir_info->channels; + int alpha_channel = stbir_info->alpha_channel; + int type = stbir_info->type; + int colorspace = stbir_info->colorspace; + int ring_buffer_entries = stbir_info->ring_buffer_num_entries; + void* output_data = stbir_info->output_data; + float* encode_buffer = stbir_info->encode_buffer; + int decode = STBIR__DECODE(type, colorspace); + int coefficient_width = stbir_info->vertical_coefficient_width; + int coefficient_counter; + int contributor = n; + + float* ring_buffer = stbir_info->ring_buffer; + int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index; + int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline; + int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float); + + int n0,n1, output_row_start; + int coefficient_group = coefficient_width * contributor; + + n0 = vertical_contributors[contributor].n0; + n1 = vertical_contributors[contributor].n1; + + output_row_start = n * stbir_info->output_stride_bytes; + + STBIR_ASSERT(stbir__use_height_upsampling(stbir_info)); + + memset(encode_buffer, 0, output_w * sizeof(float) * channels); + + // I tried reblocking this for better cache usage of encode_buffer + // (using x_outer, k, x_inner), but it lost speed. -- stb + + coefficient_counter = 0; + switch (channels) { + case 1: + for (k = n0; k <= n1; k++) + { + int coefficient_index = coefficient_counter++; + float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length); + float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; + for (x = 0; x < output_w; ++x) + { + int in_pixel_index = x * 1; + encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient; + } + } + break; + case 2: + for (k = n0; k <= n1; k++) + { + int coefficient_index = coefficient_counter++; + float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length); + float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; + for (x = 0; x < output_w; ++x) + { + int in_pixel_index = x * 2; + encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient; + encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient; + } + } + break; + case 3: + for (k = n0; k <= n1; k++) + { + int coefficient_index = coefficient_counter++; + float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length); + float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; + for (x = 0; x < output_w; ++x) + { + int in_pixel_index = x * 3; + encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient; + encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient; + encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient; + } + } + break; + case 4: + for (k = n0; k <= n1; k++) + { + int coefficient_index = coefficient_counter++; + float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length); + float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; + for (x = 0; x < output_w; ++x) + { + int in_pixel_index = x * 4; + encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient; + encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient; + encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient; + encode_buffer[in_pixel_index + 3] += ring_buffer_entry[in_pixel_index + 3] * coefficient; + } + } + break; + default: + for (k = n0; k <= n1; k++) + { + int coefficient_index = coefficient_counter++; + float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length); + float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; + for (x = 0; x < output_w; ++x) + { + int in_pixel_index = x * channels; + int c; + for (c = 0; c < channels; c++) + encode_buffer[in_pixel_index + c] += ring_buffer_entry[in_pixel_index + c] * coefficient; + } + } + break; + } + stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, encode_buffer, channels, alpha_channel, decode); +} + +static void stbir__resample_vertical_downsample(stbir__info* stbir_info, int n) +{ + int x, k; + int output_w = stbir_info->output_w; + stbir__contributors* vertical_contributors = stbir_info->vertical_contributors; + float* vertical_coefficients = stbir_info->vertical_coefficients; + int channels = stbir_info->channels; + int ring_buffer_entries = stbir_info->ring_buffer_num_entries; + float* horizontal_buffer = stbir_info->horizontal_buffer; + int coefficient_width = stbir_info->vertical_coefficient_width; + int contributor = n + stbir_info->vertical_filter_pixel_margin; + + float* ring_buffer = stbir_info->ring_buffer; + int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index; + int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline; + int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float); + int n0,n1; + + n0 = vertical_contributors[contributor].n0; + n1 = vertical_contributors[contributor].n1; + + STBIR_ASSERT(!stbir__use_height_upsampling(stbir_info)); + + for (k = n0; k <= n1; k++) + { + int coefficient_index = k - n0; + int coefficient_group = coefficient_width * contributor; + float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; + + float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length); + + switch (channels) { + case 1: + for (x = 0; x < output_w; x++) + { + int in_pixel_index = x * 1; + ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient; + } + break; + case 2: + for (x = 0; x < output_w; x++) + { + int in_pixel_index = x * 2; + ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient; + ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient; + } + break; + case 3: + for (x = 0; x < output_w; x++) + { + int in_pixel_index = x * 3; + ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient; + ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient; + ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient; + } + break; + case 4: + for (x = 0; x < output_w; x++) + { + int in_pixel_index = x * 4; + ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient; + ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient; + ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient; + ring_buffer_entry[in_pixel_index + 3] += horizontal_buffer[in_pixel_index + 3] * coefficient; + } + break; + default: + for (x = 0; x < output_w; x++) + { + int in_pixel_index = x * channels; + + int c; + for (c = 0; c < channels; c++) + ring_buffer_entry[in_pixel_index + c] += horizontal_buffer[in_pixel_index + c] * coefficient; + } + break; + } + } +} + +static void stbir__buffer_loop_upsample(stbir__info* stbir_info) +{ + int y; + float scale_ratio = stbir_info->vertical_scale; + float out_scanlines_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(1/scale_ratio) * scale_ratio; + + STBIR_ASSERT(stbir__use_height_upsampling(stbir_info)); + + for (y = 0; y < stbir_info->output_h; y++) + { + float in_center_of_out = 0; // Center of the current out scanline in the in scanline space + int in_first_scanline = 0, in_last_scanline = 0; + + stbir__calculate_sample_range_upsample(y, out_scanlines_radius, scale_ratio, stbir_info->vertical_shift, &in_first_scanline, &in_last_scanline, &in_center_of_out); + + STBIR_ASSERT(in_last_scanline - in_first_scanline + 1 <= stbir_info->ring_buffer_num_entries); + + if (stbir_info->ring_buffer_begin_index >= 0) + { + // Get rid of whatever we don't need anymore. + while (in_first_scanline > stbir_info->ring_buffer_first_scanline) + { + if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline) + { + // We just popped the last scanline off the ring buffer. + // Reset it to the empty state. + stbir_info->ring_buffer_begin_index = -1; + stbir_info->ring_buffer_first_scanline = 0; + stbir_info->ring_buffer_last_scanline = 0; + break; + } + else + { + stbir_info->ring_buffer_first_scanline++; + stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->ring_buffer_num_entries; + } + } + } + + // Load in new ones. + if (stbir_info->ring_buffer_begin_index < 0) + stbir__decode_and_resample_upsample(stbir_info, in_first_scanline); + + while (in_last_scanline > stbir_info->ring_buffer_last_scanline) + stbir__decode_and_resample_upsample(stbir_info, stbir_info->ring_buffer_last_scanline + 1); + + // Now all buffers should be ready to write a row of vertical sampling. + stbir__resample_vertical_upsample(stbir_info, y); + + STBIR_PROGRESS_REPORT((float)y / stbir_info->output_h); + } +} + +static void stbir__empty_ring_buffer(stbir__info* stbir_info, int first_necessary_scanline) +{ + int output_stride_bytes = stbir_info->output_stride_bytes; + int channels = stbir_info->channels; + int alpha_channel = stbir_info->alpha_channel; + int type = stbir_info->type; + int colorspace = stbir_info->colorspace; + int output_w = stbir_info->output_w; + void* output_data = stbir_info->output_data; + int decode = STBIR__DECODE(type, colorspace); + + float* ring_buffer = stbir_info->ring_buffer; + int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float); + + if (stbir_info->ring_buffer_begin_index >= 0) + { + // Get rid of whatever we don't need anymore. + while (first_necessary_scanline > stbir_info->ring_buffer_first_scanline) + { + if (stbir_info->ring_buffer_first_scanline >= 0 && stbir_info->ring_buffer_first_scanline < stbir_info->output_h) + { + int output_row_start = stbir_info->ring_buffer_first_scanline * output_stride_bytes; + float* ring_buffer_entry = stbir__get_ring_buffer_entry(ring_buffer, stbir_info->ring_buffer_begin_index, ring_buffer_length); + stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, ring_buffer_entry, channels, alpha_channel, decode); + STBIR_PROGRESS_REPORT((float)stbir_info->ring_buffer_first_scanline / stbir_info->output_h); + } + + if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline) + { + // We just popped the last scanline off the ring buffer. + // Reset it to the empty state. + stbir_info->ring_buffer_begin_index = -1; + stbir_info->ring_buffer_first_scanline = 0; + stbir_info->ring_buffer_last_scanline = 0; + break; + } + else + { + stbir_info->ring_buffer_first_scanline++; + stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->ring_buffer_num_entries; + } + } + } +} + +static void stbir__buffer_loop_downsample(stbir__info* stbir_info) +{ + int y; + float scale_ratio = stbir_info->vertical_scale; + int output_h = stbir_info->output_h; + float in_pixels_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(scale_ratio) / scale_ratio; + int pixel_margin = stbir_info->vertical_filter_pixel_margin; + int max_y = stbir_info->input_h + pixel_margin; + + STBIR_ASSERT(!stbir__use_height_upsampling(stbir_info)); + + for (y = -pixel_margin; y < max_y; y++) + { + float out_center_of_in; // Center of the current out scanline in the in scanline space + int out_first_scanline, out_last_scanline; + + stbir__calculate_sample_range_downsample(y, in_pixels_radius, scale_ratio, stbir_info->vertical_shift, &out_first_scanline, &out_last_scanline, &out_center_of_in); + + STBIR_ASSERT(out_last_scanline - out_first_scanline + 1 <= stbir_info->ring_buffer_num_entries); + + if (out_last_scanline < 0 || out_first_scanline >= output_h) + continue; + + stbir__empty_ring_buffer(stbir_info, out_first_scanline); + + stbir__decode_and_resample_downsample(stbir_info, y); + + // Load in new ones. + if (stbir_info->ring_buffer_begin_index < 0) + stbir__add_empty_ring_buffer_entry(stbir_info, out_first_scanline); + + while (out_last_scanline > stbir_info->ring_buffer_last_scanline) + stbir__add_empty_ring_buffer_entry(stbir_info, stbir_info->ring_buffer_last_scanline + 1); + + // Now the horizontal buffer is ready to write to all ring buffer rows. + stbir__resample_vertical_downsample(stbir_info, y); + } + + stbir__empty_ring_buffer(stbir_info, stbir_info->output_h); +} + +static void stbir__setup(stbir__info *info, int input_w, int input_h, int output_w, int output_h, int channels) +{ + info->input_w = input_w; + info->input_h = input_h; + info->output_w = output_w; + info->output_h = output_h; + info->channels = channels; +} + +static void stbir__calculate_transform(stbir__info *info, float s0, float t0, float s1, float t1, float *transform) +{ + info->s0 = s0; + info->t0 = t0; + info->s1 = s1; + info->t1 = t1; + + if (transform) + { + info->horizontal_scale = transform[0]; + info->vertical_scale = transform[1]; + info->horizontal_shift = transform[2]; + info->vertical_shift = transform[3]; + } + else + { + info->horizontal_scale = ((float)info->output_w / info->input_w) / (s1 - s0); + info->vertical_scale = ((float)info->output_h / info->input_h) / (t1 - t0); + + info->horizontal_shift = s0 * info->output_w / (s1 - s0); + info->vertical_shift = t0 * info->output_h / (t1 - t0); + } +} + +static void stbir__choose_filter(stbir__info *info, stbir_filter h_filter, stbir_filter v_filter) +{ + if (h_filter == 0) + h_filter = stbir__use_upsampling(info->horizontal_scale) ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE; + if (v_filter == 0) + v_filter = stbir__use_upsampling(info->vertical_scale) ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE; + info->horizontal_filter = h_filter; + info->vertical_filter = v_filter; +} + +static stbir_uint32 stbir__calculate_memory(stbir__info *info) +{ + int pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale); + int filter_height = stbir__get_filter_pixel_width(info->vertical_filter, info->vertical_scale); + + info->horizontal_num_contributors = stbir__get_contributors(info->horizontal_scale, info->horizontal_filter, info->input_w, info->output_w); + info->vertical_num_contributors = stbir__get_contributors(info->vertical_scale , info->vertical_filter , info->input_h, info->output_h); + + // One extra entry because floating point precision problems sometimes cause an extra to be necessary. + info->ring_buffer_num_entries = filter_height + 1; + + info->horizontal_contributors_size = info->horizontal_num_contributors * sizeof(stbir__contributors); + info->horizontal_coefficients_size = stbir__get_total_horizontal_coefficients(info) * sizeof(float); + info->vertical_contributors_size = info->vertical_num_contributors * sizeof(stbir__contributors); + info->vertical_coefficients_size = stbir__get_total_vertical_coefficients(info) * sizeof(float); + info->decode_buffer_size = (info->input_w + pixel_margin * 2) * info->channels * sizeof(float); + info->horizontal_buffer_size = info->output_w * info->channels * sizeof(float); + info->ring_buffer_size = info->output_w * info->channels * info->ring_buffer_num_entries * sizeof(float); + info->encode_buffer_size = info->output_w * info->channels * sizeof(float); + + STBIR_ASSERT(info->horizontal_filter != 0); + STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late + STBIR_ASSERT(info->vertical_filter != 0); + STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late + + if (stbir__use_height_upsampling(info)) + // The horizontal buffer is for when we're downsampling the height and we + // can't output the result of sampling the decode buffer directly into the + // ring buffers. + info->horizontal_buffer_size = 0; + else + // The encode buffer is to retain precision in the height upsampling method + // and isn't used when height downsampling. + info->encode_buffer_size = 0; + + return info->horizontal_contributors_size + info->horizontal_coefficients_size + + info->vertical_contributors_size + info->vertical_coefficients_size + + info->decode_buffer_size + info->horizontal_buffer_size + + info->ring_buffer_size + info->encode_buffer_size; +} + +static int stbir__resize_allocated(stbir__info *info, + const void* input_data, int input_stride_in_bytes, + void* output_data, int output_stride_in_bytes, + int alpha_channel, stbir_uint32 flags, stbir_datatype type, + stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace, + void* tempmem, size_t tempmem_size_in_bytes) +{ + size_t memory_required = stbir__calculate_memory(info); + + int width_stride_input = input_stride_in_bytes ? input_stride_in_bytes : info->channels * info->input_w * stbir__type_size[type]; + int width_stride_output = output_stride_in_bytes ? output_stride_in_bytes : info->channels * info->output_w * stbir__type_size[type]; + +#ifdef STBIR_DEBUG_OVERWRITE_TEST +#define OVERWRITE_ARRAY_SIZE 8 + unsigned char overwrite_output_before_pre[OVERWRITE_ARRAY_SIZE]; + unsigned char overwrite_tempmem_before_pre[OVERWRITE_ARRAY_SIZE]; + unsigned char overwrite_output_after_pre[OVERWRITE_ARRAY_SIZE]; + unsigned char overwrite_tempmem_after_pre[OVERWRITE_ARRAY_SIZE]; + + size_t begin_forbidden = width_stride_output * (info->output_h - 1) + info->output_w * info->channels * stbir__type_size[type]; + memcpy(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE); + memcpy(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE); + memcpy(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE); + memcpy(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE); +#endif + + STBIR_ASSERT(info->channels >= 0); + STBIR_ASSERT(info->channels <= STBIR_MAX_CHANNELS); + + if (info->channels < 0 || info->channels > STBIR_MAX_CHANNELS) + return 0; + + STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); + STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); + + if (info->horizontal_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table)) + return 0; + if (info->vertical_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table)) + return 0; + + if (alpha_channel < 0) + flags |= STBIR_FLAG_ALPHA_USES_COLORSPACE | STBIR_FLAG_ALPHA_PREMULTIPLIED; + + if (!(flags&STBIR_FLAG_ALPHA_USES_COLORSPACE) || !(flags&STBIR_FLAG_ALPHA_PREMULTIPLIED)) { + STBIR_ASSERT(alpha_channel >= 0 && alpha_channel < info->channels); + } + + if (alpha_channel >= info->channels) + return 0; + + STBIR_ASSERT(tempmem); + + if (!tempmem) + return 0; + + STBIR_ASSERT(tempmem_size_in_bytes >= memory_required); + + if (tempmem_size_in_bytes < memory_required) + return 0; + + memset(tempmem, 0, tempmem_size_in_bytes); + + info->input_data = input_data; + info->input_stride_bytes = width_stride_input; + + info->output_data = output_data; + info->output_stride_bytes = width_stride_output; + + info->alpha_channel = alpha_channel; + info->flags = flags; + info->type = type; + info->edge_horizontal = edge_horizontal; + info->edge_vertical = edge_vertical; + info->colorspace = colorspace; + + info->horizontal_coefficient_width = stbir__get_coefficient_width (info->horizontal_filter, info->horizontal_scale); + info->vertical_coefficient_width = stbir__get_coefficient_width (info->vertical_filter , info->vertical_scale ); + info->horizontal_filter_pixel_width = stbir__get_filter_pixel_width (info->horizontal_filter, info->horizontal_scale); + info->vertical_filter_pixel_width = stbir__get_filter_pixel_width (info->vertical_filter , info->vertical_scale ); + info->horizontal_filter_pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale); + info->vertical_filter_pixel_margin = stbir__get_filter_pixel_margin(info->vertical_filter , info->vertical_scale ); + + info->ring_buffer_length_bytes = info->output_w * info->channels * sizeof(float); + info->decode_buffer_pixels = info->input_w + info->horizontal_filter_pixel_margin * 2; + +#define STBIR__NEXT_MEMPTR(current, newtype) (newtype*)(((unsigned char*)current) + current##_size) + + info->horizontal_contributors = (stbir__contributors *) tempmem; + info->horizontal_coefficients = STBIR__NEXT_MEMPTR(info->horizontal_contributors, float); + info->vertical_contributors = STBIR__NEXT_MEMPTR(info->horizontal_coefficients, stbir__contributors); + info->vertical_coefficients = STBIR__NEXT_MEMPTR(info->vertical_contributors, float); + info->decode_buffer = STBIR__NEXT_MEMPTR(info->vertical_coefficients, float); + + if (stbir__use_height_upsampling(info)) + { + info->horizontal_buffer = NULL; + info->ring_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float); + info->encode_buffer = STBIR__NEXT_MEMPTR(info->ring_buffer, float); + + STBIR_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->encode_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes); + } + else + { + info->horizontal_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float); + info->ring_buffer = STBIR__NEXT_MEMPTR(info->horizontal_buffer, float); + info->encode_buffer = NULL; + + STBIR_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->ring_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes); + } + +#undef STBIR__NEXT_MEMPTR + + // This signals that the ring buffer is empty + info->ring_buffer_begin_index = -1; + + stbir__calculate_filters(info->horizontal_contributors, info->horizontal_coefficients, info->horizontal_filter, info->horizontal_scale, info->horizontal_shift, info->input_w, info->output_w); + stbir__calculate_filters(info->vertical_contributors, info->vertical_coefficients, info->vertical_filter, info->vertical_scale, info->vertical_shift, info->input_h, info->output_h); + + STBIR_PROGRESS_REPORT(0); + + if (stbir__use_height_upsampling(info)) + stbir__buffer_loop_upsample(info); + else + stbir__buffer_loop_downsample(info); + + STBIR_PROGRESS_REPORT(1); + +#ifdef STBIR_DEBUG_OVERWRITE_TEST + STBIR_ASSERT(memcmp(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0); + STBIR_ASSERT(memcmp(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE) == 0); + STBIR_ASSERT(memcmp(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0); + STBIR_ASSERT(memcmp(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE) == 0); +#endif + + return 1; +} + + +static int stbir__resize_arbitrary( + void *alloc_context, + const void* input_data, int input_w, int input_h, int input_stride_in_bytes, + void* output_data, int output_w, int output_h, int output_stride_in_bytes, + float s0, float t0, float s1, float t1, float *transform, + int channels, int alpha_channel, stbir_uint32 flags, stbir_datatype type, + stbir_filter h_filter, stbir_filter v_filter, + stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace) +{ + stbir__info info; + int result; + size_t memory_required; + void* extra_memory; + + stbir__setup(&info, input_w, input_h, output_w, output_h, channels); + stbir__calculate_transform(&info, s0,t0,s1,t1,transform); + stbir__choose_filter(&info, h_filter, v_filter); + memory_required = stbir__calculate_memory(&info); + extra_memory = STBIR_MALLOC(memory_required, alloc_context); + + if (!extra_memory) + return 0; + + result = stbir__resize_allocated(&info, input_data, input_stride_in_bytes, + output_data, output_stride_in_bytes, + alpha_channel, flags, type, + edge_horizontal, edge_vertical, + colorspace, extra_memory, memory_required); + + STBIR_FREE(extra_memory, alloc_context); + + return result; +} + +STBIRDEF int stbir_resize_uint8( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels) +{ + return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT, + STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR); +} + +STBIRDEF int stbir_resize_float( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + float *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels) +{ + return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_FLOAT, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT, + STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR); +} + +STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags) +{ + return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT, + STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_SRGB); +} + +STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode) +{ + return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT, + edge_wrap_mode, edge_wrap_mode, STBIR_COLORSPACE_SRGB); +} + +STBIRDEF int stbir_resize_uint8_generic( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space, + void *alloc_context) +{ + return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, filter, filter, + edge_wrap_mode, edge_wrap_mode, space); +} + +STBIRDEF int stbir_resize_uint16_generic(const stbir_uint16 *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + stbir_uint16 *output_pixels , int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space, + void *alloc_context) +{ + return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT16, filter, filter, + edge_wrap_mode, edge_wrap_mode, space); +} + + +STBIRDEF int stbir_resize_float_generic( const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + float *output_pixels , int output_w, int output_h, int output_stride_in_bytes, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space, + void *alloc_context) +{ + return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_FLOAT, filter, filter, + edge_wrap_mode, edge_wrap_mode, space); +} + + +STBIRDEF int stbir_resize( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_datatype datatype, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical, + stbir_filter filter_horizontal, stbir_filter filter_vertical, + stbir_colorspace space, void *alloc_context) +{ + return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical, + edge_mode_horizontal, edge_mode_vertical, space); +} + + +STBIRDEF int stbir_resize_subpixel(const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_datatype datatype, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical, + stbir_filter filter_horizontal, stbir_filter filter_vertical, + stbir_colorspace space, void *alloc_context, + float x_scale, float y_scale, + float x_offset, float y_offset) +{ + float transform[4]; + transform[0] = x_scale; + transform[1] = y_scale; + transform[2] = x_offset; + transform[3] = y_offset; + return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + 0,0,1,1,transform,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical, + edge_mode_horizontal, edge_mode_vertical, space); +} + +STBIRDEF int stbir_resize_region( const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_datatype datatype, + int num_channels, int alpha_channel, int flags, + stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical, + stbir_filter filter_horizontal, stbir_filter filter_vertical, + stbir_colorspace space, void *alloc_context, + float s0, float t0, float s1, float t1) +{ + return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + s0,t0,s1,t1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical, + edge_mode_horizontal, edge_mode_vertical, space); +} + +#endif // STB_IMAGE_RESIZE_IMPLEMENTATION + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/externals/stb/stb_image_write.h b/externals/stb/stb_image_write.h new file mode 100644 index 000000000..e4b32ed1b --- /dev/null +++ b/externals/stb/stb_image_write.h @@ -0,0 +1,1724 @@ +/* stb_image_write - v1.16 - public domain - http://nothings.org/stb + writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 + no warranty implied; use at your own risk + + Before #including, + + #define STB_IMAGE_WRITE_IMPLEMENTATION + + in the file that you want to have the implementation. + + Will probably not work correctly with strict-aliasing optimizations. + +ABOUT: + + This header file is a library for writing images to C stdio or a callback. + + The PNG output is not optimal; it is 20-50% larger than the file + written by a decent optimizing implementation; though providing a custom + zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. + This library is designed for source code compactness and simplicity, + not optimal image file size or run-time performance. + +BUILDING: + + You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. + You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace + malloc,realloc,free. + You can #define STBIW_MEMMOVE() to replace memmove() + You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function + for PNG compression (instead of the builtin one), it must have the following signature: + unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); + The returned data will be freed with STBIW_FREE() (free() by default), + so it must be heap allocated with STBIW_MALLOC() (malloc() by default), + +UNICODE: + + If compiling for Windows and you wish to use Unicode filenames, compile + with + #define STBIW_WINDOWS_UTF8 + and pass utf8-encoded filenames. Call stbiw_convert_wchar_to_utf8 to convert + Windows wchar_t filenames to utf8. + +USAGE: + + There are five functions, one for each image file format: + + int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); + int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); + + void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically + + There are also five equivalent functions that use an arbitrary write function. You are + expected to open/close your file-equivalent before and after calling these: + + int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); + int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + + where the callback is: + void stbi_write_func(void *context, void *data, int size); + + You can configure it with these global variables: + int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE + int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression + int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode + + + You can define STBI_WRITE_NO_STDIO to disable the file variant of these + functions, so the library will not use stdio.h at all. However, this will + also disable HDR writing, because it requires stdio for formatted output. + + Each function returns 0 on failure and non-0 on success. + + The functions create an image file defined by the parameters. The image + is a rectangle of pixels stored from left-to-right, top-to-bottom. + Each pixel contains 'comp' channels of data stored interleaved with 8-bits + per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is + monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. + The *data pointer points to the first byte of the top-left-most pixel. + For PNG, "stride_in_bytes" is the distance in bytes from the first byte of + a row of pixels to the first byte of the next row of pixels. + + PNG creates output files with the same number of components as the input. + The BMP format expands Y to RGB in the file format and does not + output alpha. + + PNG supports writing rectangles of data even when the bytes storing rows of + data are not consecutive in memory (e.g. sub-rectangles of a larger image), + by supplying the stride between the beginning of adjacent rows. The other + formats do not. (Thus you cannot write a native-format BMP through the BMP + writer, both because it is in BGR order and because it may have padding + at the end of the line.) + + PNG allows you to set the deflate compression level by setting the global + variable 'stbi_write_png_compression_level' (it defaults to 8). + + HDR expects linear float data. Since the format is always 32-bit rgb(e) + data, alpha (if provided) is discarded, and for monochrome data it is + replicated across all three channels. + + TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed + data, set the global variable 'stbi_write_tga_with_rle' to 0. + + JPEG does ignore alpha channels in input data; quality is between 1 and 100. + Higher quality looks better but results in a bigger image. + JPEG baseline (no JPEG progressive). + +CREDITS: + + + Sean Barrett - PNG/BMP/TGA + Baldur Karlsson - HDR + Jean-Sebastien Guay - TGA monochrome + Tim Kelsey - misc enhancements + Alan Hickman - TGA RLE + Emmanuel Julien - initial file IO callback implementation + Jon Olick - original jo_jpeg.cpp code + Daniel Gibson - integrate JPEG, allow external zlib + Aarni Koskela - allow choosing PNG filter + + bugfixes: + github:Chribba + Guillaume Chereau + github:jry2 + github:romigrou + Sergio Gonzalez + Jonas Karlsson + Filip Wasil + Thatcher Ulrich + github:poppolopoppo + Patrick Boettcher + github:xeekworx + Cap Petschulat + Simon Rodriguez + Ivan Tikhonov + github:ignotion + Adam Schackart + Andrew Kensler + +LICENSE + + See end of file for license information. + +*/ + +#ifndef INCLUDE_STB_IMAGE_WRITE_H +#define INCLUDE_STB_IMAGE_WRITE_H + +#include <stdlib.h> + +// if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' +#ifndef STBIWDEF +#ifdef STB_IMAGE_WRITE_STATIC +#define STBIWDEF static +#else +#ifdef __cplusplus +#define STBIWDEF extern "C" +#else +#define STBIWDEF extern +#endif +#endif +#endif + +#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations +STBIWDEF int stbi_write_tga_with_rle; +STBIWDEF int stbi_write_png_compression_level; +STBIWDEF int stbi_write_force_png_filter; +#endif + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); + +#ifdef STBIW_WINDOWS_UTF8 +STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); +#endif +#endif + +typedef void stbi_write_func(void *context, void *data, int size); + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + +STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); + +#endif//INCLUDE_STB_IMAGE_WRITE_H + +#ifdef STB_IMAGE_WRITE_IMPLEMENTATION + +#ifdef _WIN32 + #ifndef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #endif + #ifndef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #endif +#endif + +#ifndef STBI_WRITE_NO_STDIO +#include <stdio.h> +#endif // STBI_WRITE_NO_STDIO + +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) +// ok +#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." +#endif + +#ifndef STBIW_MALLOC +#define STBIW_MALLOC(sz) malloc(sz) +#define STBIW_REALLOC(p,newsz) realloc(p,newsz) +#define STBIW_FREE(p) free(p) +#endif + +#ifndef STBIW_REALLOC_SIZED +#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) +#endif + + +#ifndef STBIW_MEMMOVE +#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) +#endif + + +#ifndef STBIW_ASSERT +#include <assert.h> +#define STBIW_ASSERT(x) assert(x) +#endif + +#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) + +#ifdef STB_IMAGE_WRITE_STATIC +static int stbi_write_png_compression_level = 8; +static int stbi_write_tga_with_rle = 1; +static int stbi_write_force_png_filter = -1; +#else +int stbi_write_png_compression_level = 8; +int stbi_write_tga_with_rle = 1; +int stbi_write_force_png_filter = -1; +#endif + +static int stbi__flip_vertically_on_write = 0; + +STBIWDEF void stbi_flip_vertically_on_write(int flag) +{ + stbi__flip_vertically_on_write = flag; +} + +typedef struct +{ + stbi_write_func *func; + void *context; + unsigned char buffer[64]; + int buf_used; +} stbi__write_context; + +// initialize a callback-based context +static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) +{ + s->func = c; + s->context = context; +} + +#ifndef STBI_WRITE_NO_STDIO + +static void stbi__stdio_write(void *context, void *data, int size) +{ + fwrite(data,1,size,(FILE*) context); +} + +#if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8) +#ifdef __cplusplus +#define STBIW_EXTERN extern "C" +#else +#define STBIW_EXTERN extern +#endif +STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); +STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); + +STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) +{ + return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); +} +#endif + +static FILE *stbiw__fopen(char const *filename, char const *mode) +{ + FILE *f; +#if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8) + wchar_t wMode[64]; + wchar_t wFilename[1024]; + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename))) + return 0; + + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode))) + return 0; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != _wfopen_s(&f, wFilename, wMode)) + f = 0; +#else + f = _wfopen(wFilename, wMode); +#endif + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != fopen_s(&f, filename, mode)) + f=0; +#else + f = fopen(filename, mode); +#endif + return f; +} + +static int stbi__start_write_file(stbi__write_context *s, const char *filename) +{ + FILE *f = stbiw__fopen(filename, "wb"); + stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); + return f != NULL; +} + +static void stbi__end_write_file(stbi__write_context *s) +{ + fclose((FILE *)s->context); +} + +#endif // !STBI_WRITE_NO_STDIO + +typedef unsigned int stbiw_uint32; +typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; + +static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) +{ + while (*fmt) { + switch (*fmt++) { + case ' ': break; + case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); + s->func(s->context,&x,1); + break; } + case '2': { int x = va_arg(v,int); + unsigned char b[2]; + b[0] = STBIW_UCHAR(x); + b[1] = STBIW_UCHAR(x>>8); + s->func(s->context,b,2); + break; } + case '4': { stbiw_uint32 x = va_arg(v,int); + unsigned char b[4]; + b[0]=STBIW_UCHAR(x); + b[1]=STBIW_UCHAR(x>>8); + b[2]=STBIW_UCHAR(x>>16); + b[3]=STBIW_UCHAR(x>>24); + s->func(s->context,b,4); + break; } + default: + STBIW_ASSERT(0); + return; + } + } +} + +static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) +{ + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); +} + +static void stbiw__write_flush(stbi__write_context *s) +{ + if (s->buf_used) { + s->func(s->context, &s->buffer, s->buf_used); + s->buf_used = 0; + } +} + +static void stbiw__putc(stbi__write_context *s, unsigned char c) +{ + s->func(s->context, &c, 1); +} + +static void stbiw__write1(stbi__write_context *s, unsigned char a) +{ + if ((size_t)s->buf_used + 1 > sizeof(s->buffer)) + stbiw__write_flush(s); + s->buffer[s->buf_used++] = a; +} + +static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) +{ + int n; + if ((size_t)s->buf_used + 3 > sizeof(s->buffer)) + stbiw__write_flush(s); + n = s->buf_used; + s->buf_used = n+3; + s->buffer[n+0] = a; + s->buffer[n+1] = b; + s->buffer[n+2] = c; +} + +static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) +{ + unsigned char bg[3] = { 255, 0, 255}, px[3]; + int k; + + if (write_alpha < 0) + stbiw__write1(s, d[comp - 1]); + + switch (comp) { + case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case + case 1: + if (expand_mono) + stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp + else + stbiw__write1(s, d[0]); // monochrome TGA + break; + case 4: + if (!write_alpha) { + // composite against pink background + for (k = 0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; + stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); + break; + } + if (write_alpha > 0) + stbiw__write1(s, d[comp - 1]); +} + +static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) +{ + stbiw_uint32 zero = 0; + int i,j, j_end; + + if (y <= 0) + return; + + if (stbi__flip_vertically_on_write) + vdir *= -1; + + if (vdir < 0) { + j_end = -1; j = y-1; + } else { + j_end = y; j = 0; + } + + for (; j != j_end; j += vdir) { + for (i=0; i < x; ++i) { + unsigned char *d = (unsigned char *) data + (j*x+i)*comp; + stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); + } + stbiw__write_flush(s); + s->func(s->context, &zero, scanline_pad); + } +} + +static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) +{ + if (y < 0 || x < 0) { + return 0; + } else { + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); + stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); + return 1; + } +} + +static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) +{ + if (comp != 4) { + // write RGB bitmap + int pad = (-x*3) & 3; + return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header + } else { + // RGBA bitmaps need a v4 header + // use BI_BITFIELDS mode with 32bpp and alpha mask + // (straight BI_RGB with alpha mask doesn't work in most readers) + return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *)data,1,0, + "11 4 22 4" "4 44 22 444444 4444 4 444 444 444 444", + 'B', 'M', 14+108+x*y*4, 0, 0, 14+108, // file header + 108, x,y, 1,32, 3,0,0,0,0,0, 0xff0000,0xff00,0xff,0xff000000u, 0, 0,0,0, 0,0,0, 0,0,0, 0,0,0); // bitmap V4 header + } +} + +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s = { 0 }; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_bmp_core(&s, x, y, comp, data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s = { 0 }; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_bmp_core(&s, x, y, comp, data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif //!STBI_WRITE_NO_STDIO + +static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) +{ + int has_alpha = (comp == 2 || comp == 4); + int colorbytes = has_alpha ? comp-1 : comp; + int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 + + if (y < 0 || x < 0) + return 0; + + if (!stbi_write_tga_with_rle) { + return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, + "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); + } else { + int i,j,k; + int jend, jdir; + + stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); + + if (stbi__flip_vertically_on_write) { + j = 0; + jend = y; + jdir = 1; + } else { + j = y-1; + jend = -1; + jdir = -1; + } + for (; j != jend; j += jdir) { + unsigned char *row = (unsigned char *) data + j * x * comp; + int len; + + for (i = 0; i < x; i += len) { + unsigned char *begin = row + i * comp; + int diff = 1; + len = 1; + + if (i < x - 1) { + ++len; + diff = memcmp(begin, row + (i + 1) * comp, comp); + if (diff) { + const unsigned char *prev = begin; + for (k = i + 2; k < x && len < 128; ++k) { + if (memcmp(prev, row + k * comp, comp)) { + prev += comp; + ++len; + } else { + --len; + break; + } + } + } else { + for (k = i + 2; k < x && len < 128; ++k) { + if (!memcmp(begin, row + k * comp, comp)) { + ++len; + } else { + break; + } + } + } + } + + if (diff) { + unsigned char header = STBIW_UCHAR(len - 1); + stbiw__write1(s, header); + for (k = 0; k < len; ++k) { + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); + } + } else { + unsigned char header = STBIW_UCHAR(len - 129); + stbiw__write1(s, header); + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); + } + } + } + stbiw__write_flush(s); + } + return 1; +} + +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s = { 0 }; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_tga_core(&s, x, y, comp, (void *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s = { 0 }; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR writer +// by Baldur Karlsson + +#define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) + +#ifndef STBI_WRITE_NO_STDIO + +static void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) +{ + int exponent; + float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); + + if (maxcomp < 1e-32f) { + rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; + } else { + float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; + + rgbe[0] = (unsigned char)(linear[0] * normalize); + rgbe[1] = (unsigned char)(linear[1] * normalize); + rgbe[2] = (unsigned char)(linear[2] * normalize); + rgbe[3] = (unsigned char)(exponent + 128); + } +} + +static void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) +{ + unsigned char lengthbyte = STBIW_UCHAR(length+128); + STBIW_ASSERT(length+128 <= 255); + s->func(s->context, &lengthbyte, 1); + s->func(s->context, &databyte, 1); +} + +static void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) +{ + unsigned char lengthbyte = STBIW_UCHAR(length); + STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code + s->func(s->context, &lengthbyte, 1); + s->func(s->context, data, length); +} + +static void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) +{ + unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; + unsigned char rgbe[4]; + float linear[3]; + int x; + + scanlineheader[2] = (width&0xff00)>>8; + scanlineheader[3] = (width&0x00ff); + + /* skip RLE for images too small or large */ + if (width < 8 || width >= 32768) { + for (x=0; x < width; x++) { + switch (ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + s->func(s->context, rgbe, 4); + } + } else { + int c,r; + /* encode into scratch buffer */ + for (x=0; x < width; x++) { + switch(ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + scratch[x + width*0] = rgbe[0]; + scratch[x + width*1] = rgbe[1]; + scratch[x + width*2] = rgbe[2]; + scratch[x + width*3] = rgbe[3]; + } + + s->func(s->context, scanlineheader, 4); + + /* RLE each component separately */ + for (c=0; c < 4; c++) { + unsigned char *comp = &scratch[width*c]; + + x = 0; + while (x < width) { + // find first run + r = x; + while (r+2 < width) { + if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) + break; + ++r; + } + if (r+2 >= width) + r = width; + // dump up to first run + while (x < r) { + int len = r-x; + if (len > 128) len = 128; + stbiw__write_dump_data(s, len, &comp[x]); + x += len; + } + // if there's a run, output it + if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd + // find next byte after run + while (r < width && comp[r] == comp[x]) + ++r; + // output run up to r + while (x < r) { + int len = r-x; + if (len > 127) len = 127; + stbiw__write_run_data(s, len, comp[x]); + x += len; + } + } + } + } + } +} + +static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) +{ + if (y <= 0 || x <= 0 || data == NULL) + return 0; + else { + // Each component is stored separately. Allocate scratch space for full output scanline. + unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); + int i, len; + char buffer[128]; + char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; + s->func(s->context, header, sizeof(header)-1); + +#ifdef __STDC_LIB_EXT1__ + len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#else + len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#endif + s->func(s->context, buffer, len); + + for(i=0; i < y; i++) + stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)); + STBIW_FREE(scratch); + return 1; + } +} + +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) +{ + stbi__write_context s = { 0 }; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_hdr_core(&s, x, y, comp, (float *) data); +} + +STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) +{ + stbi__write_context s = { 0 }; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif // STBI_WRITE_NO_STDIO + + +////////////////////////////////////////////////////////////////////////////// +// +// PNG writer +// + +#ifndef STBIW_ZLIB_COMPRESS +// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() +#define stbiw__sbraw(a) ((int *) (void *) (a) - 2) +#define stbiw__sbm(a) stbiw__sbraw(a)[0] +#define stbiw__sbn(a) stbiw__sbraw(a)[1] + +#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) +#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) +#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) + +#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) +#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) +#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) + +static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) +{ + int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; + void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); + STBIW_ASSERT(p); + if (p) { + if (!*arr) ((int *) p)[1] = 0; + *arr = (void *) ((int *) p + 2); + stbiw__sbm(*arr) = m; + } + return *arr; +} + +static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) +{ + while (*bitcount >= 8) { + stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); + *bitbuffer >>= 8; + *bitcount -= 8; + } + return data; +} + +static int stbiw__zlib_bitrev(int code, int codebits) +{ + int res=0; + while (codebits--) { + res = (res << 1) | (code & 1); + code >>= 1; + } + return res; +} + +static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) +{ + int i; + for (i=0; i < limit && i < 258; ++i) + if (a[i] != b[i]) break; + return i; +} + +static unsigned int stbiw__zhash(unsigned char *data) +{ + stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + return hash; +} + +#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) +#define stbiw__zlib_add(code,codebits) \ + (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) +#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) +// default huffman tables +#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) +#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) +#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) +#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) +#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) +#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) + +#define stbiw__ZHASH 16384 + +#endif // STBIW_ZLIB_COMPRESS + +STBIWDEF unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) +{ +#ifdef STBIW_ZLIB_COMPRESS + // user provided a zlib compress implementation, use that + return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); +#else // use builtin + static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; + static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; + static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; + static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; + unsigned int bitbuf=0; + int i,j, bitcount=0; + unsigned char *out = NULL; + unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(unsigned char**)); + if (hash_table == NULL) + return NULL; + if (quality < 5) quality = 5; + + stbiw__sbpush(out, 0x78); // DEFLATE 32K window + stbiw__sbpush(out, 0x5e); // FLEVEL = 1 + stbiw__zlib_add(1,1); // BFINAL = 1 + stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman + + for (i=0; i < stbiw__ZHASH; ++i) + hash_table[i] = NULL; + + i=0; + while (i < data_len-3) { + // hash next 3 bytes of data to be compressed + int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; + unsigned char *bestloc = 0; + unsigned char **hlist = hash_table[h]; + int n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32768) { // if entry lies within window + int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); + if (d >= best) { best=d; bestloc=hlist[j]; } + } + } + // when hash table entry is too long, delete half the entries + if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { + STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); + stbiw__sbn(hash_table[h]) = quality; + } + stbiw__sbpush(hash_table[h],data+i); + + if (bestloc) { + // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal + h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); + hlist = hash_table[h]; + n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32767) { + int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); + if (e > best) { // if next match is better, bail on current match + bestloc = NULL; + break; + } + } + } + } + + if (bestloc) { + int d = (int) (data+i - bestloc); // distance back + STBIW_ASSERT(d <= 32767 && best <= 258); + for (j=0; best > lengthc[j+1]-1; ++j); + stbiw__zlib_huff(j+257); + if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); + for (j=0; d > distc[j+1]-1; ++j); + stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); + if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); + i += best; + } else { + stbiw__zlib_huffb(data[i]); + ++i; + } + } + // write out final bytes + for (;i < data_len; ++i) + stbiw__zlib_huffb(data[i]); + stbiw__zlib_huff(256); // end of block + // pad with 0 bits to byte boundary + while (bitcount) + stbiw__zlib_add(0,1); + + for (i=0; i < stbiw__ZHASH; ++i) + (void) stbiw__sbfree(hash_table[i]); + STBIW_FREE(hash_table); + + // store uncompressed instead if compression was worse + if (stbiw__sbn(out) > data_len + 2 + ((data_len+32766)/32767)*5) { + stbiw__sbn(out) = 2; // truncate to DEFLATE 32K window and FLEVEL = 1 + for (j = 0; j < data_len;) { + int blocklen = data_len - j; + if (blocklen > 32767) blocklen = 32767; + stbiw__sbpush(out, data_len - j == blocklen); // BFINAL = ?, BTYPE = 0 -- no compression + stbiw__sbpush(out, STBIW_UCHAR(blocklen)); // LEN + stbiw__sbpush(out, STBIW_UCHAR(blocklen >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(~blocklen)); // NLEN + stbiw__sbpush(out, STBIW_UCHAR(~blocklen >> 8)); + memcpy(out+stbiw__sbn(out), data+j, blocklen); + stbiw__sbn(out) += blocklen; + j += blocklen; + } + } + + { + // compute adler32 on input + unsigned int s1=1, s2=0; + int blocklen = (int) (data_len % 5552); + j=0; + while (j < data_len) { + for (i=0; i < blocklen; ++i) { s1 += data[j+i]; s2 += s1; } + s1 %= 65521; s2 %= 65521; + j += blocklen; + blocklen = 5552; + } + stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s2)); + stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s1)); + } + *out_len = stbiw__sbn(out); + // make returned pointer freeable + STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); + return (unsigned char *) stbiw__sbraw(out); +#endif // STBIW_ZLIB_COMPRESS +} + +static unsigned int stbiw__crc32(unsigned char *buffer, int len) +{ +#ifdef STBIW_CRC32 + return STBIW_CRC32(buffer, len); +#else + static unsigned int crc_table[256] = + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + unsigned int crc = ~0u; + int i; + for (i=0; i < len; ++i) + crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; + return ~crc; +#endif +} + +#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) +#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); +#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) + +static void stbiw__wpcrc(unsigned char **data, int len) +{ + unsigned int crc = stbiw__crc32(*data - len - 4, len+4); + stbiw__wp32(*data, crc); +} + +static unsigned char stbiw__paeth(int a, int b, int c) +{ + int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); + if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); + if (pb <= pc) return STBIW_UCHAR(b); + return STBIW_UCHAR(c); +} + +// @OPTIMIZE: provide an option that always forces left-predict or paeth predict +static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) +{ + static int mapping[] = { 0,1,2,3,4 }; + static int firstmap[] = { 0,1,0,5,6 }; + int *mymap = (y != 0) ? mapping : firstmap; + int i; + int type = mymap[filter_type]; + unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); + int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; + + if (type==0) { + memcpy(line_buffer, z, width*n); + return; + } + + // first loop isn't optimized since it's just one pixel + for (i = 0; i < n; ++i) { + switch (type) { + case 1: line_buffer[i] = z[i]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; + case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; + case 5: line_buffer[i] = z[i]; break; + case 6: line_buffer[i] = z[i]; break; + } + } + switch (type) { + case 1: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-n]; break; + case 2: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; + case 4: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; + case 5: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - (z[i-n]>>1); break; + case 6: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; + } +} + +STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) +{ + int force_filter = stbi_write_force_png_filter; + int ctype[5] = { -1, 0, 4, 2, 6 }; + unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; + unsigned char *out,*o, *filt, *zlib; + signed char *line_buffer; + int j,zlen; + + if (stride_bytes == 0) + stride_bytes = x * n; + + if (force_filter >= 5) { + force_filter = -1; + } + + filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; + line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } + for (j=0; j < y; ++j) { + int filter_type; + if (force_filter > -1) { + filter_type = force_filter; + stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer); + } else { // Estimate the best filter by running through all of them: + int best_filter = 0, best_filter_val = 0x7fffffff, est, i; + for (filter_type = 0; filter_type < 5; filter_type++) { + stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer); + + // Estimate the entropy of the line using this filter; the less, the better. + est = 0; + for (i = 0; i < x*n; ++i) { + est += abs((signed char) line_buffer[i]); + } + if (est < best_filter_val) { + best_filter_val = est; + best_filter = filter_type; + } + } + if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it + stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer); + filter_type = best_filter; + } + } + // when we get here, filter_type contains the filter type, and line_buffer contains the data + filt[j*(x*n+1)] = (unsigned char) filter_type; + STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); + } + STBIW_FREE(line_buffer); + zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); + STBIW_FREE(filt); + if (!zlib) return 0; + + // each tag requires 12 bytes of overhead + out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); + if (!out) return 0; + *out_len = 8 + 12+13 + 12+zlen + 12; + + o=out; + STBIW_MEMMOVE(o,sig,8); o+= 8; + stbiw__wp32(o, 13); // header length + stbiw__wptag(o, "IHDR"); + stbiw__wp32(o, x); + stbiw__wp32(o, y); + *o++ = 8; + *o++ = STBIW_UCHAR(ctype[n]); + *o++ = 0; + *o++ = 0; + *o++ = 0; + stbiw__wpcrc(&o,13); + + stbiw__wp32(o, zlen); + stbiw__wptag(o, "IDAT"); + STBIW_MEMMOVE(o, zlib, zlen); + o += zlen; + STBIW_FREE(zlib); + stbiw__wpcrc(&o, zlen); + + stbiw__wp32(o,0); + stbiw__wptag(o, "IEND"); + stbiw__wpcrc(&o,0); + + STBIW_ASSERT(o == out + *out_len); + + return out; +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) +{ + FILE *f; + int len; + unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; + + f = stbiw__fopen(filename, "wb"); + if (!f) { STBIW_FREE(png); return 0; } + fwrite(png, 1, len, f); + fclose(f); + STBIW_FREE(png); + return 1; +} +#endif + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) +{ + int len; + unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; + func(context, png, len); + STBIW_FREE(png); + return 1; +} + + +/* *************************************************************************** + * + * JPEG writer + * + * This is based on Jon Olick's jo_jpeg.cpp: + * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html + */ + +static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, + 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; + +static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { + int bitBuf = *bitBufP, bitCnt = *bitCntP; + bitCnt += bs[1]; + bitBuf |= bs[0] << (24 - bitCnt); + while(bitCnt >= 8) { + unsigned char c = (bitBuf >> 16) & 255; + stbiw__putc(s, c); + if(c == 255) { + stbiw__putc(s, 0); + } + bitBuf <<= 8; + bitCnt -= 8; + } + *bitBufP = bitBuf; + *bitCntP = bitCnt; +} + +static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { + float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; + float z1, z2, z3, z4, z5, z11, z13; + + float tmp0 = d0 + d7; + float tmp7 = d0 - d7; + float tmp1 = d1 + d6; + float tmp6 = d1 - d6; + float tmp2 = d2 + d5; + float tmp5 = d2 - d5; + float tmp3 = d3 + d4; + float tmp4 = d3 - d4; + + // Even part + float tmp10 = tmp0 + tmp3; // phase 2 + float tmp13 = tmp0 - tmp3; + float tmp11 = tmp1 + tmp2; + float tmp12 = tmp1 - tmp2; + + d0 = tmp10 + tmp11; // phase 3 + d4 = tmp10 - tmp11; + + z1 = (tmp12 + tmp13) * 0.707106781f; // c4 + d2 = tmp13 + z1; // phase 5 + d6 = tmp13 - z1; + + // Odd part + tmp10 = tmp4 + tmp5; // phase 2 + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + // The rotator is modified from fig 4-8 to avoid extra negations. + z5 = (tmp10 - tmp12) * 0.382683433f; // c6 + z2 = tmp10 * 0.541196100f + z5; // c2-c6 + z4 = tmp12 * 1.306562965f + z5; // c2+c6 + z3 = tmp11 * 0.707106781f; // c4 + + z11 = tmp7 + z3; // phase 5 + z13 = tmp7 - z3; + + *d5p = z13 + z2; // phase 6 + *d3p = z13 - z2; + *d1p = z11 + z4; + *d7p = z11 - z4; + + *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; +} + +static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { + int tmp1 = val < 0 ? -val : val; + val = val < 0 ? val-1 : val; + bits[1] = 1; + while(tmp1 >>= 1) { + ++bits[1]; + } + bits[0] = val & ((1<<bits[1])-1); +} + +static int stbiw__jpg_processDU(stbi__write_context *s, int *bitBuf, int *bitCnt, float *CDU, int du_stride, float *fdtbl, int DC, const unsigned short HTDC[256][2], const unsigned short HTAC[256][2]) { + const unsigned short EOB[2] = { HTAC[0x00][0], HTAC[0x00][1] }; + const unsigned short M16zeroes[2] = { HTAC[0xF0][0], HTAC[0xF0][1] }; + int dataOff, i, j, n, diff, end0pos, x, y; + int DU[64]; + + // DCT rows + for(dataOff=0, n=du_stride*8; dataOff<n; dataOff+=du_stride) { + stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff+1], &CDU[dataOff+2], &CDU[dataOff+3], &CDU[dataOff+4], &CDU[dataOff+5], &CDU[dataOff+6], &CDU[dataOff+7]); + } + // DCT columns + for(dataOff=0; dataOff<8; ++dataOff) { + stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff+du_stride], &CDU[dataOff+du_stride*2], &CDU[dataOff+du_stride*3], &CDU[dataOff+du_stride*4], + &CDU[dataOff+du_stride*5], &CDU[dataOff+du_stride*6], &CDU[dataOff+du_stride*7]); + } + // Quantize/descale/zigzag the coefficients + for(y = 0, j=0; y < 8; ++y) { + for(x = 0; x < 8; ++x,++j) { + float v; + i = y*du_stride+x; + v = CDU[i]*fdtbl[j]; + // DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? ceilf(v - 0.5f) : floorf(v + 0.5f)); + // ceilf() and floorf() are C99, not C89, but I /think/ they're not needed here anyway? + DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? v - 0.5f : v + 0.5f); + } + } + + // Encode DC + diff = DU[0] - DC; + if (diff == 0) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[0]); + } else { + unsigned short bits[2]; + stbiw__jpg_calcBits(diff, bits); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[bits[1]]); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); + } + // Encode ACs + end0pos = 63; + for(; (end0pos>0)&&(DU[end0pos]==0); --end0pos) { + } + // end0pos = first element in reverse order !=0 + if(end0pos == 0) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + return DU[0]; + } + for(i = 1; i <= end0pos; ++i) { + int startpos = i; + int nrzeroes; + unsigned short bits[2]; + for (; DU[i]==0 && i<=end0pos; ++i) { + } + nrzeroes = i-startpos; + if ( nrzeroes >= 16 ) { + int lng = nrzeroes>>4; + int nrmarker; + for (nrmarker=1; nrmarker <= lng; ++nrmarker) + stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); + nrzeroes &= 15; + } + stbiw__jpg_calcBits(DU[i], bits); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); + } + if(end0pos != 63) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + } + return DU[0]; +} + +static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { + // Constants that don't pollute global namespace + static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; + static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; + static const unsigned char std_ac_luminance_values[] = { + 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, + 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, + 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, + 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, + 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, + 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, + 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; + static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; + static const unsigned char std_ac_chrominance_values[] = { + 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, + 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, + 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, + 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, + 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, + 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + // Huffman tables + static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; + static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; + static const unsigned short YAC_HT[256][2] = { + {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const unsigned short UVAC_HT[256][2] = { + {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, + 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; + static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, + 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; + static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, + 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; + + int row, col, i, k, subsample; + float fdtbl_Y[64], fdtbl_UV[64]; + unsigned char YTable[64], UVTable[64]; + + if(!data || !width || !height || comp > 4 || comp < 1) { + return 0; + } + + quality = quality ? quality : 90; + subsample = quality <= 90 ? 1 : 0; + quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; + quality = quality < 50 ? 5000 / quality : 200 - quality * 2; + + for(i = 0; i < 64; ++i) { + int uvti, yti = (YQT[i]*quality+50)/100; + YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); + uvti = (UVQT[i]*quality+50)/100; + UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); + } + + for(row = 0, k = 0; row < 8; ++row) { + for(col = 0; col < 8; ++col, ++k) { + fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + } + } + + // Write Headers + { + static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; + static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; + const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), + 3,1,(unsigned char)(subsample?0x22:0x11),0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; + s->func(s->context, (void*)head0, sizeof(head0)); + s->func(s->context, (void*)YTable, sizeof(YTable)); + stbiw__putc(s, 1); + s->func(s->context, UVTable, sizeof(UVTable)); + s->func(s->context, (void*)head1, sizeof(head1)); + s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); + s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); + stbiw__putc(s, 0x10); // HTYACinfo + s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); + s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); + stbiw__putc(s, 1); // HTUDCinfo + s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); + stbiw__putc(s, 0x11); // HTUACinfo + s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); + s->func(s->context, (void*)head2, sizeof(head2)); + } + + // Encode 8x8 macroblocks + { + static const unsigned short fillBits[] = {0x7F, 7}; + int DCY=0, DCU=0, DCV=0; + int bitBuf=0, bitCnt=0; + // comp == 2 is grey+alpha (alpha is ignored) + int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; + const unsigned char *dataR = (const unsigned char *)data; + const unsigned char *dataG = dataR + ofsG; + const unsigned char *dataB = dataR + ofsB; + int x, y, pos; + if(subsample) { + for(y = 0; y < height; y += 16) { + for(x = 0; x < width; x += 16) { + float Y[256], U[256], V[256]; + for(row = y, pos = 0; row < y+16; ++row) { + // row >= height => use last input row + int clamped_row = (row < height) ? row : height - 1; + int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp; + for(col = x; col < x+16; ++col, ++pos) { + // if col >= width => use pixel from last input column + int p = base_p + ((col < width) ? col : (width-1))*comp; + float r = dataR[p], g = dataG[p], b = dataB[p]; + Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128; + U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b; + V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b; + } + } + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+0, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+8, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+128, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+136, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); + + // subsample U,V + { + float subU[64], subV[64]; + int yy, xx; + for(yy = 0, pos = 0; yy < 8; ++yy) { + for(xx = 0; xx < 8; ++xx, ++pos) { + int j = yy*32+xx*2; + subU[pos] = (U[j+0] + U[j+1] + U[j+16] + U[j+17]) * 0.25f; + subV[pos] = (V[j+0] + V[j+1] + V[j+16] + V[j+17]) * 0.25f; + } + } + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subU, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subV, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); + } + } + } + } else { + for(y = 0; y < height; y += 8) { + for(x = 0; x < width; x += 8) { + float Y[64], U[64], V[64]; + for(row = y, pos = 0; row < y+8; ++row) { + // row >= height => use last input row + int clamped_row = (row < height) ? row : height - 1; + int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp; + for(col = x; col < x+8; ++col, ++pos) { + // if col >= width => use pixel from last input column + int p = base_p + ((col < width) ? col : (width-1))*comp; + float r = dataR[p], g = dataG[p], b = dataB[p]; + Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128; + U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b; + V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b; + } + } + + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y, 8, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, U, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, V, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); + } + } + } + + // Do the bit alignment of the EOI marker + stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); + } + + // EOI + stbiw__putc(s, 0xFF); + stbiw__putc(s, 0xD9); + + return 1; +} + +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s = { 0 }; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); +} + + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s = { 0 }; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +#endif // STB_IMAGE_WRITE_IMPLEMENTATION + +/* Revision history + 1.16 (2021-07-11) + make Deflate code emit uncompressed blocks when it would otherwise expand + support writing BMPs with alpha channel + 1.15 (2020-07-13) unknown + 1.14 (2020-02-02) updated JPEG writer to downsample chroma channels + 1.13 + 1.12 + 1.11 (2019-08-11) + + 1.10 (2019-02-07) + support utf8 filenames in Windows; fix warnings and platform ifdefs + 1.09 (2018-02-11) + fix typo in zlib quality API, improve STB_I_W_STATIC in C++ + 1.08 (2018-01-29) + add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter + 1.07 (2017-07-24) + doc fix + 1.06 (2017-07-23) + writing JPEG (using Jon Olick's code) + 1.05 ??? + 1.04 (2017-03-03) + monochrome BMP expansion + 1.03 ??? + 1.02 (2016-04-02) + avoid allocating large structures on the stack + 1.01 (2016-01-16) + STBIW_REALLOC_SIZED: support allocators with no realloc support + avoid race-condition in crc initialization + minor compile issues + 1.00 (2015-09-14) + installable file IO function + 0.99 (2015-09-13) + warning fixes; TGA rle support + 0.98 (2015-04-08) + added STBIW_MALLOC, STBIW_ASSERT etc + 0.97 (2015-01-18) + fixed HDR asserts, rewrote HDR rle logic + 0.96 (2015-01-17) + add HDR output + fix monochrome BMP + 0.95 (2014-08-17) + add monochrome TGA output + 0.94 (2014-05-31) + rename private functions to avoid conflicts with stb_image.h + 0.93 (2014-05-27) + warning fixes + 0.92 (2010-08-01) + casts to unsigned char to fix warnings + 0.91 (2010-07-17) + first public release + 0.90 first internal release +*/ + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/src/android/app/build.gradle.kts b/src/android/app/build.gradle.kts index 84a3308b7..ac43d84b7 100644 --- a/src/android/app/build.gradle.kts +++ b/src/android/app/build.gradle.kts @@ -27,7 +27,7 @@ android { namespace = "org.yuzu.yuzu_emu" compileSdkVersion = "android-34" - ndkVersion = "25.2.9519653" + ndkVersion = "26.1.10909125" buildFeatures { viewBinding = true @@ -203,23 +203,23 @@ ktlint { } dependencies { - implementation("androidx.core:core-ktx:1.10.1") + implementation("androidx.core:core-ktx:1.12.0") implementation("androidx.appcompat:appcompat:1.6.1") - implementation("androidx.recyclerview:recyclerview:1.3.0") + implementation("androidx.recyclerview:recyclerview:1.3.1") implementation("androidx.constraintlayout:constraintlayout:2.1.4") - implementation("androidx.fragment:fragment-ktx:1.6.0") + implementation("androidx.fragment:fragment-ktx:1.6.1") implementation("androidx.documentfile:documentfile:1.0.1") implementation("com.google.android.material:material:1.9.0") - implementation("androidx.preference:preference:1.2.0") - implementation("androidx.lifecycle:lifecycle-viewmodel-ktx:2.6.1") + implementation("androidx.preference:preference-ktx:1.2.1") + implementation("androidx.lifecycle:lifecycle-viewmodel-ktx:2.6.2") implementation("io.coil-kt:coil:2.2.2") implementation("androidx.core:core-splashscreen:1.0.1") implementation("androidx.window:window:1.2.0-beta03") implementation("org.ini4j:ini4j:0.5.4") implementation("androidx.constraintlayout:constraintlayout:2.1.4") implementation("androidx.swiperefreshlayout:swiperefreshlayout:1.1.0") - implementation("androidx.navigation:navigation-fragment-ktx:2.6.0") - implementation("androidx.navigation:navigation-ui-ktx:2.6.0") + implementation("androidx.navigation:navigation-fragment-ktx:2.7.4") + implementation("androidx.navigation:navigation-ui-ktx:2.7.4") implementation("info.debatty:java-string-similarity:2.0.0") implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.5.0") } diff --git a/src/android/app/src/main/AndroidManifest.xml b/src/android/app/src/main/AndroidManifest.xml index 832c08e15..a67351727 100644 --- a/src/android/app/src/main/AndroidManifest.xml +++ b/src/android/app/src/main/AndroidManifest.xml @@ -28,7 +28,6 @@ SPDX-License-Identifier: GPL-3.0-or-later android:appCategory="game" android:localeConfig="@xml/locales_config" android:banner="@drawable/tv_banner" - android:extractNativeLibs="true" android:fullBackupContent="@xml/data_extraction_rules" android:dataExtractionRules="@xml/data_extraction_rules_api_31" android:enableOnBackInvokedCallback="true"> diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt index 6e39e542b..115f72710 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt @@ -15,13 +15,9 @@ import androidx.annotation.Keep import androidx.fragment.app.DialogFragment import com.google.android.material.dialog.MaterialAlertDialogBuilder import java.lang.ref.WeakReference -import org.yuzu.yuzu_emu.YuzuApplication.Companion.appContext import org.yuzu.yuzu_emu.activities.EmulationActivity import org.yuzu.yuzu_emu.utils.DocumentsTree.Companion.isNativePath -import org.yuzu.yuzu_emu.utils.FileUtil.exists -import org.yuzu.yuzu_emu.utils.FileUtil.getFileSize -import org.yuzu.yuzu_emu.utils.FileUtil.isDirectory -import org.yuzu.yuzu_emu.utils.FileUtil.openContentUri +import org.yuzu.yuzu_emu.utils.FileUtil import org.yuzu.yuzu_emu.utils.Log import org.yuzu.yuzu_emu.utils.SerializableHelper.serializable @@ -75,7 +71,7 @@ object NativeLibrary { return if (isNativePath(path!!)) { YuzuApplication.documentsTree!!.openContentUri(path, openmode) } else { - openContentUri(appContext, path, openmode) + FileUtil.openContentUri(path, openmode) } } @@ -85,7 +81,7 @@ object NativeLibrary { return if (isNativePath(path!!)) { YuzuApplication.documentsTree!!.getFileSize(path) } else { - getFileSize(appContext, path) + FileUtil.getFileSize(path) } } @@ -95,7 +91,7 @@ object NativeLibrary { return if (isNativePath(path!!)) { YuzuApplication.documentsTree!!.exists(path) } else { - exists(appContext, path) + FileUtil.exists(path) } } @@ -105,7 +101,7 @@ object NativeLibrary { return if (isNativePath(path!!)) { YuzuApplication.documentsTree!!.isDirectory(path) } else { - isDirectory(appContext, path) + FileUtil.isDirectory(path) } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/YuzuApplication.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/YuzuApplication.kt index 9561748cb..8c053670c 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/YuzuApplication.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/YuzuApplication.kt @@ -47,7 +47,7 @@ class YuzuApplication : Application() { application = this documentsTree = DocumentsTree() DirectoryInitialization.start() - GpuDriverHelper.initializeDriverParameters(applicationContext) + GpuDriverHelper.initializeDriverParameters() NativeLibrary.logDeviceInfo() createNotificationChannels() diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/adapters/DriverAdapter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/adapters/DriverAdapter.kt new file mode 100644 index 000000000..0e818cab9 --- /dev/null +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/adapters/DriverAdapter.kt @@ -0,0 +1,117 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +package org.yuzu.yuzu_emu.adapters + +import android.text.TextUtils +import android.view.LayoutInflater +import android.view.View +import android.view.ViewGroup +import androidx.recyclerview.widget.AsyncDifferConfig +import androidx.recyclerview.widget.DiffUtil +import androidx.recyclerview.widget.ListAdapter +import androidx.recyclerview.widget.RecyclerView +import org.yuzu.yuzu_emu.R +import org.yuzu.yuzu_emu.databinding.CardDriverOptionBinding +import org.yuzu.yuzu_emu.model.DriverViewModel +import org.yuzu.yuzu_emu.utils.GpuDriverHelper +import org.yuzu.yuzu_emu.utils.GpuDriverMetadata + +class DriverAdapter(private val driverViewModel: DriverViewModel) : + ListAdapter<Pair<String, GpuDriverMetadata>, DriverAdapter.DriverViewHolder>( + AsyncDifferConfig.Builder(DiffCallback()).build() + ) { + override fun onCreateViewHolder(parent: ViewGroup, viewType: Int): DriverViewHolder { + val binding = + CardDriverOptionBinding.inflate(LayoutInflater.from(parent.context), parent, false) + return DriverViewHolder(binding) + } + + override fun getItemCount(): Int = currentList.size + + override fun onBindViewHolder(holder: DriverViewHolder, position: Int) = + holder.bind(currentList[position]) + + private fun onSelectDriver(position: Int) { + driverViewModel.setSelectedDriverIndex(position) + notifyItemChanged(driverViewModel.previouslySelectedDriver) + notifyItemChanged(driverViewModel.selectedDriver) + } + + private fun onDeleteDriver(driverData: Pair<String, GpuDriverMetadata>, position: Int) { + if (driverViewModel.selectedDriver > position) { + driverViewModel.setSelectedDriverIndex(driverViewModel.selectedDriver - 1) + } + if (GpuDriverHelper.customDriverData == driverData.second) { + driverViewModel.setSelectedDriverIndex(0) + } + driverViewModel.driversToDelete.add(driverData.first) + driverViewModel.removeDriver(driverData) + notifyItemRemoved(position) + notifyItemChanged(driverViewModel.selectedDriver) + } + + inner class DriverViewHolder(val binding: CardDriverOptionBinding) : + RecyclerView.ViewHolder(binding.root) { + private lateinit var driverData: Pair<String, GpuDriverMetadata> + + fun bind(driverData: Pair<String, GpuDriverMetadata>) { + this.driverData = driverData + val driver = driverData.second + + binding.apply { + radioButton.isChecked = driverViewModel.selectedDriver == bindingAdapterPosition + root.setOnClickListener { + onSelectDriver(bindingAdapterPosition) + } + buttonDelete.setOnClickListener { + onDeleteDriver(driverData, bindingAdapterPosition) + } + + // Delay marquee by 3s + title.postDelayed( + { + title.isSelected = true + title.ellipsize = TextUtils.TruncateAt.MARQUEE + version.isSelected = true + version.ellipsize = TextUtils.TruncateAt.MARQUEE + description.isSelected = true + description.ellipsize = TextUtils.TruncateAt.MARQUEE + }, + 3000 + ) + if (driver.name == null) { + title.setText(R.string.system_gpu_driver) + description.text = "" + version.text = "" + version.visibility = View.GONE + description.visibility = View.GONE + buttonDelete.visibility = View.GONE + } else { + title.text = driver.name + version.text = driver.version + description.text = driver.description + version.visibility = View.VISIBLE + description.visibility = View.VISIBLE + buttonDelete.visibility = View.VISIBLE + } + } + } + } + + private class DiffCallback : DiffUtil.ItemCallback<Pair<String, GpuDriverMetadata>>() { + override fun areItemsTheSame( + oldItem: Pair<String, GpuDriverMetadata>, + newItem: Pair<String, GpuDriverMetadata> + ): Boolean { + return oldItem.first == newItem.first + } + + override fun areContentsTheSame( + oldItem: Pair<String, GpuDriverMetadata>, + newItem: Pair<String, GpuDriverMetadata> + ): Boolean { + return oldItem.second == newItem.second + } + } +} diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/DriverManagerFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/DriverManagerFragment.kt new file mode 100644 index 000000000..df21d74b2 --- /dev/null +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/DriverManagerFragment.kt @@ -0,0 +1,186 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +package org.yuzu.yuzu_emu.fragments + +import android.os.Bundle +import android.view.LayoutInflater +import android.view.View +import android.view.ViewGroup +import androidx.activity.result.contract.ActivityResultContracts +import androidx.core.view.ViewCompat +import androidx.core.view.WindowInsetsCompat +import androidx.core.view.updatePadding +import androidx.fragment.app.Fragment +import androidx.fragment.app.activityViewModels +import androidx.lifecycle.lifecycleScope +import androidx.navigation.findNavController +import androidx.recyclerview.widget.GridLayoutManager +import com.google.android.material.transition.MaterialSharedAxis +import kotlinx.coroutines.flow.collectLatest +import kotlinx.coroutines.launch +import org.yuzu.yuzu_emu.R +import org.yuzu.yuzu_emu.adapters.DriverAdapter +import org.yuzu.yuzu_emu.databinding.FragmentDriverManagerBinding +import org.yuzu.yuzu_emu.model.DriverViewModel +import org.yuzu.yuzu_emu.model.HomeViewModel +import org.yuzu.yuzu_emu.utils.FileUtil +import org.yuzu.yuzu_emu.utils.GpuDriverHelper +import java.io.File +import java.io.IOException + +class DriverManagerFragment : Fragment() { + private var _binding: FragmentDriverManagerBinding? = null + private val binding get() = _binding!! + + private val homeViewModel: HomeViewModel by activityViewModels() + private val driverViewModel: DriverViewModel by activityViewModels() + + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + enterTransition = MaterialSharedAxis(MaterialSharedAxis.X, true) + returnTransition = MaterialSharedAxis(MaterialSharedAxis.X, false) + reenterTransition = MaterialSharedAxis(MaterialSharedAxis.X, false) + } + + override fun onCreateView( + inflater: LayoutInflater, + container: ViewGroup?, + savedInstanceState: Bundle? + ): View { + _binding = FragmentDriverManagerBinding.inflate(inflater) + return binding.root + } + + override fun onViewCreated(view: View, savedInstanceState: Bundle?) { + super.onViewCreated(view, savedInstanceState) + homeViewModel.setNavigationVisibility(visible = false, animated = true) + homeViewModel.setStatusBarShadeVisibility(visible = false) + + if (!driverViewModel.isInteractionAllowed) { + DriversLoadingDialogFragment().show( + childFragmentManager, + DriversLoadingDialogFragment.TAG + ) + } + + binding.toolbarDrivers.setNavigationOnClickListener { + binding.root.findNavController().popBackStack() + } + + binding.buttonInstall.setOnClickListener { + getDriver.launch(arrayOf("application/zip")) + } + + binding.listDrivers.apply { + layoutManager = GridLayoutManager( + requireContext(), + resources.getInteger(R.integer.grid_columns) + ) + adapter = DriverAdapter(driverViewModel) + } + + viewLifecycleOwner.lifecycleScope.apply { + launch { + driverViewModel.driverList.collectLatest { + (binding.listDrivers.adapter as DriverAdapter).submitList(it) + } + } + launch { + driverViewModel.newDriverInstalled.collect { + if (_binding != null && it) { + (binding.listDrivers.adapter as DriverAdapter).apply { + notifyItemChanged(driverViewModel.previouslySelectedDriver) + notifyItemChanged(driverViewModel.selectedDriver) + driverViewModel.setNewDriverInstalled(false) + } + } + } + } + } + + setInsets() + } + + // Start installing requested driver + override fun onStop() { + super.onStop() + driverViewModel.onCloseDriverManager() + } + + private fun setInsets() = + ViewCompat.setOnApplyWindowInsetsListener( + binding.root + ) { _: View, windowInsets: WindowInsetsCompat -> + val barInsets = windowInsets.getInsets(WindowInsetsCompat.Type.systemBars()) + val cutoutInsets = windowInsets.getInsets(WindowInsetsCompat.Type.displayCutout()) + + val leftInsets = barInsets.left + cutoutInsets.left + val rightInsets = barInsets.right + cutoutInsets.right + + val mlpAppBar = binding.toolbarDrivers.layoutParams as ViewGroup.MarginLayoutParams + mlpAppBar.leftMargin = leftInsets + mlpAppBar.rightMargin = rightInsets + binding.toolbarDrivers.layoutParams = mlpAppBar + + val mlplistDrivers = binding.listDrivers.layoutParams as ViewGroup.MarginLayoutParams + mlplistDrivers.leftMargin = leftInsets + mlplistDrivers.rightMargin = rightInsets + binding.listDrivers.layoutParams = mlplistDrivers + + val fabSpacing = resources.getDimensionPixelSize(R.dimen.spacing_fab) + val mlpFab = + binding.buttonInstall.layoutParams as ViewGroup.MarginLayoutParams + mlpFab.leftMargin = leftInsets + fabSpacing + mlpFab.rightMargin = rightInsets + fabSpacing + mlpFab.bottomMargin = barInsets.bottom + fabSpacing + binding.buttonInstall.layoutParams = mlpFab + + binding.listDrivers.updatePadding( + bottom = barInsets.bottom + + resources.getDimensionPixelSize(R.dimen.spacing_bottom_list_fab) + ) + + windowInsets + } + + private val getDriver = + registerForActivityResult(ActivityResultContracts.OpenDocument()) { result -> + if (result == null) { + return@registerForActivityResult + } + + IndeterminateProgressDialogFragment.newInstance( + requireActivity(), + R.string.installing_driver, + false + ) { + val driverPath = + "${GpuDriverHelper.driverStoragePath}/${FileUtil.getFilename(result)}" + val driverFile = File(driverPath) + + // Ignore file exceptions when a user selects an invalid zip + try { + if (!GpuDriverHelper.copyDriverToInternalStorage(result)) { + throw IOException("Driver failed validation!") + } + } catch (_: IOException) { + if (driverFile.exists()) { + driverFile.delete() + } + return@newInstance getString(R.string.select_gpu_driver_error) + } + + val driverData = GpuDriverHelper.getMetadataFromZip(driverFile) + val driverInList = + driverViewModel.driverList.value.firstOrNull { it.second == driverData } + if (driverInList != null) { + return@newInstance getString(R.string.driver_already_installed) + } else { + driverViewModel.addDriver(Pair(driverPath, driverData)) + driverViewModel.setNewDriverInstalled(true) + } + return@newInstance Any() + }.show(childFragmentManager, IndeterminateProgressDialogFragment.TAG) + } +} diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/DriversLoadingDialogFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/DriversLoadingDialogFragment.kt new file mode 100644 index 000000000..f8c34346a --- /dev/null +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/DriversLoadingDialogFragment.kt @@ -0,0 +1,75 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +package org.yuzu.yuzu_emu.fragments + +import android.app.Dialog +import android.os.Bundle +import android.view.LayoutInflater +import android.view.View +import android.view.ViewGroup +import androidx.fragment.app.DialogFragment +import androidx.fragment.app.activityViewModels +import androidx.lifecycle.Lifecycle +import androidx.lifecycle.lifecycleScope +import androidx.lifecycle.repeatOnLifecycle +import com.google.android.material.dialog.MaterialAlertDialogBuilder +import kotlinx.coroutines.launch +import org.yuzu.yuzu_emu.R +import org.yuzu.yuzu_emu.databinding.DialogProgressBarBinding +import org.yuzu.yuzu_emu.model.DriverViewModel + +class DriversLoadingDialogFragment : DialogFragment() { + private val driverViewModel: DriverViewModel by activityViewModels() + + private lateinit var binding: DialogProgressBarBinding + + override fun onCreateDialog(savedInstanceState: Bundle?): Dialog { + binding = DialogProgressBarBinding.inflate(layoutInflater) + binding.progressBar.isIndeterminate = true + + isCancelable = false + + return MaterialAlertDialogBuilder(requireContext()) + .setTitle(R.string.loading) + .setView(binding.root) + .create() + } + + override fun onCreateView( + inflater: LayoutInflater, + container: ViewGroup?, + savedInstanceState: Bundle? + ): View = binding.root + + override fun onViewCreated(view: View, savedInstanceState: Bundle?) { + super.onViewCreated(view, savedInstanceState) + viewLifecycleOwner.lifecycleScope.apply { + launch { + repeatOnLifecycle(Lifecycle.State.RESUMED) { + driverViewModel.areDriversLoading.collect { checkForDismiss() } + } + } + launch { + repeatOnLifecycle(Lifecycle.State.RESUMED) { + driverViewModel.isDriverReady.collect { checkForDismiss() } + } + } + launch { + repeatOnLifecycle(Lifecycle.State.RESUMED) { + driverViewModel.isDeletingDrivers.collect { checkForDismiss() } + } + } + } + } + + private fun checkForDismiss() { + if (driverViewModel.isInteractionAllowed) { + dismiss() + } + } + + companion object { + const val TAG = "DriversLoadingDialogFragment" + } +} diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt index e6ad2aa77..598a9d42b 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt @@ -39,6 +39,7 @@ import androidx.window.layout.WindowLayoutInfo import com.google.android.material.dialog.MaterialAlertDialogBuilder import com.google.android.material.slider.Slider import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.flow.collect import kotlinx.coroutines.flow.collectLatest import kotlinx.coroutines.launch import org.yuzu.yuzu_emu.HomeNavigationDirections @@ -50,6 +51,7 @@ import org.yuzu.yuzu_emu.databinding.DialogOverlayAdjustBinding import org.yuzu.yuzu_emu.databinding.FragmentEmulationBinding import org.yuzu.yuzu_emu.features.settings.model.IntSetting import org.yuzu.yuzu_emu.features.settings.model.Settings +import org.yuzu.yuzu_emu.model.DriverViewModel import org.yuzu.yuzu_emu.model.Game import org.yuzu.yuzu_emu.model.EmulationViewModel import org.yuzu.yuzu_emu.overlay.InputOverlay @@ -70,6 +72,7 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { private lateinit var game: Game private val emulationViewModel: EmulationViewModel by activityViewModels() + private val driverViewModel: DriverViewModel by activityViewModels() private var isInFoldableLayout = false @@ -299,6 +302,21 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } } } + launch { + repeatOnLifecycle(Lifecycle.State.RESUMED) { + driverViewModel.isDriverReady.collect { + if (it && !emulationState.isRunning) { + if (!DirectoryInitialization.areDirectoriesReady) { + DirectoryInitialization.start() + } + + updateScreenLayout() + + emulationState.run(emulationActivity!!.isActivityRecreated) + } + } + } + } } } @@ -332,17 +350,6 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } } - override fun onResume() { - super.onResume() - if (!DirectoryInitialization.areDirectoriesReady) { - DirectoryInitialization.start() - } - - updateScreenLayout() - - emulationState.run(emulationActivity!!.isActivityRecreated) - } - override fun onPause() { if (emulationState.isRunning && emulationActivity?.isInPictureInPictureMode != true) { emulationState.pause() diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt index 8923c0ea2..f273c880a 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt @@ -5,7 +5,6 @@ package org.yuzu.yuzu_emu.fragments import android.Manifest import android.content.ActivityNotFoundException -import android.content.DialogInterface import android.content.Intent import android.content.pm.PackageManager import android.os.Bundle @@ -27,8 +26,7 @@ import androidx.fragment.app.Fragment import androidx.fragment.app.activityViewModels import androidx.navigation.findNavController import androidx.navigation.fragment.findNavController -import androidx.recyclerview.widget.LinearLayoutManager -import com.google.android.material.dialog.MaterialAlertDialogBuilder +import androidx.recyclerview.widget.GridLayoutManager import com.google.android.material.transition.MaterialSharedAxis import org.yuzu.yuzu_emu.BuildConfig import org.yuzu.yuzu_emu.HomeNavigationDirections @@ -37,6 +35,7 @@ import org.yuzu.yuzu_emu.adapters.HomeSettingAdapter import org.yuzu.yuzu_emu.databinding.FragmentHomeSettingsBinding import org.yuzu.yuzu_emu.features.DocumentProvider import org.yuzu.yuzu_emu.features.settings.model.Settings +import org.yuzu.yuzu_emu.model.DriverViewModel import org.yuzu.yuzu_emu.model.HomeSetting import org.yuzu.yuzu_emu.model.HomeViewModel import org.yuzu.yuzu_emu.ui.main.MainActivity @@ -50,6 +49,7 @@ class HomeSettingsFragment : Fragment() { private lateinit var mainActivity: MainActivity private val homeViewModel: HomeViewModel by activityViewModels() + private val driverViewModel: DriverViewModel by activityViewModels() override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) @@ -107,13 +107,17 @@ class HomeSettingsFragment : Fragment() { ) add( HomeSetting( - R.string.install_gpu_driver, + R.string.gpu_driver_manager, R.string.install_gpu_driver_description, - R.drawable.ic_exit, - { driverInstaller() }, + R.drawable.ic_build, + { + binding.root.findNavController() + .navigate(R.id.action_homeSettingsFragment_to_driverManagerFragment) + }, { GpuDriverHelper.supportsCustomDriverLoading() }, R.string.custom_driver_not_supported, - R.string.custom_driver_not_supported_description + R.string.custom_driver_not_supported_description, + driverViewModel.selectedDriverMetadata ) ) add( @@ -182,7 +186,8 @@ class HomeSettingsFragment : Fragment() { } binding.homeSettingsList.apply { - layoutManager = LinearLayoutManager(requireContext()) + layoutManager = + GridLayoutManager(requireContext(), resources.getInteger(R.integer.grid_columns)) adapter = HomeSettingAdapter( requireActivity() as AppCompatActivity, viewLifecycleOwner, @@ -292,31 +297,6 @@ class HomeSettingsFragment : Fragment() { } } - private fun driverInstaller() { - // Get the driver name for the dialog message. - var driverName = GpuDriverHelper.customDriverName - if (driverName == null) { - driverName = getString(R.string.system_gpu_driver) - } - - MaterialAlertDialogBuilder(requireContext()) - .setTitle(getString(R.string.select_gpu_driver_title)) - .setMessage(driverName) - .setNegativeButton(android.R.string.cancel, null) - .setNeutralButton(R.string.select_gpu_driver_default) { _: DialogInterface?, _: Int -> - GpuDriverHelper.installDefaultDriver(requireContext()) - Toast.makeText( - requireContext(), - R.string.select_gpu_driver_use_default, - Toast.LENGTH_SHORT - ).show() - } - .setPositiveButton(R.string.select_gpu_driver_install) { _: DialogInterface?, _: Int -> - mainActivity.getDriver.launch(arrayOf("application/zip")) - } - .show() - } - private fun shareLog() { val file = DocumentFile.fromSingleUri( mainActivity, diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/IndeterminateProgressDialogFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/IndeterminateProgressDialogFragment.kt index f128deda8..7e467814d 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/IndeterminateProgressDialogFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/IndeterminateProgressDialogFragment.kt @@ -10,8 +10,8 @@ import android.view.View import android.view.ViewGroup import android.widget.Toast import androidx.appcompat.app.AlertDialog -import androidx.appcompat.app.AppCompatActivity import androidx.fragment.app.DialogFragment +import androidx.fragment.app.FragmentActivity import androidx.fragment.app.activityViewModels import androidx.lifecycle.Lifecycle import androidx.lifecycle.ViewModelProvider @@ -78,6 +78,10 @@ class IndeterminateProgressDialogFragment : DialogFragment() { requireActivity().supportFragmentManager, MessageDialogFragment.TAG ) + + else -> { + // Do nothing + } } taskViewModel.clear() } @@ -115,7 +119,7 @@ class IndeterminateProgressDialogFragment : DialogFragment() { private const val CANCELLABLE = "Cancellable" fun newInstance( - activity: AppCompatActivity, + activity: FragmentActivity, titleId: Int, cancellable: Boolean = false, task: () -> Any diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/DriverViewModel.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/DriverViewModel.kt new file mode 100644 index 000000000..62945ad65 --- /dev/null +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/DriverViewModel.kt @@ -0,0 +1,158 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +package org.yuzu.yuzu_emu.model + +import androidx.lifecycle.ViewModel +import androidx.lifecycle.viewModelScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.StateFlow +import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext +import org.yuzu.yuzu_emu.R +import org.yuzu.yuzu_emu.YuzuApplication +import org.yuzu.yuzu_emu.utils.FileUtil +import org.yuzu.yuzu_emu.utils.GpuDriverHelper +import org.yuzu.yuzu_emu.utils.GpuDriverMetadata +import java.io.BufferedOutputStream +import java.io.File + +class DriverViewModel : ViewModel() { + private val _areDriversLoading = MutableStateFlow(false) + val areDriversLoading: StateFlow<Boolean> get() = _areDriversLoading + + private val _isDriverReady = MutableStateFlow(true) + val isDriverReady: StateFlow<Boolean> get() = _isDriverReady + + private val _isDeletingDrivers = MutableStateFlow(false) + val isDeletingDrivers: StateFlow<Boolean> get() = _isDeletingDrivers + + private val _driverList = MutableStateFlow(mutableListOf<Pair<String, GpuDriverMetadata>>()) + val driverList: StateFlow<MutableList<Pair<String, GpuDriverMetadata>>> get() = _driverList + + var previouslySelectedDriver = 0 + var selectedDriver = -1 + + private val _selectedDriverMetadata = + MutableStateFlow( + GpuDriverHelper.customDriverData.name + ?: YuzuApplication.appContext.getString(R.string.system_gpu_driver) + ) + val selectedDriverMetadata: StateFlow<String> get() = _selectedDriverMetadata + + private val _newDriverInstalled = MutableStateFlow(false) + val newDriverInstalled: StateFlow<Boolean> get() = _newDriverInstalled + + val driversToDelete = mutableListOf<String>() + + val isInteractionAllowed + get() = !areDriversLoading.value && isDriverReady.value && !isDeletingDrivers.value + + init { + _areDriversLoading.value = true + viewModelScope.launch { + withContext(Dispatchers.IO) { + val drivers = GpuDriverHelper.getDrivers() + val currentDriverMetadata = GpuDriverHelper.customDriverData + for (i in drivers.indices) { + if (drivers[i].second == currentDriverMetadata) { + setSelectedDriverIndex(i) + break + } + } + + // If a user had installed a driver before the manager was implemented, this zips + // the installed driver to UserData/gpu_drivers/CustomDriver.zip so that it can + // be indexed and exported as expected. + if (selectedDriver == -1) { + val driverToSave = + File(GpuDriverHelper.driverStoragePath, "CustomDriver.zip") + driverToSave.createNewFile() + FileUtil.zipFromInternalStorage( + File(GpuDriverHelper.driverInstallationPath!!), + GpuDriverHelper.driverInstallationPath!!, + BufferedOutputStream(driverToSave.outputStream()) + ) + drivers.add(Pair(driverToSave.path, currentDriverMetadata)) + setSelectedDriverIndex(drivers.size - 1) + } + + _driverList.value = drivers + _areDriversLoading.value = false + } + } + } + + fun setSelectedDriverIndex(value: Int) { + if (selectedDriver != -1) { + previouslySelectedDriver = selectedDriver + } + selectedDriver = value + } + + fun setNewDriverInstalled(value: Boolean) { + _newDriverInstalled.value = value + } + + fun addDriver(driverData: Pair<String, GpuDriverMetadata>) { + val driverIndex = _driverList.value.indexOfFirst { it == driverData } + if (driverIndex == -1) { + setSelectedDriverIndex(_driverList.value.size) + _driverList.value.add(driverData) + _selectedDriverMetadata.value = driverData.second.name + ?: YuzuApplication.appContext.getString(R.string.system_gpu_driver) + } else { + setSelectedDriverIndex(driverIndex) + } + } + + fun removeDriver(driverData: Pair<String, GpuDriverMetadata>) { + _driverList.value.remove(driverData) + } + + fun onCloseDriverManager() { + _isDeletingDrivers.value = true + viewModelScope.launch { + withContext(Dispatchers.IO) { + driversToDelete.forEach { + val driver = File(it) + if (driver.exists()) { + driver.delete() + } + } + driversToDelete.clear() + _isDeletingDrivers.value = false + } + } + + if (GpuDriverHelper.customDriverData == driverList.value[selectedDriver].second) { + return + } + + _isDriverReady.value = false + viewModelScope.launch { + withContext(Dispatchers.IO) { + if (selectedDriver == 0) { + GpuDriverHelper.installDefaultDriver() + setDriverReady() + return@withContext + } + + val driverToInstall = File(driverList.value[selectedDriver].first) + if (driverToInstall.exists()) { + GpuDriverHelper.installCustomDriver(driverToInstall) + } else { + GpuDriverHelper.installDefaultDriver() + } + setDriverReady() + } + } + } + + private fun setDriverReady() { + _isDriverReady.value = true + _selectedDriverMetadata.value = GpuDriverHelper.customDriverData.name + ?: YuzuApplication.appContext.getString(R.string.system_gpu_driver) + } +} diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt index 0fa5df5e5..233aa4101 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt @@ -29,12 +29,10 @@ import androidx.navigation.fragment.NavHostFragment import androidx.navigation.ui.setupWithNavController import androidx.preference.PreferenceManager import com.google.android.material.color.MaterialColors -import com.google.android.material.dialog.MaterialAlertDialogBuilder import com.google.android.material.navigation.NavigationBarView import kotlinx.coroutines.CoroutineScope import java.io.File import java.io.FilenameFilter -import java.io.IOException import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.launch import kotlinx.coroutines.withContext @@ -43,7 +41,6 @@ import org.yuzu.yuzu_emu.NativeLibrary import org.yuzu.yuzu_emu.R import org.yuzu.yuzu_emu.activities.EmulationActivity import org.yuzu.yuzu_emu.databinding.ActivityMainBinding -import org.yuzu.yuzu_emu.databinding.DialogProgressBarBinding import org.yuzu.yuzu_emu.features.DocumentProvider import org.yuzu.yuzu_emu.features.settings.model.Settings import org.yuzu.yuzu_emu.fragments.IndeterminateProgressDialogFragment @@ -343,11 +340,10 @@ class MainActivity : AppCompatActivity(), ThemeProvider { val dstPath = DirectoryInitialization.userDirectory + "/keys/" if (FileUtil.copyUriToInternalStorage( - applicationContext, result, dstPath, "prod.keys" - ) + ) != null ) { if (NativeLibrary.reloadKeys()) { Toast.makeText( @@ -446,11 +442,10 @@ class MainActivity : AppCompatActivity(), ThemeProvider { val dstPath = DirectoryInitialization.userDirectory + "/keys/" if (FileUtil.copyUriToInternalStorage( - applicationContext, result, dstPath, "key_retail.bin" - ) + ) != null ) { if (NativeLibrary.reloadKeys()) { Toast.makeText( @@ -469,59 +464,6 @@ class MainActivity : AppCompatActivity(), ThemeProvider { } } - val getDriver = - registerForActivityResult(ActivityResultContracts.OpenDocument()) { result -> - if (result == null) { - return@registerForActivityResult - } - - val takeFlags = - Intent.FLAG_GRANT_WRITE_URI_PERMISSION or Intent.FLAG_GRANT_READ_URI_PERMISSION - contentResolver.takePersistableUriPermission( - result, - takeFlags - ) - - val progressBinding = DialogProgressBarBinding.inflate(layoutInflater) - progressBinding.progressBar.isIndeterminate = true - val installationDialog = MaterialAlertDialogBuilder(this) - .setTitle(R.string.installing_driver) - .setView(progressBinding.root) - .show() - - lifecycleScope.launch { - withContext(Dispatchers.IO) { - // Ignore file exceptions when a user selects an invalid zip - try { - GpuDriverHelper.installCustomDriver(applicationContext, result) - } catch (_: IOException) { - } - - withContext(Dispatchers.Main) { - installationDialog.dismiss() - - val driverName = GpuDriverHelper.customDriverName - if (driverName != null) { - Toast.makeText( - applicationContext, - getString( - R.string.select_gpu_driver_install_success, - driverName - ), - Toast.LENGTH_SHORT - ).show() - } else { - Toast.makeText( - applicationContext, - R.string.select_gpu_driver_error, - Toast.LENGTH_LONG - ).show() - } - } - } - } - } - val installGameUpdate = registerForActivityResult( ActivityResultContracts.OpenMultipleDocuments() ) { documents: List<Uri> -> diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/DocumentsTree.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/DocumentsTree.kt index cf226ad94..eafcf9e42 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/DocumentsTree.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/DocumentsTree.kt @@ -7,7 +7,6 @@ import android.net.Uri import androidx.documentfile.provider.DocumentFile import java.io.File import java.util.* -import org.yuzu.yuzu_emu.YuzuApplication import org.yuzu.yuzu_emu.model.MinimalDocumentFile class DocumentsTree { @@ -22,7 +21,7 @@ class DocumentsTree { fun openContentUri(filepath: String, openMode: String?): Int { val node = resolvePath(filepath) ?: return -1 - return FileUtil.openContentUri(YuzuApplication.appContext, node.uri.toString(), openMode) + return FileUtil.openContentUri(node.uri.toString(), openMode) } fun getFileSize(filepath: String): Long { @@ -30,7 +29,7 @@ class DocumentsTree { return if (node == null || node.isDirectory) { 0 } else { - FileUtil.getFileSize(YuzuApplication.appContext, node.uri.toString()) + FileUtil.getFileSize(node.uri.toString()) } } @@ -67,7 +66,7 @@ class DocumentsTree { * @param parent parent node of this level */ private fun structTree(parent: DocumentsNode) { - val documents = FileUtil.listFiles(YuzuApplication.appContext, parent.uri!!) + val documents = FileUtil.listFiles(parent.uri!!) for (document in documents) { val node = DocumentsNode(document) node.parent = parent diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/FileUtil.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/FileUtil.kt index c3f53f1c5..5ee74a52c 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/FileUtil.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/FileUtil.kt @@ -3,7 +3,6 @@ package org.yuzu.yuzu_emu.utils -import android.content.Context import android.database.Cursor import android.net.Uri import android.provider.DocumentsContract @@ -11,7 +10,6 @@ import androidx.documentfile.provider.DocumentFile import kotlinx.coroutines.flow.StateFlow import java.io.BufferedInputStream import java.io.File -import java.io.FileOutputStream import java.io.IOException import java.io.InputStream import java.net.URLDecoder @@ -21,6 +19,8 @@ import org.yuzu.yuzu_emu.YuzuApplication import org.yuzu.yuzu_emu.model.MinimalDocumentFile import org.yuzu.yuzu_emu.model.TaskState import java.io.BufferedOutputStream +import java.lang.NullPointerException +import java.nio.charset.StandardCharsets import java.util.zip.ZipOutputStream object FileUtil { @@ -29,6 +29,8 @@ object FileUtil { const val APPLICATION_OCTET_STREAM = "application/octet-stream" const val TEXT_PLAIN = "text/plain" + private val context get() = YuzuApplication.appContext + /** * Create a file from directory with filename. * @param context Application context @@ -36,11 +38,11 @@ object FileUtil { * @param filename file display name. * @return boolean */ - fun createFile(context: Context?, directory: String?, filename: String): DocumentFile? { + fun createFile(directory: String?, filename: String): DocumentFile? { var decodedFilename = filename try { val directoryUri = Uri.parse(directory) - val parent = DocumentFile.fromTreeUri(context!!, directoryUri) ?: return null + val parent = DocumentFile.fromTreeUri(context, directoryUri) ?: return null decodedFilename = URLDecoder.decode(decodedFilename, DECODE_METHOD) var mimeType = APPLICATION_OCTET_STREAM if (decodedFilename.endsWith(".txt")) { @@ -56,16 +58,15 @@ object FileUtil { /** * Create a directory from directory with filename. - * @param context Application context * @param directory parent path for directory. * @param directoryName directory display name. * @return boolean */ - fun createDir(context: Context?, directory: String?, directoryName: String?): DocumentFile? { + fun createDir(directory: String?, directoryName: String?): DocumentFile? { var decodedDirectoryName = directoryName try { val directoryUri = Uri.parse(directory) - val parent = DocumentFile.fromTreeUri(context!!, directoryUri) ?: return null + val parent = DocumentFile.fromTreeUri(context, directoryUri) ?: return null decodedDirectoryName = URLDecoder.decode(decodedDirectoryName, DECODE_METHOD) val isExist = parent.findFile(decodedDirectoryName) return isExist ?: parent.createDirectory(decodedDirectoryName) @@ -77,13 +78,12 @@ object FileUtil { /** * Open content uri and return file descriptor to JNI. - * @param context Application context * @param path Native content uri path * @param openMode will be one of "r", "r", "rw", "wa", "rwa" * @return file descriptor */ @JvmStatic - fun openContentUri(context: Context, path: String, openMode: String?): Int { + fun openContentUri(path: String, openMode: String?): Int { try { val uri = Uri.parse(path) val parcelFileDescriptor = context.contentResolver.openFileDescriptor(uri, openMode!!) @@ -103,11 +103,10 @@ object FileUtil { /** * Reference: https://stackoverflow.com/questions/42186820/documentfile-is-very-slow * This function will be faster than DoucmentFile.listFiles - * @param context Application context * @param uri Directory uri. * @return CheapDocument lists. */ - fun listFiles(context: Context, uri: Uri): Array<MinimalDocumentFile> { + fun listFiles(uri: Uri): Array<MinimalDocumentFile> { val resolver = context.contentResolver val columns = arrayOf( DocumentsContract.Document.COLUMN_DOCUMENT_ID, @@ -145,7 +144,7 @@ object FileUtil { * @param path Native content uri path * @return bool */ - fun exists(context: Context, path: String?): Boolean { + fun exists(path: String?): Boolean { var c: Cursor? = null try { val mUri = Uri.parse(path) @@ -165,7 +164,7 @@ object FileUtil { * @param path content uri path * @return bool */ - fun isDirectory(context: Context, path: String): Boolean { + fun isDirectory(path: String): Boolean { val resolver = context.contentResolver val columns = arrayOf( DocumentsContract.Document.COLUMN_MIME_TYPE @@ -210,10 +209,10 @@ object FileUtil { return filename } - fun getFilesName(context: Context, path: String): Array<String> { + fun getFilesName(path: String): Array<String> { val uri = Uri.parse(path) val files: MutableList<String> = ArrayList() - for (file in listFiles(context, uri)) { + for (file in listFiles(uri)) { files.add(file.filename) } return files.toTypedArray() @@ -225,7 +224,7 @@ object FileUtil { * @return long file size */ @JvmStatic - fun getFileSize(context: Context, path: String): Long { + fun getFileSize(path: String): Long { val resolver = context.contentResolver val columns = arrayOf( DocumentsContract.Document.COLUMN_SIZE @@ -245,44 +244,38 @@ object FileUtil { return size } + /** + * Creates an input stream with a given [Uri] and copies its data to the given path. This will + * overwrite any pre-existing files. + * + * @param sourceUri The [Uri] to copy data from + * @param destinationParentPath Destination directory + * @param destinationFilename Optionally renames the file once copied + */ fun copyUriToInternalStorage( - context: Context, - sourceUri: Uri?, + sourceUri: Uri, destinationParentPath: String, - destinationFilename: String - ): Boolean { - var input: InputStream? = null - var output: FileOutputStream? = null + destinationFilename: String = "" + ): File? = try { - input = context.contentResolver.openInputStream(sourceUri!!) - output = FileOutputStream("$destinationParentPath/$destinationFilename") - val buffer = ByteArray(1024) - var len: Int - while (input!!.read(buffer).also { len = it } != -1) { - output.write(buffer, 0, len) - } - output.flush() - return true - } catch (e: Exception) { - Log.error("[FileUtil]: Cannot copy file, error: " + e.message) - } finally { - if (input != null) { - try { - input.close() - } catch (e: IOException) { - Log.error("[FileUtil]: Cannot close input file, error: " + e.message) - } + val fileName = + if (destinationFilename == "") getFilename(sourceUri) else "/$destinationFilename" + val inputStream = context.contentResolver.openInputStream(sourceUri)!! + + val destinationFile = File("$destinationParentPath$fileName") + if (destinationFile.exists()) { + destinationFile.delete() } - if (output != null) { - try { - output.close() - } catch (e: IOException) { - Log.error("[FileUtil]: Cannot close output file, error: " + e.message) - } + + destinationFile.outputStream().use { fos -> + inputStream.use { it.copyTo(fos) } } + destinationFile + } catch (e: IOException) { + null + } catch (e: NullPointerException) { + null } - return false - } /** * Extracts the given zip file into the given directory. @@ -368,4 +361,12 @@ object FileUtil { return fileName.substring(fileName.lastIndexOf(".") + 1) .lowercase() } + + @Throws(IOException::class) + fun getStringFromFile(file: File): String = + String(file.readBytes(), StandardCharsets.UTF_8) + + @Throws(IOException::class) + fun getStringFromInputStream(stream: InputStream): String = + String(stream.readBytes(), StandardCharsets.UTF_8) } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GameHelper.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GameHelper.kt index e0ee29c9b..9001ca9ab 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GameHelper.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GameHelper.kt @@ -30,7 +30,7 @@ object GameHelper { // Ensure keys are loaded so that ROM metadata can be decrypted. NativeLibrary.reloadKeys() - addGamesRecursive(games, FileUtil.listFiles(context, gamesUri), 3) + addGamesRecursive(games, FileUtil.listFiles(gamesUri), 3) // Cache list of games found on disk val serializedGames = mutableSetOf<String>() @@ -58,7 +58,7 @@ object GameHelper { if (it.isDirectory) { addGamesRecursive( games, - FileUtil.listFiles(YuzuApplication.appContext, it.uri), + FileUtil.listFiles(it.uri), depth - 1 ) } else { diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt index 1d4695a2a..f6882ce6c 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt @@ -3,64 +3,33 @@ package org.yuzu.yuzu_emu.utils -import android.content.Context import android.net.Uri +import android.os.Build import java.io.BufferedInputStream import java.io.File -import java.io.FileInputStream -import java.io.FileOutputStream import java.io.IOException -import java.util.zip.ZipInputStream import org.yuzu.yuzu_emu.NativeLibrary -import org.yuzu.yuzu_emu.utils.FileUtil.copyUriToInternalStorage +import org.yuzu.yuzu_emu.YuzuApplication +import java.util.zip.ZipException +import java.util.zip.ZipFile object GpuDriverHelper { private const val META_JSON_FILENAME = "meta.json" - private const val DRIVER_INTERNAL_FILENAME = "gpu_driver.zip" private var fileRedirectionPath: String? = null - private var driverInstallationPath: String? = null + var driverInstallationPath: String? = null private var hookLibPath: String? = null - @Throws(IOException::class) - private fun unzip(zipFilePath: String, destDir: String) { - val dir = File(destDir) - - // Create output directory if it doesn't exist - if (!dir.exists()) dir.mkdirs() - - // Unpack the files. - val inputStream = FileInputStream(zipFilePath) - val zis = ZipInputStream(BufferedInputStream(inputStream)) - val buffer = ByteArray(1024) - var ze = zis.nextEntry - while (ze != null) { - val newFile = File(destDir, ze.name) - val canonicalPath = newFile.canonicalPath - if (!canonicalPath.startsWith(destDir + ze.name)) { - throw SecurityException("Zip file attempted path traversal! " + ze.name) - } - - newFile.parentFile!!.mkdirs() - val fos = FileOutputStream(newFile) - var len: Int - while (zis.read(buffer).also { len = it } > 0) { - fos.write(buffer, 0, len) - } - fos.close() - zis.closeEntry() - ze = zis.nextEntry - } - zis.closeEntry() - } + val driverStoragePath get() = DirectoryInitialization.userDirectory!! + "/gpu_drivers/" - fun initializeDriverParameters(context: Context) { + fun initializeDriverParameters() { try { // Initialize the file redirection directory. - fileRedirectionPath = - context.getExternalFilesDir(null)!!.canonicalPath + "/gpu/vk_file_redirect/" + fileRedirectionPath = YuzuApplication.appContext + .getExternalFilesDir(null)!!.canonicalPath + "/gpu/vk_file_redirect/" // Initialize the driver installation directory. - driverInstallationPath = context.filesDir.canonicalPath + "/gpu_driver/" + driverInstallationPath = YuzuApplication.appContext + .filesDir.canonicalPath + "/gpu_driver/" } catch (e: IOException) { throw RuntimeException(e) } @@ -69,68 +38,169 @@ object GpuDriverHelper { initializeDirectories() // Initialize hook libraries directory. - hookLibPath = context.applicationInfo.nativeLibraryDir + "/" + hookLibPath = YuzuApplication.appContext.applicationInfo.nativeLibraryDir + "/" // Initialize GPU driver. NativeLibrary.initializeGpuDriver( hookLibPath, driverInstallationPath, - customDriverLibraryName, + customDriverData.libraryName, fileRedirectionPath ) } - fun installDefaultDriver(context: Context) { + fun getDrivers(): MutableList<Pair<String, GpuDriverMetadata>> { + val driverZips = File(driverStoragePath).listFiles() + val drivers: MutableList<Pair<String, GpuDriverMetadata>> = + driverZips + ?.mapNotNull { + val metadata = getMetadataFromZip(it) + metadata.name?.let { _ -> Pair(it.path, metadata) } + } + ?.sortedByDescending { it: Pair<String, GpuDriverMetadata> -> it.second.name } + ?.distinct() + ?.toMutableList() ?: mutableListOf() + + // TODO: Get system driver information + drivers.add(0, Pair("", GpuDriverMetadata())) + return drivers + } + + fun installDefaultDriver() { // Removing the installed driver will result in the backend using the default system driver. - val driverInstallationDir = File(driverInstallationPath!!) - deleteRecursive(driverInstallationDir) - initializeDriverParameters(context) + File(driverInstallationPath!!).deleteRecursively() + initializeDriverParameters() + } + + fun copyDriverToInternalStorage(driverUri: Uri): Boolean { + // Ensure we have directories. + initializeDirectories() + + // Copy the zip file URI to user data + val copiedFile = + FileUtil.copyUriToInternalStorage(driverUri, driverStoragePath) ?: return false + + // Validate driver + val metadata = getMetadataFromZip(copiedFile) + if (metadata.name == null) { + copiedFile.delete() + return false + } + + if (metadata.minApi > Build.VERSION.SDK_INT) { + copiedFile.delete() + return false + } + return true } - fun installCustomDriver(context: Context, driverPathUri: Uri?) { + /** + * Copies driver zip into user data directory so that it can be exported along with + * other user data and also unzipped into the installation directory + */ + fun installCustomDriver(driverUri: Uri): Boolean { // Revert to system default in the event the specified driver is bad. - installDefaultDriver(context) + installDefaultDriver() // Ensure we have directories. initializeDirectories() - // Copy the zip file URI into our private storage. - copyUriToInternalStorage( - context, - driverPathUri, - driverInstallationPath!!, - DRIVER_INTERNAL_FILENAME - ) + // Copy the zip file URI to user data + val copiedFile = + FileUtil.copyUriToInternalStorage(driverUri, driverStoragePath) ?: return false + + // Validate driver + val metadata = getMetadataFromZip(copiedFile) + if (metadata.name == null) { + copiedFile.delete() + return false + } + + if (metadata.minApi > Build.VERSION.SDK_INT) { + copiedFile.delete() + return false + } // Unzip the driver. try { - unzip(driverInstallationPath + DRIVER_INTERNAL_FILENAME, driverInstallationPath!!) + FileUtil.unzipToInternalStorage( + BufferedInputStream(copiedFile.inputStream()), + File(driverInstallationPath!!) + ) } catch (e: SecurityException) { - return + return false } // Initialize the driver parameters. - initializeDriverParameters(context) + initializeDriverParameters() + + return true } - external fun supportsCustomDriverLoading(): Boolean + /** + * Unzips driver into installation directory + */ + fun installCustomDriver(driver: File): Boolean { + // Revert to system default in the event the specified driver is bad. + installDefaultDriver() - // Parse the custom driver metadata to retrieve the name. - val customDriverName: String? - get() { - val metadata = GpuDriverMetadata(driverInstallationPath + META_JSON_FILENAME) - return metadata.name + // Ensure we have directories. + initializeDirectories() + + // Validate driver + val metadata = getMetadataFromZip(driver) + if (metadata.name == null) { + driver.delete() + return false } - // Parse the custom driver metadata to retrieve the library name. - private val customDriverLibraryName: String? - get() { - // Parse the custom driver metadata to retrieve the library name. - val metadata = GpuDriverMetadata(driverInstallationPath + META_JSON_FILENAME) - return metadata.libraryName + // Unzip the driver to the private installation directory + try { + FileUtil.unzipToInternalStorage( + BufferedInputStream(driver.inputStream()), + File(driverInstallationPath!!) + ) + } catch (e: SecurityException) { + return false } - private fun initializeDirectories() { + // Initialize the driver parameters. + initializeDriverParameters() + + return true + } + + /** + * Takes in a zip file and reads the meta.json file for presentation to the UI + * + * @param driver Zip containing driver and meta.json file + * @return A non-null [GpuDriverMetadata] instance that may have null members + */ + fun getMetadataFromZip(driver: File): GpuDriverMetadata { + try { + ZipFile(driver).use { zf -> + val entries = zf.entries() + while (entries.hasMoreElements()) { + val entry = entries.nextElement() + if (!entry.isDirectory && entry.name.lowercase().contains(".json")) { + zf.getInputStream(entry).use { + return GpuDriverMetadata(it, entry.size) + } + } + } + } + } catch (_: ZipException) { + } + return GpuDriverMetadata() + } + + external fun supportsCustomDriverLoading(): Boolean + + // Parse the custom driver metadata to retrieve the name. + val customDriverData: GpuDriverMetadata + get() = GpuDriverMetadata(File(driverInstallationPath + META_JSON_FILENAME)) + + fun initializeDirectories() { // Ensure the file redirection directory exists. val fileRedirectionDir = File(fileRedirectionPath!!) if (!fileRedirectionDir.exists()) { @@ -141,14 +211,10 @@ object GpuDriverHelper { if (!driverInstallationDir.exists()) { driverInstallationDir.mkdirs() } - } - - private fun deleteRecursive(fileOrDirectory: File) { - if (fileOrDirectory.isDirectory) { - for (child in fileOrDirectory.listFiles()!!) { - deleteRecursive(child) - } + // Ensure the driver storage directory exists + val driverStorageDirectory = File(driverStoragePath) + if (!driverStorageDirectory.exists()) { + driverStorageDirectory.mkdirs() } - fileOrDirectory.delete() } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverMetadata.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverMetadata.kt index a4e64070a..511a4171a 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverMetadata.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverMetadata.kt @@ -4,29 +4,29 @@ package org.yuzu.yuzu_emu.utils import java.io.IOException -import java.nio.charset.StandardCharsets -import java.nio.file.Files -import java.nio.file.Paths import org.json.JSONException import org.json.JSONObject +import java.io.File +import java.io.InputStream -class GpuDriverMetadata(metadataFilePath: String) { - var name: String? = null - var description: String? = null - var author: String? = null - var vendor: String? = null - var driverVersion: String? = null - var minApi = 0 - var libraryName: String? = null +class GpuDriverMetadata { + /** + * Tries to get driver metadata information from a meta.json [File] + * + * @param metadataFile meta.json file provided with a GPU driver + */ + constructor(metadataFile: File) { + if (metadataFile.length() > MAX_META_SIZE_BYTES) { + return + } - init { try { - val json = JSONObject(getStringFromFile(metadataFilePath)) + val json = JSONObject(FileUtil.getStringFromFile(metadataFile)) name = json.getString("name") description = json.getString("description") author = json.getString("author") vendor = json.getString("vendor") - driverVersion = json.getString("driverVersion") + version = json.getString("driverVersion") minApi = json.getInt("minApi") libraryName = json.getString("libraryName") } catch (e: JSONException) { @@ -36,12 +36,84 @@ class GpuDriverMetadata(metadataFilePath: String) { } } - companion object { - @Throws(IOException::class) - private fun getStringFromFile(filePath: String): String { - val path = Paths.get(filePath) - val bytes = Files.readAllBytes(path) - return String(bytes, StandardCharsets.UTF_8) + /** + * Tries to get driver metadata information from an input stream that's intended to be + * from a zip file + * + * @param metadataStream ZipEntry input stream + * @param size Size of the file in bytes + */ + constructor(metadataStream: InputStream, size: Long) { + if (size > MAX_META_SIZE_BYTES) { + return } + + try { + val json = JSONObject(FileUtil.getStringFromInputStream(metadataStream)) + name = json.getString("name") + description = json.getString("description") + author = json.getString("author") + vendor = json.getString("vendor") + version = json.getString("driverVersion") + minApi = json.getInt("minApi") + libraryName = json.getString("libraryName") + } catch (e: JSONException) { + // JSON is malformed, ignore and treat as unsupported metadata. + } catch (e: IOException) { + // File is inaccessible, ignore and treat as unsupported metadata. + } + } + + /** + * Creates an empty metadata instance + */ + constructor() + + override fun equals(other: Any?): Boolean { + if (other !is GpuDriverMetadata) { + return false + } + + return other.name == name && + other.description == description && + other.author == author && + other.vendor == vendor && + other.version == version && + other.minApi == minApi && + other.libraryName == libraryName + } + + override fun hashCode(): Int { + var result = name?.hashCode() ?: 0 + result = 31 * result + (description?.hashCode() ?: 0) + result = 31 * result + (author?.hashCode() ?: 0) + result = 31 * result + (vendor?.hashCode() ?: 0) + result = 31 * result + (version?.hashCode() ?: 0) + result = 31 * result + minApi + result = 31 * result + (libraryName?.hashCode() ?: 0) + return result + } + + override fun toString(): String = + """ + Name - $name + Description - $description + Author - $author + Vendor - $vendor + Version - $version + Min API - $minApi + Library Name - $libraryName + """.trimMargin().trimIndent() + + var name: String? = null + var description: String? = null + var author: String? = null + var vendor: String? = null + var version: String? = null + var minApi = 0 + var libraryName: String? = null + + companion object { + private const val MAX_META_SIZE_BYTES = 500000 } } diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp index 9cf71680c..598f4e8bf 100644 --- a/src/android/app/src/main/jni/native.cpp +++ b/src/android/app/src/main/jni/native.cpp @@ -218,7 +218,6 @@ public: return; } m_window->OnSurfaceChanged(m_native_window); - m_system.Renderer().NotifySurfaceChanged(); } void ConfigureFilesystemProvider(const std::string& filepath) { diff --git a/src/android/app/src/main/res/drawable/ic_build.xml b/src/android/app/src/main/res/drawable/ic_build.xml new file mode 100644 index 000000000..91d52f1b8 --- /dev/null +++ b/src/android/app/src/main/res/drawable/ic_build.xml @@ -0,0 +1,9 @@ +<vector xmlns:android="http://schemas.android.com/apk/res/android" + android:width="24dp" + android:height="24dp" + android:viewportWidth="24" + android:viewportHeight="24"> + <path + android:fillColor="?attr/colorControlNormal" + android:pathData="M22.7,19l-9.1,-9.1c0.9,-2.3 0.4,-5 -1.5,-6.9 -2,-2 -5,-2.4 -7.4,-1.3L9,6 6,9 1.6,4.7C0.4,7.1 0.9,10.1 2.9,12.1c1.9,1.9 4.6,2.4 6.9,1.5l9.1,9.1c0.4,0.4 1,0.4 1.4,0l2.3,-2.3c0.5,-0.4 0.5,-1.1 0.1,-1.4z" /> +</vector> diff --git a/src/android/app/src/main/res/drawable/ic_delete.xml b/src/android/app/src/main/res/drawable/ic_delete.xml new file mode 100644 index 000000000..d26a79711 --- /dev/null +++ b/src/android/app/src/main/res/drawable/ic_delete.xml @@ -0,0 +1,9 @@ +<vector xmlns:android="http://schemas.android.com/apk/res/android" + android:width="24dp" + android:height="24dp" + android:viewportWidth="24" + android:viewportHeight="24"> + <path + android:fillColor="?attr/colorControlNormal" + android:pathData="M6,19c0,1.1 0.9,2 2,2h8c1.1,0 2,-0.9 2,-2V7H6v12zM19,4h-3.5l-1,-1h-5l-1,1H5v2h14V4z" /> +</vector> diff --git a/src/android/app/src/main/res/layout/card_driver_option.xml b/src/android/app/src/main/res/layout/card_driver_option.xml new file mode 100644 index 000000000..1dd9a6d7d --- /dev/null +++ b/src/android/app/src/main/res/layout/card_driver_option.xml @@ -0,0 +1,89 @@ +<?xml version="1.0" encoding="utf-8"?> +<com.google.android.material.card.MaterialCardView xmlns:android="http://schemas.android.com/apk/res/android" + xmlns:app="http://schemas.android.com/apk/res-auto" + xmlns:tools="http://schemas.android.com/tools" + style="?attr/materialCardViewOutlinedStyle" + android:layout_width="match_parent" + android:layout_height="wrap_content" + android:layout_marginHorizontal="16dp" + android:layout_marginVertical="12dp" + android:background="?attr/selectableItemBackground" + android:clickable="true" + android:focusable="true"> + + <LinearLayout + android:layout_width="match_parent" + android:layout_height="wrap_content" + android:orientation="horizontal" + android:layout_gravity="center" + android:padding="16dp"> + + <RadioButton + android:id="@+id/radio_button" + android:layout_width="wrap_content" + android:layout_height="wrap_content" + android:layout_gravity="center_vertical" + android:clickable="false" + android:checked="false" /> + + <LinearLayout + android:layout_width="0dp" + android:layout_height="wrap_content" + android:layout_weight="1" + android:orientation="vertical" + android:layout_gravity="center_vertical"> + + <com.google.android.material.textview.MaterialTextView + android:id="@+id/title" + style="@style/TextAppearance.Material3.TitleMedium" + android:layout_width="match_parent" + android:layout_height="wrap_content" + android:ellipsize="none" + android:marqueeRepeatLimit="marquee_forever" + android:requiresFadingEdge="horizontal" + android:singleLine="true" + android:textAlignment="viewStart" + tools:text="@string/select_gpu_driver_default" /> + + <com.google.android.material.textview.MaterialTextView + android:id="@+id/version" + style="@style/TextAppearance.Material3.BodyMedium" + android:layout_width="match_parent" + android:layout_height="wrap_content" + android:layout_marginTop="6dp" + android:ellipsize="none" + android:marqueeRepeatLimit="marquee_forever" + android:requiresFadingEdge="horizontal" + android:singleLine="true" + android:textAlignment="viewStart" + tools:text="@string/install_gpu_driver_description" /> + + <com.google.android.material.textview.MaterialTextView + android:id="@+id/description" + style="@style/TextAppearance.Material3.BodyMedium" + android:layout_width="match_parent" + android:layout_height="wrap_content" + android:layout_marginTop="6dp" + android:ellipsize="none" + android:marqueeRepeatLimit="marquee_forever" + android:requiresFadingEdge="horizontal" + android:singleLine="true" + android:textAlignment="viewStart" + tools:text="@string/install_gpu_driver_description" /> + + </LinearLayout> + + <Button + android:id="@+id/button_delete" + style="@style/Widget.Material3.Button.IconButton" + android:layout_width="wrap_content" + android:layout_height="wrap_content" + android:layout_gravity="center_vertical" + android:contentDescription="@string/delete" + android:tooltipText="@string/delete" + app:icon="@drawable/ic_delete" + app:iconTint="?attr/colorControlNormal" /> + + </LinearLayout> + +</com.google.android.material.card.MaterialCardView> diff --git a/src/android/app/src/main/res/layout/card_home_option.xml b/src/android/app/src/main/res/layout/card_home_option.xml index f9f1d89fb..6e8a232f9 100644 --- a/src/android/app/src/main/res/layout/card_home_option.xml +++ b/src/android/app/src/main/res/layout/card_home_option.xml @@ -16,7 +16,8 @@ <LinearLayout android:id="@+id/option_layout" android:layout_width="match_parent" - android:layout_height="wrap_content"> + android:layout_height="wrap_content" + android:layout_gravity="center_vertical"> <ImageView android:id="@+id/option_icon" diff --git a/src/android/app/src/main/res/layout/fragment_driver_manager.xml b/src/android/app/src/main/res/layout/fragment_driver_manager.xml new file mode 100644 index 000000000..6cea2d164 --- /dev/null +++ b/src/android/app/src/main/res/layout/fragment_driver_manager.xml @@ -0,0 +1,48 @@ +<?xml version="1.0" encoding="utf-8"?> +<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android" + xmlns:app="http://schemas.android.com/apk/res-auto" + android:id="@+id/coordinator_licenses" + android:layout_width="match_parent" + android:layout_height="match_parent" + android:background="?attr/colorSurface"> + + <androidx.coordinatorlayout.widget.CoordinatorLayout + android:layout_width="match_parent" + android:layout_height="match_parent"> + + <com.google.android.material.appbar.AppBarLayout + android:id="@+id/appbar_drivers" + android:layout_width="match_parent" + android:layout_height="wrap_content" + android:fitsSystemWindows="true" + app:liftOnScrollTargetViewId="@id/list_drivers"> + + <com.google.android.material.appbar.MaterialToolbar + android:id="@+id/toolbar_drivers" + android:layout_width="match_parent" + android:layout_height="?attr/actionBarSize" + app:navigationIcon="@drawable/ic_back" + app:title="@string/gpu_driver_manager" /> + + </com.google.android.material.appbar.AppBarLayout> + + <androidx.recyclerview.widget.RecyclerView + android:id="@+id/list_drivers" + android:layout_width="match_parent" + android:layout_height="match_parent" + android:clipToPadding="false" + app:layout_behavior="@string/appbar_scrolling_view_behavior" /> + + </androidx.coordinatorlayout.widget.CoordinatorLayout> + + <com.google.android.material.floatingactionbutton.ExtendedFloatingActionButton + android:id="@+id/button_install" + android:layout_width="wrap_content" + android:layout_height="wrap_content" + android:layout_gravity="bottom|end" + android:text="@string/install" + app:icon="@drawable/ic_add" + app:layout_constraintBottom_toBottomOf="parent" + app:layout_constraintEnd_toEndOf="parent" /> + +</androidx.constraintlayout.widget.ConstraintLayout> diff --git a/src/android/app/src/main/res/navigation/home_navigation.xml b/src/android/app/src/main/res/navigation/home_navigation.xml index 2356b802b..82749359d 100644 --- a/src/android/app/src/main/res/navigation/home_navigation.xml +++ b/src/android/app/src/main/res/navigation/home_navigation.xml @@ -22,6 +22,9 @@ <action android:id="@+id/action_homeSettingsFragment_to_installableFragment" app:destination="@id/installableFragment" /> + <action + android:id="@+id/action_homeSettingsFragment_to_driverManagerFragment" + app:destination="@id/driverManagerFragment" /> </fragment> <fragment @@ -95,5 +98,9 @@ android:id="@+id/installableFragment" android:name="org.yuzu.yuzu_emu.fragments.InstallableFragment" android:label="InstallableFragment" /> + <fragment + android:id="@+id/driverManagerFragment" + android:name="org.yuzu.yuzu_emu.fragments.DriverManagerFragment" + android:label="DriverManagerFragment" /> </navigation> diff --git a/src/android/app/src/main/res/values-de/strings.xml b/src/android/app/src/main/res/values-de/strings.xml index dd0f36392..72a47fbdb 100644 --- a/src/android/app/src/main/res/values-de/strings.xml +++ b/src/android/app/src/main/res/values-de/strings.xml @@ -168,9 +168,7 @@ <string name="select_gpu_driver_title">Möchtest du deinen aktuellen GPU-Treiber ersetzen?</string> <string name="select_gpu_driver_install">Installieren</string> <string name="select_gpu_driver_default">Standard</string> - <string name="select_gpu_driver_install_success">%s wurde installiert</string> <string name="select_gpu_driver_use_default">Standard GPU-Treiber wird verwendet</string> - <string name="select_gpu_driver_error">Ungültiger Treiber ausgewählt, Standard-Treiber wird verwendet!</string> <string name="system_gpu_driver">System GPU-Treiber</string> <string name="installing_driver">Treiber wird installiert...</string> diff --git a/src/android/app/src/main/res/values-es/strings.xml b/src/android/app/src/main/res/values-es/strings.xml index d398f862f..e5bdd5889 100644 --- a/src/android/app/src/main/res/values-es/strings.xml +++ b/src/android/app/src/main/res/values-es/strings.xml @@ -171,9 +171,7 @@ <string name="select_gpu_driver_title">¿Quiere reemplazar el driver de GPU actual?</string> <string name="select_gpu_driver_install">Instalar</string> <string name="select_gpu_driver_default">Predeterminado</string> - <string name="select_gpu_driver_install_success">Instalado %s</string> <string name="select_gpu_driver_use_default">Usando el driver de GPU por defecto </string> - <string name="select_gpu_driver_error">¡Driver no válido, utilizando el predeterminado del sistema!</string> <string name="system_gpu_driver">Driver GPU del sistema</string> <string name="installing_driver">Instalando driver...</string> diff --git a/src/android/app/src/main/res/values-fr/strings.xml b/src/android/app/src/main/res/values-fr/strings.xml index a7abd9077..1e02828aa 100644 --- a/src/android/app/src/main/res/values-fr/strings.xml +++ b/src/android/app/src/main/res/values-fr/strings.xml @@ -171,9 +171,7 @@ <string name="select_gpu_driver_title">Souhaitez vous remplacer votre pilote actuel ?</string> <string name="select_gpu_driver_install">Installer</string> <string name="select_gpu_driver_default">Défaut</string> - <string name="select_gpu_driver_install_success">%s Installé</string> <string name="select_gpu_driver_use_default">Utilisation du pilote de GPU par défaut</string> - <string name="select_gpu_driver_error">Pilote non valide sélectionné, utilisation du paramètre par défaut du système !</string> <string name="system_gpu_driver">Pilote du GPU du système</string> <string name="installing_driver">Installation du pilote...</string> diff --git a/src/android/app/src/main/res/values-it/strings.xml b/src/android/app/src/main/res/values-it/strings.xml index b18161801..09c9345b0 100644 --- a/src/android/app/src/main/res/values-it/strings.xml +++ b/src/android/app/src/main/res/values-it/strings.xml @@ -171,9 +171,7 @@ <string name="select_gpu_driver_title">Vuoi sostituire il driver della tua GPU attuale?</string> <string name="select_gpu_driver_install">Installa</string> <string name="select_gpu_driver_default">Predefinito</string> - <string name="select_gpu_driver_install_success">Installato%s</string> <string name="select_gpu_driver_use_default">Utilizza il driver predefinito della GPU.</string> - <string name="select_gpu_driver_error">Il driver selezionato è invalido, è in utilizzo quello predefinito di sistema!</string> <string name="system_gpu_driver">Driver GPU del sistema</string> <string name="installing_driver">Installando i driver...</string> diff --git a/src/android/app/src/main/res/values-ja/strings.xml b/src/android/app/src/main/res/values-ja/strings.xml index 88fa5a0bb..a0ea78bef 100644 --- a/src/android/app/src/main/res/values-ja/strings.xml +++ b/src/android/app/src/main/res/values-ja/strings.xml @@ -170,9 +170,7 @@ <string name="select_gpu_driver_title">現在のGPUドライバーを置き換えますか?</string> <string name="select_gpu_driver_install">インストール</string> <string name="select_gpu_driver_default">デフォルト</string> - <string name="select_gpu_driver_install_success">%s をインストールしました</string> <string name="select_gpu_driver_use_default">デフォルトのGPUドライバーを使用します</string> - <string name="select_gpu_driver_error">選択されたドライバが無効なため、システムのデフォルトを使用します!</string> <string name="system_gpu_driver">システムのGPUドライバ</string> <string name="installing_driver">インストール中…</string> diff --git a/src/android/app/src/main/res/values-ko/strings.xml b/src/android/app/src/main/res/values-ko/strings.xml index 4b658255c..214f95706 100644 --- a/src/android/app/src/main/res/values-ko/strings.xml +++ b/src/android/app/src/main/res/values-ko/strings.xml @@ -171,9 +171,7 @@ <string name="select_gpu_driver_title">현재 사용 중인 GPU 드라이버를 교체하겠습니까?</string> <string name="select_gpu_driver_install">설치</string> <string name="select_gpu_driver_default">기본값</string> - <string name="select_gpu_driver_install_success">설치된 %s</string> <string name="select_gpu_driver_use_default">기본 GPU 드라이버 사용</string> - <string name="select_gpu_driver_error">시스템 기본값을 사용하여 잘못된 드라이버를 선택했습니다!</string> <string name="system_gpu_driver">시스템 GPU 드라이버</string> <string name="installing_driver">드라이버 설치 중...</string> diff --git a/src/android/app/src/main/res/values-nb/strings.xml b/src/android/app/src/main/res/values-nb/strings.xml index dd602a389..5443cef42 100644 --- a/src/android/app/src/main/res/values-nb/strings.xml +++ b/src/android/app/src/main/res/values-nb/strings.xml @@ -171,9 +171,7 @@ <string name="select_gpu_driver_title">Ønsker du å bytte ut din nåværende GPU-driver?</string> <string name="select_gpu_driver_install">Installer</string> <string name="select_gpu_driver_default">Standard</string> - <string name="select_gpu_driver_install_success">Installert %s</string> <string name="select_gpu_driver_use_default">Bruk av standard GPU-driver</string> - <string name="select_gpu_driver_error">Ugyldig driver valgt, bruker systemstandard!</string> <string name="system_gpu_driver">Systemets GPU-driver</string> <string name="installing_driver">Installerer driver...</string> diff --git a/src/android/app/src/main/res/values-pl/strings.xml b/src/android/app/src/main/res/values-pl/strings.xml index 2fdd1f952..899e233d0 100644 --- a/src/android/app/src/main/res/values-pl/strings.xml +++ b/src/android/app/src/main/res/values-pl/strings.xml @@ -171,9 +171,7 @@ <string name="select_gpu_driver_title">Chcesz zastąpić obecny sterownik układu graficznego?</string> <string name="select_gpu_driver_install">Zainstaluj</string> <string name="select_gpu_driver_default">Domyślne</string> - <string name="select_gpu_driver_install_success">Zainstalowano %s</string> <string name="select_gpu_driver_use_default">Aktywny domyślny sterownik GPU</string> - <string name="select_gpu_driver_error">Wybrano błędny sterownik, powrót do domyślnego. </string> <string name="system_gpu_driver">Systemowy sterownik GPU</string> <string name="installing_driver">Instalowanie sterownika...</string> diff --git a/src/android/app/src/main/res/values-pt-rBR/strings.xml b/src/android/app/src/main/res/values-pt-rBR/strings.xml index 2f26367fe..caa095364 100644 --- a/src/android/app/src/main/res/values-pt-rBR/strings.xml +++ b/src/android/app/src/main/res/values-pt-rBR/strings.xml @@ -171,9 +171,7 @@ <string name="select_gpu_driver_title">Queres substituir o driver do GPU atual? </string> <string name="select_gpu_driver_install">Instalar</string> <string name="select_gpu_driver_default">Padrão</string> - <string name="select_gpu_driver_install_success">Instalado%s</string> <string name="select_gpu_driver_use_default">Usar o driver padrão do GPU</string> - <string name="select_gpu_driver_error">Driver selecionado inválido, a usar o padrão do sistema!</string> <string name="system_gpu_driver">Driver do GPU padrão</string> <string name="installing_driver">A instalar o Driver...</string> diff --git a/src/android/app/src/main/res/values-pt-rPT/strings.xml b/src/android/app/src/main/res/values-pt-rPT/strings.xml index 4e1eb4cd7..0a1a47fbb 100644 --- a/src/android/app/src/main/res/values-pt-rPT/strings.xml +++ b/src/android/app/src/main/res/values-pt-rPT/strings.xml @@ -171,9 +171,7 @@ <string name="select_gpu_driver_title">Queres substituir o driver do GPU atual? </string> <string name="select_gpu_driver_install">Instalar</string> <string name="select_gpu_driver_default">Padrão</string> - <string name="select_gpu_driver_install_success">Instalado%s</string> <string name="select_gpu_driver_use_default">Usar o driver padrão do GPU</string> - <string name="select_gpu_driver_error">Driver selecionado inválido, a usar o padrão do sistema!</string> <string name="system_gpu_driver">Driver do GPU padrão</string> <string name="installing_driver">A instalar o Driver...</string> diff --git a/src/android/app/src/main/res/values-ru/strings.xml b/src/android/app/src/main/res/values-ru/strings.xml index f5695dc93..0bef035d6 100644 --- a/src/android/app/src/main/res/values-ru/strings.xml +++ b/src/android/app/src/main/res/values-ru/strings.xml @@ -171,9 +171,7 @@ <string name="select_gpu_driver_title">Хотите заменить текущий драйвер ГП?</string> <string name="select_gpu_driver_install">Установить</string> <string name="select_gpu_driver_default">По умолчанию</string> - <string name="select_gpu_driver_install_success">Установлено %s</string> <string name="select_gpu_driver_use_default">Используется стандартный драйвер ГП </string> - <string name="select_gpu_driver_error">Выбран неверный драйвер, используется стандартный системный!</string> <string name="system_gpu_driver">Системный драйвер ГП</string> <string name="installing_driver">Установка драйвера...</string> diff --git a/src/android/app/src/main/res/values-uk/strings.xml b/src/android/app/src/main/res/values-uk/strings.xml index 061bc6f04..5b789ee98 100644 --- a/src/android/app/src/main/res/values-uk/strings.xml +++ b/src/android/app/src/main/res/values-uk/strings.xml @@ -171,9 +171,7 @@ <string name="select_gpu_driver_title">Хочете замінити поточний драйвер ГП?</string> <string name="select_gpu_driver_install">Встановити</string> <string name="select_gpu_driver_default">За замовчуванням</string> - <string name="select_gpu_driver_install_success">Встановлено %s</string> <string name="select_gpu_driver_use_default">Використовується стандартний драйвер ГП</string> - <string name="select_gpu_driver_error">Обрано неправильний драйвер, використовується стандартний системний!</string> <string name="system_gpu_driver">Системний драйвер ГП</string> <string name="installing_driver">Встановлення драйвера...</string> diff --git a/src/android/app/src/main/res/values-zh-rCN/strings.xml b/src/android/app/src/main/res/values-zh-rCN/strings.xml index fe6dd5eaa..c0e885751 100644 --- a/src/android/app/src/main/res/values-zh-rCN/strings.xml +++ b/src/android/app/src/main/res/values-zh-rCN/strings.xml @@ -171,9 +171,7 @@ <string name="select_gpu_driver_title">要取代您当前的 GPU 驱动程序吗?</string> <string name="select_gpu_driver_install">安装</string> <string name="select_gpu_driver_default">系统默认</string> - <string name="select_gpu_driver_install_success">已安装 %s</string> <string name="select_gpu_driver_use_default">使用默认 GPU 驱动程序</string> - <string name="select_gpu_driver_error">选择的驱动程序无效,将使用系统默认的驱动程序!</string> <string name="system_gpu_driver">系统 GPU 驱动程序</string> <string name="installing_driver">正在安装驱动程序…</string> diff --git a/src/android/app/src/main/res/values-zh-rTW/strings.xml b/src/android/app/src/main/res/values-zh-rTW/strings.xml index 9b3e54224..4a21bf893 100644 --- a/src/android/app/src/main/res/values-zh-rTW/strings.xml +++ b/src/android/app/src/main/res/values-zh-rTW/strings.xml @@ -171,9 +171,7 @@ <string name="select_gpu_driver_title">要取代您目前的 GPU 驅動程式嗎?</string> <string name="select_gpu_driver_install">安裝</string> <string name="select_gpu_driver_default">預設</string> - <string name="select_gpu_driver_install_success">已安裝 %s</string> <string name="select_gpu_driver_use_default">使用預設 GPU 驅動程式</string> - <string name="select_gpu_driver_error">選取的驅動程式無效,將使用系統預設驅動程式!</string> <string name="system_gpu_driver">系統 GPU 驅動程式</string> <string name="installing_driver">正在安裝驅動程式…</string> diff --git a/src/android/app/src/main/res/values/dimens.xml b/src/android/app/src/main/res/values/dimens.xml index 7b2296d95..ef855ea6f 100644 --- a/src/android/app/src/main/res/values/dimens.xml +++ b/src/android/app/src/main/res/values/dimens.xml @@ -13,6 +13,8 @@ <dimen name="menu_width">256dp</dimen> <dimen name="card_width">165dp</dimen> <dimen name="icon_inset">24dp</dimen> + <dimen name="spacing_bottom_list_fab">72dp</dimen> + <dimen name="spacing_fab">24dp</dimen> <dimen name="dialog_margin">20dp</dimen> <dimen name="elevated_app_bar">3dp</dimen> diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index e51edf872..9e4854221 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -72,6 +72,7 @@ <string name="invalid_keys_error">Invalid encryption keys</string> <string name="dumping_keys_quickstart_link">https://yuzu-emu.org/help/quickstart/#dumping-decryption-keys</string> <string name="install_keys_failure_description">The selected file is incorrect or corrupt. Please redump your keys.</string> + <string name="gpu_driver_manager">GPU Driver Manager</string> <string name="install_gpu_driver">Install GPU driver</string> <string name="install_gpu_driver_description">Install alternative drivers for potentially better performance or accuracy</string> <string name="advanced_settings">Advanced settings</string> @@ -234,15 +235,17 @@ <string name="export_failed">Export failed</string> <string name="import_failed">Import failed</string> <string name="cancelling">Cancelling</string> + <string name="install">Install</string> + <string name="delete">Delete</string> <!-- GPU driver installation --> <string name="select_gpu_driver">Select GPU driver</string> <string name="select_gpu_driver_title">Would you like to replace your current GPU driver?</string> <string name="select_gpu_driver_install">Install</string> <string name="select_gpu_driver_default">Default</string> - <string name="select_gpu_driver_install_success">Installed %s</string> <string name="select_gpu_driver_use_default">Using default GPU driver</string> - <string name="select_gpu_driver_error">Invalid driver selected, using system default!</string> + <string name="select_gpu_driver_error">Invalid driver selected</string> + <string name="driver_already_installed">Driver already installed</string> <string name="system_gpu_driver">System GPU driver</string> <string name="installing_driver">Installing driver…</string> diff --git a/src/android/build.gradle.kts b/src/android/build.gradle.kts index 80f370c16..51e559321 100644 --- a/src/android/build.gradle.kts +++ b/src/android/build.gradle.kts @@ -3,8 +3,8 @@ // Top-level build file where you can add configuration options common to all sub-projects/modules. plugins { - id("com.android.application") version "8.0.2" apply false - id("com.android.library") version "8.0.2" apply false + id("com.android.application") version "8.1.2" apply false + id("com.android.library") version "8.1.2" apply false id("org.jetbrains.kotlin.android") version "1.8.21" apply false } diff --git a/src/audio_core/adsp/apps/audio_renderer/audio_renderer.cpp b/src/audio_core/adsp/apps/audio_renderer/audio_renderer.cpp index 972d5e45b..ef301d8b4 100644 --- a/src/audio_core/adsp/apps/audio_renderer/audio_renderer.cpp +++ b/src/audio_core/adsp/apps/audio_renderer/audio_renderer.cpp @@ -77,6 +77,7 @@ void AudioRenderer::Wait() { "{}, got {}", Message::RenderResponse, msg); } + PostDSPClearCommandBuffer(); } void AudioRenderer::Send(Direction dir, u32 message) { @@ -96,6 +97,14 @@ void AudioRenderer::SetCommandBuffer(s32 session_id, CpuAddr buffer, u64 size, u command_buffers[session_id].reset_buffer = reset; } +void AudioRenderer::PostDSPClearCommandBuffer() noexcept { + for (auto& buffer : command_buffers) { + buffer.buffer = 0; + buffer.size = 0; + buffer.reset_buffer = false; + } +} + u32 AudioRenderer::GetRemainCommandCount(s32 session_id) const noexcept { return command_buffers[session_id].remaining_command_count; } diff --git a/src/audio_core/adsp/apps/audio_renderer/audio_renderer.h b/src/audio_core/adsp/apps/audio_renderer/audio_renderer.h index 85874d88a..57b89d9fe 100644 --- a/src/audio_core/adsp/apps/audio_renderer/audio_renderer.h +++ b/src/audio_core/adsp/apps/audio_renderer/audio_renderer.h @@ -85,6 +85,8 @@ private: */ void CreateSinkStreams(); + void PostDSPClearCommandBuffer() noexcept; + /// Core system Core::System& system; /// The output sink the AudioRenderer will send samples to diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp index 6081352a2..d66d04fae 100644 --- a/src/audio_core/sink/sink_stream.cpp +++ b/src/audio_core/sink/sink_stream.cpp @@ -204,6 +204,10 @@ void SinkStream::ProcessAudioOutAndRender(std::span<s16> output_buffer, std::siz // paused and we'll desync, so just play silence. if (system.IsPaused() || system.IsShuttingDown()) { if (system.IsShuttingDown()) { + { + std::scoped_lock lk{release_mutex}; + queued_buffers.store(0); + } release_cv.notify_one(); } diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 416203c59..e216eb3de 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -120,6 +120,8 @@ add_library(common STATIC socket_types.h spin_lock.cpp spin_lock.h + stb.cpp + stb.h steady_clock.cpp steady_clock.h stream.cpp @@ -189,6 +191,14 @@ if(ARCHITECTURE_x86_64) target_link_libraries(common PRIVATE xbyak::xbyak) endif() +if (ARCHITECTURE_arm64 AND (ANDROID OR LINUX)) + target_sources(common + PRIVATE + arm64/native_clock.cpp + arm64/native_clock.h + ) +endif() + if (MSVC) target_compile_definitions(common PRIVATE # The standard library doesn't provide any replacement for codecvt yet @@ -200,6 +210,8 @@ if (MSVC) /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data /we4800 # Implicit conversion from 'type' to bool. Possible information loss ) +else() + set_source_files_properties(stb.cpp PROPERTIES COMPILE_OPTIONS "-Wno-implicit-fallthrough;-Wno-missing-declarations;-Wno-missing-field-initializers") endif() if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") @@ -215,7 +227,7 @@ endif() create_target_directory_groups(common) -target_link_libraries(common PUBLIC Boost::context Boost::headers fmt::fmt microprofile Threads::Threads) +target_link_libraries(common PUBLIC Boost::context Boost::headers fmt::fmt microprofile stb::headers Threads::Threads) target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd LLVM::Demangle) if (ANDROID) diff --git a/src/common/arm64/native_clock.cpp b/src/common/arm64/native_clock.cpp new file mode 100644 index 000000000..88fdba527 --- /dev/null +++ b/src/common/arm64/native_clock.cpp @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/arm64/native_clock.h" + +namespace Common::Arm64 { + +namespace { + +NativeClock::FactorType GetFixedPointFactor(u64 num, u64 den) { + return (static_cast<NativeClock::FactorType>(num) << 64) / den; +} + +u64 MultiplyHigh(u64 m, NativeClock::FactorType factor) { + return static_cast<u64>((m * factor) >> 64); +} + +} // namespace + +NativeClock::NativeClock() { + const u64 host_cntfrq = GetHostCNTFRQ(); + ns_cntfrq_factor = GetFixedPointFactor(NsRatio::den, host_cntfrq); + us_cntfrq_factor = GetFixedPointFactor(UsRatio::den, host_cntfrq); + ms_cntfrq_factor = GetFixedPointFactor(MsRatio::den, host_cntfrq); + guest_cntfrq_factor = GetFixedPointFactor(CNTFRQ, host_cntfrq); + gputick_cntfrq_factor = GetFixedPointFactor(GPUTickFreq, host_cntfrq); +} + +std::chrono::nanoseconds NativeClock::GetTimeNS() const { + return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_cntfrq_factor)}; +} + +std::chrono::microseconds NativeClock::GetTimeUS() const { + return std::chrono::microseconds{MultiplyHigh(GetHostTicksElapsed(), us_cntfrq_factor)}; +} + +std::chrono::milliseconds NativeClock::GetTimeMS() const { + return std::chrono::milliseconds{MultiplyHigh(GetHostTicksElapsed(), ms_cntfrq_factor)}; +} + +u64 NativeClock::GetCNTPCT() const { + return MultiplyHigh(GetHostTicksElapsed(), guest_cntfrq_factor); +} + +u64 NativeClock::GetGPUTick() const { + return MultiplyHigh(GetHostTicksElapsed(), gputick_cntfrq_factor); +} + +u64 NativeClock::GetHostTicksNow() const { + u64 cntvct_el0 = 0; + asm volatile("dsb ish\n\t" + "mrs %[cntvct_el0], cntvct_el0\n\t" + "dsb ish\n\t" + : [cntvct_el0] "=r"(cntvct_el0)); + return cntvct_el0; +} + +u64 NativeClock::GetHostTicksElapsed() const { + return GetHostTicksNow(); +} + +bool NativeClock::IsNative() const { + return true; +} + +u64 NativeClock::GetHostCNTFRQ() { + u64 cntfrq_el0 = 0; + asm("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0)); + return cntfrq_el0; +} + +} // namespace Common::Arm64 diff --git a/src/common/arm64/native_clock.h b/src/common/arm64/native_clock.h new file mode 100644 index 000000000..a28b419f2 --- /dev/null +++ b/src/common/arm64/native_clock.h @@ -0,0 +1,47 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/wall_clock.h" + +namespace Common::Arm64 { + +class NativeClock final : public WallClock { +public: + explicit NativeClock(); + + std::chrono::nanoseconds GetTimeNS() const override; + + std::chrono::microseconds GetTimeUS() const override; + + std::chrono::milliseconds GetTimeMS() const override; + + u64 GetCNTPCT() const override; + + u64 GetGPUTick() const override; + + u64 GetHostTicksNow() const override; + + u64 GetHostTicksElapsed() const override; + + bool IsNative() const override; + + static u64 GetHostCNTFRQ(); + +public: + using FactorType = unsigned __int128; + + FactorType GetGuestCNTFRQFactor() const { + return guest_cntfrq_factor; + } + +private: + FactorType ns_cntfrq_factor; + FactorType us_cntfrq_factor; + FactorType ms_cntfrq_factor; + FactorType guest_cntfrq_factor; + FactorType gputick_cntfrq_factor; +}; + +} // namespace Common::Arm64 diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 0dad9338a..47d028d48 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -39,8 +39,12 @@ #define Crash() exit(1) #endif +#define LTO_NOINLINE __attribute__((noinline)) + #else // _MSC_VER +#define LTO_NOINLINE + // Locale Cross-Compatibility #define locale_t _locale_t diff --git a/src/common/elf.h b/src/common/elf.h index 14a5e9597..0b728dc54 100644 --- a/src/common/elf.h +++ b/src/common/elf.h @@ -211,6 +211,11 @@ struct Elf64_Rela { Elf64_Sxword r_addend; /* Addend */ }; +/* RELR relocation table entry */ + +using Elf32_Relr = Elf32_Word; +using Elf64_Relr = Elf64_Xword; + /* How to extract and insert information held in the r_info field. */ static inline u32 Elf32RelSymIndex(Elf32_Word r_info) { @@ -328,6 +333,9 @@ constexpr u32 ElfDtFiniArray = 26; /* Array with addresses of fini fct */ constexpr u32 ElfDtInitArraySz = 27; /* Size in bytes of DT_INIT_ARRAY */ constexpr u32 ElfDtFiniArraySz = 28; /* Size in bytes of DT_FINI_ARRAY */ constexpr u32 ElfDtSymtabShndx = 34; /* Address of SYMTAB_SHNDX section */ +constexpr u32 ElfDtRelrsz = 35; /* Size of RELR relative relocations */ +constexpr u32 ElfDtRelr = 36; /* Address of RELR relative relocations */ +constexpr u32 ElfDtRelrent = 37; /* Size of one RELR relative relocation */ } // namespace ELF } // namespace Common diff --git a/src/common/fs/fs_paths.h b/src/common/fs/fs_paths.h index 1a3f6ab45..bcf447089 100644 --- a/src/common/fs/fs_paths.h +++ b/src/common/fs/fs_paths.h @@ -19,10 +19,12 @@ #define LOAD_DIR "load" #define LOG_DIR "log" #define NAND_DIR "nand" +#define PLAY_TIME_DIR "play_time" #define SCREENSHOTS_DIR "screenshots" #define SDMC_DIR "sdmc" #define SHADER_DIR "shader" #define TAS_DIR "tas" +#define ICONS_DIR "icons" // yuzu-specific files diff --git a/src/common/fs/path_util.cpp b/src/common/fs/path_util.cpp index fbac4d80c..0c4c88cde 100644 --- a/src/common/fs/path_util.cpp +++ b/src/common/fs/path_util.cpp @@ -125,10 +125,12 @@ public: GenerateYuzuPath(YuzuPath::LoadDir, yuzu_path / LOAD_DIR); GenerateYuzuPath(YuzuPath::LogDir, yuzu_path / LOG_DIR); GenerateYuzuPath(YuzuPath::NANDDir, yuzu_path / NAND_DIR); + GenerateYuzuPath(YuzuPath::PlayTimeDir, yuzu_path / PLAY_TIME_DIR); GenerateYuzuPath(YuzuPath::ScreenshotsDir, yuzu_path / SCREENSHOTS_DIR); GenerateYuzuPath(YuzuPath::SDMCDir, yuzu_path / SDMC_DIR); GenerateYuzuPath(YuzuPath::ShaderDir, yuzu_path / SHADER_DIR); GenerateYuzuPath(YuzuPath::TASDir, yuzu_path / TAS_DIR); + GenerateYuzuPath(YuzuPath::IconsDir, yuzu_path / ICONS_DIR); } private: diff --git a/src/common/fs/path_util.h b/src/common/fs/path_util.h index 036e475aa..2874ea738 100644 --- a/src/common/fs/path_util.h +++ b/src/common/fs/path_util.h @@ -21,10 +21,12 @@ enum class YuzuPath { LoadDir, // Where cheat/mod files are stored. LogDir, // Where log files are stored. NANDDir, // Where the emulated NAND is stored. + PlayTimeDir, // Where play time data is stored. ScreenshotsDir, // Where yuzu screenshots are stored. SDMCDir, // Where the emulated SDMC is stored. ShaderDir, // Where shaders are stored. TASDir, // Where TAS scripts are stored. + IconsDir, // Where Icons for Windows shortcuts are stored. }; /** diff --git a/src/common/nvidia_flags.cpp b/src/common/nvidia_flags.cpp index 7ed7690ee..fa3747782 100644 --- a/src/common/nvidia_flags.cpp +++ b/src/common/nvidia_flags.cpp @@ -25,6 +25,7 @@ void ConfigureNvidiaEnvironmentFlags() { void(_putenv(fmt::format("__GL_SHADER_DISK_CACHE_PATH={}", windows_path_string).c_str())); void(_putenv("__GL_SHADER_DISK_CACHE_SKIP_CLEANUP=1")); + void(_putenv("__GL_THREADED_OPTIMIZATIONS=1")); #endif } diff --git a/src/common/polyfill_thread.h b/src/common/polyfill_thread.h index 41cbb9ed5..12e59a893 100644 --- a/src/common/polyfill_thread.h +++ b/src/common/polyfill_thread.h @@ -15,12 +15,13 @@ #include <condition_variable> #include <stop_token> #include <thread> +#include <utility> namespace Common { template <typename Condvar, typename Lock, typename Pred> void CondvarWait(Condvar& cv, std::unique_lock<Lock>& lk, std::stop_token token, Pred&& pred) { - cv.wait(lk, token, std::move(pred)); + cv.wait(lk, token, std::forward<Pred>(pred)); } template <typename Rep, typename Period> @@ -109,7 +110,7 @@ public: // Insert the callback. stop_state_callback ret = ++m_next_callback; - m_callbacks.emplace(ret, move(f)); + m_callbacks.emplace(ret, std::move(f)); return ret; } @@ -162,7 +163,7 @@ private: friend class stop_source; template <typename Callback> friend class stop_callback; - stop_token(shared_ptr<polyfill::stop_state> stop_state) : m_stop_state(move(stop_state)) {} + stop_token(shared_ptr<polyfill::stop_state> stop_state) : m_stop_state(std::move(stop_state)) {} private: shared_ptr<polyfill::stop_state> m_stop_state; @@ -198,7 +199,7 @@ public: private: friend class jthread; explicit stop_source(shared_ptr<polyfill::stop_state> stop_state) - : m_stop_state(move(stop_state)) {} + : m_stop_state(std::move(stop_state)) {} private: shared_ptr<polyfill::stop_state> m_stop_state; @@ -218,16 +219,16 @@ public: C&& cb) noexcept(is_nothrow_constructible_v<Callback, C>) : m_stop_state(st.m_stop_state) { if (m_stop_state) { - m_callback = m_stop_state->insert_callback(move(cb)); + m_callback = m_stop_state->insert_callback(std::move(cb)); } } template <typename C> requires constructible_from<Callback, C> explicit stop_callback(stop_token&& st, C&& cb) noexcept(is_nothrow_constructible_v<Callback, C>) - : m_stop_state(move(st.m_stop_state)) { + : m_stop_state(std::move(st.m_stop_state)) { if (m_stop_state) { - m_callback = m_stop_state->insert_callback(move(cb)); + m_callback = m_stop_state->insert_callback(std::move(cb)); } } ~stop_callback() { @@ -260,7 +261,7 @@ public: typename = enable_if_t<!is_same_v<remove_cvref_t<F>, jthread>>> explicit jthread(F&& f, Args&&... args) : m_stop_state(make_shared<polyfill::stop_state>()), - m_thread(make_thread(move(f), move(args)...)) {} + m_thread(make_thread(std::forward<F>(f), std::forward<Args>(args)...)) {} ~jthread() { if (joinable()) { @@ -317,9 +318,9 @@ private: template <typename F, typename... Args> thread make_thread(F&& f, Args&&... args) { if constexpr (is_invocable_v<decay_t<F>, stop_token, decay_t<Args>...>) { - return thread(move(f), get_stop_token(), move(args)...); + return thread(std::forward<F>(f), get_stop_token(), std::forward<Args>(args)...); } else { - return thread(move(f), move(args)...); + return thread(std::forward<F>(f), std::forward<Args>(args)...); } } diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 3fde3cae6..98b43e49c 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -45,6 +45,7 @@ SWITCHABLE(CpuAccuracy, true); SWITCHABLE(FullscreenMode, true); SWITCHABLE(GpuAccuracy, true); SWITCHABLE(Language, true); +SWITCHABLE(MemoryLayout, true); SWITCHABLE(NvdecEmulation, false); SWITCHABLE(Region, true); SWITCHABLE(RendererBackend, true); @@ -61,6 +62,10 @@ SWITCHABLE(u32, false); SWITCHABLE(u8, false); SWITCHABLE(u8, true); +// Used in UISettings +// TODO see if we can move this to uisettings.cpp +SWITCHABLE(ConfirmStop, true); + #undef SETTING #undef SWITCHABLE #endif diff --git a/src/common/settings.h b/src/common/settings.h index 6a3fe47c9..9317075f7 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -67,6 +67,7 @@ SWITCHABLE(CpuAccuracy, true); SWITCHABLE(FullscreenMode, true); SWITCHABLE(GpuAccuracy, true); SWITCHABLE(Language, true); +SWITCHABLE(MemoryLayout, true); SWITCHABLE(NvdecEmulation, false); SWITCHABLE(Region, true); SWITCHABLE(RendererBackend, true); @@ -83,6 +84,10 @@ SWITCHABLE(u32, false); SWITCHABLE(u8, false); SWITCHABLE(u8, true); +// Used in UISettings +// TODO see if we can move this to uisettings.h +SWITCHABLE(ConfirmStop, true); + #undef SETTING #undef SWITCHABLE #endif diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h index 815cafe15..11429d7a8 100644 --- a/src/common/settings_enums.h +++ b/src/common/settings_enums.h @@ -133,6 +133,8 @@ ENUM(CpuAccuracy, Auto, Accurate, Unsafe, Paranoid); ENUM(MemoryLayout, Memory_4Gb, Memory_6Gb, Memory_8Gb); +ENUM(ConfirmStop, Ask_Always, Ask_Based_On_Game, Ask_Never); + ENUM(FullscreenMode, Borderless, Exclusive); ENUM(NvdecEmulation, Off, Cpu, Gpu); diff --git a/src/common/stb.cpp b/src/common/stb.cpp new file mode 100644 index 000000000..d3b16665d --- /dev/null +++ b/src/common/stb.cpp @@ -0,0 +1,8 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#define STB_IMAGE_IMPLEMENTATION +#define STB_IMAGE_RESIZE_IMPLEMENTATION +#define STB_IMAGE_WRITE_IMPLEMENTATION + +#include "common/stb.h" diff --git a/src/common/stb.h b/src/common/stb.h new file mode 100644 index 000000000..e5c197c11 --- /dev/null +++ b/src/common/stb.h @@ -0,0 +1,8 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <stb_image.h> +#include <stb_image_resize.h> +#include <stb_image_write.h> diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index feab1653d..4c7aba3f5 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -135,6 +135,11 @@ std::u16string UTF8ToUTF16(std::string_view input) { return convert.from_bytes(input.data(), input.data() + input.size()); } +std::u32string UTF8ToUTF32(std::string_view input) { + std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> convert; + return convert.from_bytes(input.data(), input.data() + input.size()); +} + #ifdef _WIN32 static std::wstring CPToUTF16(u32 code_page, std::string_view input) { const auto size = diff --git a/src/common/string_util.h b/src/common/string_util.h index c351f1a0c..9da1ca4e9 100644 --- a/src/common/string_util.h +++ b/src/common/string_util.h @@ -38,6 +38,7 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _ [[nodiscard]] std::string UTF16ToUTF8(std::u16string_view input); [[nodiscard]] std::u16string UTF8ToUTF16(std::string_view input); +[[nodiscard]] std::u32string UTF8ToUTF32(std::string_view input); #ifdef _WIN32 [[nodiscard]] std::string UTF16ToUTF8(std::wstring_view input); diff --git a/src/common/thread.cpp b/src/common/thread.cpp index 919e33af9..34cc1527b 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -11,6 +11,7 @@ #include <mach/mach.h> #elif defined(_WIN32) #include <windows.h> +#include "common/string_util.h" #else #if defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__OpenBSD__) #include <pthread_np.h> @@ -82,29 +83,8 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { #ifdef _MSC_VER // Sets the debugger-visible name of the current thread. -// Uses trick documented in: -// https://docs.microsoft.com/en-us/visualstudio/debugger/how-to-set-a-thread-name-in-native-code void SetCurrentThreadName(const char* name) { - static const DWORD MS_VC_EXCEPTION = 0x406D1388; - -#pragma pack(push, 8) - struct THREADNAME_INFO { - DWORD dwType; // must be 0x1000 - LPCSTR szName; // pointer to name (in user addr space) - DWORD dwThreadID; // thread ID (-1=caller thread) - DWORD dwFlags; // reserved for future use, must be zero - } info; -#pragma pack(pop) - - info.dwType = 0x1000; - info.szName = name; - info.dwThreadID = std::numeric_limits<DWORD>::max(); - info.dwFlags = 0; - - __try { - RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), (ULONG_PTR*)&info); - } __except (EXCEPTION_CONTINUE_EXECUTION) { - } + SetThreadDescription(GetCurrentThread(), UTF8ToUTF16W(name).data()); } #else // !MSVC_VER, so must be POSIX threads diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 71e15ab4c..caca9a123 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -10,6 +10,10 @@ #include "common/x64/rdtsc.h" #endif +#if defined(ARCHITECTURE_arm64) && defined(__linux__) +#include "common/arm64/native_clock.h" +#endif + namespace Common { class StandardWallClock final : public WallClock { @@ -53,7 +57,7 @@ private: }; std::unique_ptr<WallClock> CreateOptimalClock() { -#ifdef ARCHITECTURE_x86_64 +#if defined(ARCHITECTURE_x86_64) const auto& caps = GetCPUCaps(); if (caps.invariant_tsc && caps.tsc_frequency >= std::nano::den) { @@ -64,6 +68,8 @@ std::unique_ptr<WallClock> CreateOptimalClock() { // - Is not more precise than 1 GHz (1ns resolution) return std::make_unique<StandardWallClock>(); } +#elif defined(ARCHITECTURE_arm64) && defined(__linux__) + return std::make_unique<Arm64::NativeClock>(); #else return std::make_unique<StandardWallClock>(); #endif diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e02ededfc..e4f499135 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -466,14 +466,18 @@ add_library(core STATIC hle/service/caps/caps_a.h hle/service/caps/caps_c.cpp hle/service/caps/caps_c.h - hle/service/caps/caps_u.cpp - hle/service/caps/caps_u.h + hle/service/caps/caps_manager.cpp + hle/service/caps/caps_manager.h + hle/service/caps/caps_result.h hle/service/caps/caps_sc.cpp hle/service/caps/caps_sc.h hle/service/caps/caps_ss.cpp hle/service/caps/caps_ss.h hle/service/caps/caps_su.cpp hle/service/caps/caps_su.h + hle/service/caps/caps_types.h + hle/service/caps/caps_u.cpp + hle/service/caps/caps_u.h hle/service/erpt/erpt.cpp hle/service/erpt/erpt.h hle/service/es/es.cpp diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp index 0c012f094..5e27dde58 100644 --- a/src/core/arm/arm_interface.cpp +++ b/src/core/arm/arm_interface.cpp @@ -86,9 +86,9 @@ void ARM_Interface::SymbolicateBacktrace(Core::System& system, std::vector<Backt std::map<std::string, Symbols::Symbols> symbols; for (const auto& module : modules) { - symbols.insert_or_assign( - module.second, Symbols::GetSymbols(module.first, system.ApplicationMemory(), - system.ApplicationProcess()->Is64BitProcess())); + symbols.insert_or_assign(module.second, + Symbols::GetSymbols(module.first, system.ApplicationMemory(), + system.ApplicationProcess()->Is64Bit())); } for (auto& entry : out) { diff --git a/src/core/core.cpp b/src/core/core.cpp index 08cbb8978..14d6c8c27 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -116,11 +116,8 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, } } - if (concat.empty()) { - return nullptr; - } - - return FileSys::ConcatenatedVfsFile::MakeConcatenatedFile(concat, dir->GetName()); + return FileSys::ConcatenatedVfsFile::MakeConcatenatedFile(dir->GetName(), + std::move(concat)); } if (Common::FS::IsDir(path)) { @@ -312,17 +309,10 @@ struct System::Impl { telemetry_session->AddInitialInfo(*app_loader, fs_controller, *content_provider); - // Create a resource limit for the process. - const auto physical_memory_size = - kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::Application); - auto* resource_limit = Kernel::CreateResourceLimitForProcess(system, physical_memory_size); - // Create the process. auto main_process = Kernel::KProcess::Create(system.Kernel()); - ASSERT(Kernel::KProcess::Initialize(main_process, system, "main", - Kernel::KProcess::ProcessType::Userland, resource_limit) - .IsSuccess()); Kernel::KProcess::Register(system.Kernel(), main_process); + kernel.AppendNewProcess(main_process); kernel.MakeApplicationProcess(main_process); const auto [load_result, load_parameters] = app_loader->Load(*main_process, system); if (load_result != Loader::ResultStatus::Success) { @@ -421,6 +411,7 @@ struct System::Impl { services->KillNVNFlinger(); } kernel.CloseServices(); + kernel.ShutdownCores(); services.reset(); service_manager.reset(); cheat_engine.reset(); @@ -432,7 +423,6 @@ struct System::Impl { gpu_core.reset(); host1x_core.reset(); perf_stats.reset(); - kernel.ShutdownCores(); cpu_manager.Shutdown(); debugger.reset(); kernel.Shutdown(); @@ -1078,6 +1068,10 @@ void System::ApplySettings() { impl->RefreshTime(); if (IsPoweredOn()) { + if (Settings::values.custom_rtc_enabled) { + const s64 posix_time{Settings::values.custom_rtc.GetValue()}; + GetTimeManager().UpdateLocalSystemClockTime(posix_time); + } Renderer().RefreshBaseSettings(); } } diff --git a/src/core/debugger/debugger.cpp b/src/core/debugger/debugger.cpp index a1589fecb..0e270eb50 100644 --- a/src/core/debugger/debugger.cpp +++ b/src/core/debugger/debugger.cpp @@ -258,20 +258,20 @@ private: Kernel::KScopedSchedulerLock sl{system.Kernel()}; // Put all threads to sleep on next scheduler round. - for (auto* thread : ThreadList()) { - thread->RequestSuspend(Kernel::SuspendType::Debug); + for (auto& thread : ThreadList()) { + thread.RequestSuspend(Kernel::SuspendType::Debug); } } void ResumeEmulation(Kernel::KThread* except = nullptr) { // Wake up all threads. - for (auto* thread : ThreadList()) { - if (thread == except) { + for (auto& thread : ThreadList()) { + if (std::addressof(thread) == except) { continue; } - thread->SetStepState(Kernel::StepState::NotStepping); - thread->Resume(Kernel::SuspendType::Debug); + thread.SetStepState(Kernel::StepState::NotStepping); + thread.Resume(Kernel::SuspendType::Debug); } } @@ -283,13 +283,17 @@ private: } void UpdateActiveThread() { - const auto& threads{ThreadList()}; - if (std::find(threads.begin(), threads.end(), state->active_thread) == threads.end()) { - state->active_thread = threads.front(); + auto& threads{ThreadList()}; + for (auto& thread : threads) { + if (std::addressof(thread) == state->active_thread) { + // Thread is still alive, no need to update. + return; + } } + state->active_thread = std::addressof(threads.front()); } - const std::list<Kernel::KThread*>& ThreadList() { + Kernel::KProcess::ThreadList& ThreadList() { return system.ApplicationProcess()->GetThreadList(); } diff --git a/src/core/debugger/gdbstub.cpp b/src/core/debugger/gdbstub.cpp index e55831f27..6f5f5156b 100644 --- a/src/core/debugger/gdbstub.cpp +++ b/src/core/debugger/gdbstub.cpp @@ -2,6 +2,8 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <atomic> +#include <codecvt> +#include <locale> #include <numeric> #include <optional> #include <thread> @@ -12,6 +14,7 @@ #include "common/logging/log.h" #include "common/scope_exit.h" #include "common/settings.h" +#include "common/string_util.h" #include "core/arm/arm_interface.h" #include "core/core.h" #include "core/debugger/gdbstub.h" @@ -68,10 +71,16 @@ static std::string EscapeGDB(std::string_view data) { } static std::string EscapeXML(std::string_view data) { + std::u32string converted = U"[Encoding error]"; + try { + converted = Common::UTF8ToUTF32(data); + } catch (std::range_error&) { + } + std::string escaped; escaped.reserve(data.size()); - for (char c : data) { + for (char32_t c : converted) { switch (c) { case '&': escaped += "&"; @@ -86,7 +95,11 @@ static std::string EscapeXML(std::string_view data) { escaped += ">"; break; default: - escaped += c; + if (c > 0x7f) { + escaped += fmt::format("&#{};", static_cast<u32>(c)); + } else { + escaped += static_cast<char>(c); + } break; } } @@ -96,7 +109,7 @@ static std::string EscapeXML(std::string_view data) { GDBStub::GDBStub(DebuggerBackend& backend_, Core::System& system_) : DebuggerFrontend(backend_), system{system_} { - if (system.ApplicationProcess()->Is64BitProcess()) { + if (system.ApplicationProcess()->Is64Bit()) { arch = std::make_unique<GDBStubA64>(); } else { arch = std::make_unique<GDBStubA32>(); @@ -433,10 +446,10 @@ void GDBStub::HandleBreakpointRemove(std::string_view command) { // See osdbg_thread_local_region.os.horizon.hpp and osdbg_thread_type.os.horizon.hpp static std::optional<std::string> GetNameFromThreadType32(Core::Memory::Memory& memory, - const Kernel::KThread* thread) { + const Kernel::KThread& thread) { // Read thread type from TLS - const VAddr tls_thread_type{memory.Read32(thread->GetTlsAddress() + 0x1fc)}; - const VAddr argument_thread_type{thread->GetArgument()}; + const VAddr tls_thread_type{memory.Read32(thread.GetTlsAddress() + 0x1fc)}; + const VAddr argument_thread_type{thread.GetArgument()}; if (argument_thread_type && tls_thread_type != argument_thread_type) { // Probably not created by nnsdk, no name available. @@ -464,10 +477,10 @@ static std::optional<std::string> GetNameFromThreadType32(Core::Memory::Memory& } static std::optional<std::string> GetNameFromThreadType64(Core::Memory::Memory& memory, - const Kernel::KThread* thread) { + const Kernel::KThread& thread) { // Read thread type from TLS - const VAddr tls_thread_type{memory.Read64(thread->GetTlsAddress() + 0x1f8)}; - const VAddr argument_thread_type{thread->GetArgument()}; + const VAddr tls_thread_type{memory.Read64(thread.GetTlsAddress() + 0x1f8)}; + const VAddr argument_thread_type{thread.GetArgument()}; if (argument_thread_type && tls_thread_type != argument_thread_type) { // Probably not created by nnsdk, no name available. @@ -495,16 +508,16 @@ static std::optional<std::string> GetNameFromThreadType64(Core::Memory::Memory& } static std::optional<std::string> GetThreadName(Core::System& system, - const Kernel::KThread* thread) { - if (system.ApplicationProcess()->Is64BitProcess()) { + const Kernel::KThread& thread) { + if (system.ApplicationProcess()->Is64Bit()) { return GetNameFromThreadType64(system.ApplicationMemory(), thread); } else { return GetNameFromThreadType32(system.ApplicationMemory(), thread); } } -static std::string_view GetThreadWaitReason(const Kernel::KThread* thread) { - switch (thread->GetWaitReasonForDebugging()) { +static std::string_view GetThreadWaitReason(const Kernel::KThread& thread) { + switch (thread.GetWaitReasonForDebugging()) { case Kernel::ThreadWaitReasonForDebugging::Sleep: return "Sleep"; case Kernel::ThreadWaitReasonForDebugging::IPC: @@ -522,8 +535,8 @@ static std::string_view GetThreadWaitReason(const Kernel::KThread* thread) { } } -static std::string GetThreadState(const Kernel::KThread* thread) { - switch (thread->GetState()) { +static std::string GetThreadState(const Kernel::KThread& thread) { + switch (thread.GetState()) { case Kernel::ThreadState::Initialized: return "Initialized"; case Kernel::ThreadState::Waiting: @@ -591,7 +604,7 @@ void GDBStub::HandleQuery(std::string_view command) { const auto& threads = system.ApplicationProcess()->GetThreadList(); std::vector<std::string> thread_ids; for (const auto& thread : threads) { - thread_ids.push_back(fmt::format("{:x}", thread->GetThreadId())); + thread_ids.push_back(fmt::format("{:x}", thread.GetThreadId())); } SendReply(fmt::format("m{}", fmt::join(thread_ids, ","))); } else if (command.starts_with("sThreadInfo")) { @@ -603,14 +616,14 @@ void GDBStub::HandleQuery(std::string_view command) { buffer += "<threads>"; const auto& threads = system.ApplicationProcess()->GetThreadList(); - for (const auto* thread : threads) { + for (const auto& thread : threads) { auto thread_name{GetThreadName(system, thread)}; if (!thread_name) { - thread_name = fmt::format("Thread {:d}", thread->GetThreadId()); + thread_name = fmt::format("Thread {:d}", thread.GetThreadId()); } buffer += fmt::format(R"(<thread id="{:x}" core="{:d}" name="{}">{}</thread>)", - thread->GetThreadId(), thread->GetActiveCore(), + thread.GetThreadId(), thread.GetActiveCore(), EscapeXML(*thread_name), GetThreadState(thread)); } @@ -809,11 +822,13 @@ void GDBStub::HandleRcmd(const std::vector<u8>& command) { const char i = True(mem_info.attribute & MemoryAttribute::IpcLocked) ? 'I' : '-'; const char d = True(mem_info.attribute & MemoryAttribute::DeviceShared) ? 'D' : '-'; const char u = True(mem_info.attribute & MemoryAttribute::Uncached) ? 'U' : '-'; + const char p = + True(mem_info.attribute & MemoryAttribute::PermissionLocked) ? 'P' : '-'; - reply += - fmt::format(" {:#012x} - {:#012x} {} {} {}{}{}{} [{}, {}]\n", - mem_info.base_address, mem_info.base_address + mem_info.size - 1, - perm, state, l, i, d, u, mem_info.ipc_count, mem_info.device_count); + reply += fmt::format(" {:#012x} - {:#012x} {} {} {}{}{}{}{} [{}, {}]\n", + mem_info.base_address, + mem_info.base_address + mem_info.size - 1, perm, state, l, i, + d, u, p, mem_info.ipc_count, mem_info.device_count); } const uintptr_t next_address = mem_info.base_address + mem_info.size; @@ -835,10 +850,10 @@ void GDBStub::HandleRcmd(const std::vector<u8>& command) { } Kernel::KThread* GDBStub::GetThreadByID(u64 thread_id) { - const auto& threads{system.ApplicationProcess()->GetThreadList()}; - for (auto* thread : threads) { - if (thread->GetThreadId() == thread_id) { - return thread; + auto& threads{system.ApplicationProcess()->GetThreadList()}; + for (auto& thread : threads) { + if (thread.GetThreadId() == thread_id) { + return std::addressof(thread); } } diff --git a/src/core/file_sys/fsmitm_romfsbuild.cpp b/src/core/file_sys/fsmitm_romfsbuild.cpp index e39c7b62b..f1d3e4129 100644 --- a/src/core/file_sys/fsmitm_romfsbuild.cpp +++ b/src/core/file_sys/fsmitm_romfsbuild.cpp @@ -107,62 +107,56 @@ static u64 romfs_get_hash_table_count(u64 num_entries) { void RomFSBuildContext::VisitDirectory(VirtualDir romfs_dir, VirtualDir ext_dir, std::shared_ptr<RomFSBuildDirectoryContext> parent) { - std::vector<std::shared_ptr<RomFSBuildDirectoryContext>> child_dirs; + for (auto& child_romfs_file : romfs_dir->GetFiles()) { + const auto name = child_romfs_file->GetName(); + const auto child = std::make_shared<RomFSBuildFileContext>(); + // Set child's path. + child->cur_path_ofs = parent->path_len + 1; + child->path_len = child->cur_path_ofs + static_cast<u32>(name.size()); + child->path = parent->path + "/" + name; + + if (ext_dir != nullptr && ext_dir->GetFile(name + ".stub") != nullptr) { + continue; + } - const auto entries = romfs_dir->GetEntries(); + // Sanity check on path_len + ASSERT(child->path_len < FS_MAX_PATH); - for (const auto& kv : entries) { - if (kv.second == VfsEntryType::Directory) { - const auto child = std::make_shared<RomFSBuildDirectoryContext>(); - // Set child's path. - child->cur_path_ofs = parent->path_len + 1; - child->path_len = child->cur_path_ofs + static_cast<u32>(kv.first.size()); - child->path = parent->path + "/" + kv.first; + child->source = std::move(child_romfs_file); - if (ext_dir != nullptr && ext_dir->GetFile(kv.first + ".stub") != nullptr) { - continue; + if (ext_dir != nullptr) { + if (const auto ips = ext_dir->GetFile(name + ".ips")) { + if (auto patched = PatchIPS(child->source, ips)) { + child->source = std::move(patched); + } } + } - // Sanity check on path_len - ASSERT(child->path_len < FS_MAX_PATH); - - if (AddDirectory(parent, child)) { - child_dirs.push_back(child); - } - } else { - const auto child = std::make_shared<RomFSBuildFileContext>(); - // Set child's path. - child->cur_path_ofs = parent->path_len + 1; - child->path_len = child->cur_path_ofs + static_cast<u32>(kv.first.size()); - child->path = parent->path + "/" + kv.first; - - if (ext_dir != nullptr && ext_dir->GetFile(kv.first + ".stub") != nullptr) { - continue; - } + child->size = child->source->GetSize(); - // Sanity check on path_len - ASSERT(child->path_len < FS_MAX_PATH); + AddFile(parent, child); + } - child->source = romfs_dir->GetFile(kv.first); + for (auto& child_romfs_dir : romfs_dir->GetSubdirectories()) { + const auto name = child_romfs_dir->GetName(); + const auto child = std::make_shared<RomFSBuildDirectoryContext>(); + // Set child's path. + child->cur_path_ofs = parent->path_len + 1; + child->path_len = child->cur_path_ofs + static_cast<u32>(name.size()); + child->path = parent->path + "/" + name; - if (ext_dir != nullptr) { - if (const auto ips = ext_dir->GetFile(kv.first + ".ips")) { - if (auto patched = PatchIPS(child->source, ips)) { - child->source = std::move(patched); - } - } - } + if (ext_dir != nullptr && ext_dir->GetFile(name + ".stub") != nullptr) { + continue; + } - child->size = child->source->GetSize(); + // Sanity check on path_len + ASSERT(child->path_len < FS_MAX_PATH); - AddFile(parent, child); + if (!AddDirectory(parent, child)) { + continue; } - } - for (auto& child : child_dirs) { - auto subdir_name = std::string_view(child->path).substr(child->cur_path_ofs); - auto child_romfs_dir = romfs_dir->GetSubdirectory(subdir_name); - auto child_ext_dir = ext_dir != nullptr ? ext_dir->GetSubdirectory(subdir_name) : nullptr; + auto child_ext_dir = ext_dir != nullptr ? ext_dir->GetSubdirectory(name) : nullptr; this->VisitDirectory(child_romfs_dir, child_ext_dir, child); } } @@ -293,7 +287,7 @@ std::multimap<u64, VirtualFile> RomFSBuildContext::Build() { cur_entry.name_size = name_size; - out.emplace(cur_file->offset + ROMFS_FILEPARTITION_OFS, cur_file->source); + out.emplace(cur_file->offset + ROMFS_FILEPARTITION_OFS, std::move(cur_file->source)); std::memcpy(file_table.data() + cur_file->entry_offset, &cur_entry, sizeof(RomFSFileEntry)); std::memset(file_table.data() + cur_file->entry_offset + sizeof(RomFSFileEntry), 0, Common::AlignUp(cur_entry.name_size, 4)); diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp index 8e475f25a..0bca05587 100644 --- a/src/core/file_sys/patch_manager.cpp +++ b/src/core/file_sys/patch_manager.cpp @@ -377,16 +377,16 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t auto romfs_dir = FindSubdirectoryCaseless(subdir, "romfs"); if (romfs_dir != nullptr) - layers.push_back(std::make_shared<CachedVfsDirectory>(romfs_dir)); + layers.emplace_back(std::make_shared<CachedVfsDirectory>(std::move(romfs_dir))); auto ext_dir = FindSubdirectoryCaseless(subdir, "romfs_ext"); if (ext_dir != nullptr) - layers_ext.push_back(std::make_shared<CachedVfsDirectory>(ext_dir)); + layers_ext.emplace_back(std::make_shared<CachedVfsDirectory>(std::move(ext_dir))); if (type == ContentRecordType::HtmlDocument) { auto manual_dir = FindSubdirectoryCaseless(subdir, "manual_html"); if (manual_dir != nullptr) - layers.push_back(std::make_shared<CachedVfsDirectory>(manual_dir)); + layers.emplace_back(std::make_shared<CachedVfsDirectory>(std::move(manual_dir))); } } @@ -400,7 +400,7 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t return; } - layers.push_back(std::move(extracted)); + layers.emplace_back(std::move(extracted)); auto layered = LayeredVfsDirectory::MakeLayeredDirectory(std::move(layers)); if (layered == nullptr) { diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp index f00479bd3..763a44fee 100644 --- a/src/core/file_sys/program_metadata.cpp +++ b/src/core/file_sys/program_metadata.cpp @@ -5,6 +5,7 @@ #include <vector> #include "common/logging/log.h" +#include "common/scope_exit.h" #include "core/file_sys/program_metadata.h" #include "core/file_sys/vfs.h" #include "core/loader/loader.h" @@ -95,17 +96,24 @@ Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) { return Loader::ResultStatus::Success; } +Loader::ResultStatus ProgramMetadata::Reload(VirtualFile file) { + const u64 original_program_id = aci_header.title_id; + SCOPE_EXIT({ aci_header.title_id = original_program_id; }); + + return this->Load(file); +} + /*static*/ ProgramMetadata ProgramMetadata::GetDefault() { - // Allow use of cores 0~3 and thread priorities 1~63. - constexpr u32 default_thread_info_capability = 0x30007F7; + // Allow use of cores 0~3 and thread priorities 16~63. + constexpr u32 default_thread_info_capability = 0x30043F7; ProgramMetadata result; result.LoadManual( true /*is_64_bit*/, FileSys::ProgramAddressSpaceType::Is39Bit /*address_space*/, - 0x2c /*main_thread_prio*/, 0 /*main_thread_core*/, 0x00100000 /*main_thread_stack_size*/, - 0 /*title_id*/, 0xFFFFFFFFFFFFFFFF /*filesystem_permissions*/, - 0x1FE00000 /*system_resource_size*/, {default_thread_info_capability} /*capabilities*/); + 0x2c /*main_thread_prio*/, 0 /*main_thread_core*/, 0x100000 /*main_thread_stack_size*/, + 0 /*title_id*/, 0xFFFFFFFFFFFFFFFF /*filesystem_permissions*/, 0 /*system_resource_size*/, + {default_thread_info_capability} /*capabilities*/); return result; } diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h index 2e8960b07..76ee97d78 100644 --- a/src/core/file_sys/program_metadata.h +++ b/src/core/file_sys/program_metadata.h @@ -56,6 +56,7 @@ public: static ProgramMetadata GetDefault(); Loader::ResultStatus Load(VirtualFile file); + Loader::ResultStatus Reload(VirtualFile file); /// Load from parameters instead of NPDM file, used for KIP void LoadManual(bool is_64_bit, ProgramAddressSpaceType address_space, s32 main_thread_prio, @@ -72,6 +73,9 @@ public: u64 GetFilesystemPermissions() const; u32 GetSystemResourceSize() const; const KernelCapabilityDescriptors& GetKernelCapabilities() const; + const std::array<u8, 0x10>& GetName() const { + return npdm_header.application_name; + } void Print() const; @@ -163,14 +167,14 @@ private: u32_le unk_size_2; }; - Header npdm_header; - AciHeader aci_header; - AcidHeader acid_header; + Header npdm_header{}; + AciHeader aci_header{}; + AcidHeader acid_header{}; - FileAccessControl acid_file_access; - FileAccessHeader aci_file_access; + FileAccessControl acid_file_access{}; + FileAccessHeader aci_file_access{}; - KernelCapabilityDescriptors aci_kernel_capabilities; + KernelCapabilityDescriptors aci_kernel_capabilities{}; }; } // namespace FileSys diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp index 04da93d5c..1cc77ad14 100644 --- a/src/core/file_sys/registered_cache.cpp +++ b/src/core/file_sys/registered_cache.cpp @@ -322,7 +322,8 @@ VirtualFile RegisteredCache::OpenFileOrDirectoryConcat(const VirtualDir& open_di return nullptr; } - return ConcatenatedVfsFile::MakeConcatenatedFile(concat, concat.front()->GetName()); + auto name = concat.front()->GetName(); + return ConcatenatedVfsFile::MakeConcatenatedFile(std::move(name), std::move(concat)); } VirtualFile RegisteredCache::GetFileAtID(NcaID id) const { diff --git a/src/core/file_sys/romfs.cpp b/src/core/file_sys/romfs.cpp index 614da2130..1c580de57 100644 --- a/src/core/file_sys/romfs.cpp +++ b/src/core/file_sys/romfs.cpp @@ -133,7 +133,7 @@ VirtualDir ExtractRomFS(VirtualFile file, RomFSExtractionType type) { out = out->GetSubdirectories().front(); } - return std::make_shared<CachedVfsDirectory>(out); + return std::make_shared<CachedVfsDirectory>(std::move(out)); } VirtualFile CreateRomFS(VirtualDir dir, VirtualDir ext) { @@ -141,8 +141,7 @@ VirtualFile CreateRomFS(VirtualDir dir, VirtualDir ext) { return nullptr; RomFSBuildContext ctx{dir, ext}; - auto file_map = ctx.Build(); - return ConcatenatedVfsFile::MakeConcatenatedFile(0, file_map, dir->GetName()); + return ConcatenatedVfsFile::MakeConcatenatedFile(0, dir->GetName(), ctx.Build()); } } // namespace FileSys diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp index bd493ecca..e4751c2b4 100644 --- a/src/core/file_sys/system_archive/system_version.cpp +++ b/src/core/file_sys/system_archive/system_version.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/logging/log.h" #include "core/file_sys/system_archive/system_version.h" #include "core/file_sys/vfs_vector.h" #include "core/hle/api_version.h" @@ -12,6 +13,9 @@ std::string GetLongDisplayVersion() { } VirtualDir SystemVersion() { + LOG_WARNING(Common_Filesystem, "called - Using hardcoded firmware version '{}'", + GetLongDisplayVersion()); + VirtualFile file = std::make_shared<VectorVfsFile>(std::vector<u8>(0x100), "file"); file->WriteObject(HLE::ApiVersion::HOS_VERSION_MAJOR, 0); file->WriteObject(HLE::ApiVersion::HOS_VERSION_MINOR, 1); diff --git a/src/core/file_sys/vfs_cached.cpp b/src/core/file_sys/vfs_cached.cpp index c3154ee81..7ee5300e5 100644 --- a/src/core/file_sys/vfs_cached.cpp +++ b/src/core/file_sys/vfs_cached.cpp @@ -6,13 +6,13 @@ namespace FileSys { -CachedVfsDirectory::CachedVfsDirectory(VirtualDir& source_dir) +CachedVfsDirectory::CachedVfsDirectory(VirtualDir&& source_dir) : name(source_dir->GetName()), parent(source_dir->GetParentDirectory()) { for (auto& dir : source_dir->GetSubdirectories()) { - dirs.emplace(dir->GetName(), std::make_shared<CachedVfsDirectory>(dir)); + dirs.emplace(dir->GetName(), std::make_shared<CachedVfsDirectory>(std::move(dir))); } for (auto& file : source_dir->GetFiles()) { - files.emplace(file->GetName(), file); + files.emplace(file->GetName(), std::move(file)); } } diff --git a/src/core/file_sys/vfs_cached.h b/src/core/file_sys/vfs_cached.h index 113acac12..1e5300784 100644 --- a/src/core/file_sys/vfs_cached.h +++ b/src/core/file_sys/vfs_cached.h @@ -11,7 +11,7 @@ namespace FileSys { class CachedVfsDirectory : public ReadOnlyVfsDirectory { public: - CachedVfsDirectory(VirtualDir& source_directory); + CachedVfsDirectory(VirtualDir&& source_directory); ~CachedVfsDirectory() override; VirtualFile GetFile(std::string_view file_name) const override; diff --git a/src/core/file_sys/vfs_concat.cpp b/src/core/file_sys/vfs_concat.cpp index 311a59e5f..168b9cbec 100644 --- a/src/core/file_sys/vfs_concat.cpp +++ b/src/core/file_sys/vfs_concat.cpp @@ -10,7 +10,7 @@ namespace FileSys { -ConcatenatedVfsFile::ConcatenatedVfsFile(ConcatenationMap&& concatenation_map_, std::string&& name_) +ConcatenatedVfsFile::ConcatenatedVfsFile(std::string&& name_, ConcatenationMap&& concatenation_map_) : concatenation_map(std::move(concatenation_map_)), name(std::move(name_)) { DEBUG_ASSERT(this->VerifyContinuity()); } @@ -30,8 +30,8 @@ bool ConcatenatedVfsFile::VerifyContinuity() const { ConcatenatedVfsFile::~ConcatenatedVfsFile() = default; -VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(const std::vector<VirtualFile>& files, - std::string&& name) { +VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(std::string&& name, + std::vector<VirtualFile>&& files) { // Fold trivial cases. if (files.empty()) { return nullptr; @@ -46,20 +46,21 @@ VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(const std::vector<VirtualF u64 last_offset = 0; for (auto& file : files) { + const auto size = file->GetSize(); + concatenation_map.emplace_back(ConcatenationEntry{ .offset = last_offset, - .file = file, + .file = std::move(file), }); - last_offset += file->GetSize(); + last_offset += size; } - return VirtualFile(new ConcatenatedVfsFile(std::move(concatenation_map), std::move(name))); + return VirtualFile(new ConcatenatedVfsFile(std::move(name), std::move(concatenation_map))); } -VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte, - const std::multimap<u64, VirtualFile>& files, - std::string&& name) { +VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte, std::string&& name, + std::multimap<u64, VirtualFile>&& files) { // Fold trivial cases. if (files.empty()) { return nullptr; @@ -76,6 +77,8 @@ VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte, // Iteration of a multimap is ordered, so offset will be strictly non-decreasing. for (auto& [offset, file] : files) { + const auto size = file->GetSize(); + if (offset > last_offset) { concatenation_map.emplace_back(ConcatenationEntry{ .offset = last_offset, @@ -85,13 +88,13 @@ VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte, concatenation_map.emplace_back(ConcatenationEntry{ .offset = offset, - .file = file, + .file = std::move(file), }); - last_offset = offset + file->GetSize(); + last_offset = offset + size; } - return VirtualFile(new ConcatenatedVfsFile(std::move(concatenation_map), std::move(name))); + return VirtualFile(new ConcatenatedVfsFile(std::move(name), std::move(concatenation_map))); } std::string ConcatenatedVfsFile::GetName() const { diff --git a/src/core/file_sys/vfs_concat.h b/src/core/file_sys/vfs_concat.h index 6b329d545..cbddd12bd 100644 --- a/src/core/file_sys/vfs_concat.h +++ b/src/core/file_sys/vfs_concat.h @@ -24,22 +24,20 @@ private: }; using ConcatenationMap = std::vector<ConcatenationEntry>; - explicit ConcatenatedVfsFile(std::vector<ConcatenationEntry>&& concatenation_map, - std::string&& name); + explicit ConcatenatedVfsFile(std::string&& name, + std::vector<ConcatenationEntry>&& concatenation_map); bool VerifyContinuity() const; public: ~ConcatenatedVfsFile() override; /// Wrapper function to allow for more efficient handling of files.size() == 0, 1 cases. - static VirtualFile MakeConcatenatedFile(const std::vector<VirtualFile>& files, - std::string&& name); + static VirtualFile MakeConcatenatedFile(std::string&& name, std::vector<VirtualFile>&& files); /// Convenience function that turns a map of offsets to files into a concatenated file, filling /// gaps with a given filler byte. - static VirtualFile MakeConcatenatedFile(u8 filler_byte, - const std::multimap<u64, VirtualFile>& files, - std::string&& name); + static VirtualFile MakeConcatenatedFile(u8 filler_byte, std::string&& name, + std::multimap<u64, VirtualFile>&& files); std::string GetName() const override; std::size_t GetSize() const override; diff --git a/src/core/file_sys/vfs_layered.cpp b/src/core/file_sys/vfs_layered.cpp index 3e6426afc..08daca397 100644 --- a/src/core/file_sys/vfs_layered.cpp +++ b/src/core/file_sys/vfs_layered.cpp @@ -38,7 +38,7 @@ VirtualDir LayeredVfsDirectory::GetDirectoryRelative(std::string_view path) cons for (const auto& layer : dirs) { auto dir = layer->GetDirectoryRelative(path); if (dir != nullptr) { - out.push_back(std::move(dir)); + out.emplace_back(std::move(dir)); } } @@ -62,11 +62,11 @@ std::vector<VirtualFile> LayeredVfsDirectory::GetFiles() const { std::set<std::string, std::less<>> out_names; for (const auto& layer : dirs) { - for (const auto& file : layer->GetFiles()) { + for (auto& file : layer->GetFiles()) { auto file_name = file->GetName(); if (!out_names.contains(file_name)) { out_names.emplace(std::move(file_name)); - out.push_back(file); + out.emplace_back(std::move(file)); } } } @@ -86,7 +86,7 @@ std::vector<VirtualDir> LayeredVfsDirectory::GetSubdirectories() const { std::vector<VirtualDir> out; out.reserve(names.size()); for (const auto& subdir : names) - out.push_back(GetSubdirectory(subdir)); + out.emplace_back(GetSubdirectory(subdir)); return out; } diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp index 4cfdf4558..59364efa1 100644 --- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp +++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp @@ -8,7 +8,11 @@ #include "core/hle/kernel/board/nintendo/nx/k_system_control.h" #include "core/hle/kernel/board/nintendo/nx/secure_monitor.h" +#include "core/hle/kernel/k_memory_manager.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_trace.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/svc_results.h" namespace Kernel::Board::Nintendo::Nx { @@ -30,6 +34,8 @@ constexpr const std::size_t RequiredNonSecureSystemMemorySize = constexpr const std::size_t RequiredNonSecureSystemMemorySizeWithFatal = RequiredNonSecureSystemMemorySize + impl::RequiredNonSecureSystemMemorySizeViFatal; +constexpr const std::size_t SecureAlignment = 128_KiB; + namespace { using namespace Common::Literals; @@ -183,4 +189,57 @@ u64 KSystemControl::GenerateRandomRange(u64 min, u64 max) { return GenerateUniformRange(min, max, GenerateRandomU64); } +size_t KSystemControl::CalculateRequiredSecureMemorySize(size_t size, u32 pool) { + if (pool == static_cast<u32>(KMemoryManager::Pool::Applet)) { + return 0; + } else { + // return KSystemControlBase::CalculateRequiredSecureMemorySize(size, pool); + return size; + } +} + +Result KSystemControl::AllocateSecureMemory(KernelCore& kernel, KVirtualAddress* out, size_t size, + u32 pool) { + // Applet secure memory is handled separately. + UNIMPLEMENTED_IF(pool == static_cast<u32>(KMemoryManager::Pool::Applet)); + + // Ensure the size is aligned. + const size_t alignment = + (pool == static_cast<u32>(KMemoryManager::Pool::System) ? PageSize : SecureAlignment); + R_UNLESS(Common::IsAligned(size, alignment), ResultInvalidSize); + + // Allocate the memory. + const size_t num_pages = size / PageSize; + const KPhysicalAddress paddr = kernel.MemoryManager().AllocateAndOpenContinuous( + num_pages, alignment / PageSize, + KMemoryManager::EncodeOption(static_cast<KMemoryManager::Pool>(pool), + KMemoryManager::Direction::FromFront)); + R_UNLESS(paddr != 0, ResultOutOfMemory); + + // Ensure we don't leak references to the memory on error. + ON_RESULT_FAILURE { + kernel.MemoryManager().Close(paddr, num_pages); + }; + + // We succeeded. + *out = KPageTable::GetHeapVirtualAddress(kernel.MemoryLayout(), paddr); + R_SUCCEED(); +} + +void KSystemControl::FreeSecureMemory(KernelCore& kernel, KVirtualAddress address, size_t size, + u32 pool) { + // Applet secure memory is handled separately. + UNIMPLEMENTED_IF(pool == static_cast<u32>(KMemoryManager::Pool::Applet)); + + // Ensure the size is aligned. + const size_t alignment = + (pool == static_cast<u32>(KMemoryManager::Pool::System) ? PageSize : SecureAlignment); + ASSERT(Common::IsAligned(GetInteger(address), alignment)); + ASSERT(Common::IsAligned(size, alignment)); + + // Close the secure region's pages. + kernel.MemoryManager().Close(KPageTable::GetHeapPhysicalAddress(kernel.MemoryLayout(), address), + size / PageSize); +} + } // namespace Kernel::Board::Nintendo::Nx diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h index b477e8193..ff1feec70 100644 --- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h +++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h @@ -4,6 +4,11 @@ #pragma once #include "core/hle/kernel/k_typed_address.h" +#include "core/hle/result.h" + +namespace Kernel { +class KernelCore; +} namespace Kernel::Board::Nintendo::Nx { @@ -25,8 +30,16 @@ public: static std::size_t GetMinimumNonSecureSystemPoolSize(); }; + // Randomness. static u64 GenerateRandomRange(u64 min, u64 max); static u64 GenerateRandomU64(); + + // Secure Memory. + static size_t CalculateRequiredSecureMemorySize(size_t size, u32 pool); + static Result AllocateSecureMemory(KernelCore& kernel, KVirtualAddress* out, size_t size, + u32 pool); + static void FreeSecureMemory(KernelCore& kernel, KVirtualAddress address, size_t size, + u32 pool); }; } // namespace Kernel::Board::Nintendo::Nx diff --git a/src/core/hle/kernel/init/init_slab_setup.cpp b/src/core/hle/kernel/init/init_slab_setup.cpp index 1f2db673c..a0e20bbbb 100644 --- a/src/core/hle/kernel/init/init_slab_setup.cpp +++ b/src/core/hle/kernel/init/init_slab_setup.cpp @@ -106,7 +106,7 @@ static_assert(KernelPageBufferAdditionalSize == /// memory. static KPhysicalAddress TranslateSlabAddrToPhysical(KMemoryLayout& memory_layout, KVirtualAddress slab_addr) { - slab_addr -= GetInteger(memory_layout.GetSlabRegionAddress()); + slab_addr -= memory_layout.GetSlabRegion().GetAddress(); return GetInteger(slab_addr) + Core::DramMemoryMap::SlabHeapBase; } @@ -196,7 +196,12 @@ void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) { auto& kernel = system.Kernel(); // Get the start of the slab region, since that's where we'll be working. - KVirtualAddress address = memory_layout.GetSlabRegionAddress(); + const KMemoryRegion& slab_region = memory_layout.GetSlabRegion(); + KVirtualAddress address = slab_region.GetAddress(); + + // Clear the slab region. + // TODO: implement access to kernel VAs. + // std::memset(device_ptr, 0, slab_region.GetSize()); // Initialize slab type array to be in sorted order. std::array<KSlabType, KSlabType_Count> slab_types; diff --git a/src/core/hle/kernel/initial_process.h b/src/core/hle/kernel/initial_process.h index 82195f4f7..2c95269fc 100644 --- a/src/core/hle/kernel/initial_process.h +++ b/src/core/hle/kernel/initial_process.h @@ -19,4 +19,8 @@ static inline KPhysicalAddress GetInitialProcessBinaryPhysicalAddress() { MainMemoryAddress); } +static inline size_t GetInitialProcessBinarySize() { + return InitialProcessBinarySizeMax; +} + } // namespace Kernel diff --git a/src/core/hle/kernel/k_capabilities.h b/src/core/hle/kernel/k_capabilities.h index de766c811..ebd4eedb1 100644 --- a/src/core/hle/kernel/k_capabilities.h +++ b/src/core/hle/kernel/k_capabilities.h @@ -200,8 +200,8 @@ private: RawCapabilityValue raw; BitField<0, 15, CapabilityType> id; - BitField<15, 4, u32> major_version; - BitField<19, 13, u32> minor_version; + BitField<15, 4, u32> minor_version; + BitField<19, 13, u32> major_version; }; union HandleTable { diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp index efbac0e6a..7633a51fb 100644 --- a/src/core/hle/kernel/k_condition_variable.cpp +++ b/src/core/hle/kernel/k_condition_variable.cpp @@ -107,12 +107,12 @@ KConditionVariable::KConditionVariable(Core::System& system) KConditionVariable::~KConditionVariable() = default; -Result KConditionVariable::SignalToAddress(KProcessAddress addr) { - KThread* owner_thread = GetCurrentThreadPointer(m_kernel); +Result KConditionVariable::SignalToAddress(KernelCore& kernel, KProcessAddress addr) { + KThread* owner_thread = GetCurrentThreadPointer(kernel); // Signal the address. { - KScopedSchedulerLock sl(m_kernel); + KScopedSchedulerLock sl(kernel); // Remove waiter thread. bool has_waiters{}; @@ -133,7 +133,7 @@ Result KConditionVariable::SignalToAddress(KProcessAddress addr) { // Write the value to userspace. Result result{ResultSuccess}; - if (WriteToUser(m_kernel, addr, std::addressof(next_value))) [[likely]] { + if (WriteToUser(kernel, addr, std::addressof(next_value))) [[likely]] { result = ResultSuccess; } else { result = ResultInvalidCurrentMemory; @@ -148,28 +148,28 @@ Result KConditionVariable::SignalToAddress(KProcessAddress addr) { } } -Result KConditionVariable::WaitForAddress(Handle handle, KProcessAddress addr, u32 value) { - KThread* cur_thread = GetCurrentThreadPointer(m_kernel); - ThreadQueueImplForKConditionVariableWaitForAddress wait_queue(m_kernel); +Result KConditionVariable::WaitForAddress(KernelCore& kernel, Handle handle, KProcessAddress addr, + u32 value) { + KThread* cur_thread = GetCurrentThreadPointer(kernel); + ThreadQueueImplForKConditionVariableWaitForAddress wait_queue(kernel); // Wait for the address. KThread* owner_thread{}; { - KScopedSchedulerLock sl(m_kernel); + KScopedSchedulerLock sl(kernel); // Check if the thread should terminate. R_UNLESS(!cur_thread->IsTerminationRequested(), ResultTerminationRequested); // Read the tag from userspace. u32 test_tag{}; - R_UNLESS(ReadFromUser(m_kernel, std::addressof(test_tag), addr), - ResultInvalidCurrentMemory); + R_UNLESS(ReadFromUser(kernel, std::addressof(test_tag), addr), ResultInvalidCurrentMemory); // If the tag isn't the handle (with wait mask), we're done. R_SUCCEED_IF(test_tag != (handle | Svc::HandleWaitMask)); // Get the lock owner thread. - owner_thread = GetCurrentProcess(m_kernel) + owner_thread = GetCurrentProcess(kernel) .GetHandleTable() .GetObjectWithoutPseudoHandle<KThread>(handle) .ReleasePointerUnsafe(); diff --git a/src/core/hle/kernel/k_condition_variable.h b/src/core/hle/kernel/k_condition_variable.h index 8c2f3ae51..2620c8e39 100644 --- a/src/core/hle/kernel/k_condition_variable.h +++ b/src/core/hle/kernel/k_condition_variable.h @@ -24,11 +24,12 @@ public: explicit KConditionVariable(Core::System& system); ~KConditionVariable(); - // Arbitration - Result SignalToAddress(KProcessAddress addr); - Result WaitForAddress(Handle handle, KProcessAddress addr, u32 value); + // Arbitration. + static Result SignalToAddress(KernelCore& kernel, KProcessAddress addr); + static Result WaitForAddress(KernelCore& kernel, Handle handle, KProcessAddress addr, + u32 value); - // Condition variable + // Condition variable. void Signal(u64 cv_key, s32 count); Result Wait(KProcessAddress addr, u64 key, u32 value, s64 timeout); diff --git a/src/core/hle/kernel/k_interrupt_manager.cpp b/src/core/hle/kernel/k_interrupt_manager.cpp index fe6a20168..22d79569a 100644 --- a/src/core/hle/kernel/k_interrupt_manager.cpp +++ b/src/core/hle/kernel/k_interrupt_manager.cpp @@ -22,7 +22,7 @@ void HandleInterrupt(KernelCore& kernel, s32 core_id) { KScopedSchedulerLock sl{kernel}; // Pin the current thread. - process->PinCurrentThread(core_id); + process->PinCurrentThread(); // Set the interrupt flag for the thread. GetCurrentThread(kernel).SetInterruptFlag(); diff --git a/src/core/hle/kernel/k_memory_block.h b/src/core/hle/kernel/k_memory_block.h index 41a29da24..ef3f61321 100644 --- a/src/core/hle/kernel/k_memory_block.h +++ b/src/core/hle/kernel/k_memory_block.h @@ -36,6 +36,7 @@ enum class KMemoryState : u32 { FlagCanChangeAttribute = (1 << 24), FlagCanCodeMemory = (1 << 25), FlagLinearMapped = (1 << 26), + FlagCanPermissionLock = (1 << 27), FlagsData = FlagCanReprotect | FlagCanUseIpc | FlagCanUseNonDeviceIpc | FlagCanUseNonSecureIpc | FlagMapped | FlagCanAlias | FlagCanTransfer | FlagCanQueryPhysical | @@ -50,12 +51,16 @@ enum class KMemoryState : u32 { FlagLinearMapped, Free = static_cast<u32>(Svc::MemoryState::Free), - Io = static_cast<u32>(Svc::MemoryState::Io) | FlagMapped | FlagCanDeviceMap | - FlagCanAlignedDeviceMap, + + IoMemory = static_cast<u32>(Svc::MemoryState::Io) | FlagMapped | FlagCanDeviceMap | + FlagCanAlignedDeviceMap, + IoRegister = + static_cast<u32>(Svc::MemoryState::Io) | FlagCanDeviceMap | FlagCanAlignedDeviceMap, + Static = static_cast<u32>(Svc::MemoryState::Static) | FlagMapped | FlagCanQueryPhysical, Code = static_cast<u32>(Svc::MemoryState::Code) | FlagsCode | FlagCanMapProcess, CodeData = static_cast<u32>(Svc::MemoryState::CodeData) | FlagsData | FlagCanMapProcess | - FlagCanCodeMemory, + FlagCanCodeMemory | FlagCanPermissionLock, Normal = static_cast<u32>(Svc::MemoryState::Normal) | FlagsData | FlagCanCodeMemory, Shared = static_cast<u32>(Svc::MemoryState::Shared) | FlagMapped | FlagReferenceCounted | FlagLinearMapped, @@ -65,7 +70,8 @@ enum class KMemoryState : u32 { AliasCode = static_cast<u32>(Svc::MemoryState::AliasCode) | FlagsCode | FlagCanMapProcess | FlagCanCodeAlias, AliasCodeData = static_cast<u32>(Svc::MemoryState::AliasCodeData) | FlagsData | - FlagCanMapProcess | FlagCanCodeAlias | FlagCanCodeMemory, + FlagCanMapProcess | FlagCanCodeAlias | FlagCanCodeMemory | + FlagCanPermissionLock, Ipc = static_cast<u32>(Svc::MemoryState::Ipc) | FlagsMisc | FlagCanAlignedDeviceMap | FlagCanUseIpc | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, @@ -73,7 +79,7 @@ enum class KMemoryState : u32 { Stack = static_cast<u32>(Svc::MemoryState::Stack) | FlagsMisc | FlagCanAlignedDeviceMap | FlagCanUseIpc | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, - ThreadLocal = static_cast<u32>(Svc::MemoryState::ThreadLocal) | FlagMapped | FlagLinearMapped, + ThreadLocal = static_cast<u32>(Svc::MemoryState::ThreadLocal) | FlagLinearMapped, Transfered = static_cast<u32>(Svc::MemoryState::Transfered) | FlagsMisc | FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc | @@ -94,7 +100,7 @@ enum class KMemoryState : u32 { NonDeviceIpc = static_cast<u32>(Svc::MemoryState::NonDeviceIpc) | FlagsMisc | FlagCanUseNonDeviceIpc, - Kernel = static_cast<u32>(Svc::MemoryState::Kernel) | FlagMapped, + Kernel = static_cast<u32>(Svc::MemoryState::Kernel), GeneratedCode = static_cast<u32>(Svc::MemoryState::GeneratedCode) | FlagMapped | FlagReferenceCounted | FlagCanDebug | FlagLinearMapped, @@ -105,34 +111,36 @@ enum class KMemoryState : u32 { Insecure = static_cast<u32>(Svc::MemoryState::Insecure) | FlagMapped | FlagReferenceCounted | FlagLinearMapped | FlagCanChangeAttribute | FlagCanDeviceMap | - FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, + FlagCanAlignedDeviceMap | FlagCanQueryPhysical | FlagCanUseNonSecureIpc | + FlagCanUseNonDeviceIpc, }; DECLARE_ENUM_FLAG_OPERATORS(KMemoryState); static_assert(static_cast<u32>(KMemoryState::Free) == 0x00000000); -static_assert(static_cast<u32>(KMemoryState::Io) == 0x00182001); +static_assert(static_cast<u32>(KMemoryState::IoMemory) == 0x00182001); +static_assert(static_cast<u32>(KMemoryState::IoRegister) == 0x00180001); static_assert(static_cast<u32>(KMemoryState::Static) == 0x00042002); static_assert(static_cast<u32>(KMemoryState::Code) == 0x04DC7E03); -static_assert(static_cast<u32>(KMemoryState::CodeData) == 0x07FEBD04); +static_assert(static_cast<u32>(KMemoryState::CodeData) == 0x0FFEBD04); static_assert(static_cast<u32>(KMemoryState::Normal) == 0x077EBD05); static_assert(static_cast<u32>(KMemoryState::Shared) == 0x04402006); static_assert(static_cast<u32>(KMemoryState::AliasCode) == 0x04DD7E08); -static_assert(static_cast<u32>(KMemoryState::AliasCodeData) == 0x07FFBD09); +static_assert(static_cast<u32>(KMemoryState::AliasCodeData) == 0x0FFFBD09); static_assert(static_cast<u32>(KMemoryState::Ipc) == 0x045C3C0A); static_assert(static_cast<u32>(KMemoryState::Stack) == 0x045C3C0B); -static_assert(static_cast<u32>(KMemoryState::ThreadLocal) == 0x0400200C); +static_assert(static_cast<u32>(KMemoryState::ThreadLocal) == 0x0400000C); static_assert(static_cast<u32>(KMemoryState::Transfered) == 0x055C3C0D); static_assert(static_cast<u32>(KMemoryState::SharedTransfered) == 0x045C380E); static_assert(static_cast<u32>(KMemoryState::SharedCode) == 0x0440380F); static_assert(static_cast<u32>(KMemoryState::Inaccessible) == 0x00000010); static_assert(static_cast<u32>(KMemoryState::NonSecureIpc) == 0x045C3811); static_assert(static_cast<u32>(KMemoryState::NonDeviceIpc) == 0x044C2812); -static_assert(static_cast<u32>(KMemoryState::Kernel) == 0x00002013); +static_assert(static_cast<u32>(KMemoryState::Kernel) == 0x00000013); static_assert(static_cast<u32>(KMemoryState::GeneratedCode) == 0x04402214); static_assert(static_cast<u32>(KMemoryState::CodeOut) == 0x04402015); static_assert(static_cast<u32>(KMemoryState::Coverage) == 0x00002016); -static_assert(static_cast<u32>(KMemoryState::Insecure) == 0x05583817); +static_assert(static_cast<u32>(KMemoryState::Insecure) == 0x055C3817); enum class KMemoryPermission : u8 { None = 0, @@ -182,8 +190,9 @@ enum class KMemoryAttribute : u8 { IpcLocked = static_cast<u8>(Svc::MemoryAttribute::IpcLocked), DeviceShared = static_cast<u8>(Svc::MemoryAttribute::DeviceShared), Uncached = static_cast<u8>(Svc::MemoryAttribute::Uncached), + PermissionLocked = static_cast<u8>(Svc::MemoryAttribute::PermissionLocked), - SetMask = Uncached, + SetMask = Uncached | PermissionLocked, }; DECLARE_ENUM_FLAG_OPERATORS(KMemoryAttribute); @@ -261,6 +270,10 @@ struct KMemoryInfo { return m_state; } + constexpr Svc::MemoryState GetSvcState() const { + return static_cast<Svc::MemoryState>(m_state & KMemoryState::Mask); + } + constexpr KMemoryPermission GetPermission() const { return m_permission; } @@ -326,6 +339,10 @@ public: return this->GetEndAddress() - 1; } + constexpr KMemoryState GetState() const { + return m_memory_state; + } + constexpr u16 GetIpcLockCount() const { return m_ipc_lock_count; } @@ -443,6 +460,13 @@ public: } } + constexpr void UpdateAttribute(KMemoryAttribute mask, KMemoryAttribute attr) { + ASSERT(False(mask & KMemoryAttribute::IpcLocked)); + ASSERT(False(mask & KMemoryAttribute::DeviceShared)); + + m_attribute = (m_attribute & ~mask) | attr; + } + constexpr void Split(KMemoryBlock* block, KProcessAddress addr) { ASSERT(this->GetAddress() < addr); ASSERT(this->Contains(addr)); diff --git a/src/core/hle/kernel/k_memory_block_manager.cpp b/src/core/hle/kernel/k_memory_block_manager.cpp index ab75f550e..58a1e7216 100644 --- a/src/core/hle/kernel/k_memory_block_manager.cpp +++ b/src/core/hle/kernel/k_memory_block_manager.cpp @@ -160,8 +160,8 @@ void KMemoryBlockManager::Update(KMemoryBlockManagerUpdateAllocator* allocator, } // Update block state. - it->Update(state, perm, attr, cur_address == address, static_cast<u8>(set_disable_attr), - static_cast<u8>(clear_disable_attr)); + it->Update(state, perm, attr, it->GetAddress() == address, + static_cast<u8>(set_disable_attr), static_cast<u8>(clear_disable_attr)); cur_address += cur_info.GetSize(); remaining_pages -= cur_info.GetNumPages(); } @@ -175,7 +175,9 @@ void KMemoryBlockManager::UpdateIfMatch(KMemoryBlockManagerUpdateAllocator* allo KProcessAddress address, size_t num_pages, KMemoryState test_state, KMemoryPermission test_perm, KMemoryAttribute test_attr, KMemoryState state, - KMemoryPermission perm, KMemoryAttribute attr) { + KMemoryPermission perm, KMemoryAttribute attr, + KMemoryBlockDisableMergeAttribute set_disable_attr, + KMemoryBlockDisableMergeAttribute clear_disable_attr) { // Ensure for auditing that we never end up with an invalid tree. KScopedMemoryBlockManagerAuditor auditor(this); ASSERT(Common::IsAligned(GetInteger(address), PageSize)); @@ -214,7 +216,8 @@ void KMemoryBlockManager::UpdateIfMatch(KMemoryBlockManagerUpdateAllocator* allo } // Update block state. - it->Update(state, perm, attr, false, 0, 0); + it->Update(state, perm, attr, false, static_cast<u8>(set_disable_attr), + static_cast<u8>(clear_disable_attr)); cur_address += cur_info.GetSize(); remaining_pages -= cur_info.GetNumPages(); } else { @@ -284,6 +287,65 @@ void KMemoryBlockManager::UpdateLock(KMemoryBlockManagerUpdateAllocator* allocat this->CoalesceForUpdate(allocator, address, num_pages); } +void KMemoryBlockManager::UpdateAttribute(KMemoryBlockManagerUpdateAllocator* allocator, + KProcessAddress address, size_t num_pages, + KMemoryAttribute mask, KMemoryAttribute attr) { + // Ensure for auditing that we never end up with an invalid tree. + KScopedMemoryBlockManagerAuditor auditor(this); + ASSERT(Common::IsAligned(GetInteger(address), PageSize)); + + KProcessAddress cur_address = address; + size_t remaining_pages = num_pages; + iterator it = this->FindIterator(address); + + while (remaining_pages > 0) { + const size_t remaining_size = remaining_pages * PageSize; + KMemoryInfo cur_info = it->GetMemoryInfo(); + + if ((it->GetAttribute() & mask) != attr) { + // If we need to, create a new block before and insert it. + if (cur_info.GetAddress() != GetInteger(cur_address)) { + KMemoryBlock* new_block = allocator->Allocate(); + + it->Split(new_block, cur_address); + it = m_memory_block_tree.insert(*new_block); + it++; + + cur_info = it->GetMemoryInfo(); + cur_address = cur_info.GetAddress(); + } + + // If we need to, create a new block after and insert it. + if (cur_info.GetSize() > remaining_size) { + KMemoryBlock* new_block = allocator->Allocate(); + + it->Split(new_block, cur_address + remaining_size); + it = m_memory_block_tree.insert(*new_block); + + cur_info = it->GetMemoryInfo(); + } + + // Update block state. + it->UpdateAttribute(mask, attr); + cur_address += cur_info.GetSize(); + remaining_pages -= cur_info.GetNumPages(); + } else { + // If we already have the right attributes, just advance. + if (cur_address + remaining_size < cur_info.GetEndAddress()) { + remaining_pages = 0; + cur_address += remaining_size; + } else { + remaining_pages = + (cur_address + remaining_size - cur_info.GetEndAddress()) / PageSize; + cur_address = cur_info.GetEndAddress(); + } + } + it++; + } + + this->CoalesceForUpdate(allocator, address, num_pages); +} + // Debug. bool KMemoryBlockManager::CheckState() const { // Loop over every block, ensuring that we are sorted and coalesced. diff --git a/src/core/hle/kernel/k_memory_block_manager.h b/src/core/hle/kernel/k_memory_block_manager.h index 96496e990..cb7b6f430 100644 --- a/src/core/hle/kernel/k_memory_block_manager.h +++ b/src/core/hle/kernel/k_memory_block_manager.h @@ -115,7 +115,11 @@ public: void UpdateIfMatch(KMemoryBlockManagerUpdateAllocator* allocator, KProcessAddress address, size_t num_pages, KMemoryState test_state, KMemoryPermission test_perm, KMemoryAttribute test_attr, KMemoryState state, KMemoryPermission perm, - KMemoryAttribute attr); + KMemoryAttribute attr, KMemoryBlockDisableMergeAttribute set_disable_attr, + KMemoryBlockDisableMergeAttribute clear_disable_attr); + + void UpdateAttribute(KMemoryBlockManagerUpdateAllocator* allocator, KProcessAddress address, + size_t num_pages, KMemoryAttribute mask, KMemoryAttribute attr); iterator FindIterator(KProcessAddress address) const { return m_memory_block_tree.find(KMemoryBlock( diff --git a/src/core/hle/kernel/k_memory_layout.h b/src/core/hle/kernel/k_memory_layout.h index 54a71df56..c8122644f 100644 --- a/src/core/hle/kernel/k_memory_layout.h +++ b/src/core/hle/kernel/k_memory_layout.h @@ -137,11 +137,9 @@ public: return GetStackTopAddress(core_id, KMemoryRegionType_KernelMiscExceptionStack); } - KVirtualAddress GetSlabRegionAddress() const { - return Dereference(GetVirtualMemoryRegionTree().FindByType(KMemoryRegionType_KernelSlab)) - .GetAddress(); + const KMemoryRegion& GetSlabRegion() const { + return Dereference(GetVirtualMemoryRegionTree().FindByType(KMemoryRegionType_KernelSlab)); } - const KMemoryRegion& GetDeviceRegion(KMemoryRegionType type) const { return Dereference(GetPhysicalMemoryRegionTree().FindFirstDerived(type)); } diff --git a/src/core/hle/kernel/k_memory_manager.cpp b/src/core/hle/kernel/k_memory_manager.cpp index 74d8169e0..cdc5572d8 100644 --- a/src/core/hle/kernel/k_memory_manager.cpp +++ b/src/core/hle/kernel/k_memory_manager.cpp @@ -11,6 +11,7 @@ #include "core/hle/kernel/initial_process.h" #include "core/hle/kernel/k_memory_manager.h" #include "core/hle/kernel/k_page_group.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/svc_results.h" @@ -119,7 +120,8 @@ void KMemoryManager::Initialize(KVirtualAddress management_region, size_t manage // Free each region to its corresponding heap. size_t reserved_sizes[MaxManagerCount] = {}; const KPhysicalAddress ini_start = GetInitialProcessBinaryPhysicalAddress(); - const KPhysicalAddress ini_end = ini_start + InitialProcessBinarySizeMax; + const size_t ini_size = GetInitialProcessBinarySize(); + const KPhysicalAddress ini_end = ini_start + ini_size; const KPhysicalAddress ini_last = ini_end - 1; for (const auto& it : m_system.Kernel().MemoryLayout().GetPhysicalMemoryRegionTree()) { if (it.IsDerivedFrom(KMemoryRegionType_DramUserPool)) { @@ -137,13 +139,13 @@ void KMemoryManager::Initialize(KVirtualAddress management_region, size_t manage } // Open/reserve the ini memory. - manager.OpenFirst(ini_start, InitialProcessBinarySizeMax / PageSize); - reserved_sizes[it.GetAttributes()] += InitialProcessBinarySizeMax; + manager.OpenFirst(ini_start, ini_size / PageSize); + reserved_sizes[it.GetAttributes()] += ini_size; // Free memory after the ini to the heap. if (ini_last != cur_last) { ASSERT(cur_end != 0); - manager.Free(ini_end, cur_end - ini_end); + manager.Free(ini_end, (cur_end - ini_end) / PageSize); } } else { // Ensure there's no partial overlap with the ini image. @@ -167,11 +169,37 @@ void KMemoryManager::Initialize(KVirtualAddress management_region, size_t manage } Result KMemoryManager::InitializeOptimizedMemory(u64 process_id, Pool pool) { - UNREACHABLE(); + const u32 pool_index = static_cast<u32>(pool); + + // Lock the pool. + KScopedLightLock lk(m_pool_locks[pool_index]); + + // Check that we don't already have an optimized process. + R_UNLESS(!m_has_optimized_process[pool_index], ResultBusy); + + // Set the optimized process id. + m_optimized_process_ids[pool_index] = process_id; + m_has_optimized_process[pool_index] = true; + + // Clear the management area for the optimized process. + for (auto* manager = this->GetFirstManager(pool, Direction::FromFront); manager != nullptr; + manager = this->GetNextManager(manager, Direction::FromFront)) { + manager->InitializeOptimizedMemory(m_system.Kernel()); + } + + R_SUCCEED(); } void KMemoryManager::FinalizeOptimizedMemory(u64 process_id, Pool pool) { - UNREACHABLE(); + const u32 pool_index = static_cast<u32>(pool); + + // Lock the pool. + KScopedLightLock lk(m_pool_locks[pool_index]); + + // If the process was optimized, clear it. + if (m_has_optimized_process[pool_index] && m_optimized_process_ids[pool_index] == process_id) { + m_has_optimized_process[pool_index] = false; + } } KPhysicalAddress KMemoryManager::AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, @@ -206,7 +234,7 @@ KPhysicalAddress KMemoryManager::AllocateAndOpenContinuous(size_t num_pages, siz // Maintain the optimized memory bitmap, if we should. if (m_has_optimized_process[static_cast<size_t>(pool)]) { - UNIMPLEMENTED(); + chosen_manager->TrackUnoptimizedAllocation(m_system.Kernel(), allocated_block, num_pages); } // Open the first reference to the pages. @@ -254,7 +282,8 @@ Result KMemoryManager::AllocatePageGroupImpl(KPageGroup* out, size_t num_pages, // Maintain the optimized memory bitmap, if we should. if (unoptimized) { - UNIMPLEMENTED(); + cur_manager->TrackUnoptimizedAllocation(m_system.Kernel(), allocated_block, + pages_per_alloc); } num_pages -= pages_per_alloc; @@ -357,8 +386,8 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32 // Process part or all of the block. const size_t cur_pages = std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address)); - any_new = - manager.ProcessOptimizedAllocation(cur_address, cur_pages, fill_pattern); + any_new = manager.ProcessOptimizedAllocation(m_system.Kernel(), cur_address, + cur_pages, fill_pattern); // Advance. cur_address += cur_pages * PageSize; @@ -381,7 +410,7 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32 // Track some or all of the current pages. const size_t cur_pages = std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address)); - manager.TrackOptimizedAllocation(cur_address, cur_pages); + manager.TrackOptimizedAllocation(m_system.Kernel(), cur_address, cur_pages); // Advance. cur_address += cur_pages * PageSize; @@ -426,17 +455,86 @@ size_t KMemoryManager::Impl::Initialize(KPhysicalAddress address, size_t size, return total_management_size; } -void KMemoryManager::Impl::TrackUnoptimizedAllocation(KPhysicalAddress block, size_t num_pages) { - UNREACHABLE(); +void KMemoryManager::Impl::InitializeOptimizedMemory(KernelCore& kernel) { + auto optimize_pa = + KPageTable::GetHeapPhysicalAddress(kernel.MemoryLayout(), m_management_region); + auto* optimize_map = kernel.System().DeviceMemory().GetPointer<u64>(optimize_pa); + + std::memset(optimize_map, 0, CalculateOptimizedProcessOverheadSize(m_heap.GetSize())); } -void KMemoryManager::Impl::TrackOptimizedAllocation(KPhysicalAddress block, size_t num_pages) { - UNREACHABLE(); +void KMemoryManager::Impl::TrackUnoptimizedAllocation(KernelCore& kernel, KPhysicalAddress block, + size_t num_pages) { + auto optimize_pa = + KPageTable::GetHeapPhysicalAddress(kernel.MemoryLayout(), m_management_region); + auto* optimize_map = kernel.System().DeviceMemory().GetPointer<u64>(optimize_pa); + + // Get the range we're tracking. + size_t offset = this->GetPageOffset(block); + const size_t last = offset + num_pages - 1; + + // Track. + while (offset <= last) { + // Mark the page as not being optimized-allocated. + optimize_map[offset / Common::BitSize<u64>()] &= + ~(u64(1) << (offset % Common::BitSize<u64>())); + + offset++; + } +} + +void KMemoryManager::Impl::TrackOptimizedAllocation(KernelCore& kernel, KPhysicalAddress block, + size_t num_pages) { + auto optimize_pa = + KPageTable::GetHeapPhysicalAddress(kernel.MemoryLayout(), m_management_region); + auto* optimize_map = kernel.System().DeviceMemory().GetPointer<u64>(optimize_pa); + + // Get the range we're tracking. + size_t offset = this->GetPageOffset(block); + const size_t last = offset + num_pages - 1; + + // Track. + while (offset <= last) { + // Mark the page as being optimized-allocated. + optimize_map[offset / Common::BitSize<u64>()] |= + (u64(1) << (offset % Common::BitSize<u64>())); + + offset++; + } } -bool KMemoryManager::Impl::ProcessOptimizedAllocation(KPhysicalAddress block, size_t num_pages, - u8 fill_pattern) { - UNREACHABLE(); +bool KMemoryManager::Impl::ProcessOptimizedAllocation(KernelCore& kernel, KPhysicalAddress block, + size_t num_pages, u8 fill_pattern) { + auto& device_memory = kernel.System().DeviceMemory(); + auto optimize_pa = + KPageTable::GetHeapPhysicalAddress(kernel.MemoryLayout(), m_management_region); + auto* optimize_map = device_memory.GetPointer<u64>(optimize_pa); + + // We want to return whether any pages were newly allocated. + bool any_new = false; + + // Get the range we're processing. + size_t offset = this->GetPageOffset(block); + const size_t last = offset + num_pages - 1; + + // Process. + while (offset <= last) { + // Check if the page has been optimized-allocated before. + if ((optimize_map[offset / Common::BitSize<u64>()] & + (u64(1) << (offset % Common::BitSize<u64>()))) == 0) { + // If not, it's new. + any_new = true; + + // Fill the page. + auto* ptr = device_memory.GetPointer<u8>(m_heap.GetAddress()); + std::memset(ptr + offset * PageSize, fill_pattern, PageSize); + } + + offset++; + } + + // Return the number of pages we processed. + return any_new; } size_t KMemoryManager::Impl::CalculateManagementOverheadSize(size_t region_size) { diff --git a/src/core/hle/kernel/k_memory_manager.h b/src/core/hle/kernel/k_memory_manager.h index 7e4b41319..c5a487af9 100644 --- a/src/core/hle/kernel/k_memory_manager.h +++ b/src/core/hle/kernel/k_memory_manager.h @@ -216,14 +216,14 @@ private: m_heap.SetInitialUsedSize(reserved_size); } - void InitializeOptimizedMemory() { - UNIMPLEMENTED(); - } + void InitializeOptimizedMemory(KernelCore& kernel); - void TrackUnoptimizedAllocation(KPhysicalAddress block, size_t num_pages); - void TrackOptimizedAllocation(KPhysicalAddress block, size_t num_pages); + void TrackUnoptimizedAllocation(KernelCore& kernel, KPhysicalAddress block, + size_t num_pages); + void TrackOptimizedAllocation(KernelCore& kernel, KPhysicalAddress block, size_t num_pages); - bool ProcessOptimizedAllocation(KPhysicalAddress block, size_t num_pages, u8 fill_pattern); + bool ProcessOptimizedAllocation(KernelCore& kernel, KPhysicalAddress block, + size_t num_pages, u8 fill_pattern); constexpr Pool GetPool() const { return m_pool; diff --git a/src/core/hle/kernel/k_memory_region_type.h b/src/core/hle/kernel/k_memory_region_type.h index e5630c1ac..bcbf450f0 100644 --- a/src/core/hle/kernel/k_memory_region_type.h +++ b/src/core/hle/kernel/k_memory_region_type.h @@ -190,9 +190,15 @@ static_assert(KMemoryRegionType_DramKernelInitPt.GetValue() == constexpr inline auto KMemoryRegionType_DramKernelSecureAppletMemory = KMemoryRegionType_DramKernelBase.DeriveSparse(1, 3, 0).SetAttribute( KMemoryRegionAttr_LinearMapped); +constexpr inline const auto KMemoryRegionType_DramKernelSecureUnknown = + KMemoryRegionType_DramKernelBase.DeriveSparse(1, 3, 1).SetAttribute( + KMemoryRegionAttr_LinearMapped); static_assert(KMemoryRegionType_DramKernelSecureAppletMemory.GetValue() == (0x18E | KMemoryRegionAttr_CarveoutProtected | KMemoryRegionAttr_NoUserMap | KMemoryRegionAttr_LinearMapped)); +static_assert(KMemoryRegionType_DramKernelSecureUnknown.GetValue() == + (0x28E | KMemoryRegionAttr_CarveoutProtected | KMemoryRegionAttr_NoUserMap | + KMemoryRegionAttr_LinearMapped)); constexpr inline auto KMemoryRegionType_DramReservedEarly = KMemoryRegionType_DramReservedBase.DeriveAttribute(KMemoryRegionAttr_NoUserMap); @@ -217,16 +223,18 @@ constexpr inline auto KMemoryRegionType_DramPoolPartition = static_assert(KMemoryRegionType_DramPoolPartition.GetValue() == (0x26 | KMemoryRegionAttr_LinearMapped | KMemoryRegionAttr_NoUserMap)); -constexpr inline auto KMemoryRegionType_DramPoolManagement = - KMemoryRegionType_DramPoolPartition.DeriveTransition(0, 2).DeriveTransition().SetAttribute( +// UNUSED: .Derive(4, 1); +// UNUSED: .Derive(4, 2); +constexpr inline const auto KMemoryRegionType_DramPoolManagement = + KMemoryRegionType_DramPoolPartition.Derive(4, 0).SetAttribute( KMemoryRegionAttr_CarveoutProtected); -constexpr inline auto KMemoryRegionType_DramUserPool = - KMemoryRegionType_DramPoolPartition.DeriveTransition(1, 2).DeriveTransition(); +constexpr inline const auto KMemoryRegionType_DramUserPool = + KMemoryRegionType_DramPoolPartition.Derive(4, 3); static_assert(KMemoryRegionType_DramPoolManagement.GetValue() == - (0x166 | KMemoryRegionAttr_LinearMapped | KMemoryRegionAttr_NoUserMap | + (0xE6 | KMemoryRegionAttr_LinearMapped | KMemoryRegionAttr_NoUserMap | KMemoryRegionAttr_CarveoutProtected)); static_assert(KMemoryRegionType_DramUserPool.GetValue() == - (0x1A6 | KMemoryRegionAttr_LinearMapped | KMemoryRegionAttr_NoUserMap)); + (0x266 | KMemoryRegionAttr_LinearMapped | KMemoryRegionAttr_NoUserMap)); constexpr inline auto KMemoryRegionType_DramApplicationPool = KMemoryRegionType_DramUserPool.Derive(4, 0); @@ -237,60 +245,63 @@ constexpr inline auto KMemoryRegionType_DramSystemNonSecurePool = constexpr inline auto KMemoryRegionType_DramSystemPool = KMemoryRegionType_DramUserPool.Derive(4, 3).SetAttribute(KMemoryRegionAttr_CarveoutProtected); static_assert(KMemoryRegionType_DramApplicationPool.GetValue() == - (0x7A6 | KMemoryRegionAttr_LinearMapped | KMemoryRegionAttr_NoUserMap)); + (0xE66 | KMemoryRegionAttr_LinearMapped | KMemoryRegionAttr_NoUserMap)); static_assert(KMemoryRegionType_DramAppletPool.GetValue() == - (0xBA6 | KMemoryRegionAttr_LinearMapped | KMemoryRegionAttr_NoUserMap)); + (0x1666 | KMemoryRegionAttr_LinearMapped | KMemoryRegionAttr_NoUserMap)); static_assert(KMemoryRegionType_DramSystemNonSecurePool.GetValue() == - (0xDA6 | KMemoryRegionAttr_LinearMapped | KMemoryRegionAttr_NoUserMap)); + (0x1A66 | KMemoryRegionAttr_LinearMapped | KMemoryRegionAttr_NoUserMap)); static_assert(KMemoryRegionType_DramSystemPool.GetValue() == - (0x13A6 | KMemoryRegionAttr_LinearMapped | KMemoryRegionAttr_NoUserMap | + (0x2666 | KMemoryRegionAttr_LinearMapped | KMemoryRegionAttr_NoUserMap | KMemoryRegionAttr_CarveoutProtected)); constexpr inline auto KMemoryRegionType_VirtualDramHeapBase = - KMemoryRegionType_Dram.DeriveSparse(1, 3, 0); + KMemoryRegionType_Dram.DeriveSparse(1, 4, 0); constexpr inline auto KMemoryRegionType_VirtualDramKernelPtHeap = - KMemoryRegionType_Dram.DeriveSparse(1, 3, 1); + KMemoryRegionType_Dram.DeriveSparse(1, 4, 1); constexpr inline auto KMemoryRegionType_VirtualDramKernelTraceBuffer = - KMemoryRegionType_Dram.DeriveSparse(1, 3, 2); + KMemoryRegionType_Dram.DeriveSparse(1, 4, 2); static_assert(KMemoryRegionType_VirtualDramHeapBase.GetValue() == 0x1A); static_assert(KMemoryRegionType_VirtualDramKernelPtHeap.GetValue() == 0x2A); static_assert(KMemoryRegionType_VirtualDramKernelTraceBuffer.GetValue() == 0x4A); -// UNUSED: .DeriveSparse(2, 2, 0); -constexpr inline auto KMemoryRegionType_VirtualDramUnknownDebug = - KMemoryRegionType_Dram.DeriveSparse(2, 2, 1); -static_assert(KMemoryRegionType_VirtualDramUnknownDebug.GetValue() == (0x52)); - -constexpr inline auto KMemoryRegionType_VirtualDramKernelSecureAppletMemory = - KMemoryRegionType_Dram.DeriveSparse(3, 1, 0); -static_assert(KMemoryRegionType_VirtualDramKernelSecureAppletMemory.GetValue() == (0x62)); - -constexpr inline auto KMemoryRegionType_VirtualDramKernelInitPt = - KMemoryRegionType_VirtualDramHeapBase.Derive(3, 0); -constexpr inline auto KMemoryRegionType_VirtualDramPoolManagement = - KMemoryRegionType_VirtualDramHeapBase.Derive(3, 1); -constexpr inline auto KMemoryRegionType_VirtualDramUserPool = - KMemoryRegionType_VirtualDramHeapBase.Derive(3, 2); -static_assert(KMemoryRegionType_VirtualDramKernelInitPt.GetValue() == 0x19A); -static_assert(KMemoryRegionType_VirtualDramPoolManagement.GetValue() == 0x29A); -static_assert(KMemoryRegionType_VirtualDramUserPool.GetValue() == 0x31A); +// UNUSED: .Derive(4, 2); +constexpr inline const auto KMemoryRegionType_VirtualDramUnknownDebug = + KMemoryRegionType_Dram.Advance(2).Derive(4, 0); +constexpr inline const auto KMemoryRegionType_VirtualDramKernelSecureAppletMemory = + KMemoryRegionType_Dram.Advance(2).Derive(4, 1); +constexpr inline const auto KMemoryRegionType_VirtualDramKernelSecureUnknown = + KMemoryRegionType_Dram.Advance(2).Derive(4, 3); +static_assert(KMemoryRegionType_VirtualDramUnknownDebug.GetValue() == (0x32)); +static_assert(KMemoryRegionType_VirtualDramKernelSecureAppletMemory.GetValue() == (0x52)); +static_assert(KMemoryRegionType_VirtualDramKernelSecureUnknown.GetValue() == (0x92)); + +// UNUSED: .Derive(4, 3); +constexpr inline const auto KMemoryRegionType_VirtualDramKernelInitPt = + KMemoryRegionType_VirtualDramHeapBase.Derive(4, 0); +constexpr inline const auto KMemoryRegionType_VirtualDramPoolManagement = + KMemoryRegionType_VirtualDramHeapBase.Derive(4, 1); +constexpr inline const auto KMemoryRegionType_VirtualDramUserPool = + KMemoryRegionType_VirtualDramHeapBase.Derive(4, 2); +static_assert(KMemoryRegionType_VirtualDramKernelInitPt.GetValue() == 0x31A); +static_assert(KMemoryRegionType_VirtualDramPoolManagement.GetValue() == 0x51A); +static_assert(KMemoryRegionType_VirtualDramUserPool.GetValue() == 0x61A); // NOTE: For unknown reason, the pools are derived out-of-order here. // It's worth eventually trying to understand why Nintendo made this choice. // UNUSED: .Derive(6, 0); // UNUSED: .Derive(6, 1); -constexpr inline auto KMemoryRegionType_VirtualDramAppletPool = - KMemoryRegionType_VirtualDramUserPool.Derive(6, 2); -constexpr inline auto KMemoryRegionType_VirtualDramApplicationPool = - KMemoryRegionType_VirtualDramUserPool.Derive(6, 3); -constexpr inline auto KMemoryRegionType_VirtualDramSystemNonSecurePool = - KMemoryRegionType_VirtualDramUserPool.Derive(6, 4); -constexpr inline auto KMemoryRegionType_VirtualDramSystemPool = - KMemoryRegionType_VirtualDramUserPool.Derive(6, 5); -static_assert(KMemoryRegionType_VirtualDramAppletPool.GetValue() == 0x1B1A); -static_assert(KMemoryRegionType_VirtualDramApplicationPool.GetValue() == 0x271A); -static_assert(KMemoryRegionType_VirtualDramSystemNonSecurePool.GetValue() == 0x2B1A); -static_assert(KMemoryRegionType_VirtualDramSystemPool.GetValue() == 0x331A); +constexpr inline const auto KMemoryRegionType_VirtualDramApplicationPool = + KMemoryRegionType_VirtualDramUserPool.Derive(4, 0); +constexpr inline const auto KMemoryRegionType_VirtualDramAppletPool = + KMemoryRegionType_VirtualDramUserPool.Derive(4, 1); +constexpr inline const auto KMemoryRegionType_VirtualDramSystemNonSecurePool = + KMemoryRegionType_VirtualDramUserPool.Derive(4, 2); +constexpr inline const auto KMemoryRegionType_VirtualDramSystemPool = + KMemoryRegionType_VirtualDramUserPool.Derive(4, 3); +static_assert(KMemoryRegionType_VirtualDramApplicationPool.GetValue() == 0x361A); +static_assert(KMemoryRegionType_VirtualDramAppletPool.GetValue() == 0x561A); +static_assert(KMemoryRegionType_VirtualDramSystemNonSecurePool.GetValue() == 0x661A); +static_assert(KMemoryRegionType_VirtualDramSystemPool.GetValue() == 0x961A); constexpr inline auto KMemoryRegionType_ArchDeviceBase = KMemoryRegionType_Kernel.DeriveTransition(0, 1).SetSparseOnly(); @@ -354,12 +365,14 @@ constexpr inline auto KMemoryRegionType_KernelTemp = static_assert(KMemoryRegionType_KernelTemp.GetValue() == 0x31); constexpr KMemoryRegionType GetTypeForVirtualLinearMapping(u32 type_id) { - if (KMemoryRegionType_KernelTraceBuffer.IsAncestorOf(type_id)) { - return KMemoryRegionType_VirtualDramKernelTraceBuffer; - } else if (KMemoryRegionType_DramKernelPtHeap.IsAncestorOf(type_id)) { + if (KMemoryRegionType_DramKernelPtHeap.IsAncestorOf(type_id)) { return KMemoryRegionType_VirtualDramKernelPtHeap; } else if (KMemoryRegionType_DramKernelSecureAppletMemory.IsAncestorOf(type_id)) { return KMemoryRegionType_VirtualDramKernelSecureAppletMemory; + } else if (KMemoryRegionType_DramKernelSecureUnknown.IsAncestorOf(type_id)) { + return KMemoryRegionType_VirtualDramKernelSecureUnknown; + } else if (KMemoryRegionType_KernelTraceBuffer.IsAncestorOf(type_id)) { + return KMemoryRegionType_VirtualDramKernelTraceBuffer; } else if ((type_id | KMemoryRegionAttr_ShouldKernelMap) == type_id) { return KMemoryRegionType_VirtualDramUnknownDebug; } else { diff --git a/src/core/hle/kernel/k_page_group.h b/src/core/hle/kernel/k_page_group.h index b32909f05..de9d63a8d 100644 --- a/src/core/hle/kernel/k_page_group.h +++ b/src/core/hle/kernel/k_page_group.h @@ -183,12 +183,17 @@ private: class KScopedPageGroup { public: - explicit KScopedPageGroup(const KPageGroup* gp) : m_pg(gp) { + explicit KScopedPageGroup(const KPageGroup* gp, bool not_first = true) : m_pg(gp) { if (m_pg) { - m_pg->Open(); + if (not_first) { + m_pg->Open(); + } else { + m_pg->OpenFirst(); + } } } - explicit KScopedPageGroup(const KPageGroup& gp) : KScopedPageGroup(std::addressof(gp)) {} + explicit KScopedPageGroup(const KPageGroup& gp, bool not_first = true) + : KScopedPageGroup(std::addressof(gp), not_first) {} ~KScopedPageGroup() { if (m_pg) { m_pg->Close(); diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp index 5b51edf30..1d47bdf6b 100644 --- a/src/core/hle/kernel/k_page_table.cpp +++ b/src/core/hle/kernel/k_page_table.cpp @@ -82,14 +82,14 @@ public: using namespace Common::Literals; -constexpr size_t GetAddressSpaceWidthFromType(FileSys::ProgramAddressSpaceType as_type) { +constexpr size_t GetAddressSpaceWidthFromType(Svc::CreateProcessFlag as_type) { switch (as_type) { - case FileSys::ProgramAddressSpaceType::Is32Bit: - case FileSys::ProgramAddressSpaceType::Is32BitNoMap: + case Svc::CreateProcessFlag::AddressSpace32Bit: + case Svc::CreateProcessFlag::AddressSpace32BitWithoutAlias: return 32; - case FileSys::ProgramAddressSpaceType::Is36Bit: + case Svc::CreateProcessFlag::AddressSpace64BitDeprecated: return 36; - case FileSys::ProgramAddressSpaceType::Is39Bit: + case Svc::CreateProcessFlag::AddressSpace64Bit: return 39; default: ASSERT(false); @@ -105,7 +105,7 @@ KPageTable::KPageTable(Core::System& system_) KPageTable::~KPageTable() = default; -Result KPageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_type, bool enable_aslr, +Result KPageTable::InitializeForProcess(Svc::CreateProcessFlag as_type, bool enable_aslr, bool enable_das_merge, bool from_back, KMemoryManager::Pool pool, KProcessAddress code_addr, size_t code_size, KSystemResource* system_resource, @@ -133,7 +133,7 @@ Result KPageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_type ASSERT(code_addr + code_size - 1 <= end - 1); // Adjust heap/alias size if we don't have an alias region - if (as_type == FileSys::ProgramAddressSpaceType::Is32BitNoMap) { + if (as_type == Svc::CreateProcessFlag::AddressSpace32BitWithoutAlias) { heap_region_size += alias_region_size; alias_region_size = 0; } @@ -505,7 +505,7 @@ Result KPageTable::UnmapCodeMemory(KProcessAddress dst_address, KProcessAddress R_TRY(this->CheckMemoryStateContiguous( std::addressof(num_dst_allocator_blocks), dst_address, size, KMemoryState::FlagCanCodeAlias, KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None, - KMemoryAttribute::All, KMemoryAttribute::None)); + KMemoryAttribute::All & ~KMemoryAttribute::PermissionLocked, KMemoryAttribute::None)); // Determine whether any pages being unmapped are code. bool any_code_pages = false; @@ -1724,29 +1724,43 @@ Result KPageTable::MapPhysicalMemory(KProcessAddress address, size_t size) { PageSize; // While we have pages to map, map them. - while (map_pages > 0) { - // Check if we're at the end of the physical block. - if (pg_pages == 0) { - // Ensure there are more pages to map. - ASSERT(pg_it != pg.end()); - - // Advance our physical block. - ++pg_it; - pg_phys_addr = pg_it->GetAddress(); - pg_pages = pg_it->GetNumPages(); + { + // Create a page group for the current mapping range. + KPageGroup cur_pg(m_kernel, m_block_info_manager); + { + ON_RESULT_FAILURE_2 { + cur_pg.OpenFirst(); + cur_pg.Close(); + }; + + size_t remain_pages = map_pages; + while (remain_pages > 0) { + // Check if we're at the end of the physical block. + if (pg_pages == 0) { + // Ensure there are more pages to map. + ASSERT(pg_it != pg.end()); + + // Advance our physical block. + ++pg_it; + pg_phys_addr = pg_it->GetAddress(); + pg_pages = pg_it->GetNumPages(); + } + + // Add whatever we can to the current block. + const size_t cur_pages = std::min(pg_pages, remain_pages); + R_TRY(cur_pg.AddBlock(pg_phys_addr + + ((pg_pages - cur_pages) * PageSize), + cur_pages)); + + // Advance. + remain_pages -= cur_pages; + pg_pages -= cur_pages; + } } - // Map whatever we can. - const size_t cur_pages = std::min(pg_pages, map_pages); - R_TRY(Operate(cur_address, cur_pages, KMemoryPermission::UserReadWrite, - OperationType::MapFirst, pg_phys_addr)); - - // Advance. - cur_address += cur_pages * PageSize; - map_pages -= cur_pages; - - pg_phys_addr += cur_pages * PageSize; - pg_pages -= cur_pages; + // Map the pages. + R_TRY(this->Operate(cur_address, map_pages, cur_pg, + OperationType::MapFirstGroup)); } } @@ -1770,7 +1784,11 @@ Result KPageTable::MapPhysicalMemory(KProcessAddress address, size_t size) { m_memory_block_manager.UpdateIfMatch( std::addressof(allocator), address, size / PageSize, KMemoryState::Free, KMemoryPermission::None, KMemoryAttribute::None, KMemoryState::Normal, - KMemoryPermission::UserReadWrite, KMemoryAttribute::None); + KMemoryPermission::UserReadWrite, KMemoryAttribute::None, + address == this->GetAliasRegionStart() + ? KMemoryBlockDisableMergeAttribute::Normal + : KMemoryBlockDisableMergeAttribute::None, + KMemoryBlockDisableMergeAttribute::None); R_SUCCEED(); } @@ -1868,6 +1886,13 @@ Result KPageTable::UnmapPhysicalMemory(KProcessAddress address, size_t size) { // Iterate over the memory, unmapping as we go. auto it = m_memory_block_manager.FindIterator(cur_address); + + const auto clear_merge_attr = + (it->GetState() == KMemoryState::Normal && + it->GetAddress() == this->GetAliasRegionStart() && it->GetAddress() == address) + ? KMemoryBlockDisableMergeAttribute::Normal + : KMemoryBlockDisableMergeAttribute::None; + while (true) { // Check that the iterator is valid. ASSERT(it != m_memory_block_manager.end()); @@ -1905,7 +1930,7 @@ Result KPageTable::UnmapPhysicalMemory(KProcessAddress address, size_t size) { m_memory_block_manager.Update(std::addressof(allocator), address, size / PageSize, KMemoryState::Free, KMemoryPermission::None, KMemoryAttribute::None, KMemoryBlockDisableMergeAttribute::None, - KMemoryBlockDisableMergeAttribute::None); + clear_merge_attr); // We succeeded. R_SUCCEED(); @@ -2379,8 +2404,7 @@ Result KPageTable::MapPageGroup(KProcessAddress* out_addr, const KPageGroup& pg, KScopedPageTableUpdater updater(this); // Perform mapping operation. - const KPageProperties properties = {perm, state == KMemoryState::Io, false, - DisableMergeAttribute::DisableHead}; + const KPageProperties properties = {perm, false, false, DisableMergeAttribute::DisableHead}; R_TRY(this->MapPageGroupImpl(updater.GetPageList(), addr, pg, properties, false)); // Update the blocks. @@ -2422,8 +2446,7 @@ Result KPageTable::MapPageGroup(KProcessAddress addr, const KPageGroup& pg, KMem KScopedPageTableUpdater updater(this); // Perform mapping operation. - const KPageProperties properties = {perm, state == KMemoryState::Io, false, - DisableMergeAttribute::DisableHead}; + const KPageProperties properties = {perm, false, false, DisableMergeAttribute::DisableHead}; R_TRY(this->MapPageGroupImpl(updater.GetPageList(), addr, pg, properties, false)); // Update the blocks. @@ -2652,11 +2675,18 @@ Result KPageTable::SetMemoryAttribute(KProcessAddress addr, size_t size, u32 mas size_t num_allocator_blocks; constexpr auto AttributeTestMask = ~(KMemoryAttribute::SetMask | KMemoryAttribute::DeviceShared); - R_TRY(this->CheckMemoryState( - std::addressof(old_state), std::addressof(old_perm), std::addressof(old_attr), - std::addressof(num_allocator_blocks), addr, size, KMemoryState::FlagCanChangeAttribute, - KMemoryState::FlagCanChangeAttribute, KMemoryPermission::None, KMemoryPermission::None, - AttributeTestMask, KMemoryAttribute::None, ~AttributeTestMask)); + const KMemoryState state_test_mask = + static_cast<KMemoryState>(((mask & static_cast<u32>(KMemoryAttribute::Uncached)) + ? static_cast<u32>(KMemoryState::FlagCanChangeAttribute) + : 0) | + ((mask & static_cast<u32>(KMemoryAttribute::PermissionLocked)) + ? static_cast<u32>(KMemoryState::FlagCanPermissionLock) + : 0)); + R_TRY(this->CheckMemoryState(std::addressof(old_state), std::addressof(old_perm), + std::addressof(old_attr), std::addressof(num_allocator_blocks), + addr, size, state_test_mask, state_test_mask, + KMemoryPermission::None, KMemoryPermission::None, + AttributeTestMask, KMemoryAttribute::None, ~AttributeTestMask)); // Create an update allocator. Result allocator_result{ResultSuccess}; @@ -2664,18 +2694,17 @@ Result KPageTable::SetMemoryAttribute(KProcessAddress addr, size_t size, u32 mas m_memory_block_slab_manager, num_allocator_blocks); R_TRY(allocator_result); - // Determine the new attribute. - const KMemoryAttribute new_attr = - static_cast<KMemoryAttribute>(((old_attr & static_cast<KMemoryAttribute>(~mask)) | - static_cast<KMemoryAttribute>(attr & mask))); - - // Perform operation. - this->Operate(addr, num_pages, old_perm, OperationType::ChangePermissionsAndRefresh); + // If we need to, perform a change attribute operation. + if (True(KMemoryAttribute::Uncached & static_cast<KMemoryAttribute>(mask))) { + // Perform operation. + R_TRY(this->Operate(addr, num_pages, old_perm, + OperationType::ChangePermissionsAndRefreshAndFlush, 0)); + } // Update the blocks. - m_memory_block_manager.Update(std::addressof(allocator), addr, num_pages, old_state, old_perm, - new_attr, KMemoryBlockDisableMergeAttribute::None, - KMemoryBlockDisableMergeAttribute::None); + m_memory_block_manager.UpdateAttribute(std::addressof(allocator), addr, num_pages, + static_cast<KMemoryAttribute>(mask), + static_cast<KMemoryAttribute>(attr)); R_SUCCEED(); } @@ -2863,7 +2892,8 @@ Result KPageTable::LockForMapDeviceAddressSpace(bool* out_is_io, KProcessAddress &KMemoryBlock::ShareToDevice, KMemoryPermission::None); // Set whether the locked memory was io. - *out_is_io = old_state == KMemoryState::Io; + *out_is_io = + static_cast<Svc::MemoryState>(old_state & KMemoryState::Mask) == Svc::MemoryState::Io; R_SUCCEED(); } @@ -2949,6 +2979,23 @@ Result KPageTable::UnlockForIpcUserBuffer(KProcessAddress address, size_t size) KMemoryAttribute::Locked, nullptr)); } +Result KPageTable::LockForTransferMemory(KPageGroup* out, KProcessAddress address, size_t size, + KMemoryPermission perm) { + R_RETURN(this->LockMemoryAndOpen(out, nullptr, address, size, KMemoryState::FlagCanTransfer, + KMemoryState::FlagCanTransfer, KMemoryPermission::All, + KMemoryPermission::UserReadWrite, KMemoryAttribute::All, + KMemoryAttribute::None, perm, KMemoryAttribute::Locked)); +} + +Result KPageTable::UnlockForTransferMemory(KProcessAddress address, size_t size, + const KPageGroup& pg) { + R_RETURN(this->UnlockMemory(address, size, KMemoryState::FlagCanTransfer, + KMemoryState::FlagCanTransfer, KMemoryPermission::None, + KMemoryPermission::None, KMemoryAttribute::All, + KMemoryAttribute::Locked, KMemoryPermission::UserReadWrite, + KMemoryAttribute::Locked, std::addressof(pg))); +} + Result KPageTable::LockForCodeMemory(KPageGroup* out, KProcessAddress addr, size_t size) { R_RETURN(this->LockMemoryAndOpen( out, nullptr, addr, size, KMemoryState::FlagCanCodeMemory, KMemoryState::FlagCanCodeMemory, @@ -3004,9 +3051,10 @@ Result KPageTable::Operate(KProcessAddress addr, size_t num_pages, const KPageGr ASSERT(num_pages == page_group.GetNumPages()); switch (operation) { - case OperationType::MapGroup: { + case OperationType::MapGroup: + case OperationType::MapFirstGroup: { // We want to maintain a new reference to every page in the group. - KScopedPageGroup spg(page_group); + KScopedPageGroup spg(page_group, operation != OperationType::MapFirstGroup); for (const auto& node : page_group) { const size_t size{node.GetNumPages() * PageSize}; @@ -3048,7 +3096,6 @@ Result KPageTable::Operate(KProcessAddress addr, size_t num_pages, KMemoryPermis m_memory->UnmapRegion(*m_page_table_impl, addr, num_pages * PageSize); break; } - case OperationType::MapFirst: case OperationType::Map: { ASSERT(map_addr); ASSERT(Common::IsAligned(GetInteger(map_addr), PageSize)); @@ -3056,11 +3103,7 @@ Result KPageTable::Operate(KProcessAddress addr, size_t num_pages, KMemoryPermis // Open references to pages, if we should. if (IsHeapPhysicalAddress(m_kernel.MemoryLayout(), map_addr)) { - if (operation == OperationType::MapFirst) { - m_kernel.MemoryManager().OpenFirst(map_addr, num_pages); - } else { - m_kernel.MemoryManager().Open(map_addr, num_pages); - } + m_kernel.MemoryManager().Open(map_addr, num_pages); } break; } @@ -3070,6 +3113,7 @@ Result KPageTable::Operate(KProcessAddress addr, size_t num_pages, KMemoryPermis } case OperationType::ChangePermissions: case OperationType::ChangePermissionsAndRefresh: + case OperationType::ChangePermissionsAndRefreshAndFlush: break; default: ASSERT(false); @@ -3089,79 +3133,79 @@ void KPageTable::FinalizeUpdate(PageLinkedList* page_list) { } } -KProcessAddress KPageTable::GetRegionAddress(KMemoryState state) const { +KProcessAddress KPageTable::GetRegionAddress(Svc::MemoryState state) const { switch (state) { - case KMemoryState::Free: - case KMemoryState::Kernel: + case Svc::MemoryState::Free: + case Svc::MemoryState::Kernel: return m_address_space_start; - case KMemoryState::Normal: + case Svc::MemoryState::Normal: return m_heap_region_start; - case KMemoryState::Ipc: - case KMemoryState::NonSecureIpc: - case KMemoryState::NonDeviceIpc: + case Svc::MemoryState::Ipc: + case Svc::MemoryState::NonSecureIpc: + case Svc::MemoryState::NonDeviceIpc: return m_alias_region_start; - case KMemoryState::Stack: + case Svc::MemoryState::Stack: return m_stack_region_start; - case KMemoryState::Static: - case KMemoryState::ThreadLocal: + case Svc::MemoryState::Static: + case Svc::MemoryState::ThreadLocal: return m_kernel_map_region_start; - case KMemoryState::Io: - case KMemoryState::Shared: - case KMemoryState::AliasCode: - case KMemoryState::AliasCodeData: - case KMemoryState::Transfered: - case KMemoryState::SharedTransfered: - case KMemoryState::SharedCode: - case KMemoryState::GeneratedCode: - case KMemoryState::CodeOut: - case KMemoryState::Coverage: - case KMemoryState::Insecure: + case Svc::MemoryState::Io: + case Svc::MemoryState::Shared: + case Svc::MemoryState::AliasCode: + case Svc::MemoryState::AliasCodeData: + case Svc::MemoryState::Transfered: + case Svc::MemoryState::SharedTransfered: + case Svc::MemoryState::SharedCode: + case Svc::MemoryState::GeneratedCode: + case Svc::MemoryState::CodeOut: + case Svc::MemoryState::Coverage: + case Svc::MemoryState::Insecure: return m_alias_code_region_start; - case KMemoryState::Code: - case KMemoryState::CodeData: + case Svc::MemoryState::Code: + case Svc::MemoryState::CodeData: return m_code_region_start; default: UNREACHABLE(); } } -size_t KPageTable::GetRegionSize(KMemoryState state) const { +size_t KPageTable::GetRegionSize(Svc::MemoryState state) const { switch (state) { - case KMemoryState::Free: - case KMemoryState::Kernel: + case Svc::MemoryState::Free: + case Svc::MemoryState::Kernel: return m_address_space_end - m_address_space_start; - case KMemoryState::Normal: + case Svc::MemoryState::Normal: return m_heap_region_end - m_heap_region_start; - case KMemoryState::Ipc: - case KMemoryState::NonSecureIpc: - case KMemoryState::NonDeviceIpc: + case Svc::MemoryState::Ipc: + case Svc::MemoryState::NonSecureIpc: + case Svc::MemoryState::NonDeviceIpc: return m_alias_region_end - m_alias_region_start; - case KMemoryState::Stack: + case Svc::MemoryState::Stack: return m_stack_region_end - m_stack_region_start; - case KMemoryState::Static: - case KMemoryState::ThreadLocal: + case Svc::MemoryState::Static: + case Svc::MemoryState::ThreadLocal: return m_kernel_map_region_end - m_kernel_map_region_start; - case KMemoryState::Io: - case KMemoryState::Shared: - case KMemoryState::AliasCode: - case KMemoryState::AliasCodeData: - case KMemoryState::Transfered: - case KMemoryState::SharedTransfered: - case KMemoryState::SharedCode: - case KMemoryState::GeneratedCode: - case KMemoryState::CodeOut: - case KMemoryState::Coverage: - case KMemoryState::Insecure: + case Svc::MemoryState::Io: + case Svc::MemoryState::Shared: + case Svc::MemoryState::AliasCode: + case Svc::MemoryState::AliasCodeData: + case Svc::MemoryState::Transfered: + case Svc::MemoryState::SharedTransfered: + case Svc::MemoryState::SharedCode: + case Svc::MemoryState::GeneratedCode: + case Svc::MemoryState::CodeOut: + case Svc::MemoryState::Coverage: + case Svc::MemoryState::Insecure: return m_alias_code_region_end - m_alias_code_region_start; - case KMemoryState::Code: - case KMemoryState::CodeData: + case Svc::MemoryState::Code: + case Svc::MemoryState::CodeData: return m_code_region_end - m_code_region_start; default: UNREACHABLE(); } } -bool KPageTable::CanContain(KProcessAddress addr, size_t size, KMemoryState state) const { +bool KPageTable::CanContain(KProcessAddress addr, size_t size, Svc::MemoryState state) const { const KProcessAddress end = addr + size; const KProcessAddress last = end - 1; @@ -3175,32 +3219,32 @@ bool KPageTable::CanContain(KProcessAddress addr, size_t size, KMemoryState stat const bool is_in_alias = !(end <= m_alias_region_start || m_alias_region_end <= addr || m_alias_region_start == m_alias_region_end); switch (state) { - case KMemoryState::Free: - case KMemoryState::Kernel: + case Svc::MemoryState::Free: + case Svc::MemoryState::Kernel: return is_in_region; - case KMemoryState::Io: - case KMemoryState::Static: - case KMemoryState::Code: - case KMemoryState::CodeData: - case KMemoryState::Shared: - case KMemoryState::AliasCode: - case KMemoryState::AliasCodeData: - case KMemoryState::Stack: - case KMemoryState::ThreadLocal: - case KMemoryState::Transfered: - case KMemoryState::SharedTransfered: - case KMemoryState::SharedCode: - case KMemoryState::GeneratedCode: - case KMemoryState::CodeOut: - case KMemoryState::Coverage: - case KMemoryState::Insecure: + case Svc::MemoryState::Io: + case Svc::MemoryState::Static: + case Svc::MemoryState::Code: + case Svc::MemoryState::CodeData: + case Svc::MemoryState::Shared: + case Svc::MemoryState::AliasCode: + case Svc::MemoryState::AliasCodeData: + case Svc::MemoryState::Stack: + case Svc::MemoryState::ThreadLocal: + case Svc::MemoryState::Transfered: + case Svc::MemoryState::SharedTransfered: + case Svc::MemoryState::SharedCode: + case Svc::MemoryState::GeneratedCode: + case Svc::MemoryState::CodeOut: + case Svc::MemoryState::Coverage: + case Svc::MemoryState::Insecure: return is_in_region && !is_in_heap && !is_in_alias; - case KMemoryState::Normal: + case Svc::MemoryState::Normal: ASSERT(is_in_heap); return is_in_region && !is_in_alias; - case KMemoryState::Ipc: - case KMemoryState::NonSecureIpc: - case KMemoryState::NonDeviceIpc: + case Svc::MemoryState::Ipc: + case Svc::MemoryState::NonSecureIpc: + case Svc::MemoryState::NonDeviceIpc: ASSERT(is_in_alias); return is_in_region && !is_in_heap; default: @@ -3264,21 +3308,16 @@ Result KPageTable::CheckMemoryStateContiguous(size_t* out_blocks_needed, KProces Result KPageTable::CheckMemoryState(KMemoryState* out_state, KMemoryPermission* out_perm, KMemoryAttribute* out_attr, size_t* out_blocks_needed, - KProcessAddress addr, size_t size, KMemoryState state_mask, + KMemoryBlockManager::const_iterator it, + KProcessAddress last_addr, KMemoryState state_mask, KMemoryState state, KMemoryPermission perm_mask, KMemoryPermission perm, KMemoryAttribute attr_mask, KMemoryAttribute attr, KMemoryAttribute ignore_attr) const { ASSERT(this->IsLockedByCurrentThread()); // Get information about the first block. - const KProcessAddress last_addr = addr + size - 1; - KMemoryBlockManager::const_iterator it = m_memory_block_manager.FindIterator(addr); KMemoryInfo info = it->GetMemoryInfo(); - // If the start address isn't aligned, we need a block. - const size_t blocks_for_start_align = - (Common::AlignDown(GetInteger(addr), PageSize) != info.GetAddress()) ? 1 : 0; - // Validate all blocks in the range have correct state. const KMemoryState first_state = info.m_state; const KMemoryPermission first_perm = info.m_permission; @@ -3304,10 +3343,6 @@ Result KPageTable::CheckMemoryState(KMemoryState* out_state, KMemoryPermission* info = it->GetMemoryInfo(); } - // If the end address isn't aligned, we need a block. - const size_t blocks_for_end_align = - (Common::AlignUp(GetInteger(addr) + size, PageSize) != info.GetEndAddress()) ? 1 : 0; - // Write output state. if (out_state != nullptr) { *out_state = first_state; @@ -3318,9 +3353,39 @@ Result KPageTable::CheckMemoryState(KMemoryState* out_state, KMemoryPermission* if (out_attr != nullptr) { *out_attr = static_cast<KMemoryAttribute>(first_attr & ~ignore_attr); } + + // If the end address isn't aligned, we need a block. if (out_blocks_needed != nullptr) { - *out_blocks_needed = blocks_for_start_align + blocks_for_end_align; + const size_t blocks_for_end_align = + (Common::AlignDown(GetInteger(last_addr), PageSize) + PageSize != info.GetEndAddress()) + ? 1 + : 0; + *out_blocks_needed = blocks_for_end_align; } + + R_SUCCEED(); +} + +Result KPageTable::CheckMemoryState(KMemoryState* out_state, KMemoryPermission* out_perm, + KMemoryAttribute* out_attr, size_t* out_blocks_needed, + KProcessAddress addr, size_t size, KMemoryState state_mask, + KMemoryState state, KMemoryPermission perm_mask, + KMemoryPermission perm, KMemoryAttribute attr_mask, + KMemoryAttribute attr, KMemoryAttribute ignore_attr) const { + ASSERT(this->IsLockedByCurrentThread()); + + // Check memory state. + const KProcessAddress last_addr = addr + size - 1; + KMemoryBlockManager::const_iterator it = m_memory_block_manager.FindIterator(addr); + R_TRY(this->CheckMemoryState(out_state, out_perm, out_attr, out_blocks_needed, it, last_addr, + state_mask, state, perm_mask, perm, attr_mask, attr, ignore_attr)); + + // If the start address isn't aligned, we need a block. + if (out_blocks_needed != nullptr && + Common::AlignDown(GetInteger(addr), PageSize) != it->GetAddress()) { + ++(*out_blocks_needed); + } + R_SUCCEED(); } @@ -3388,6 +3453,11 @@ Result KPageTable::LockMemoryAndOpen(KPageGroup* out_pg, KPhysicalAddress* out_K new_attr, KMemoryBlockDisableMergeAttribute::Locked, KMemoryBlockDisableMergeAttribute::None); + // If we have an output page group, open. + if (out_pg) { + out_pg->Open(); + } + R_SUCCEED(); } diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h index b9e8c6042..66f16faaf 100644 --- a/src/core/hle/kernel/k_page_table.h +++ b/src/core/hle/kernel/k_page_table.h @@ -63,7 +63,7 @@ public: explicit KPageTable(Core::System& system_); ~KPageTable(); - Result InitializeForProcess(FileSys::ProgramAddressSpaceType as_type, bool enable_aslr, + Result InitializeForProcess(Svc::CreateProcessFlag as_type, bool enable_aslr, bool enable_das_merge, bool from_back, KMemoryManager::Pool pool, KProcessAddress code_addr, size_t code_size, KSystemResource* system_resource, KResourceLimit* resource_limit, @@ -104,6 +104,9 @@ public: Result CleanupForIpcServer(KProcessAddress address, size_t size, KMemoryState dst_state); Result CleanupForIpcClient(KProcessAddress address, size_t size, KMemoryState dst_state); + Result LockForTransferMemory(KPageGroup* out, KProcessAddress address, size_t size, + KMemoryPermission perm); + Result UnlockForTransferMemory(KProcessAddress address, size_t size, const KPageGroup& pg); Result LockForCodeMemory(KPageGroup* out, KProcessAddress addr, size_t size); Result UnlockForCodeMemory(KProcessAddress addr, size_t size, const KPageGroup& pg); Result MakeAndOpenPageGroup(KPageGroup* out, KProcessAddress address, size_t num_pages, @@ -123,8 +126,6 @@ public: return m_block_info_manager; } - bool CanContain(KProcessAddress addr, size_t size, KMemoryState state) const; - Result MapPages(KProcessAddress* out_addr, size_t num_pages, size_t alignment, KPhysicalAddress phys_addr, KProcessAddress region_start, size_t region_num_pages, KMemoryState state, KMemoryPermission perm) { @@ -159,6 +160,21 @@ public: void RemapPageGroup(PageLinkedList* page_list, KProcessAddress address, size_t size, const KPageGroup& pg); + KProcessAddress GetRegionAddress(Svc::MemoryState state) const; + size_t GetRegionSize(Svc::MemoryState state) const; + bool CanContain(KProcessAddress addr, size_t size, Svc::MemoryState state) const; + + KProcessAddress GetRegionAddress(KMemoryState state) const { + return this->GetRegionAddress(static_cast<Svc::MemoryState>(state & KMemoryState::Mask)); + } + size_t GetRegionSize(KMemoryState state) const { + return this->GetRegionSize(static_cast<Svc::MemoryState>(state & KMemoryState::Mask)); + } + bool CanContain(KProcessAddress addr, size_t size, KMemoryState state) const { + return this->CanContain(addr, size, + static_cast<Svc::MemoryState>(state & KMemoryState::Mask)); + } + protected: struct PageLinkedList { private: @@ -201,12 +217,13 @@ protected: private: enum class OperationType : u32 { Map = 0, - MapFirst = 1, - MapGroup = 2, + MapGroup = 1, + MapFirstGroup = 2, Unmap = 3, ChangePermissions = 4, ChangePermissionsAndRefresh = 5, - Separate = 6, + ChangePermissionsAndRefreshAndFlush = 6, + Separate = 7, }; static constexpr KMemoryAttribute DefaultMemoryIgnoreAttr = @@ -225,8 +242,6 @@ private: Result Operate(KProcessAddress addr, size_t num_pages, KMemoryPermission perm, OperationType operation, KPhysicalAddress map_addr = 0); void FinalizeUpdate(PageLinkedList* page_list); - KProcessAddress GetRegionAddress(KMemoryState state) const; - size_t GetRegionSize(KMemoryState state) const; KProcessAddress FindFreeArea(KProcessAddress region_start, size_t region_num_pages, size_t num_pages, size_t alignment, size_t offset, @@ -249,6 +264,13 @@ private: KMemoryAttribute attr_mask, KMemoryAttribute attr) const; Result CheckMemoryState(KMemoryState* out_state, KMemoryPermission* out_perm, KMemoryAttribute* out_attr, size_t* out_blocks_needed, + KMemoryBlockManager::const_iterator it, KProcessAddress last_addr, + KMemoryState state_mask, KMemoryState state, + KMemoryPermission perm_mask, KMemoryPermission perm, + KMemoryAttribute attr_mask, KMemoryAttribute attr, + KMemoryAttribute ignore_attr = DefaultMemoryIgnoreAttr) const; + Result CheckMemoryState(KMemoryState* out_state, KMemoryPermission* out_perm, + KMemoryAttribute* out_attr, size_t* out_blocks_needed, KProcessAddress addr, size_t size, KMemoryState state_mask, KMemoryState state, KMemoryPermission perm_mask, KMemoryPermission perm, KMemoryAttribute attr_mask, KMemoryAttribute attr, @@ -378,7 +400,7 @@ public: constexpr size_t GetAliasCodeRegionSize() const { return m_alias_code_region_end - m_alias_code_region_start; } - size_t GetNormalMemorySize() { + size_t GetNormalMemorySize() const { KScopedLightLock lk(m_general_lock); return GetHeapSize() + m_mapped_physical_memory_size; } diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 4a099286b..1f4b0755d 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -1,515 +1,598 @@ -// SPDX-FileCopyrightText: 2015 Citra Emulator Project +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include <algorithm> -#include <bitset> -#include <ctime> -#include <memory> #include <random> -#include "common/alignment.h" -#include "common/assert.h" -#include "common/logging/log.h" #include "common/scope_exit.h" #include "common/settings.h" #include "core/core.h" -#include "core/file_sys/program_metadata.h" -#include "core/hle/kernel/code_set.h" -#include "core/hle/kernel/k_memory_block_manager.h" -#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_process.h" -#include "core/hle/kernel/k_resource_limit.h" -#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_scoped_resource_reservation.h" #include "core/hle/kernel/k_shared_memory.h" #include "core/hle/kernel/k_shared_memory_info.h" -#include "core/hle/kernel/k_thread.h" -#include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/svc_results.h" -#include "core/memory.h" +#include "core/hle/kernel/k_thread_local_page.h" +#include "core/hle/kernel/k_thread_queue.h" +#include "core/hle/kernel/k_worker_task_manager.h" namespace Kernel { -namespace { -/** - * Sets up the primary application thread - * - * @param system The system instance to create the main thread under. - * @param owner_process The parent process for the main thread - * @param priority The priority to give the main thread - */ -void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority, - KProcessAddress stack_top) { - const KProcessAddress entry_point = owner_process.GetEntryPoint(); - ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::ThreadCountMax, 1)); - - KThread* thread = KThread::Create(system.Kernel()); - SCOPE_EXIT({ thread->Close(); }); - - ASSERT(KThread::InitializeUserThread(system, thread, entry_point, 0, stack_top, priority, - owner_process.GetIdealCoreId(), - std::addressof(owner_process)) - .IsSuccess()); - - // Register 1 must be a handle to the main thread - Handle thread_handle{}; - owner_process.GetHandleTable().Add(std::addressof(thread_handle), thread); - - thread->GetContext32().cpu_registers[0] = 0; - thread->GetContext64().cpu_registers[0] = 0; - thread->GetContext32().cpu_registers[1] = thread_handle; - thread->GetContext64().cpu_registers[1] = thread_handle; - - if (system.DebuggerEnabled()) { - thread->RequestSuspend(SuspendType::Debug); - } - // Run our thread. - void(thread->Run()); -} -} // Anonymous namespace +namespace { -Result KProcess::Initialize(KProcess* process, Core::System& system, std::string process_name, - ProcessType type, KResourceLimit* res_limit) { - auto& kernel = system.Kernel(); +Result TerminateChildren(KernelCore& kernel, KProcess* process, + const KThread* thread_to_not_terminate) { + // Request that all children threads terminate. + { + KScopedLightLock proc_lk(process->GetListLock()); + KScopedSchedulerLock sl(kernel); + + if (thread_to_not_terminate != nullptr && + process->GetPinnedThread(GetCurrentCoreId(kernel)) == thread_to_not_terminate) { + // NOTE: Here Nintendo unpins the current thread instead of the thread_to_not_terminate. + // This is valid because the only caller which uses non-nullptr as argument uses + // GetCurrentThreadPointer(), but it's still notable because it seems incorrect at + // first glance. + process->UnpinCurrentThread(); + } - process->name = std::move(process_name); - process->m_resource_limit = res_limit; - process->m_system_resource_address = 0; - process->m_state = State::Created; - process->m_program_id = 0; - process->m_process_id = type == ProcessType::KernelInternal ? kernel.CreateNewKernelProcessID() - : kernel.CreateNewUserProcessID(); - process->m_capabilities.InitializeForMetadatalessProcess(); - process->m_is_initialized = true; + auto& thread_list = process->GetThreadList(); + for (auto it = thread_list.begin(); it != thread_list.end(); ++it) { + if (KThread* thread = std::addressof(*it); thread != thread_to_not_terminate) { + if (thread->GetState() != ThreadState::Terminated) { + thread->RequestTerminate(); + } + } + } + } - std::mt19937 rng(Settings::values.rng_seed_enabled ? Settings::values.rng_seed.GetValue() - : static_cast<u32>(std::time(nullptr))); - std::uniform_int_distribution<u64> distribution; - std::generate(process->m_random_entropy.begin(), process->m_random_entropy.end(), - [&] { return distribution(rng); }); + // Wait for all children threads to terminate. + while (true) { + // Get the next child. + KThread* cur_child = nullptr; + { + KScopedLightLock proc_lk(process->GetListLock()); + + auto& thread_list = process->GetThreadList(); + for (auto it = thread_list.begin(); it != thread_list.end(); ++it) { + if (KThread* thread = std::addressof(*it); thread != thread_to_not_terminate) { + if (thread->GetState() != ThreadState::Terminated) { + if (thread->Open()) { + cur_child = thread; + break; + } + } + } + } + } - kernel.AppendNewProcess(process); + // If we didn't find any non-terminated children, we're done. + if (cur_child == nullptr) { + break; + } - // Clear remaining fields. - process->m_num_running_threads = 0; - process->m_is_signaled = false; - process->m_exception_thread = nullptr; - process->m_is_suspended = false; - process->m_schedule_count = 0; - process->m_is_handle_table_initialized = false; - process->m_is_hbl = false; + // Terminate and close the thread. + SCOPE_EXIT({ cur_child->Close(); }); - // Open a reference to the resource limit. - process->m_resource_limit->Open(); + if (const Result terminate_result = cur_child->Terminate(); + ResultTerminationRequested == terminate_result) { + R_THROW(terminate_result); + } + } R_SUCCEED(); } -void KProcess::DoWorkerTaskImpl() { - UNIMPLEMENTED(); -} - -KResourceLimit* KProcess::GetResourceLimit() const { - return m_resource_limit; -} +class ThreadQueueImplForKProcessEnterUserException final : public KThreadQueue { +private: + KThread** m_exception_thread; -void KProcess::IncrementRunningThreadCount() { - ASSERT(m_num_running_threads.load() >= 0); - ++m_num_running_threads; -} +public: + explicit ThreadQueueImplForKProcessEnterUserException(KernelCore& kernel, KThread** t) + : KThreadQueue(kernel), m_exception_thread(t) {} -void KProcess::DecrementRunningThreadCount() { - ASSERT(m_num_running_threads.load() > 0); + virtual void EndWait(KThread* waiting_thread, Result wait_result) override { + // Set the exception thread. + *m_exception_thread = waiting_thread; - if (const auto prev = m_num_running_threads--; prev == 1) { - // TODO(bunnei): Process termination to be implemented when multiprocess is supported. + // Invoke the base end wait handler. + KThreadQueue::EndWait(waiting_thread, wait_result); } -} -u64 KProcess::GetTotalPhysicalMemoryAvailable() { - const u64 capacity{m_resource_limit->GetFreeValue(LimitableResource::PhysicalMemoryMax) + - m_page_table.GetNormalMemorySize() + GetSystemResourceSize() + m_image_size + - m_main_thread_stack_size}; - if (const auto pool_size = m_kernel.MemoryManager().GetSize(KMemoryManager::Pool::Application); - capacity != pool_size) { - LOG_WARNING(Kernel, "capacity {} != application pool size {}", capacity, pool_size); - } - if (capacity < m_memory_usage_capacity) { - return capacity; + virtual void CancelWait(KThread* waiting_thread, Result wait_result, + bool cancel_timer_task) override { + // Remove the thread as a waiter on its mutex owner. + waiting_thread->GetLockOwner()->RemoveWaiter(waiting_thread); + + // Invoke the base cancel wait handler. + KThreadQueue::CancelWait(waiting_thread, wait_result, cancel_timer_task); } - return m_memory_usage_capacity; -} +}; -u64 KProcess::GetTotalPhysicalMemoryAvailableWithoutSystemResource() { - return this->GetTotalPhysicalMemoryAvailable() - this->GetSystemResourceSize(); +void GenerateRandom(std::span<u64> out_random) { + std::mt19937 rng(Settings::values.rng_seed_enabled ? Settings::values.rng_seed.GetValue() + : static_cast<u32>(std::time(nullptr))); + std::uniform_int_distribution<u64> distribution; + std::generate(out_random.begin(), out_random.end(), [&] { return distribution(rng); }); } -u64 KProcess::GetTotalPhysicalMemoryUsed() { - return m_image_size + m_main_thread_stack_size + m_page_table.GetNormalMemorySize() + - this->GetSystemResourceSize(); -} +} // namespace -u64 KProcess::GetTotalPhysicalMemoryUsedWithoutSystemResource() { - return this->GetTotalPhysicalMemoryUsed() - this->GetSystemResourceUsage(); -} +void KProcess::Finalize() { + // Delete the process local region. + this->DeleteThreadLocalRegion(m_plr_address); -bool KProcess::ReleaseUserException(KThread* thread) { - KScopedSchedulerLock sl{m_kernel}; + // Get the used memory size. + const size_t used_memory_size = this->GetUsedNonSystemUserPhysicalMemorySize(); - if (m_exception_thread == thread) { - m_exception_thread = nullptr; + // Finalize the page table. + m_page_table.Finalize(); - // Remove waiter thread. - bool has_waiters{}; - if (KThread* next = thread->RemoveKernelWaiterByKey( - std::addressof(has_waiters), - reinterpret_cast<uintptr_t>(std::addressof(m_exception_thread))); - next != nullptr) { - next->EndWait(ResultSuccess); + // Finish using our system resource. + if (m_system_resource) { + if (m_system_resource->IsSecureResource()) { + // Finalize optimized memory. If memory wasn't optimized, this is a no-op. + m_kernel.MemoryManager().FinalizeOptimizedMemory(this->GetId(), m_memory_pool); } - KScheduler::SetSchedulerUpdateNeeded(m_kernel); - - return true; - } else { - return false; + m_system_resource->Close(); + m_system_resource = nullptr; } -} - -void KProcess::PinCurrentThread(s32 core_id) { - ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel)); - // Get the current thread. - KThread* cur_thread = - m_kernel.Scheduler(static_cast<std::size_t>(core_id)).GetSchedulerCurrentThread(); + // Free all shared memory infos. + { + auto it = m_shared_memory_list.begin(); + while (it != m_shared_memory_list.end()) { + KSharedMemoryInfo* info = std::addressof(*it); + KSharedMemory* shmem = info->GetSharedMemory(); - // If the thread isn't terminated, pin it. - if (!cur_thread->IsTerminationRequested()) { - // Pin it. - this->PinThread(core_id, cur_thread); - cur_thread->Pin(core_id); + while (!info->Close()) { + shmem->Close(); + } + shmem->Close(); - // An update is needed. - KScheduler::SetSchedulerUpdateNeeded(m_kernel); + it = m_shared_memory_list.erase(it); + KSharedMemoryInfo::Free(m_kernel, info); + } } -} -void KProcess::UnpinCurrentThread(s32 core_id) { - ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel)); - - // Get the current thread. - KThread* cur_thread = - m_kernel.Scheduler(static_cast<std::size_t>(core_id)).GetSchedulerCurrentThread(); + // Our thread local page list must be empty at this point. + ASSERT(m_partially_used_tlp_tree.empty()); + ASSERT(m_fully_used_tlp_tree.empty()); - // Unpin it. - cur_thread->Unpin(); - this->UnpinThread(core_id, cur_thread); + // Release memory to the resource limit. + if (m_resource_limit != nullptr) { + ASSERT(used_memory_size >= m_memory_release_hint); + m_resource_limit->Release(Svc::LimitableResource::PhysicalMemoryMax, used_memory_size, + used_memory_size - m_memory_release_hint); + m_resource_limit->Close(); + } - // An update is needed. - KScheduler::SetSchedulerUpdateNeeded(m_kernel); + // Perform inherited finalization. + KSynchronizationObject::Finalize(); } -void KProcess::UnpinThread(KThread* thread) { - ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel)); - - // Get the thread's core id. - const auto core_id = thread->GetActiveCore(); +Result KProcess::Initialize(const Svc::CreateProcessParameter& params, KResourceLimit* res_limit, + bool is_real) { + // TODO: remove this special case + if (is_real) { + // Create and clear the process local region. + R_TRY(this->CreateThreadLocalRegion(std::addressof(m_plr_address))); + this->GetMemory().ZeroBlock(m_plr_address, Svc::ThreadLocalRegionSize); + } - // Unpin it. - this->UnpinThread(core_id, thread); - thread->Unpin(); + // Copy in the name from parameters. + static_assert(sizeof(params.name) < sizeof(m_name)); + std::memcpy(m_name.data(), params.name.data(), sizeof(params.name)); + m_name[sizeof(params.name)] = 0; + + // Set misc fields. + m_state = State::Created; + m_main_thread_stack_size = 0; + m_used_kernel_memory_size = 0; + m_ideal_core_id = 0; + m_flags = params.flags; + m_version = params.version; + m_program_id = params.program_id; + m_code_address = params.code_address; + m_code_size = params.code_num_pages * PageSize; + m_is_application = True(params.flags & Svc::CreateProcessFlag::IsApplication); + + // Set thread fields. + for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { + m_running_threads[i] = nullptr; + m_pinned_threads[i] = nullptr; + m_running_thread_idle_counts[i] = 0; + m_running_thread_switch_counts[i] = 0; + } - // An update is needed. - KScheduler::SetSchedulerUpdateNeeded(m_kernel); -} + // Set max memory based on address space type. + switch ((params.flags & Svc::CreateProcessFlag::AddressSpaceMask)) { + case Svc::CreateProcessFlag::AddressSpace32Bit: + case Svc::CreateProcessFlag::AddressSpace64BitDeprecated: + case Svc::CreateProcessFlag::AddressSpace64Bit: + m_max_process_memory = m_page_table.GetHeapRegionSize(); + break; + case Svc::CreateProcessFlag::AddressSpace32BitWithoutAlias: + m_max_process_memory = m_page_table.GetHeapRegionSize() + m_page_table.GetAliasRegionSize(); + break; + default: + UNREACHABLE(); + } -Result KProcess::AddSharedMemory(KSharedMemory* shmem, [[maybe_unused]] KProcessAddress address, - [[maybe_unused]] size_t size) { - // Lock ourselves, to prevent concurrent access. - KScopedLightLock lk(m_state_lock); + // Generate random entropy. + GenerateRandom(m_entropy); - // Try to find an existing info for the memory. - KSharedMemoryInfo* shemen_info = nullptr; - const auto iter = std::find_if( - m_shared_memory_list.begin(), m_shared_memory_list.end(), - [shmem](const KSharedMemoryInfo* info) { return info->GetSharedMemory() == shmem; }); - if (iter != m_shared_memory_list.end()) { - shemen_info = *iter; - } + // Clear remaining fields. + m_num_running_threads = 0; + m_num_process_switches = 0; + m_num_thread_switches = 0; + m_num_fpu_switches = 0; + m_num_supervisor_calls = 0; + m_num_ipc_messages = 0; - if (shemen_info == nullptr) { - shemen_info = KSharedMemoryInfo::Allocate(m_kernel); - R_UNLESS(shemen_info != nullptr, ResultOutOfMemory); + m_is_signaled = false; + m_exception_thread = nullptr; + m_is_suspended = false; + m_memory_release_hint = 0; + m_schedule_count = 0; + m_is_handle_table_initialized = false; - shemen_info->Initialize(shmem); - m_shared_memory_list.push_back(shemen_info); - } + // Open a reference to our resource limit. + m_resource_limit = res_limit; + m_resource_limit->Open(); - // Open a reference to the shared memory and its info. - shmem->Open(); - shemen_info->Open(); + // We're initialized! + m_is_initialized = true; R_SUCCEED(); } -void KProcess::RemoveSharedMemory(KSharedMemory* shmem, [[maybe_unused]] KProcessAddress address, - [[maybe_unused]] size_t size) { - // Lock ourselves, to prevent concurrent access. - KScopedLightLock lk(m_state_lock); +Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPageGroup& pg, + std::span<const u32> caps, KResourceLimit* res_limit, + KMemoryManager::Pool pool, bool immortal) { + ASSERT(res_limit != nullptr); + ASSERT((params.code_num_pages * PageSize) / PageSize == + static_cast<size_t>(params.code_num_pages)); + + // Set members. + m_memory_pool = pool; + m_is_default_application_system_resource = false; + m_is_immortal = immortal; + + // Setup our system resource. + if (const size_t system_resource_num_pages = params.system_resource_num_pages; + system_resource_num_pages != 0) { + // Create a secure system resource. + KSecureSystemResource* secure_resource = KSecureSystemResource::Create(m_kernel); + R_UNLESS(secure_resource != nullptr, ResultOutOfResource); + + ON_RESULT_FAILURE { + secure_resource->Close(); + }; + + // Initialize the secure resource. + R_TRY(secure_resource->Initialize(system_resource_num_pages * PageSize, res_limit, + m_memory_pool)); + + // Set our system resource. + m_system_resource = secure_resource; + } else { + // Use the system-wide system resource. + const bool is_app = True(params.flags & Svc::CreateProcessFlag::IsApplication); + m_system_resource = std::addressof(is_app ? m_kernel.GetAppSystemResource() + : m_kernel.GetSystemSystemResource()); - KSharedMemoryInfo* shemen_info = nullptr; - const auto iter = std::find_if( - m_shared_memory_list.begin(), m_shared_memory_list.end(), - [shmem](const KSharedMemoryInfo* info) { return info->GetSharedMemory() == shmem; }); - if (iter != m_shared_memory_list.end()) { - shemen_info = *iter; + m_is_default_application_system_resource = is_app; + + // Open reference to the system resource. + m_system_resource->Open(); } - ASSERT(shemen_info != nullptr); + // Ensure we clean up our secure resource, if we fail. + ON_RESULT_FAILURE { + m_system_resource->Close(); + m_system_resource = nullptr; + }; - if (shemen_info->Close()) { - m_shared_memory_list.erase(iter); - KSharedMemoryInfo::Free(m_kernel, shemen_info); + // Setup page table. + { + const auto as_type = params.flags & Svc::CreateProcessFlag::AddressSpaceMask; + const bool enable_aslr = True(params.flags & Svc::CreateProcessFlag::EnableAslr); + const bool enable_das_merge = + False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge); + R_TRY(m_page_table.InitializeForProcess( + as_type, enable_aslr, enable_das_merge, !enable_aslr, pool, params.code_address, + params.code_num_pages * PageSize, m_system_resource, res_limit, this->GetMemory())); } + ON_RESULT_FAILURE_2 { + m_page_table.Finalize(); + }; - // Close a reference to the shared memory. - shmem->Close(); -} + // Ensure we can insert the code region. + R_UNLESS(m_page_table.CanContain(params.code_address, params.code_num_pages * PageSize, + KMemoryState::Code), + ResultInvalidMemoryRegion); -void KProcess::RegisterThread(KThread* thread) { - KScopedLightLock lk{m_list_lock}; + // Map the code region. + R_TRY(m_page_table.MapPageGroup(params.code_address, pg, KMemoryState::Code, + KMemoryPermission::KernelRead)); - m_thread_list.push_back(thread); -} + // Initialize capabilities. + R_TRY(m_capabilities.InitializeForKip(caps, std::addressof(m_page_table))); -void KProcess::UnregisterThread(KThread* thread) { - KScopedLightLock lk{m_list_lock}; + // Initialize the process id. + m_process_id = m_kernel.CreateNewUserProcessID(); + ASSERT(InitialProcessIdMin <= m_process_id); + ASSERT(m_process_id <= InitialProcessIdMax); - m_thread_list.remove(thread); -} + // Initialize the rest of the process. + R_TRY(this->Initialize(params, res_limit, true)); -u64 KProcess::GetFreeThreadCount() const { - if (m_resource_limit != nullptr) { - const auto current_value = - m_resource_limit->GetCurrentValue(LimitableResource::ThreadCountMax); - const auto limit_value = m_resource_limit->GetLimitValue(LimitableResource::ThreadCountMax); - return limit_value - current_value; - } else { - return 0; - } + // We succeeded! + R_SUCCEED(); } -Result KProcess::Reset() { - // Lock the process and the scheduler. - KScopedLightLock lk(m_state_lock); - KScopedSchedulerLock sl{m_kernel}; +Result KProcess::Initialize(const Svc::CreateProcessParameter& params, + std::span<const u32> user_caps, KResourceLimit* res_limit, + KMemoryManager::Pool pool) { + ASSERT(res_limit != nullptr); - // Validate that we're in a state that we can reset. - R_UNLESS(m_state != State::Terminated, ResultInvalidState); - R_UNLESS(m_is_signaled, ResultInvalidState); + // Set members. + m_memory_pool = pool; + m_is_default_application_system_resource = false; + m_is_immortal = false; - // Clear signaled. - m_is_signaled = false; - R_SUCCEED(); -} + // Get the memory sizes. + const size_t code_num_pages = params.code_num_pages; + const size_t system_resource_num_pages = params.system_resource_num_pages; + const size_t code_size = code_num_pages * PageSize; + const size_t system_resource_size = system_resource_num_pages * PageSize; -Result KProcess::SetActivity(ProcessActivity activity) { - // Lock ourselves and the scheduler. - KScopedLightLock lk{m_state_lock}; - KScopedLightLock list_lk{m_list_lock}; - KScopedSchedulerLock sl{m_kernel}; + // Reserve memory for our code resource. + KScopedResourceReservation memory_reservation( + res_limit, Svc::LimitableResource::PhysicalMemoryMax, code_size); + R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached); - // Validate our state. - R_UNLESS(m_state != State::Terminating, ResultInvalidState); - R_UNLESS(m_state != State::Terminated, ResultInvalidState); + // Setup our system resource. + if (system_resource_num_pages != 0) { + // Create a secure system resource. + KSecureSystemResource* secure_resource = KSecureSystemResource::Create(m_kernel); + R_UNLESS(secure_resource != nullptr, ResultOutOfResource); - // Either pause or resume. - if (activity == ProcessActivity::Paused) { - // Verify that we're not suspended. - R_UNLESS(!m_is_suspended, ResultInvalidState); + ON_RESULT_FAILURE { + secure_resource->Close(); + }; - // Suspend all threads. - for (auto* thread : this->GetThreadList()) { - thread->RequestSuspend(SuspendType::Process); - } + // Initialize the secure resource. + R_TRY(secure_resource->Initialize(system_resource_size, res_limit, m_memory_pool)); + + // Set our system resource. + m_system_resource = secure_resource; - // Set ourselves as suspended. - this->SetSuspended(true); } else { - ASSERT(activity == ProcessActivity::Runnable); + // Use the system-wide system resource. + const bool is_app = True(params.flags & Svc::CreateProcessFlag::IsApplication); + m_system_resource = std::addressof(is_app ? m_kernel.GetAppSystemResource() + : m_kernel.GetSystemSystemResource()); - // Verify that we're suspended. - R_UNLESS(m_is_suspended, ResultInvalidState); + m_is_default_application_system_resource = is_app; - // Resume all threads. - for (auto* thread : this->GetThreadList()) { - thread->Resume(SuspendType::Process); - } + // Open reference to the system resource. + m_system_resource->Open(); + } - // Set ourselves as resumed. - this->SetSuspended(false); + // Ensure we clean up our secure resource, if we fail. + ON_RESULT_FAILURE { + m_system_resource->Close(); + m_system_resource = nullptr; + }; + + // Setup page table. + { + const auto as_type = params.flags & Svc::CreateProcessFlag::AddressSpaceMask; + const bool enable_aslr = True(params.flags & Svc::CreateProcessFlag::EnableAslr); + const bool enable_das_merge = + False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge); + R_TRY(m_page_table.InitializeForProcess(as_type, enable_aslr, enable_das_merge, + !enable_aslr, pool, params.code_address, code_size, + m_system_resource, res_limit, this->GetMemory())); + } + ON_RESULT_FAILURE_2 { + m_page_table.Finalize(); + }; + + // Ensure we can insert the code region. + R_UNLESS(m_page_table.CanContain(params.code_address, code_size, KMemoryState::Code), + ResultInvalidMemoryRegion); + + // Map the code region. + R_TRY(m_page_table.MapPages(params.code_address, code_num_pages, KMemoryState::Code, + KMemoryPermission::KernelRead | KMemoryPermission::NotMapped)); + + // Initialize capabilities. + R_TRY(m_capabilities.InitializeForUser(user_caps, std::addressof(m_page_table))); + + // Initialize the process id. + m_process_id = m_kernel.CreateNewUserProcessID(); + ASSERT(ProcessIdMin <= m_process_id); + ASSERT(m_process_id <= ProcessIdMax); + + // If we should optimize memory allocations, do so. + if (m_system_resource->IsSecureResource() && + True(params.flags & Svc::CreateProcessFlag::OptimizeMemoryAllocation)) { + R_TRY(m_kernel.MemoryManager().InitializeOptimizedMemory(m_process_id, pool)); } + // Initialize the rest of the process. + R_TRY(this->Initialize(params, res_limit, true)); + + // We succeeded, so commit our memory reservation. + memory_reservation.Commit(); R_SUCCEED(); } -Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, - bool is_hbl) { - m_program_id = metadata.GetTitleID(); - m_ideal_core = metadata.GetMainThreadCore(); - m_is_64bit_process = metadata.Is64BitProgram(); - m_system_resource_size = metadata.GetSystemResourceSize(); - m_image_size = code_size; - m_is_hbl = is_hbl; +void KProcess::DoWorkerTaskImpl() { + // Terminate child threads. + TerminateChildren(m_kernel, this, nullptr); - if (metadata.GetAddressSpaceType() == FileSys::ProgramAddressSpaceType::Is39Bit) { - // For 39-bit processes, the ASLR region starts at 0x800'0000 and is ~512GiB large. - // However, some (buggy) programs/libraries like skyline incorrectly depend on the - // existence of ASLR pages before the entry point, so we will adjust the load address - // to point to about 2GiB into the ASLR region. - m_code_address = 0x8000'0000; - } else { - // All other processes can be mapped at the beginning of the code region. - if (metadata.GetAddressSpaceType() == FileSys::ProgramAddressSpaceType::Is36Bit) { - m_code_address = 0x800'0000; - } else { - m_code_address = 0x20'0000; - } + // Finalize the handle table, if we're not immortal. + if (!m_is_immortal && m_is_handle_table_initialized) { + this->FinalizeHandleTable(); } - KScopedResourceReservation memory_reservation( - m_resource_limit, LimitableResource::PhysicalMemoryMax, code_size + m_system_resource_size); - if (!memory_reservation.Succeeded()) { - LOG_ERROR(Kernel, "Could not reserve process memory requirements of size {:X} bytes", - code_size + m_system_resource_size); - R_RETURN(ResultLimitReached); - } - // Initialize process address space - if (const Result result{m_page_table.InitializeForProcess( - metadata.GetAddressSpaceType(), false, false, false, KMemoryManager::Pool::Application, - this->GetEntryPoint(), code_size, std::addressof(m_kernel.GetAppSystemResource()), - m_resource_limit, m_kernel.System().ApplicationMemory())}; - result.IsError()) { - R_RETURN(result); - } - - // Map process code region - if (const Result result{m_page_table.MapProcessCode(this->GetEntryPoint(), code_size / PageSize, - KMemoryState::Code, - KMemoryPermission::None)}; - result.IsError()) { - R_RETURN(result); - } - - // Initialize process capabilities - const auto& caps{metadata.GetKernelCapabilities()}; - if (const Result result{ - m_capabilities.InitializeForUserProcess(caps.data(), caps.size(), m_page_table)}; - result.IsError()) { - R_RETURN(result); - } - - // Set memory usage capacity - switch (metadata.GetAddressSpaceType()) { - case FileSys::ProgramAddressSpaceType::Is32Bit: - case FileSys::ProgramAddressSpaceType::Is36Bit: - case FileSys::ProgramAddressSpaceType::Is39Bit: - m_memory_usage_capacity = - m_page_table.GetHeapRegionEnd() - m_page_table.GetHeapRegionStart(); - break; + // Finish termination. + this->FinishTermination(); +} - case FileSys::ProgramAddressSpaceType::Is32BitNoMap: - m_memory_usage_capacity = - (m_page_table.GetHeapRegionEnd() - m_page_table.GetHeapRegionStart()) + - (m_page_table.GetAliasRegionEnd() - m_page_table.GetAliasRegionStart()); - break; +Result KProcess::StartTermination() { + // Finalize the handle table when we're done, if the process isn't immortal. + SCOPE_EXIT({ + if (!m_is_immortal) { + this->FinalizeHandleTable(); + } + }); - default: - ASSERT(false); - break; - } + // Terminate child threads other than the current one. + R_RETURN(TerminateChildren(m_kernel, this, GetCurrentThreadPointer(m_kernel))); +} - // Create TLS region - R_TRY(this->CreateThreadLocalRegion(std::addressof(m_plr_address))); - memory_reservation.Commit(); +void KProcess::FinishTermination() { + // Only allow termination to occur if the process isn't immortal. + if (!m_is_immortal) { + // Release resource limit hint. + if (m_resource_limit != nullptr) { + m_memory_release_hint = this->GetUsedNonSystemUserPhysicalMemorySize(); + m_resource_limit->Release(Svc::LimitableResource::PhysicalMemoryMax, 0, + m_memory_release_hint); + } + + // Change state. + { + KScopedSchedulerLock sl(m_kernel); + this->ChangeState(State::Terminated); + } - R_RETURN(m_handle_table.Initialize(m_capabilities.GetHandleTableSize())); + // Close. + this->Close(); + } } -void KProcess::Run(s32 main_thread_priority, u64 stack_size) { - ASSERT(this->AllocateMainThreadStack(stack_size) == ResultSuccess); - m_resource_limit->Reserve(LimitableResource::ThreadCountMax, 1); +void KProcess::Exit() { + // Determine whether we need to start terminating + bool needs_terminate = false; + { + KScopedLightLock lk(m_state_lock); + KScopedSchedulerLock sl(m_kernel); + + ASSERT(m_state != State::Created); + ASSERT(m_state != State::CreatedAttached); + ASSERT(m_state != State::Crashed); + ASSERT(m_state != State::Terminated); + if (m_state == State::Running || m_state == State::RunningAttached || + m_state == State::DebugBreak) { + this->ChangeState(State::Terminating); + needs_terminate = true; + } + } - const std::size_t heap_capacity{m_memory_usage_capacity - - (m_main_thread_stack_size + m_image_size)}; - ASSERT(!m_page_table.SetMaxHeapSize(heap_capacity).IsError()); + // If we need to start termination, do so. + if (needs_terminate) { + this->StartTermination(); - this->ChangeState(State::Running); + // Register the process as a work task. + m_kernel.WorkerTaskManager().AddTask(m_kernel, KWorkerTaskManager::WorkerType::Exit, this); + } - SetupMainThread(m_kernel.System(), *this, main_thread_priority, m_main_thread_stack_top); + // Exit the current thread. + GetCurrentThread(m_kernel).Exit(); } -void KProcess::PrepareForTermination() { - this->ChangeState(State::Terminating); +Result KProcess::Terminate() { + // Determine whether we need to start terminating. + bool needs_terminate = false; + { + KScopedLightLock lk(m_state_lock); - const auto stop_threads = [this](const std::vector<KThread*>& in_thread_list) { - for (auto* thread : in_thread_list) { - if (thread->GetOwnerProcess() != this) - continue; + // Check whether we're allowed to terminate. + R_UNLESS(m_state != State::Created, ResultInvalidState); + R_UNLESS(m_state != State::CreatedAttached, ResultInvalidState); - if (thread == GetCurrentThreadPointer(m_kernel)) - continue; + KScopedSchedulerLock sl(m_kernel); - // TODO(Subv): When are the other running/ready threads terminated? - ASSERT_MSG(thread->GetState() == ThreadState::Waiting, - "Exiting processes with non-waiting threads is currently unimplemented"); + if (m_state == State::Running || m_state == State::RunningAttached || + m_state == State::Crashed || m_state == State::DebugBreak) { + this->ChangeState(State::Terminating); + needs_terminate = true; + } + } - thread->Exit(); + // If we need to terminate, do so. + if (needs_terminate) { + // Start termination. + if (R_SUCCEEDED(this->StartTermination())) { + // Finish termination. + this->FinishTermination(); + } else { + // Register the process as a work task. + m_kernel.WorkerTaskManager().AddTask(m_kernel, KWorkerTaskManager::WorkerType::Exit, + this); } - }; + } - stop_threads(m_kernel.System().GlobalSchedulerContext().GetThreadList()); + R_SUCCEED(); +} - this->DeleteThreadLocalRegion(m_plr_address); - m_plr_address = 0; +Result KProcess::AddSharedMemory(KSharedMemory* shmem, KProcessAddress address, size_t size) { + // Lock ourselves, to prevent concurrent access. + KScopedLightLock lk(m_state_lock); - if (m_resource_limit) { - m_resource_limit->Release(LimitableResource::PhysicalMemoryMax, - m_main_thread_stack_size + m_image_size); + // Try to find an existing info for the memory. + KSharedMemoryInfo* info = nullptr; + for (auto it = m_shared_memory_list.begin(); it != m_shared_memory_list.end(); ++it) { + if (it->GetSharedMemory() == shmem) { + info = std::addressof(*it); + break; + } } - this->ChangeState(State::Terminated); -} + // If we didn't find an info, create one. + if (info == nullptr) { + // Allocate a new info. + info = KSharedMemoryInfo::Allocate(m_kernel); + R_UNLESS(info != nullptr, ResultOutOfResource); -void KProcess::Finalize() { - // Free all shared memory infos. - { - auto it = m_shared_memory_list.begin(); - while (it != m_shared_memory_list.end()) { - KSharedMemoryInfo* info = *it; - KSharedMemory* shmem = info->GetSharedMemory(); + // Initialize the info and add it to our list. + info->Initialize(shmem); + m_shared_memory_list.push_back(*info); + } - while (!info->Close()) { - shmem->Close(); - } + // Open a reference to the shared memory and its info. + shmem->Open(); + info->Open(); - shmem->Close(); + R_SUCCEED(); +} - it = m_shared_memory_list.erase(it); - KSharedMemoryInfo::Free(m_kernel, info); +void KProcess::RemoveSharedMemory(KSharedMemory* shmem, KProcessAddress address, size_t size) { + // Lock ourselves, to prevent concurrent access. + KScopedLightLock lk(m_state_lock); + + // Find an existing info for the memory. + KSharedMemoryInfo* info = nullptr; + auto it = m_shared_memory_list.begin(); + for (; it != m_shared_memory_list.end(); ++it) { + if (it->GetSharedMemory() == shmem) { + info = std::addressof(*it); + break; } } + ASSERT(info != nullptr); - // Release memory to the resource limit. - if (m_resource_limit != nullptr) { - m_resource_limit->Close(); - m_resource_limit = nullptr; + // Close a reference to the info and its memory. + if (info->Close()) { + m_shared_memory_list.erase(it); + KSharedMemoryInfo::Free(m_kernel, info); } - // Finalize the page table. - m_page_table.Finalize(); - - // Perform inherited finalization. - KSynchronizationObject::Finalize(); + shmem->Close(); } Result KProcess::CreateThreadLocalRegion(KProcessAddress* out) { @@ -518,7 +601,7 @@ Result KProcess::CreateThreadLocalRegion(KProcessAddress* out) { // See if we can get a region from a partially used TLP. { - KScopedSchedulerLock sl{m_kernel}; + KScopedSchedulerLock sl(m_kernel); if (auto it = m_partially_used_tlp_tree.begin(); it != m_partially_used_tlp_tree.end()) { tlr = it->Reserve(); @@ -538,7 +621,9 @@ Result KProcess::CreateThreadLocalRegion(KProcessAddress* out) { // Allocate a new page. tlp = KThreadLocalPage::Allocate(m_kernel); R_UNLESS(tlp != nullptr, ResultOutOfMemory); - auto tlp_guard = SCOPE_GUARD({ KThreadLocalPage::Free(m_kernel, tlp); }); + ON_RESULT_FAILURE { + KThreadLocalPage::Free(m_kernel, tlp); + }; // Initialize the new page. R_TRY(tlp->Initialize(m_kernel, this)); @@ -549,7 +634,7 @@ Result KProcess::CreateThreadLocalRegion(KProcessAddress* out) { // Insert into our tree. { - KScopedSchedulerLock sl{m_kernel}; + KScopedSchedulerLock sl(m_kernel); if (tlp->IsAllUsed()) { m_fully_used_tlp_tree.insert(*tlp); } else { @@ -558,7 +643,6 @@ Result KProcess::CreateThreadLocalRegion(KProcessAddress* out) { } // We succeeded! - tlp_guard.Cancel(); *out = tlr; R_SUCCEED(); } @@ -568,7 +652,7 @@ Result KProcess::DeleteThreadLocalRegion(KProcessAddress addr) { // Release the region. { - KScopedSchedulerLock sl{m_kernel}; + KScopedSchedulerLock sl(m_kernel); // Try to find the page in the partially used list. auto it = m_partially_used_tlp_tree.find_key(Common::AlignDown(GetInteger(addr), PageSize)); @@ -611,95 +695,213 @@ Result KProcess::DeleteThreadLocalRegion(KProcessAddress addr) { R_SUCCEED(); } -bool KProcess::InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) { - const auto watch{std::find_if(m_watchpoints.begin(), m_watchpoints.end(), [&](const auto& wp) { - return wp.type == DebugWatchpointType::None; - })}; +bool KProcess::ReserveResource(Svc::LimitableResource which, s64 value) { + if (KResourceLimit* rl = this->GetResourceLimit(); rl != nullptr) { + return rl->Reserve(which, value); + } else { + return true; + } +} - if (watch == m_watchpoints.end()) { - return false; +bool KProcess::ReserveResource(Svc::LimitableResource which, s64 value, s64 timeout) { + if (KResourceLimit* rl = this->GetResourceLimit(); rl != nullptr) { + return rl->Reserve(which, value, timeout); + } else { + return true; } +} - watch->start_address = addr; - watch->end_address = addr + size; - watch->type = type; +void KProcess::ReleaseResource(Svc::LimitableResource which, s64 value) { + if (KResourceLimit* rl = this->GetResourceLimit(); rl != nullptr) { + rl->Release(which, value); + } +} - for (KProcessAddress page = Common::AlignDown(GetInteger(addr), PageSize); page < addr + size; - page += PageSize) { - m_debug_page_refcounts[page]++; - this->GetMemory().MarkRegionDebug(page, PageSize, true); +void KProcess::ReleaseResource(Svc::LimitableResource which, s64 value, s64 hint) { + if (KResourceLimit* rl = this->GetResourceLimit(); rl != nullptr) { + rl->Release(which, value, hint); } +} - return true; +void KProcess::IncrementRunningThreadCount() { + ASSERT(m_num_running_threads.load() >= 0); + + ++m_num_running_threads; } -bool KProcess::RemoveWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) { - const auto watch{std::find_if(m_watchpoints.begin(), m_watchpoints.end(), [&](const auto& wp) { - return wp.start_address == addr && wp.end_address == addr + size && wp.type == type; - })}; +void KProcess::DecrementRunningThreadCount() { + ASSERT(m_num_running_threads.load() > 0); - if (watch == m_watchpoints.end()) { + if (const auto prev = m_num_running_threads--; prev == 1) { + this->Terminate(); + } +} + +bool KProcess::EnterUserException() { + // Get the current thread. + KThread* cur_thread = GetCurrentThreadPointer(m_kernel); + ASSERT(this == cur_thread->GetOwnerProcess()); + + // Check that we haven't already claimed the exception thread. + if (m_exception_thread == cur_thread) { return false; } - watch->start_address = 0; - watch->end_address = 0; - watch->type = DebugWatchpointType::None; + // Create the wait queue we'll be using. + ThreadQueueImplForKProcessEnterUserException wait_queue(m_kernel, + std::addressof(m_exception_thread)); - for (KProcessAddress page = Common::AlignDown(GetInteger(addr), PageSize); page < addr + size; - page += PageSize) { - m_debug_page_refcounts[page]--; - if (!m_debug_page_refcounts[page]) { - this->GetMemory().MarkRegionDebug(page, PageSize, false); + // Claim the exception thread. + { + // Lock the scheduler. + KScopedSchedulerLock sl(m_kernel); + + // Check that we're not terminating. + if (cur_thread->IsTerminationRequested()) { + return false; + } + + // If we don't have an exception thread, we can just claim it directly. + if (m_exception_thread == nullptr) { + m_exception_thread = cur_thread; + KScheduler::SetSchedulerUpdateNeeded(m_kernel); + return true; } + + // Otherwise, we need to wait until we don't have an exception thread. + + // Add the current thread as a waiter on the current exception thread. + cur_thread->SetKernelAddressKey( + reinterpret_cast<uintptr_t>(std::addressof(m_exception_thread)) | 1); + m_exception_thread->AddWaiter(cur_thread); + + // Wait to claim the exception thread. + cur_thread->BeginWait(std::addressof(wait_queue)); } - return true; + // If our wait didn't end due to thread termination, we succeeded. + return ResultTerminationRequested != cur_thread->GetWaitResult(); } -void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) { - const auto ReprotectSegment = [&](const CodeSet::Segment& segment, - Svc::MemoryPermission permission) { - m_page_table.SetProcessMemoryPermission(segment.addr + base_addr, segment.size, permission); - }; +bool KProcess::LeaveUserException() { + return this->ReleaseUserException(GetCurrentThreadPointer(m_kernel)); +} - this->GetMemory().WriteBlock(base_addr, code_set.memory.data(), code_set.memory.size()); +bool KProcess::ReleaseUserException(KThread* thread) { + KScopedSchedulerLock sl(m_kernel); - ReprotectSegment(code_set.CodeSegment(), Svc::MemoryPermission::ReadExecute); - ReprotectSegment(code_set.RODataSegment(), Svc::MemoryPermission::Read); - ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite); + if (m_exception_thread == thread) { + m_exception_thread = nullptr; + + // Remove waiter thread. + bool has_waiters; + if (KThread* next = thread->RemoveKernelWaiterByKey( + std::addressof(has_waiters), + reinterpret_cast<uintptr_t>(std::addressof(m_exception_thread)) | 1); + next != nullptr) { + next->EndWait(ResultSuccess); + } + + KScheduler::SetSchedulerUpdateNeeded(m_kernel); + + return true; + } else { + return false; + } } -bool KProcess::IsSignaled() const { - ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel)); - return m_is_signaled; +void KProcess::RegisterThread(KThread* thread) { + KScopedLightLock lk(m_list_lock); + + m_thread_list.push_back(*thread); } -KProcess::KProcess(KernelCore& kernel) - : KAutoObjectWithSlabHeapAndContainer{kernel}, m_page_table{m_kernel.System()}, - m_handle_table{m_kernel}, m_address_arbiter{m_kernel.System()}, - m_condition_var{m_kernel.System()}, m_state_lock{m_kernel}, m_list_lock{m_kernel} {} +void KProcess::UnregisterThread(KThread* thread) { + KScopedLightLock lk(m_list_lock); -KProcess::~KProcess() = default; + m_thread_list.erase(m_thread_list.iterator_to(*thread)); +} + +size_t KProcess::GetUsedUserPhysicalMemorySize() const { + const size_t norm_size = m_page_table.GetNormalMemorySize(); + const size_t other_size = m_code_size + m_main_thread_stack_size; + const size_t sec_size = this->GetRequiredSecureMemorySizeNonDefault(); -void KProcess::ChangeState(State new_state) { - if (m_state == new_state) { - return; + return norm_size + other_size + sec_size; +} + +size_t KProcess::GetTotalUserPhysicalMemorySize() const { + // Get the amount of free and used size. + const size_t free_size = + m_resource_limit->GetFreeValue(Svc::LimitableResource::PhysicalMemoryMax); + const size_t max_size = m_max_process_memory; + + // Determine used size. + // NOTE: This does *not* check this->IsDefaultApplicationSystemResource(), unlike + // GetUsedUserPhysicalMemorySize(). + const size_t norm_size = m_page_table.GetNormalMemorySize(); + const size_t other_size = m_code_size + m_main_thread_stack_size; + const size_t sec_size = this->GetRequiredSecureMemorySize(); + const size_t used_size = norm_size + other_size + sec_size; + + // NOTE: These function calls will recalculate, introducing a race...it is unclear why Nintendo + // does it this way. + if (used_size + free_size > max_size) { + return max_size; + } else { + return free_size + this->GetUsedUserPhysicalMemorySize(); } +} - m_state = new_state; - m_is_signaled = true; - this->NotifyAvailable(); +size_t KProcess::GetUsedNonSystemUserPhysicalMemorySize() const { + const size_t norm_size = m_page_table.GetNormalMemorySize(); + const size_t other_size = m_code_size + m_main_thread_stack_size; + + return norm_size + other_size; +} + +size_t KProcess::GetTotalNonSystemUserPhysicalMemorySize() const { + // Get the amount of free and used size. + const size_t free_size = + m_resource_limit->GetFreeValue(Svc::LimitableResource::PhysicalMemoryMax); + const size_t max_size = m_max_process_memory; + + // Determine used size. + // NOTE: This does *not* check this->IsDefaultApplicationSystemResource(), unlike + // GetUsedUserPhysicalMemorySize(). + const size_t norm_size = m_page_table.GetNormalMemorySize(); + const size_t other_size = m_code_size + m_main_thread_stack_size; + const size_t sec_size = this->GetRequiredSecureMemorySize(); + const size_t used_size = norm_size + other_size + sec_size; + + // NOTE: These function calls will recalculate, introducing a race...it is unclear why Nintendo + // does it this way. + if (used_size + free_size > max_size) { + return max_size - this->GetRequiredSecureMemorySizeNonDefault(); + } else { + return free_size + this->GetUsedNonSystemUserPhysicalMemorySize(); + } } -Result KProcess::AllocateMainThreadStack(std::size_t stack_size) { +Result KProcess::Run(s32 priority, size_t stack_size) { + // Lock ourselves, to prevent concurrent access. + KScopedLightLock lk(m_state_lock); + + // Validate that we're in a state where we can initialize. + const auto state = m_state; + R_UNLESS(state == State::Created || state == State::CreatedAttached, ResultInvalidState); + + // Place a tentative reservation of a thread for this process. + KScopedResourceReservation thread_reservation(this, Svc::LimitableResource::ThreadCountMax); + R_UNLESS(thread_reservation.Succeeded(), ResultLimitReached); + // Ensure that we haven't already allocated stack. ASSERT(m_main_thread_stack_size == 0); // Ensure that we're allocating a valid stack. stack_size = Common::AlignUp(stack_size, PageSize); - // R_UNLESS(stack_size + image_size <= m_max_process_memory, ResultOutOfMemory); - R_UNLESS(stack_size + m_image_size >= m_image_size, ResultOutOfMemory); + R_UNLESS(stack_size + m_code_size <= m_max_process_memory, ResultOutOfMemory); + R_UNLESS(stack_size + m_code_size >= m_code_size, ResultOutOfMemory); // Place a tentative reservation of memory for our new stack. KScopedResourceReservation mem_reservation(this, Svc::LimitableResource::PhysicalMemoryMax, @@ -707,21 +909,359 @@ Result KProcess::AllocateMainThreadStack(std::size_t stack_size) { R_UNLESS(mem_reservation.Succeeded(), ResultLimitReached); // Allocate and map our stack. + KProcessAddress stack_top = 0; if (stack_size) { KProcessAddress stack_bottom; R_TRY(m_page_table.MapPages(std::addressof(stack_bottom), stack_size / PageSize, KMemoryState::Stack, KMemoryPermission::UserReadWrite)); - m_main_thread_stack_top = stack_bottom + stack_size; + stack_top = stack_bottom + stack_size; m_main_thread_stack_size = stack_size; } + // Ensure our stack is safe to clean up on exit. + ON_RESULT_FAILURE { + if (m_main_thread_stack_size) { + ASSERT(R_SUCCEEDED(m_page_table.UnmapPages(stack_top - m_main_thread_stack_size, + m_main_thread_stack_size / PageSize, + KMemoryState::Stack))); + m_main_thread_stack_size = 0; + } + }; + + // Set our maximum heap size. + R_TRY(m_page_table.SetMaxHeapSize(m_max_process_memory - + (m_main_thread_stack_size + m_code_size))); + + // Initialize our handle table. + R_TRY(this->InitializeHandleTable(m_capabilities.GetHandleTableSize())); + ON_RESULT_FAILURE_2 { + this->FinalizeHandleTable(); + }; + + // Create a new thread for the process. + KThread* main_thread = KThread::Create(m_kernel); + R_UNLESS(main_thread != nullptr, ResultOutOfResource); + SCOPE_EXIT({ main_thread->Close(); }); + + // Initialize the thread. + R_TRY(KThread::InitializeUserThread(m_kernel.System(), main_thread, this->GetEntryPoint(), 0, + stack_top, priority, m_ideal_core_id, this)); + + // Register the thread, and commit our reservation. + KThread::Register(m_kernel, main_thread); + thread_reservation.Commit(); + + // Add the thread to our handle table. + Handle thread_handle; + R_TRY(m_handle_table.Add(std::addressof(thread_handle), main_thread)); + + // Set the thread arguments. + main_thread->GetContext32().cpu_registers[0] = 0; + main_thread->GetContext64().cpu_registers[0] = 0; + main_thread->GetContext32().cpu_registers[1] = thread_handle; + main_thread->GetContext64().cpu_registers[1] = thread_handle; + + // Update our state. + this->ChangeState((state == State::Created) ? State::Running : State::RunningAttached); + ON_RESULT_FAILURE_2 { + this->ChangeState(state); + }; + + // Suspend for debug, if we should. + if (m_kernel.System().DebuggerEnabled()) { + main_thread->RequestSuspend(SuspendType::Debug); + } + + // Run our thread. + R_TRY(main_thread->Run()); + + // Open a reference to represent that we're running. + this->Open(); + // We succeeded! Commit our memory reservation. mem_reservation.Commit(); R_SUCCEED(); } +Result KProcess::Reset() { + // Lock the process and the scheduler. + KScopedLightLock lk(m_state_lock); + KScopedSchedulerLock sl(m_kernel); + + // Validate that we're in a state that we can reset. + R_UNLESS(m_state != State::Terminated, ResultInvalidState); + R_UNLESS(m_is_signaled, ResultInvalidState); + + // Clear signaled. + m_is_signaled = false; + R_SUCCEED(); +} + +Result KProcess::SetActivity(Svc::ProcessActivity activity) { + // Lock ourselves and the scheduler. + KScopedLightLock lk(m_state_lock); + KScopedLightLock list_lk(m_list_lock); + KScopedSchedulerLock sl(m_kernel); + + // Validate our state. + R_UNLESS(m_state != State::Terminating, ResultInvalidState); + R_UNLESS(m_state != State::Terminated, ResultInvalidState); + + // Either pause or resume. + if (activity == Svc::ProcessActivity::Paused) { + // Verify that we're not suspended. + R_UNLESS(!m_is_suspended, ResultInvalidState); + + // Suspend all threads. + auto end = this->GetThreadList().end(); + for (auto it = this->GetThreadList().begin(); it != end; ++it) { + it->RequestSuspend(SuspendType::Process); + } + + // Set ourselves as suspended. + this->SetSuspended(true); + } else { + ASSERT(activity == Svc::ProcessActivity::Runnable); + + // Verify that we're suspended. + R_UNLESS(m_is_suspended, ResultInvalidState); + + // Resume all threads. + auto end = this->GetThreadList().end(); + for (auto it = this->GetThreadList().begin(); it != end; ++it) { + it->Resume(SuspendType::Process); + } + + // Set ourselves as resumed. + this->SetSuspended(false); + } + + R_SUCCEED(); +} + +void KProcess::PinCurrentThread() { + ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel)); + + // Get the current thread. + const s32 core_id = GetCurrentCoreId(m_kernel); + KThread* cur_thread = GetCurrentThreadPointer(m_kernel); + + // If the thread isn't terminated, pin it. + if (!cur_thread->IsTerminationRequested()) { + // Pin it. + this->PinThread(core_id, cur_thread); + cur_thread->Pin(core_id); + + // An update is needed. + KScheduler::SetSchedulerUpdateNeeded(m_kernel); + } +} + +void KProcess::UnpinCurrentThread() { + ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel)); + + // Get the current thread. + const s32 core_id = GetCurrentCoreId(m_kernel); + KThread* cur_thread = GetCurrentThreadPointer(m_kernel); + + // Unpin it. + cur_thread->Unpin(); + this->UnpinThread(core_id, cur_thread); + + // An update is needed. + KScheduler::SetSchedulerUpdateNeeded(m_kernel); +} + +void KProcess::UnpinThread(KThread* thread) { + ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel)); + + // Get the thread's core id. + const auto core_id = thread->GetActiveCore(); + + // Unpin it. + this->UnpinThread(core_id, thread); + thread->Unpin(); + + // An update is needed. + KScheduler::SetSchedulerUpdateNeeded(m_kernel); +} + +Result KProcess::GetThreadList(s32* out_num_threads, KProcessAddress out_thread_ids, + s32 max_out_count) { + // TODO: use current memory reference + auto& memory = m_kernel.System().ApplicationMemory(); + + // Lock the list. + KScopedLightLock lk(m_list_lock); + + // Iterate over the list. + s32 count = 0; + auto end = this->GetThreadList().end(); + for (auto it = this->GetThreadList().begin(); it != end; ++it) { + // If we're within array bounds, write the id. + if (count < max_out_count) { + // Get the thread id. + KThread* thread = std::addressof(*it); + const u64 id = thread->GetId(); + + // Copy the id to userland. + memory.Write64(out_thread_ids + count * sizeof(u64), id); + } + + // Increment the count. + ++count; + } + + // We successfully iterated the list. + *out_num_threads = count; + R_SUCCEED(); +} + +void KProcess::Switch(KProcess* cur_process, KProcess* next_process) {} + +KProcess::KProcess(KernelCore& kernel) + : KAutoObjectWithSlabHeapAndContainer(kernel), m_page_table{kernel.System()}, + m_state_lock{kernel}, m_list_lock{kernel}, m_cond_var{kernel.System()}, + m_address_arbiter{kernel.System()}, m_handle_table{kernel} {} +KProcess::~KProcess() = default; + +Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, + bool is_hbl) { + // Create a resource limit for the process. + const auto physical_memory_size = + m_kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::Application); + auto* res_limit = + Kernel::CreateResourceLimitForProcess(m_kernel.System(), physical_memory_size); + + // Ensure we maintain a clean state on exit. + SCOPE_EXIT({ res_limit->Close(); }); + + // Declare flags and code address. + Svc::CreateProcessFlag flag{}; + u64 code_address{}; + + // We are an application. + flag |= Svc::CreateProcessFlag::IsApplication; + + // If we are 64-bit, create as such. + if (metadata.Is64BitProgram()) { + flag |= Svc::CreateProcessFlag::Is64Bit; + } + + // Set the address space type and code address. + switch (metadata.GetAddressSpaceType()) { + case FileSys::ProgramAddressSpaceType::Is39Bit: + flag |= Svc::CreateProcessFlag::AddressSpace64Bit; + + // For 39-bit processes, the ASLR region starts at 0x800'0000 and is ~512GiB large. + // However, some (buggy) programs/libraries like skyline incorrectly depend on the + // existence of ASLR pages before the entry point, so we will adjust the load address + // to point to about 2GiB into the ASLR region. + code_address = 0x8000'0000; + break; + case FileSys::ProgramAddressSpaceType::Is36Bit: + flag |= Svc::CreateProcessFlag::AddressSpace64BitDeprecated; + code_address = 0x800'0000; + break; + case FileSys::ProgramAddressSpaceType::Is32Bit: + flag |= Svc::CreateProcessFlag::AddressSpace32Bit; + code_address = 0x20'0000; + break; + case FileSys::ProgramAddressSpaceType::Is32BitNoMap: + flag |= Svc::CreateProcessFlag::AddressSpace32BitWithoutAlias; + code_address = 0x20'0000; + break; + } + + Svc::CreateProcessParameter params{ + .name = {}, + .version = {}, + .program_id = metadata.GetTitleID(), + .code_address = code_address, + .code_num_pages = static_cast<s32>(code_size / PageSize), + .flags = flag, + .reslimit = Svc::InvalidHandle, + .system_resource_num_pages = static_cast<s32>(metadata.GetSystemResourceSize() / PageSize), + }; + + // Set the process name. + const auto& name = metadata.GetName(); + static_assert(sizeof(params.name) <= sizeof(name)); + std::memcpy(params.name.data(), name.data(), sizeof(params.name)); + + // Initialize for application process. + R_TRY(this->Initialize(params, metadata.GetKernelCapabilities(), res_limit, + KMemoryManager::Pool::Application)); + + // Assign remaining properties. + m_is_hbl = is_hbl; + m_ideal_core_id = metadata.GetMainThreadCore(); + + // We succeeded. + R_SUCCEED(); +} + +void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) { + const auto ReprotectSegment = [&](const CodeSet::Segment& segment, + Svc::MemoryPermission permission) { + m_page_table.SetProcessMemoryPermission(segment.addr + base_addr, segment.size, permission); + }; + + this->GetMemory().WriteBlock(base_addr, code_set.memory.data(), code_set.memory.size()); + + ReprotectSegment(code_set.CodeSegment(), Svc::MemoryPermission::ReadExecute); + ReprotectSegment(code_set.RODataSegment(), Svc::MemoryPermission::Read); + ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite); +} + +bool KProcess::InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) { + const auto watch{std::find_if(m_watchpoints.begin(), m_watchpoints.end(), [&](const auto& wp) { + return wp.type == DebugWatchpointType::None; + })}; + + if (watch == m_watchpoints.end()) { + return false; + } + + watch->start_address = addr; + watch->end_address = addr + size; + watch->type = type; + + for (KProcessAddress page = Common::AlignDown(GetInteger(addr), PageSize); page < addr + size; + page += PageSize) { + m_debug_page_refcounts[page]++; + this->GetMemory().MarkRegionDebug(page, PageSize, true); + } + + return true; +} + +bool KProcess::RemoveWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) { + const auto watch{std::find_if(m_watchpoints.begin(), m_watchpoints.end(), [&](const auto& wp) { + return wp.start_address == addr && wp.end_address == addr + size && wp.type == type; + })}; + + if (watch == m_watchpoints.end()) { + return false; + } + + watch->start_address = 0; + watch->end_address = 0; + watch->type = DebugWatchpointType::None; + + for (KProcessAddress page = Common::AlignDown(GetInteger(addr), PageSize); page < addr + size; + page += PageSize) { + m_debug_page_refcounts[page]--; + if (!m_debug_page_refcounts[page]) { + this->GetMemory().MarkRegionDebug(page, PageSize, false); + } + } + + return true; +} + Core::Memory::Memory& KProcess::GetMemory() const { // TODO: per-process memory return m_kernel.System().ApplicationMemory(); diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index 146e07a57..f9f755afa 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h @@ -1,59 +1,23 @@ -// SPDX-FileCopyrightText: 2015 Citra Emulator Project +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once -#include <array> -#include <cstddef> -#include <list> #include <map> -#include <string> + +#include "core/hle/kernel/code_set.h" #include "core/hle/kernel/k_address_arbiter.h" -#include "core/hle/kernel/k_auto_object.h" +#include "core/hle/kernel/k_capabilities.h" #include "core/hle/kernel/k_condition_variable.h" #include "core/hle/kernel/k_handle_table.h" #include "core/hle/kernel/k_page_table.h" -#include "core/hle/kernel/k_synchronization_object.h" +#include "core/hle/kernel/k_page_table_manager.h" +#include "core/hle/kernel/k_system_resource.h" +#include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/k_thread_local_page.h" -#include "core/hle/kernel/k_typed_address.h" -#include "core/hle/kernel/k_worker_task.h" -#include "core/hle/kernel/process_capability.h" -#include "core/hle/kernel/slab_helpers.h" -#include "core/hle/result.h" - -namespace Core { -namespace Memory { -class Memory; -}; - -class System; -} // namespace Core - -namespace FileSys { -class ProgramMetadata; -} namespace Kernel { -class KernelCore; -class KResourceLimit; -class KThread; -class KSharedMemoryInfo; -class TLSPage; - -struct CodeSet; - -enum class MemoryRegion : u16 { - APPLICATION = 1, - SYSTEM = 2, - BASE = 3, -}; - -enum class ProcessActivity : u32 { - Runnable, - Paused, -}; - enum class DebugWatchpointType : u8 { None = 0, Read = 1 << 0, @@ -72,9 +36,6 @@ class KProcess final : public KAutoObjectWithSlabHeapAndContainer<KProcess, KWor KERNEL_AUTOOBJECT_TRAITS(KProcess, KSynchronizationObject); public: - explicit KProcess(KernelCore& kernel); - ~KProcess() override; - enum class State { Created = static_cast<u32>(Svc::ProcessState::Created), CreatedAttached = static_cast<u32>(Svc::ProcessState::CreatedAttached), @@ -86,470 +47,493 @@ public: DebugBreak = static_cast<u32>(Svc::ProcessState::DebugBreak), }; - enum : u64 { - /// Lowest allowed process ID for a kernel initial process. - InitialKIPIDMin = 1, - /// Highest allowed process ID for a kernel initial process. - InitialKIPIDMax = 80, - - /// Lowest allowed process ID for a userland process. - ProcessIDMin = 81, - /// Highest allowed process ID for a userland process. - ProcessIDMax = 0xFFFFFFFFFFFFFFFF, - }; + using ThreadList = Common::IntrusiveListMemberTraits<&KThread::m_process_list_node>::ListType; - // Used to determine how process IDs are assigned. - enum class ProcessType { - KernelInternal, - Userland, - }; + static constexpr size_t AslrAlignment = 2_MiB; - static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4; +public: + static constexpr u64 InitialProcessIdMin = 1; + static constexpr u64 InitialProcessIdMax = 0x50; - static Result Initialize(KProcess* process, Core::System& system, std::string process_name, - ProcessType type, KResourceLimit* res_limit); + static constexpr u64 ProcessIdMin = InitialProcessIdMax + 1; + static constexpr u64 ProcessIdMax = std::numeric_limits<u64>::max(); - /// Gets a reference to the process' page table. - KPageTable& GetPageTable() { - return m_page_table; - } +private: + using SharedMemoryInfoList = Common::IntrusiveListBaseTraits<KSharedMemoryInfo>::ListType; + using TLPTree = + Common::IntrusiveRedBlackTreeBaseTraits<KThreadLocalPage>::TreeType<KThreadLocalPage>; + using TLPIterator = TLPTree::iterator; - /// Gets const a reference to the process' page table. - const KPageTable& GetPageTable() const { - return m_page_table; - } +private: + KPageTable m_page_table; + std::atomic<size_t> m_used_kernel_memory_size{}; + TLPTree m_fully_used_tlp_tree{}; + TLPTree m_partially_used_tlp_tree{}; + s32 m_ideal_core_id{}; + KResourceLimit* m_resource_limit{}; + KSystemResource* m_system_resource{}; + size_t m_memory_release_hint{}; + State m_state{}; + KLightLock m_state_lock; + KLightLock m_list_lock; + KConditionVariable m_cond_var; + KAddressArbiter m_address_arbiter; + std::array<u64, 4> m_entropy{}; + bool m_is_signaled{}; + bool m_is_initialized{}; + bool m_is_application{}; + bool m_is_default_application_system_resource{}; + bool m_is_hbl{}; + std::array<char, 13> m_name{}; + std::atomic<u16> m_num_running_threads{}; + Svc::CreateProcessFlag m_flags{}; + KMemoryManager::Pool m_memory_pool{}; + s64 m_schedule_count{}; + KCapabilities m_capabilities{}; + u64 m_program_id{}; + u64 m_process_id{}; + KProcessAddress m_code_address{}; + size_t m_code_size{}; + size_t m_main_thread_stack_size{}; + size_t m_max_process_memory{}; + u32 m_version{}; + KHandleTable m_handle_table; + KProcessAddress m_plr_address{}; + KThread* m_exception_thread{}; + ThreadList m_thread_list{}; + SharedMemoryInfoList m_shared_memory_list{}; + bool m_is_suspended{}; + bool m_is_immortal{}; + bool m_is_handle_table_initialized{}; + std::array<KThread*, Core::Hardware::NUM_CPU_CORES> m_running_threads{}; + std::array<u64, Core::Hardware::NUM_CPU_CORES> m_running_thread_idle_counts{}; + std::array<u64, Core::Hardware::NUM_CPU_CORES> m_running_thread_switch_counts{}; + std::array<KThread*, Core::Hardware::NUM_CPU_CORES> m_pinned_threads{}; + std::array<DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS> m_watchpoints{}; + std::map<KProcessAddress, u64> m_debug_page_refcounts{}; + std::atomic<s64> m_cpu_time{}; + std::atomic<s64> m_num_process_switches{}; + std::atomic<s64> m_num_thread_switches{}; + std::atomic<s64> m_num_fpu_switches{}; + std::atomic<s64> m_num_supervisor_calls{}; + std::atomic<s64> m_num_ipc_messages{}; + std::atomic<s64> m_num_ipc_replies{}; + std::atomic<s64> m_num_ipc_receives{}; - /// Gets a reference to the process' handle table. - KHandleTable& GetHandleTable() { - return m_handle_table; - } +private: + Result StartTermination(); + void FinishTermination(); - /// Gets a const reference to the process' handle table. - const KHandleTable& GetHandleTable() const { - return m_handle_table; + void PinThread(s32 core_id, KThread* thread) { + ASSERT(0 <= core_id && core_id < static_cast<s32>(Core::Hardware::NUM_CPU_CORES)); + ASSERT(thread != nullptr); + ASSERT(m_pinned_threads[core_id] == nullptr); + m_pinned_threads[core_id] = thread; } - /// Gets a reference to process's memory. - Core::Memory::Memory& GetMemory() const; - - Result SignalToAddress(KProcessAddress address) { - return m_condition_var.SignalToAddress(address); + void UnpinThread(s32 core_id, KThread* thread) { + ASSERT(0 <= core_id && core_id < static_cast<s32>(Core::Hardware::NUM_CPU_CORES)); + ASSERT(thread != nullptr); + ASSERT(m_pinned_threads[core_id] == thread); + m_pinned_threads[core_id] = nullptr; } - Result WaitForAddress(Handle handle, KProcessAddress address, u32 tag) { - return m_condition_var.WaitForAddress(handle, address, tag); - } +public: + explicit KProcess(KernelCore& kernel); + ~KProcess() override; - void SignalConditionVariable(u64 cv_key, int32_t count) { - return m_condition_var.Signal(cv_key, count); - } + Result Initialize(const Svc::CreateProcessParameter& params, KResourceLimit* res_limit, + bool is_real); - Result WaitConditionVariable(KProcessAddress address, u64 cv_key, u32 tag, s64 ns) { - R_RETURN(m_condition_var.Wait(address, cv_key, tag, ns)); - } + Result Initialize(const Svc::CreateProcessParameter& params, const KPageGroup& pg, + std::span<const u32> caps, KResourceLimit* res_limit, + KMemoryManager::Pool pool, bool immortal); + Result Initialize(const Svc::CreateProcessParameter& params, std::span<const u32> user_caps, + KResourceLimit* res_limit, KMemoryManager::Pool pool); + void Exit(); - Result SignalAddressArbiter(uint64_t address, Svc::SignalType signal_type, s32 value, - s32 count) { - R_RETURN(m_address_arbiter.SignalToAddress(address, signal_type, value, count)); + const char* GetName() const { + return m_name.data(); } - Result WaitAddressArbiter(uint64_t address, Svc::ArbitrationType arb_type, s32 value, - s64 timeout) { - R_RETURN(m_address_arbiter.WaitForAddress(address, arb_type, value, timeout)); + u64 GetProgramId() const { + return m_program_id; } - KProcessAddress GetProcessLocalRegionAddress() const { - return m_plr_address; + u64 GetProcessId() const { + return m_process_id; } - /// Gets the current status of the process State GetState() const { return m_state; } - /// Gets the unique ID that identifies this particular process. - u64 GetProcessId() const { - return m_process_id; + u64 GetCoreMask() const { + return m_capabilities.GetCoreMask(); + } + u64 GetPhysicalCoreMask() const { + return m_capabilities.GetPhysicalCoreMask(); + } + u64 GetPriorityMask() const { + return m_capabilities.GetPriorityMask(); } - /// Gets the program ID corresponding to this process. - u64 GetProgramId() const { - return m_program_id; + s32 GetIdealCoreId() const { + return m_ideal_core_id; + } + void SetIdealCoreId(s32 core_id) { + m_ideal_core_id = core_id; } - KProcessAddress GetEntryPoint() const { - return m_code_address; + bool CheckThreadPriority(s32 prio) const { + return ((1ULL << prio) & this->GetPriorityMask()) != 0; } - /// Gets the resource limit descriptor for this process - KResourceLimit* GetResourceLimit() const; + u32 GetCreateProcessFlags() const { + return static_cast<u32>(m_flags); + } - /// Gets the ideal CPU core ID for this process - u8 GetIdealCoreId() const { - return m_ideal_core; + bool Is64Bit() const { + return True(m_flags & Svc::CreateProcessFlag::Is64Bit); } - /// Checks if the specified thread priority is valid. - bool CheckThreadPriority(s32 prio) const { - return ((1ULL << prio) & GetPriorityMask()) != 0; + KProcessAddress GetEntryPoint() const { + return m_code_address; } - /// Gets the bitmask of allowed cores that this process' threads can run on. - u64 GetCoreMask() const { - return m_capabilities.GetCoreMask(); + size_t GetMainStackSize() const { + return m_main_thread_stack_size; } - /// Gets the bitmask of allowed thread priorities. - u64 GetPriorityMask() const { - return m_capabilities.GetPriorityMask(); + KMemoryManager::Pool GetMemoryPool() const { + return m_memory_pool; } - /// Gets the amount of secure memory to allocate for memory management. - u32 GetSystemResourceSize() const { - return m_system_resource_size; + u64 GetRandomEntropy(size_t i) const { + return m_entropy[i]; } - /// Gets the amount of secure memory currently in use for memory management. - u32 GetSystemResourceUsage() const { - // On hardware, this returns the amount of system resource memory that has - // been used by the kernel. This is problematic for Yuzu to emulate, because - // system resource memory is used for page tables -- and yuzu doesn't really - // have a way to calculate how much memory is required for page tables for - // the current process at any given time. - // TODO: Is this even worth implementing? Games may retrieve this value via - // an SDK function that gets used + available system resource size for debug - // or diagnostic purposes. However, it seems unlikely that a game would make - // decisions based on how much system memory is dedicated to its page tables. - // Is returning a value other than zero wise? - return 0; + bool IsApplication() const { + return m_is_application; } - /// Whether this process is an AArch64 or AArch32 process. - bool Is64BitProcess() const { - return m_is_64bit_process; + bool IsDefaultApplicationSystemResource() const { + return m_is_default_application_system_resource; } bool IsSuspended() const { return m_is_suspended; } - void SetSuspended(bool suspended) { m_is_suspended = suspended; } - /// Gets the total running time of the process instance in ticks. - u64 GetCPUTimeTicks() const { - return m_total_process_running_time_ticks; + Result Terminate(); + + bool IsTerminated() const { + return m_state == State::Terminated; } - /// Updates the total running time, adding the given ticks to it. - void UpdateCPUTimeTicks(u64 ticks) { - m_total_process_running_time_ticks += ticks; + bool IsPermittedSvc(u32 svc_id) const { + return m_capabilities.IsPermittedSvc(svc_id); } - /// Gets the process schedule count, used for thread yielding - s64 GetScheduledCount() const { - return m_schedule_count; + bool IsPermittedInterrupt(s32 interrupt_id) const { + return m_capabilities.IsPermittedInterrupt(interrupt_id); } - /// Increments the process schedule count, used for thread yielding. - void IncrementScheduledCount() { - ++m_schedule_count; + bool IsPermittedDebug() const { + return m_capabilities.IsPermittedDebug(); } - void IncrementRunningThreadCount(); - void DecrementRunningThreadCount(); + bool CanForceDebug() const { + return m_capabilities.CanForceDebug(); + } - void SetRunningThread(s32 core, KThread* thread, u64 idle_count) { - m_running_threads[core] = thread; - m_running_thread_idle_counts[core] = idle_count; + bool IsHbl() const { + return m_is_hbl; } - void ClearRunningThread(KThread* thread) { - for (size_t i = 0; i < m_running_threads.size(); ++i) { - if (m_running_threads[i] == thread) { - m_running_threads[i] = nullptr; - } - } + Kernel::KMemoryManager::Direction GetAllocateOption() const { + // TODO: property of the KPageTableBase + return KMemoryManager::Direction::FromFront; } - [[nodiscard]] KThread* GetRunningThread(s32 core) const { - return m_running_threads[core]; + ThreadList& GetThreadList() { + return m_thread_list; + } + const ThreadList& GetThreadList() const { + return m_thread_list; } + bool EnterUserException(); + bool LeaveUserException(); bool ReleaseUserException(KThread* thread); - [[nodiscard]] KThread* GetPinnedThread(s32 core_id) const { + KThread* GetPinnedThread(s32 core_id) const { ASSERT(0 <= core_id && core_id < static_cast<s32>(Core::Hardware::NUM_CPU_CORES)); return m_pinned_threads[core_id]; } - /// Gets 8 bytes of random data for svcGetInfo RandomEntropy - u64 GetRandomEntropy(std::size_t index) const { - return m_random_entropy.at(index); + const Svc::SvcAccessFlagSet& GetSvcPermissions() const { + return m_capabilities.GetSvcPermissions(); } - /// Retrieves the total physical memory available to this process in bytes. - u64 GetTotalPhysicalMemoryAvailable(); - - /// Retrieves the total physical memory available to this process in bytes, - /// without the size of the personal system resource heap added to it. - u64 GetTotalPhysicalMemoryAvailableWithoutSystemResource(); - - /// Retrieves the total physical memory used by this process in bytes. - u64 GetTotalPhysicalMemoryUsed(); - - /// Retrieves the total physical memory used by this process in bytes, - /// without the size of the personal system resource heap added to it. - u64 GetTotalPhysicalMemoryUsedWithoutSystemResource(); - - /// Gets the list of all threads created with this process as their owner. - std::list<KThread*>& GetThreadList() { - return m_thread_list; + KResourceLimit* GetResourceLimit() const { + return m_resource_limit; } - /// Registers a thread as being created under this process, - /// adding it to this process' thread list. - void RegisterThread(KThread* thread); + bool ReserveResource(Svc::LimitableResource which, s64 value); + bool ReserveResource(Svc::LimitableResource which, s64 value, s64 timeout); + void ReleaseResource(Svc::LimitableResource which, s64 value); + void ReleaseResource(Svc::LimitableResource which, s64 value, s64 hint); - /// Unregisters a thread from this process, removing it - /// from this process' thread list. - void UnregisterThread(KThread* thread); + KLightLock& GetStateLock() { + return m_state_lock; + } + KLightLock& GetListLock() { + return m_list_lock; + } - /// Retrieves the number of available threads for this process. - u64 GetFreeThreadCount() const; - - /// Clears the signaled state of the process if and only if it's signaled. - /// - /// @pre The process must not be already terminated. If this is called on a - /// terminated process, then ResultInvalidState will be returned. - /// - /// @pre The process must be in a signaled state. If this is called on a - /// process instance that is not signaled, ResultInvalidState will be - /// returned. - Result Reset(); + KPageTable& GetPageTable() { + return m_page_table; + } + const KPageTable& GetPageTable() const { + return m_page_table; + } - /** - * Loads process-specifics configuration info with metadata provided - * by an executable. - * - * @param metadata The provided metadata to load process specific info from. - * - * @returns ResultSuccess if all relevant metadata was able to be - * loaded and parsed. Otherwise, an error code is returned. - */ - Result LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, - bool is_hbl); + KHandleTable& GetHandleTable() { + return m_handle_table; + } + const KHandleTable& GetHandleTable() const { + return m_handle_table; + } - /** - * Starts the main application thread for this process. - * - * @param main_thread_priority The priority for the main thread. - * @param stack_size The stack size for the main thread in bytes. - */ - void Run(s32 main_thread_priority, u64 stack_size); + size_t GetUsedUserPhysicalMemorySize() const; + size_t GetTotalUserPhysicalMemorySize() const; + size_t GetUsedNonSystemUserPhysicalMemorySize() const; + size_t GetTotalNonSystemUserPhysicalMemorySize() const; - /** - * Prepares a process for termination by stopping all of its threads - * and clearing any other resources. - */ - void PrepareForTermination(); + Result AddSharedMemory(KSharedMemory* shmem, KProcessAddress address, size_t size); + void RemoveSharedMemory(KSharedMemory* shmem, KProcessAddress address, size_t size); - void LoadModule(CodeSet code_set, KProcessAddress base_addr); + Result CreateThreadLocalRegion(KProcessAddress* out); + Result DeleteThreadLocalRegion(KProcessAddress addr); - bool IsInitialized() const override { - return m_is_initialized; + KProcessAddress GetProcessLocalRegionAddress() const { + return m_plr_address; } - static void PostDestroy(uintptr_t arg) {} - - void Finalize() override; - - u64 GetId() const override { - return GetProcessId(); + KThread* GetExceptionThread() const { + return m_exception_thread; } - bool IsHbl() const { - return m_is_hbl; + void AddCpuTime(s64 diff) { + m_cpu_time += diff; + } + s64 GetCpuTime() { + return m_cpu_time.load(); } - bool IsSignaled() const override; - - void DoWorkerTaskImpl(); + s64 GetScheduledCount() const { + return m_schedule_count; + } + void IncrementScheduledCount() { + ++m_schedule_count; + } - Result SetActivity(ProcessActivity activity); + void IncrementRunningThreadCount(); + void DecrementRunningThreadCount(); - void PinCurrentThread(s32 core_id); - void UnpinCurrentThread(s32 core_id); - void UnpinThread(KThread* thread); + size_t GetRequiredSecureMemorySizeNonDefault() const { + if (!this->IsDefaultApplicationSystemResource() && m_system_resource->IsSecureResource()) { + auto* secure_system_resource = static_cast<KSecureSystemResource*>(m_system_resource); + return secure_system_resource->CalculateRequiredSecureMemorySize(); + } - KLightLock& GetStateLock() { - return m_state_lock; + return 0; } - Result AddSharedMemory(KSharedMemory* shmem, KProcessAddress address, size_t size); - void RemoveSharedMemory(KSharedMemory* shmem, KProcessAddress address, size_t size); - - /////////////////////////////////////////////////////////////////////////////////////////////// - // Thread-local storage management - - // Marks the next available region as used and returns the address of the slot. - [[nodiscard]] Result CreateThreadLocalRegion(KProcessAddress* out); + size_t GetRequiredSecureMemorySize() const { + if (m_system_resource->IsSecureResource()) { + auto* secure_system_resource = static_cast<KSecureSystemResource*>(m_system_resource); + return secure_system_resource->CalculateRequiredSecureMemorySize(); + } - // Frees a used TLS slot identified by the given address - Result DeleteThreadLocalRegion(KProcessAddress addr); + return 0; + } - /////////////////////////////////////////////////////////////////////////////////////////////// - // Debug watchpoint management + size_t GetTotalSystemResourceSize() const { + if (!this->IsDefaultApplicationSystemResource() && m_system_resource->IsSecureResource()) { + auto* secure_system_resource = static_cast<KSecureSystemResource*>(m_system_resource); + return secure_system_resource->GetSize(); + } - // Attempts to insert a watchpoint into a free slot. Returns false if none are available. - bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type); + return 0; + } - // Attempts to remove the watchpoint specified by the given parameters. - bool RemoveWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type); + size_t GetUsedSystemResourceSize() const { + if (!this->IsDefaultApplicationSystemResource() && m_system_resource->IsSecureResource()) { + auto* secure_system_resource = static_cast<KSecureSystemResource*>(m_system_resource); + return secure_system_resource->GetUsedSize(); + } - const std::array<DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS>& GetWatchpoints() const { - return m_watchpoints; + return 0; } - const std::string& GetName() { - return name; + void SetRunningThread(s32 core, KThread* thread, u64 idle_count, u64 switch_count) { + m_running_threads[core] = thread; + m_running_thread_idle_counts[core] = idle_count; + m_running_thread_switch_counts[core] = switch_count; } -private: - void PinThread(s32 core_id, KThread* thread) { - ASSERT(0 <= core_id && core_id < static_cast<s32>(Core::Hardware::NUM_CPU_CORES)); - ASSERT(thread != nullptr); - ASSERT(m_pinned_threads[core_id] == nullptr); - m_pinned_threads[core_id] = thread; + void ClearRunningThread(KThread* thread) { + for (size_t i = 0; i < m_running_threads.size(); ++i) { + if (m_running_threads[i] == thread) { + m_running_threads[i] = nullptr; + } + } } - void UnpinThread(s32 core_id, KThread* thread) { - ASSERT(0 <= core_id && core_id < static_cast<s32>(Core::Hardware::NUM_CPU_CORES)); - ASSERT(thread != nullptr); - ASSERT(m_pinned_threads[core_id] == thread); - m_pinned_threads[core_id] = nullptr; + const KSystemResource& GetSystemResource() const { + return *m_system_resource; } - void FinalizeHandleTable() { - // Finalize the table. - m_handle_table.Finalize(); - - // Note that the table is finalized. - m_is_handle_table_initialized = false; + const KMemoryBlockSlabManager& GetMemoryBlockSlabManager() const { + return m_system_resource->GetMemoryBlockSlabManager(); + } + const KBlockInfoManager& GetBlockInfoManager() const { + return m_system_resource->GetBlockInfoManager(); + } + const KPageTableManager& GetPageTableManager() const { + return m_system_resource->GetPageTableManager(); } - void ChangeState(State new_state); - - /// Allocates the main thread stack for the process, given the stack size in bytes. - Result AllocateMainThreadStack(std::size_t stack_size); - - /// Memory manager for this process - KPageTable m_page_table; - - /// Current status of the process - State m_state{}; + KThread* GetRunningThread(s32 core) const { + return m_running_threads[core]; + } + u64 GetRunningThreadIdleCount(s32 core) const { + return m_running_thread_idle_counts[core]; + } + u64 GetRunningThreadSwitchCount(s32 core) const { + return m_running_thread_switch_counts[core]; + } - /// The ID of this process - u64 m_process_id = 0; + void RegisterThread(KThread* thread); + void UnregisterThread(KThread* thread); - /// Title ID corresponding to the process - u64 m_program_id = 0; + Result Run(s32 priority, size_t stack_size); - /// Specifies additional memory to be reserved for the process's memory management by the - /// system. When this is non-zero, secure memory is allocated and used for page table allocation - /// instead of using the normal global page tables/memory block management. - u32 m_system_resource_size = 0; + Result Reset(); - /// Resource limit descriptor for this process - KResourceLimit* m_resource_limit{}; + void SetDebugBreak() { + if (m_state == State::RunningAttached) { + this->ChangeState(State::DebugBreak); + } + } - KVirtualAddress m_system_resource_address{}; + void SetAttached() { + if (m_state == State::DebugBreak) { + this->ChangeState(State::RunningAttached); + } + } - /// The ideal CPU core for this process, threads are scheduled on this core by default. - u8 m_ideal_core = 0; + Result SetActivity(Svc::ProcessActivity activity); - /// Contains the parsed process capability descriptors. - ProcessCapabilities m_capabilities; + void PinCurrentThread(); + void UnpinCurrentThread(); + void UnpinThread(KThread* thread); - /// Whether or not this process is AArch64, or AArch32. - /// By default, we currently assume this is true, unless otherwise - /// specified by metadata provided to the process during loading. - bool m_is_64bit_process = true; + void SignalConditionVariable(uintptr_t cv_key, int32_t count) { + return m_cond_var.Signal(cv_key, count); + } - /// Total running time for the process in ticks. - std::atomic<u64> m_total_process_running_time_ticks = 0; + Result WaitConditionVariable(KProcessAddress address, uintptr_t cv_key, u32 tag, s64 ns) { + R_RETURN(m_cond_var.Wait(address, cv_key, tag, ns)); + } - /// Per-process handle table for storing created object handles in. - KHandleTable m_handle_table; + Result SignalAddressArbiter(uintptr_t address, Svc::SignalType signal_type, s32 value, + s32 count) { + R_RETURN(m_address_arbiter.SignalToAddress(address, signal_type, value, count)); + } - /// Per-process address arbiter. - KAddressArbiter m_address_arbiter; + Result WaitAddressArbiter(uintptr_t address, Svc::ArbitrationType arb_type, s32 value, + s64 timeout) { + R_RETURN(m_address_arbiter.WaitForAddress(address, arb_type, value, timeout)); + } - /// The per-process mutex lock instance used for handling various - /// forms of services, such as lock arbitration, and condition - /// variable related facilities. - KConditionVariable m_condition_var; + Result GetThreadList(s32* out_num_threads, KProcessAddress out_thread_ids, s32 max_out_count); - /// Address indicating the location of the process' dedicated TLS region. - KProcessAddress m_plr_address = 0; + static void Switch(KProcess* cur_process, KProcess* next_process); - /// Address indicating the location of the process's entry point. - KProcessAddress m_code_address = 0; +public: + // Attempts to insert a watchpoint into a free slot. Returns false if none are available. + bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type); - /// Random values for svcGetInfo RandomEntropy - std::array<u64, RANDOM_ENTROPY_SIZE> m_random_entropy{}; + // Attempts to remove the watchpoint specified by the given parameters. + bool RemoveWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type); - /// List of threads that are running with this process as their owner. - std::list<KThread*> m_thread_list; + const std::array<DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS>& GetWatchpoints() const { + return m_watchpoints; + } - /// List of shared memory that are running with this process as their owner. - std::list<KSharedMemoryInfo*> m_shared_memory_list; +public: + Result LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, + bool is_hbl); - /// Address of the top of the main thread's stack - KProcessAddress m_main_thread_stack_top{}; + void LoadModule(CodeSet code_set, KProcessAddress base_addr); - /// Size of the main thread's stack - std::size_t m_main_thread_stack_size{}; + Core::Memory::Memory& GetMemory() const; - /// Memory usage capacity for the process - std::size_t m_memory_usage_capacity{}; +public: + // Overridden parent functions. + bool IsInitialized() const override { + return m_is_initialized; + } - /// Process total image size - std::size_t m_image_size{}; + static void PostDestroy(uintptr_t arg) {} - /// Schedule count of this process - s64 m_schedule_count{}; + void Finalize() override; - size_t m_memory_release_hint{}; + u64 GetIdImpl() const { + return this->GetProcessId(); + } + u64 GetId() const override { + return this->GetIdImpl(); + } - std::string name{}; + virtual bool IsSignaled() const override { + ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel)); + return m_is_signaled; + } - bool m_is_signaled{}; - bool m_is_suspended{}; - bool m_is_immortal{}; - bool m_is_handle_table_initialized{}; - bool m_is_initialized{}; - bool m_is_hbl{}; + void DoWorkerTaskImpl(); - std::atomic<u16> m_num_running_threads{}; +private: + void ChangeState(State new_state) { + if (m_state != new_state) { + m_state = new_state; + m_is_signaled = true; + this->NotifyAvailable(); + } + } - std::array<KThread*, Core::Hardware::NUM_CPU_CORES> m_running_threads{}; - std::array<u64, Core::Hardware::NUM_CPU_CORES> m_running_thread_idle_counts{}; - std::array<KThread*, Core::Hardware::NUM_CPU_CORES> m_pinned_threads{}; - std::array<DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS> m_watchpoints{}; - std::map<KProcessAddress, u64> m_debug_page_refcounts; + Result InitializeHandleTable(s32 size) { + // Try to initialize the handle table. + R_TRY(m_handle_table.Initialize(size)); - KThread* m_exception_thread{}; + // We succeeded, so note that we did. + m_is_handle_table_initialized = true; + R_SUCCEED(); + } - KLightLock m_state_lock; - KLightLock m_list_lock; + void FinalizeHandleTable() { + // Finalize the table. + m_handle_table.Finalize(); - using TLPTree = - Common::IntrusiveRedBlackTreeBaseTraits<KThreadLocalPage>::TreeType<KThreadLocalPage>; - using TLPIterator = TLPTree::iterator; - TLPTree m_fully_used_tlp_tree; - TLPTree m_partially_used_tlp_tree; + // Note that the table is finalized. + m_is_handle_table_initialized = false; + } }; } // namespace Kernel diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp index d8143c650..1bce63a56 100644 --- a/src/core/hle/kernel/k_scheduler.cpp +++ b/src/core/hle/kernel/k_scheduler.cpp @@ -190,7 +190,7 @@ u64 KScheduler::UpdateHighestPriorityThread(KThread* highest_thread) { if (m_state.should_count_idle) { if (highest_thread != nullptr) [[likely]] { if (KProcess* process = highest_thread->GetOwnerProcess(); process != nullptr) { - process->SetRunningThread(m_core_id, highest_thread, m_state.idle_count); + process->SetRunningThread(m_core_id, highest_thread, m_state.idle_count, 0); } } else { m_state.idle_count++; @@ -356,7 +356,7 @@ void KScheduler::SwitchThread(KThread* next_thread) { const s64 tick_diff = cur_tick - prev_tick; cur_thread->AddCpuTime(m_core_id, tick_diff); if (cur_process != nullptr) { - cur_process->UpdateCPUTimeTicks(tick_diff); + cur_process->AddCpuTime(tick_diff); } m_last_context_switch_time = cur_tick; diff --git a/src/core/hle/kernel/k_system_resource.cpp b/src/core/hle/kernel/k_system_resource.cpp index e6c8d589a..07e92aa80 100644 --- a/src/core/hle/kernel/k_system_resource.cpp +++ b/src/core/hle/kernel/k_system_resource.cpp @@ -1,25 +1,100 @@ // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "core/core.h" +#include "core/hle/kernel/k_scoped_resource_reservation.h" #include "core/hle/kernel/k_system_resource.h" namespace Kernel { Result KSecureSystemResource::Initialize(size_t size, KResourceLimit* resource_limit, KMemoryManager::Pool pool) { - // Unimplemented - UNREACHABLE(); + // Set members. + m_resource_limit = resource_limit; + m_resource_size = size; + m_resource_pool = pool; + + // Determine required size for our secure resource. + const size_t secure_size = this->CalculateRequiredSecureMemorySize(); + + // Reserve memory for our secure resource. + KScopedResourceReservation memory_reservation( + m_resource_limit, Svc::LimitableResource::PhysicalMemoryMax, secure_size); + R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached); + + // Allocate secure memory. + R_TRY(KSystemControl::AllocateSecureMemory(m_kernel, std::addressof(m_resource_address), + m_resource_size, static_cast<u32>(m_resource_pool))); + ASSERT(m_resource_address != 0); + + // Ensure we clean up the secure memory, if we fail past this point. + ON_RESULT_FAILURE { + KSystemControl::FreeSecureMemory(m_kernel, m_resource_address, m_resource_size, + static_cast<u32>(m_resource_pool)); + }; + + // Check that our allocation is bigger than the reference counts needed for it. + const size_t rc_size = + Common::AlignUp(KPageTableSlabHeap::CalculateReferenceCountSize(m_resource_size), PageSize); + R_UNLESS(m_resource_size > rc_size, ResultOutOfMemory); + + // Get resource pointer. + KPhysicalAddress resource_paddr = + KPageTable::GetHeapPhysicalAddress(m_kernel.MemoryLayout(), m_resource_address); + auto* resource = + m_kernel.System().DeviceMemory().GetPointer<KPageTableManager::RefCount>(resource_paddr); + + // Initialize slab heaps. + m_dynamic_page_manager.Initialize(m_resource_address + rc_size, m_resource_size - rc_size, + PageSize); + m_page_table_heap.Initialize(std::addressof(m_dynamic_page_manager), 0, resource); + m_memory_block_heap.Initialize(std::addressof(m_dynamic_page_manager), 0); + m_block_info_heap.Initialize(std::addressof(m_dynamic_page_manager), 0); + + // Initialize managers. + m_page_table_manager.Initialize(std::addressof(m_dynamic_page_manager), + std::addressof(m_page_table_heap)); + m_memory_block_slab_manager.Initialize(std::addressof(m_dynamic_page_manager), + std::addressof(m_memory_block_heap)); + m_block_info_manager.Initialize(std::addressof(m_dynamic_page_manager), + std::addressof(m_block_info_heap)); + + // Set our managers. + this->SetManagers(m_memory_block_slab_manager, m_block_info_manager, m_page_table_manager); + + // Commit the memory reservation. + memory_reservation.Commit(); + + // Open reference to our resource limit. + m_resource_limit->Open(); + + // Set ourselves as initialized. + m_is_initialized = true; + + R_SUCCEED(); } void KSecureSystemResource::Finalize() { - // Unimplemented - UNREACHABLE(); + // Check that we have no outstanding allocations. + ASSERT(m_memory_block_slab_manager.GetUsed() == 0); + ASSERT(m_block_info_manager.GetUsed() == 0); + ASSERT(m_page_table_manager.GetUsed() == 0); + + // Free our secure memory. + KSystemControl::FreeSecureMemory(m_kernel, m_resource_address, m_resource_size, + static_cast<u32>(m_resource_pool)); + + // Release the memory reservation. + m_resource_limit->Release(Svc::LimitableResource::PhysicalMemoryMax, + this->CalculateRequiredSecureMemorySize()); + + // Close reference to our resource limit. + m_resource_limit->Close(); } size_t KSecureSystemResource::CalculateRequiredSecureMemorySize(size_t size, KMemoryManager::Pool pool) { - // Unimplemented - UNREACHABLE(); + return KSystemControl::CalculateRequiredSecureMemorySize(size, static_cast<u32>(pool)); } } // namespace Kernel diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index 7df8fd7f7..a6deb50ec 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -122,16 +122,15 @@ Result KThread::Initialize(KThreadFunction func, uintptr_t arg, KProcessAddress case ThreadType::Main: ASSERT(arg == 0); [[fallthrough]]; - case ThreadType::HighPriority: - [[fallthrough]]; - case ThreadType::Dummy: - [[fallthrough]]; case ThreadType::User: ASSERT(((owner == nullptr) || (owner->GetCoreMask() | (1ULL << virt_core)) == owner->GetCoreMask())); ASSERT(((owner == nullptr) || (prio > Svc::LowestThreadPriority) || (owner->GetPriorityMask() | (1ULL << prio)) == owner->GetPriorityMask())); break; + case ThreadType::HighPriority: + case ThreadType::Dummy: + break; case ThreadType::Kernel: UNIMPLEMENTED(); break; @@ -216,6 +215,7 @@ Result KThread::Initialize(KThreadFunction func, uintptr_t arg, KProcessAddress // Setup the TLS, if needed. if (type == ThreadType::User) { R_TRY(owner->CreateThreadLocalRegion(std::addressof(m_tls_address))); + owner->GetMemory().ZeroBlock(m_tls_address, Svc::ThreadLocalRegionSize); } m_parent = owner; @@ -403,7 +403,7 @@ void KThread::StartTermination() { if (m_parent != nullptr) { m_parent->ReleaseUserException(this); if (m_parent->GetPinnedThread(GetCurrentCoreId(m_kernel)) == this) { - m_parent->UnpinCurrentThread(m_core_id); + m_parent->UnpinCurrentThread(); } } @@ -415,10 +415,6 @@ void KThread::StartTermination() { m_parent->ClearRunningThread(this); } - // Signal. - m_signaled = true; - KSynchronizationObject::NotifyAvailable(); - // Clear previous thread in KScheduler. KScheduler::ClearPreviousThread(m_kernel, this); @@ -437,6 +433,13 @@ void KThread::FinishTermination() { } } + // Acquire the scheduler lock. + KScopedSchedulerLock sl{m_kernel}; + + // Signal. + m_signaled = true; + KSynchronizationObject::NotifyAvailable(); + // Close the thread. this->Close(); } @@ -820,7 +823,7 @@ void KThread::CloneFpuStatus() { ASSERT(this->GetOwnerProcess() != nullptr); ASSERT(this->GetOwnerProcess() == GetCurrentProcessPointer(m_kernel)); - if (this->GetOwnerProcess()->Is64BitProcess()) { + if (this->GetOwnerProcess()->Is64Bit()) { // Clone FPSR and FPCR. ThreadContext64 cur_ctx{}; m_kernel.System().CurrentArmInterface().SaveContext(cur_ctx); @@ -923,7 +926,7 @@ Result KThread::GetThreadContext3(Common::ScratchBuffer<u8>& out) { // If we're not terminating, get the thread's user context. if (!this->IsTerminationRequested()) { - if (m_parent->Is64BitProcess()) { + if (m_parent->Is64Bit()) { // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. auto context = GetContext64(); context.pstate &= 0xFF0FFE20; @@ -1174,6 +1177,9 @@ Result KThread::Run() { owner->IncrementRunningThreadCount(); } + // Open a reference, now that we're running. + this->Open(); + // Set our state and finish. this->SetState(ThreadState::Runnable); diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h index d178c2453..e1f80b04f 100644 --- a/src/core/hle/kernel/k_thread.h +++ b/src/core/hle/kernel/k_thread.h @@ -721,6 +721,7 @@ private: // For core KThread implementation ThreadContext32 m_thread_context_32{}; ThreadContext64 m_thread_context_64{}; + Common::IntrusiveListNode m_process_list_node; Common::IntrusiveRedBlackTreeNode m_condvar_arbiter_tree_node{}; s32 m_priority{}; using ConditionVariableThreadTreeTraits = diff --git a/src/core/hle/kernel/k_transfer_memory.cpp b/src/core/hle/kernel/k_transfer_memory.cpp index 13d34125c..0e2e11743 100644 --- a/src/core/hle/kernel/k_transfer_memory.cpp +++ b/src/core/hle/kernel/k_transfer_memory.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/scope_exit.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_resource_limit.h" #include "core/hle/kernel/k_transfer_memory.h" @@ -9,28 +10,50 @@ namespace Kernel { KTransferMemory::KTransferMemory(KernelCore& kernel) - : KAutoObjectWithSlabHeapAndContainer{kernel} {} + : KAutoObjectWithSlabHeapAndContainer{kernel}, m_lock{kernel} {} KTransferMemory::~KTransferMemory() = default; -Result KTransferMemory::Initialize(KProcessAddress address, std::size_t size, - Svc::MemoryPermission owner_perm) { +Result KTransferMemory::Initialize(KProcessAddress addr, std::size_t size, + Svc::MemoryPermission own_perm) { // Set members. m_owner = GetCurrentProcessPointer(m_kernel); - // TODO(bunnei): Lock for transfer memory + // Get the owner page table. + auto& page_table = m_owner->GetPageTable(); + + // Construct the page group, guarding to make sure our state is valid on exit. + m_page_group.emplace(m_kernel, page_table.GetBlockInfoManager()); + auto pg_guard = SCOPE_GUARD({ m_page_group.reset(); }); + + // Lock the memory. + R_TRY(page_table.LockForTransferMemory(std::addressof(*m_page_group), addr, size, + ConvertToKMemoryPermission(own_perm))); // Set remaining tracking members. m_owner->Open(); - m_owner_perm = owner_perm; - m_address = address; - m_size = size; + m_owner_perm = own_perm; + m_address = addr; m_is_initialized = true; + m_is_mapped = false; + // We succeeded. + pg_guard.Cancel(); R_SUCCEED(); } -void KTransferMemory::Finalize() {} +void KTransferMemory::Finalize() { + // Unlock. + if (!m_is_mapped) { + const size_t size = m_page_group->GetNumPages() * PageSize; + ASSERT(R_SUCCEEDED( + m_owner->GetPageTable().UnlockForTransferMemory(m_address, size, *m_page_group))); + } + + // Close the page group. + m_page_group->Close(); + m_page_group->Finalize(); +} void KTransferMemory::PostDestroy(uintptr_t arg) { KProcess* owner = reinterpret_cast<KProcess*>(arg); @@ -38,4 +61,54 @@ void KTransferMemory::PostDestroy(uintptr_t arg) { owner->Close(); } +Result KTransferMemory::Map(KProcessAddress address, size_t size, Svc::MemoryPermission map_perm) { + // Validate the size. + R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); + + // Validate the permission. + R_UNLESS(m_owner_perm == map_perm, ResultInvalidState); + + // Lock ourselves. + KScopedLightLock lk(m_lock); + + // Ensure we're not already mapped. + R_UNLESS(!m_is_mapped, ResultInvalidState); + + // Map the memory. + const KMemoryState state = (m_owner_perm == Svc::MemoryPermission::None) + ? KMemoryState::Transfered + : KMemoryState::SharedTransfered; + R_TRY(GetCurrentProcess(m_kernel).GetPageTable().MapPageGroup( + address, *m_page_group, state, KMemoryPermission::UserReadWrite)); + + // Mark ourselves as mapped. + m_is_mapped = true; + + R_SUCCEED(); +} + +Result KTransferMemory::Unmap(KProcessAddress address, size_t size) { + // Validate the size. + R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); + + // Lock ourselves. + KScopedLightLock lk(m_lock); + + // Unmap the memory. + const KMemoryState state = (m_owner_perm == Svc::MemoryPermission::None) + ? KMemoryState::Transfered + : KMemoryState::SharedTransfered; + R_TRY(GetCurrentProcess(m_kernel).GetPageTable().UnmapPageGroup(address, *m_page_group, state)); + + // Mark ourselves as unmapped. + ASSERT(m_is_mapped); + m_is_mapped = false; + + R_SUCCEED(); +} + +size_t KTransferMemory::GetSize() const { + return m_is_initialized ? m_page_group->GetNumPages() * PageSize : 0; +} + } // namespace Kernel diff --git a/src/core/hle/kernel/k_transfer_memory.h b/src/core/hle/kernel/k_transfer_memory.h index 54f97ccb4..8a0b08761 100644 --- a/src/core/hle/kernel/k_transfer_memory.h +++ b/src/core/hle/kernel/k_transfer_memory.h @@ -3,6 +3,9 @@ #pragma once +#include <optional> + +#include "core/hle/kernel/k_page_group.h" #include "core/hle/kernel/slab_helpers.h" #include "core/hle/kernel/svc_types.h" #include "core/hle/result.h" @@ -48,16 +51,19 @@ public: return m_address; } - size_t GetSize() const { - return m_is_initialized ? m_size : 0; - } + size_t GetSize() const; + + Result Map(KProcessAddress address, size_t size, Svc::MemoryPermission map_perm); + Result Unmap(KProcessAddress address, size_t size); private: + std::optional<KPageGroup> m_page_group{}; KProcess* m_owner{}; KProcessAddress m_address{}; + KLightLock m_lock; Svc::MemoryPermission m_owner_perm{}; - size_t m_size{}; bool m_is_initialized{}; + bool m_is_mapped{}; }; } // namespace Kernel diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index a1134b7e2..4a1559291 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -101,35 +101,31 @@ struct KernelCore::Impl { void InitializeCores() { for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { - cores[core_id]->Initialize((*application_process).Is64BitProcess()); + cores[core_id]->Initialize((*application_process).Is64Bit()); system.ApplicationMemory().SetCurrentPageTable(*application_process, core_id); } } - void CloseApplicationProcess() { - KProcess* old_process = application_process.exchange(nullptr); - if (old_process == nullptr) { - return; - } - - // old_process->Close(); - // TODO: The process should be destroyed based on accurate ref counting after - // calling Close(). Adding a manual Destroy() call instead to avoid a memory leak. - old_process->Finalize(); - old_process->Destroy(); + void TerminateApplicationProcess() { + application_process.load()->Terminate(); } void Shutdown() { is_shutting_down.store(true, std::memory_order_relaxed); SCOPE_EXIT({ is_shutting_down.store(false, std::memory_order_relaxed); }); - process_list.clear(); - CloseServices(); + auto* old_process = application_process.exchange(nullptr); + if (old_process) { + old_process->Close(); + } + + process_list.clear(); + next_object_id = 0; - next_kernel_process_id = KProcess::InitialKIPIDMin; - next_user_process_id = KProcess::ProcessIDMin; + next_kernel_process_id = KProcess::InitialProcessIdMin; + next_user_process_id = KProcess::ProcessIdMin; next_thread_id = 1; global_handle_table->Finalize(); @@ -176,8 +172,6 @@ struct KernelCore::Impl { } } - CloseApplicationProcess(); - // Track kernel objects that were not freed on shutdown { std::scoped_lock lk{registered_objects_lock}; @@ -344,6 +338,8 @@ struct KernelCore::Impl { // Create the system page table managers. app_system_resource = std::make_unique<KSystemResource>(kernel); sys_system_resource = std::make_unique<KSystemResource>(kernel); + KAutoObject::Create(std::addressof(*app_system_resource)); + KAutoObject::Create(std::addressof(*sys_system_resource)); // Set the managers for the system resources. app_system_resource->SetManagers(*app_memory_block_manager, *app_block_info_manager, @@ -373,7 +369,7 @@ struct KernelCore::Impl { static inline thread_local u8 host_thread_id = UINT8_MAX; /// Sets the host thread ID for the caller. - u32 SetHostThreadId(std::size_t core_id) { + LTO_NOINLINE u32 SetHostThreadId(std::size_t core_id) { // This should only be called during core init. ASSERT(host_thread_id == UINT8_MAX); @@ -384,13 +380,13 @@ struct KernelCore::Impl { } /// Gets the host thread ID for the caller - u32 GetHostThreadId() const { + LTO_NOINLINE u32 GetHostThreadId() const { return host_thread_id; } // Gets the dummy KThread for the caller, allocating a new one if this is the first time - KThread* GetHostDummyThread(KThread* existing_thread) { - const auto initialize{[](KThread* thread) { + LTO_NOINLINE KThread* GetHostDummyThread(KThread* existing_thread) { + const auto initialize{[](KThread* thread) LTO_NOINLINE { ASSERT(KThread::InitializeDummyThread(thread, nullptr).IsSuccess()); return thread; }}; @@ -424,11 +420,11 @@ struct KernelCore::Impl { static inline thread_local bool is_phantom_mode_for_singlecore{false}; - bool IsPhantomModeForSingleCore() const { + LTO_NOINLINE bool IsPhantomModeForSingleCore() const { return is_phantom_mode_for_singlecore; } - void SetIsPhantomModeForSingleCore(bool value) { + LTO_NOINLINE void SetIsPhantomModeForSingleCore(bool value) { ASSERT(!is_multicore); is_phantom_mode_for_singlecore = value; } @@ -439,14 +435,14 @@ struct KernelCore::Impl { static inline thread_local KThread* current_thread{nullptr}; - KThread* GetCurrentEmuThread() { + LTO_NOINLINE KThread* GetCurrentEmuThread() { if (!current_thread) { current_thread = GetHostDummyThread(nullptr); } return current_thread; } - void SetCurrentEmuThread(KThread* thread) { + LTO_NOINLINE void SetCurrentEmuThread(KThread* thread) { current_thread = thread; } @@ -623,14 +619,33 @@ struct KernelCore::Impl { ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert( GetInteger(slab_start_phys_addr), slab_region_size, KMemoryRegionType_DramKernelSlab)); + // Insert a physical region for the secure applet memory. + const auto secure_applet_end_phys_addr = + slab_end_phys_addr + KSystemControl::SecureAppletMemorySize; + if constexpr (KSystemControl::SecureAppletMemorySize > 0) { + ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert( + GetInteger(slab_end_phys_addr), KSystemControl::SecureAppletMemorySize, + KMemoryRegionType_DramKernelSecureAppletMemory)); + } + + // Insert a physical region for the unknown debug2 region. + constexpr size_t SecureUnknownRegionSize = 0; + const size_t secure_unknown_size = SecureUnknownRegionSize; + const auto secure_unknown_end_phys_addr = secure_applet_end_phys_addr + secure_unknown_size; + if constexpr (SecureUnknownRegionSize > 0) { + ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert( + GetInteger(secure_applet_end_phys_addr), secure_unknown_size, + KMemoryRegionType_DramKernelSecureUnknown)); + } + // Determine size available for kernel page table heaps, requiring > 8 MB. const KPhysicalAddress resource_end_phys_addr = slab_start_phys_addr + resource_region_size; - const size_t page_table_heap_size = resource_end_phys_addr - slab_end_phys_addr; + const size_t page_table_heap_size = resource_end_phys_addr - secure_unknown_end_phys_addr; ASSERT(page_table_heap_size / 4_MiB > 2); // Insert a physical region for the kernel page table heap region ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert( - GetInteger(slab_end_phys_addr), page_table_heap_size, + GetInteger(secure_unknown_end_phys_addr), page_table_heap_size, KMemoryRegionType_DramKernelPtHeap)); // All DRAM regions that we haven't tagged by this point will be mapped under the linear @@ -773,8 +788,8 @@ struct KernelCore::Impl { std::mutex registered_in_use_objects_lock; std::atomic<u32> next_object_id{0}; - std::atomic<u64> next_kernel_process_id{KProcess::InitialKIPIDMin}; - std::atomic<u64> next_user_process_id{KProcess::ProcessIDMin}; + std::atomic<u64> next_kernel_process_id{KProcess::InitialProcessIdMin}; + std::atomic<u64> next_user_process_id{KProcess::ProcessIdMin}; std::atomic<u64> next_thread_id{1}; // Lists all processes that exist in the current session. @@ -905,10 +920,6 @@ const KProcess* KernelCore::ApplicationProcess() const { return impl->application_process; } -void KernelCore::CloseApplicationProcess() { - impl->CloseApplicationProcess(); -} - const std::vector<KProcess*>& KernelCore::GetProcessList() const { return impl->process_list; } @@ -1109,8 +1120,8 @@ std::jthread KernelCore::RunOnHostCoreProcess(std::string&& process_name, std::function<void()> func) { // Make a new process. KProcess* process = KProcess::Create(*this); - ASSERT(R_SUCCEEDED(KProcess::Initialize(process, System(), "", KProcess::ProcessType::Userland, - GetSystemResourceLimit()))); + ASSERT(R_SUCCEEDED( + process->Initialize(Svc::CreateProcessParameter{}, GetSystemResourceLimit(), false))); // Ensure that we don't hold onto any extra references. SCOPE_EXIT({ process->Close(); }); @@ -1137,8 +1148,8 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function // Make a new process. KProcess* process = KProcess::Create(*this); - ASSERT(R_SUCCEEDED(KProcess::Initialize(process, System(), "", KProcess::ProcessType::Userland, - GetSystemResourceLimit()))); + ASSERT(R_SUCCEEDED( + process->Initialize(Svc::CreateProcessParameter{}, GetSystemResourceLimit(), false))); // Ensure that we don't hold onto any extra references. SCOPE_EXIT({ process->Close(); }); @@ -1247,7 +1258,8 @@ const Kernel::KSharedMemory& KernelCore::GetHidBusSharedMem() const { void KernelCore::SuspendApplication(bool suspended) { const bool should_suspend{exception_exited || suspended}; - const auto activity = should_suspend ? ProcessActivity::Paused : ProcessActivity::Runnable; + const auto activity = + should_suspend ? Svc::ProcessActivity::Paused : Svc::ProcessActivity::Runnable; // Get the application process. KScopedAutoObject<KProcess> process = ApplicationProcess(); @@ -1281,6 +1293,8 @@ void KernelCore::SuspendApplication(bool suspended) { } void KernelCore::ShutdownCores() { + impl->TerminateApplicationProcess(); + KScopedSchedulerLock lk{*this}; for (auto* thread : impl->shutdown_threads) { diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index d5b08eeb5..d8086c0ea 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -134,9 +134,6 @@ public: /// Retrieves a const pointer to the application process. const KProcess* ApplicationProcess() const; - /// Closes the application process. - void CloseApplicationProcess(); - /// Retrieves the list of processes. const std::vector<KProcess*>& GetProcessList() const; diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 871d541d4..b76683969 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -4426,7 +4426,7 @@ void Call(Core::System& system, u32 imm) { auto& kernel = system.Kernel(); kernel.EnterSVCProfile(); - if (GetCurrentProcess(system.Kernel()).Is64BitProcess()) { + if (GetCurrentProcess(system.Kernel()).Is64Bit()) { Call64(system, imm); } else { Call32(system, imm); diff --git a/src/core/hle/kernel/svc/svc_info.cpp b/src/core/hle/kernel/svc/svc_info.cpp index f99964028..ada998772 100644 --- a/src/core/hle/kernel/svc/svc_info.cpp +++ b/src/core/hle/kernel/svc/svc_info.cpp @@ -86,20 +86,19 @@ Result GetInfo(Core::System& system, u64* result, InfoType info_id_type, Handle R_SUCCEED(); case InfoType::TotalMemorySize: - *result = process->GetTotalPhysicalMemoryAvailable(); + *result = process->GetTotalUserPhysicalMemorySize(); R_SUCCEED(); case InfoType::UsedMemorySize: - *result = process->GetTotalPhysicalMemoryUsed(); + *result = process->GetUsedUserPhysicalMemorySize(); R_SUCCEED(); case InfoType::SystemResourceSizeTotal: - *result = process->GetSystemResourceSize(); + *result = process->GetTotalSystemResourceSize(); R_SUCCEED(); case InfoType::SystemResourceSizeUsed: - LOG_WARNING(Kernel_SVC, "(STUBBED) Attempted to query system resource usage"); - *result = process->GetSystemResourceUsage(); + *result = process->GetUsedSystemResourceSize(); R_SUCCEED(); case InfoType::ProgramId: @@ -111,20 +110,29 @@ Result GetInfo(Core::System& system, u64* result, InfoType info_id_type, Handle R_SUCCEED(); case InfoType::TotalNonSystemMemorySize: - *result = process->GetTotalPhysicalMemoryAvailableWithoutSystemResource(); + *result = process->GetTotalNonSystemUserPhysicalMemorySize(); R_SUCCEED(); case InfoType::UsedNonSystemMemorySize: - *result = process->GetTotalPhysicalMemoryUsedWithoutSystemResource(); + *result = process->GetUsedNonSystemUserPhysicalMemorySize(); R_SUCCEED(); case InfoType::IsApplication: LOG_WARNING(Kernel_SVC, "(STUBBED) Assuming process is application"); - *result = true; + *result = process->IsApplication(); R_SUCCEED(); case InfoType::FreeThreadCount: - *result = process->GetFreeThreadCount(); + if (KResourceLimit* resource_limit = process->GetResourceLimit(); + resource_limit != nullptr) { + const auto current_value = + resource_limit->GetCurrentValue(Svc::LimitableResource::ThreadCountMax); + const auto limit_value = + resource_limit->GetLimitValue(Svc::LimitableResource::ThreadCountMax); + *result = limit_value - current_value; + } else { + *result = 0; + } R_SUCCEED(); default: @@ -161,7 +169,7 @@ Result GetInfo(Core::System& system, u64* result, InfoType info_id_type, Handle case InfoType::RandomEntropy: R_UNLESS(handle == 0, ResultInvalidHandle); - R_UNLESS(info_sub_id < KProcess::RANDOM_ENTROPY_SIZE, ResultInvalidCombination); + R_UNLESS(info_sub_id < 4, ResultInvalidCombination); *result = GetCurrentProcess(system.Kernel()).GetRandomEntropy(info_sub_id); R_SUCCEED(); diff --git a/src/core/hle/kernel/svc/svc_lock.cpp b/src/core/hle/kernel/svc/svc_lock.cpp index 1d7bc4246..5f0833fcb 100644 --- a/src/core/hle/kernel/svc/svc_lock.cpp +++ b/src/core/hle/kernel/svc/svc_lock.cpp @@ -17,7 +17,7 @@ Result ArbitrateLock(Core::System& system, Handle thread_handle, u64 address, u3 R_UNLESS(!IsKernelAddress(address), ResultInvalidCurrentMemory); R_UNLESS(Common::IsAligned(address, sizeof(u32)), ResultInvalidAddress); - R_RETURN(GetCurrentProcess(system.Kernel()).WaitForAddress(thread_handle, address, tag)); + R_RETURN(KConditionVariable::WaitForAddress(system.Kernel(), thread_handle, address, tag)); } /// Unlock a mutex @@ -28,7 +28,7 @@ Result ArbitrateUnlock(Core::System& system, u64 address) { R_UNLESS(!IsKernelAddress(address), ResultInvalidCurrentMemory); R_UNLESS(Common::IsAligned(address, sizeof(u32)), ResultInvalidAddress); - R_RETURN(GetCurrentProcess(system.Kernel()).SignalToAddress(address)); + R_RETURN(KConditionVariable::SignalToAddress(system.Kernel(), address)); } Result ArbitrateLock64(Core::System& system, Handle thread_handle, uint64_t address, uint32_t tag) { diff --git a/src/core/hle/kernel/svc/svc_memory.cpp b/src/core/hle/kernel/svc/svc_memory.cpp index 2cab74127..97f1210de 100644 --- a/src/core/hle/kernel/svc/svc_memory.cpp +++ b/src/core/hle/kernel/svc/svc_memory.cpp @@ -76,7 +76,7 @@ Result MapUnmapMemorySanityChecks(const KPageTable& manager, u64 dst_addr, u64 s } // namespace Result SetMemoryPermission(Core::System& system, u64 address, u64 size, MemoryPermission perm) { - LOG_DEBUG(Kernel_SVC, "called, address=0x{:016X}, size=0x{:X}, perm=0x{:08X", address, size, + LOG_DEBUG(Kernel_SVC, "called, address=0x{:016X}, size=0x{:X}, perm=0x{:08X}", address, size, perm); // Validate address / size. @@ -108,10 +108,16 @@ Result SetMemoryAttribute(Core::System& system, u64 address, u64 size, u32 mask, R_UNLESS((address < address + size), ResultInvalidCurrentMemory); // Validate the attribute and mask. - constexpr u32 SupportedMask = static_cast<u32>(MemoryAttribute::Uncached); + constexpr u32 SupportedMask = + static_cast<u32>(MemoryAttribute::Uncached | MemoryAttribute::PermissionLocked); R_UNLESS((mask | attr) == mask, ResultInvalidCombination); R_UNLESS((mask | attr | SupportedMask) == SupportedMask, ResultInvalidCombination); + // Check that permission locked is either being set or not masked. + R_UNLESS((static_cast<Svc::MemoryAttribute>(mask) & Svc::MemoryAttribute::PermissionLocked) == + (static_cast<Svc::MemoryAttribute>(attr) & Svc::MemoryAttribute::PermissionLocked), + ResultInvalidCombination); + // Validate that the region is in range for the current process. auto& page_table{GetCurrentProcess(system.Kernel()).GetPageTable()}; R_UNLESS(page_table.Contains(address, size), ResultInvalidCurrentMemory); diff --git a/src/core/hle/kernel/svc/svc_physical_memory.cpp b/src/core/hle/kernel/svc/svc_physical_memory.cpp index d3545f232..99330d02a 100644 --- a/src/core/hle/kernel/svc/svc_physical_memory.cpp +++ b/src/core/hle/kernel/svc/svc_physical_memory.cpp @@ -46,7 +46,7 @@ Result MapPhysicalMemory(Core::System& system, u64 addr, u64 size) { KProcess* const current_process{GetCurrentProcessPointer(system.Kernel())}; auto& page_table{current_process->GetPageTable()}; - if (current_process->GetSystemResourceSize() == 0) { + if (current_process->GetTotalSystemResourceSize() == 0) { LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); R_THROW(ResultInvalidState); } @@ -95,7 +95,7 @@ Result UnmapPhysicalMemory(Core::System& system, u64 addr, u64 size) { KProcess* const current_process{GetCurrentProcessPointer(system.Kernel())}; auto& page_table{current_process->GetPageTable()}; - if (current_process->GetSystemResourceSize() == 0) { + if (current_process->GetTotalSystemResourceSize() == 0) { LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); R_THROW(ResultInvalidState); } diff --git a/src/core/hle/kernel/svc/svc_synchronization.cpp b/src/core/hle/kernel/svc/svc_synchronization.cpp index 8ebc1bd1c..6c79cfd8d 100644 --- a/src/core/hle/kernel/svc/svc_synchronization.cpp +++ b/src/core/hle/kernel/svc/svc_synchronization.cpp @@ -132,7 +132,7 @@ void SynchronizePreemptionState(Core::System& system) { GetCurrentThread(kernel).ClearInterruptFlag(); // Unpin the current thread. - cur_process->UnpinCurrentThread(core_id); + cur_process->UnpinCurrentThread(); } } diff --git a/src/core/hle/kernel/svc/svc_thread.cpp b/src/core/hle/kernel/svc/svc_thread.cpp index 933b82e30..755fd62b5 100644 --- a/src/core/hle/kernel/svc/svc_thread.cpp +++ b/src/core/hle/kernel/svc/svc_thread.cpp @@ -85,10 +85,6 @@ Result StartThread(Core::System& system, Handle thread_handle) { // Try to start the thread. R_TRY(thread->Run()); - // If we succeeded, persist a reference to the thread. - thread->Open(); - system.Kernel().RegisterInUseObject(thread.GetPointerUnsafe()); - R_SUCCEED(); } @@ -99,7 +95,6 @@ void ExitThread(Core::System& system) { auto* const current_thread = GetCurrentThreadPointer(system.Kernel()); system.GlobalSchedulerContext().RemoveThread(current_thread); current_thread->Exit(); - system.Kernel().UnregisterInUseObject(current_thread); } /// Sleep the current thread @@ -260,7 +255,7 @@ Result GetThreadList(Core::System& system, s32* out_num_threads, u64 out_thread_ auto list_iter = thread_list.cbegin(); for (std::size_t i = 0; i < copy_amount; ++i, ++list_iter) { - memory.Write64(out_thread_ids, (*list_iter)->GetThreadId()); + memory.Write64(out_thread_ids, list_iter->GetThreadId()); out_thread_ids += sizeof(u64); } diff --git a/src/core/hle/kernel/svc/svc_transfer_memory.cpp b/src/core/hle/kernel/svc/svc_transfer_memory.cpp index 7d94e7f09..1f97121b3 100644 --- a/src/core/hle/kernel/svc/svc_transfer_memory.cpp +++ b/src/core/hle/kernel/svc/svc_transfer_memory.cpp @@ -71,15 +71,59 @@ Result CreateTransferMemory(Core::System& system, Handle* out, u64 address, u64 } Result MapTransferMemory(Core::System& system, Handle trmem_handle, uint64_t address, uint64_t size, - MemoryPermission owner_perm) { - UNIMPLEMENTED(); - R_THROW(ResultNotImplemented); + MemoryPermission map_perm) { + // Validate the address/size. + R_UNLESS(Common::IsAligned(address, PageSize), ResultInvalidAddress); + R_UNLESS(Common::IsAligned(size, PageSize), ResultInvalidSize); + R_UNLESS(size > 0, ResultInvalidSize); + R_UNLESS((address < address + size), ResultInvalidCurrentMemory); + + // Validate the permission. + R_UNLESS(IsValidTransferMemoryPermission(map_perm), ResultInvalidState); + + // Get the transfer memory. + KScopedAutoObject trmem = GetCurrentProcess(system.Kernel()) + .GetHandleTable() + .GetObject<KTransferMemory>(trmem_handle); + R_UNLESS(trmem.IsNotNull(), ResultInvalidHandle); + + // Verify that the mapping is in range. + R_UNLESS(GetCurrentProcess(system.Kernel()) + .GetPageTable() + .CanContain(address, size, KMemoryState::Transfered), + ResultInvalidMemoryRegion); + + // Map the transfer memory. + R_TRY(trmem->Map(address, size, map_perm)); + + // We succeeded. + R_SUCCEED(); } Result UnmapTransferMemory(Core::System& system, Handle trmem_handle, uint64_t address, uint64_t size) { - UNIMPLEMENTED(); - R_THROW(ResultNotImplemented); + // Validate the address/size. + R_UNLESS(Common::IsAligned(address, PageSize), ResultInvalidAddress); + R_UNLESS(Common::IsAligned(size, PageSize), ResultInvalidSize); + R_UNLESS(size > 0, ResultInvalidSize); + R_UNLESS((address < address + size), ResultInvalidCurrentMemory); + + // Get the transfer memory. + KScopedAutoObject trmem = GetCurrentProcess(system.Kernel()) + .GetHandleTable() + .GetObject<KTransferMemory>(trmem_handle); + R_UNLESS(trmem.IsNotNull(), ResultInvalidHandle); + + // Verify that the mapping is in range. + R_UNLESS(GetCurrentProcess(system.Kernel()) + .GetPageTable() + .CanContain(address, size, KMemoryState::Transfered), + ResultInvalidMemoryRegion); + + // Unmap the transfer memory. + R_TRY(trmem->Unmap(address, size)); + + R_SUCCEED(); } Result MapTransferMemory64(Core::System& system, Handle trmem_handle, uint64_t address, diff --git a/src/core/hle/kernel/svc_generator.py b/src/core/hle/kernel/svc_generator.py index 7fcbb1ba1..5531faac6 100644 --- a/src/core/hle/kernel/svc_generator.py +++ b/src/core/hle/kernel/svc_generator.py @@ -592,7 +592,7 @@ void Call(Core::System& system, u32 imm) { auto& kernel = system.Kernel(); kernel.EnterSVCProfile(); - if (GetCurrentProcess(system.Kernel()).Is64BitProcess()) { + if (GetCurrentProcess(system.Kernel()).Is64Bit()) { Call64(system, imm); } else { Call32(system, imm); diff --git a/src/core/hle/kernel/svc_types.h b/src/core/hle/kernel/svc_types.h index 7f380ca4f..50de02e36 100644 --- a/src/core/hle/kernel/svc_types.h +++ b/src/core/hle/kernel/svc_types.h @@ -46,6 +46,7 @@ enum class MemoryAttribute : u32 { IpcLocked = (1 << 1), DeviceShared = (1 << 2), Uncached = (1 << 3), + PermissionLocked = (1 << 4), }; DECLARE_ENUM_FLAG_OPERATORS(MemoryAttribute); @@ -603,13 +604,57 @@ enum class ProcessActivity : u32 { Paused, }; +enum class CreateProcessFlag : u32 { + // Is 64 bit? + Is64Bit = (1 << 0), + + // What kind of address space? + AddressSpaceShift = 1, + AddressSpaceMask = (7 << AddressSpaceShift), + AddressSpace32Bit = (0 << AddressSpaceShift), + AddressSpace64BitDeprecated = (1 << AddressSpaceShift), + AddressSpace32BitWithoutAlias = (2 << AddressSpaceShift), + AddressSpace64Bit = (3 << AddressSpaceShift), + + // Should JIT debug be done on crash? + EnableDebug = (1 << 4), + + // Should ASLR be enabled for the process? + EnableAslr = (1 << 5), + + // Is the process an application? + IsApplication = (1 << 6), + + // 4.x deprecated: Should use secure memory? + DeprecatedUseSecureMemory = (1 << 7), + + // 5.x+ Pool partition type. + PoolPartitionShift = 7, + PoolPartitionMask = (0xF << PoolPartitionShift), + PoolPartitionApplication = (0 << PoolPartitionShift), + PoolPartitionApplet = (1 << PoolPartitionShift), + PoolPartitionSystem = (2 << PoolPartitionShift), + PoolPartitionSystemNonSecure = (3 << PoolPartitionShift), + + // 7.x+ Should memory allocation be optimized? This requires IsApplication. + OptimizeMemoryAllocation = (1 << 11), + + // 11.x+ DisableDeviceAddressSpaceMerge. + DisableDeviceAddressSpaceMerge = (1 << 12), + + // Mask of all flags. + All = Is64Bit | AddressSpaceMask | EnableDebug | EnableAslr | IsApplication | + PoolPartitionMask | OptimizeMemoryAllocation | DisableDeviceAddressSpaceMerge, +}; +DECLARE_ENUM_FLAG_OPERATORS(CreateProcessFlag); + struct CreateProcessParameter { std::array<char, 12> name; u32 version; u64 program_id; u64 code_address; s32 code_num_pages; - u32 flags; + CreateProcessFlag flags; Handle reslimit; s32 system_resource_num_pages; }; diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index b971401e6..1b1c8190e 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -49,7 +49,7 @@ public: : ServiceFramework{system_, "IManagerForSystemService"} { // clang-format off static const FunctionInfo functions[] = { - {0, nullptr, "CheckAvailability"}, + {0, &IManagerForSystemService::CheckAvailability, "CheckAvailability"}, {1, nullptr, "GetAccountId"}, {2, nullptr, "EnsureIdTokenCacheAsync"}, {3, nullptr, "LoadIdTokenCache"}, @@ -78,6 +78,13 @@ public: RegisterHandlers(functions); } + +private: + void CheckAvailability(HLERequestContext& ctx) { + LOG_WARNING(Service_ACC, "(STUBBED) called"); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); + } }; // 3.0.0+ @@ -400,13 +407,13 @@ protected: IPC::RequestParser rp{ctx}; const auto base = rp.PopRaw<ProfileBase>(); - const auto user_data = ctx.ReadBuffer(); - const auto image_data = ctx.ReadBuffer(1); + const auto image_data = ctx.ReadBufferA(0); + const auto user_data = ctx.ReadBufferX(0); - LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid=0x{}", - Common::StringFromFixedZeroTerminatedBuffer( - reinterpret_cast<const char*>(base.username.data()), base.username.size()), - base.timestamp, base.user_uuid.RawString()); + LOG_INFO(Service_ACC, "called, username='{}', timestamp={:016X}, uuid=0x{}", + Common::StringFromFixedZeroTerminatedBuffer( + reinterpret_cast<const char*>(base.username.data()), base.username.size()), + base.timestamp, base.user_uuid.RawString()); if (user_data.size() < sizeof(UserData)) { LOG_ERROR(Service_ACC, "UserData buffer too small!"); @@ -837,6 +844,29 @@ void Module::Interface::InitializeApplicationInfoV2(HLERequestContext& ctx) { rb.Push(ResultSuccess); } +void Module::Interface::BeginUserRegistration(HLERequestContext& ctx) { + const auto user_id = Common::UUID::MakeRandom(); + profile_manager->CreateNewUser(user_id, "yuzu"); + + LOG_INFO(Service_ACC, "called, uuid={}", user_id.FormattedString()); + + IPC::ResponseBuilder rb{ctx, 6}; + rb.Push(ResultSuccess); + rb.PushRaw(user_id); +} + +void Module::Interface::CompleteUserRegistration(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + Common::UUID user_id = rp.PopRaw<Common::UUID>(); + + LOG_INFO(Service_ACC, "called, uuid={}", user_id.FormattedString()); + + profile_manager->WriteUserSaveFile(); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); +} + void Module::Interface::GetProfileEditor(HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; Common::UUID user_id = rp.PopRaw<Common::UUID>(); @@ -880,6 +910,17 @@ void Module::Interface::StoreSaveDataThumbnailApplication(HLERequestContext& ctx StoreSaveDataThumbnail(ctx, uuid, tid); } +void Module::Interface::GetBaasAccountManagerForSystemService(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto uuid = rp.PopRaw<Common::UUID>(); + + LOG_INFO(Service_ACC, "called, uuid=0x{}", uuid.RawString()); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(ResultSuccess); + rb.PushIpcInterface<IManagerForSystemService>(system, uuid); +} + void Module::Interface::StoreSaveDataThumbnailSystem(HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto uuid = rp.PopRaw<Common::UUID>(); diff --git a/src/core/hle/service/acc/acc.h b/src/core/hle/service/acc/acc.h index 6b4735c2f..0395229b4 100644 --- a/src/core/hle/service/acc/acc.h +++ b/src/core/hle/service/acc/acc.h @@ -33,10 +33,13 @@ public: void TrySelectUserWithoutInteraction(HLERequestContext& ctx); void IsUserAccountSwitchLocked(HLERequestContext& ctx); void InitializeApplicationInfoV2(HLERequestContext& ctx); + void BeginUserRegistration(HLERequestContext& ctx); + void CompleteUserRegistration(HLERequestContext& ctx); void GetProfileEditor(HLERequestContext& ctx); void ListQualifiedUsers(HLERequestContext& ctx); void ListOpenContextStoredUsers(HLERequestContext& ctx); void StoreSaveDataThumbnailApplication(HLERequestContext& ctx); + void GetBaasAccountManagerForSystemService(HLERequestContext& ctx); void StoreSaveDataThumbnailSystem(HLERequestContext& ctx); private: diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp index d9882ecd3..770d13ec5 100644 --- a/src/core/hle/service/acc/acc_su.cpp +++ b/src/core/hle/service/acc/acc_su.cpp @@ -23,7 +23,7 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module_, std::shared_ptr<ProfileManager> {99, nullptr, "DebugActivateOpenContextRetention"}, {100, nullptr, "GetUserRegistrationNotifier"}, {101, nullptr, "GetUserStateChangeNotifier"}, - {102, nullptr, "GetBaasAccountManagerForSystemService"}, + {102, &ACC_SU::GetBaasAccountManagerForSystemService, "GetBaasAccountManagerForSystemService"}, {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"}, {104, nullptr, "GetProfileUpdateNotifier"}, {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, @@ -40,8 +40,8 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module_, std::shared_ptr<ProfileManager> {152, nullptr, "LoadSignedDeviceIdentifierCacheForNintendoAccount"}, {190, nullptr, "GetUserLastOpenedApplication"}, {191, nullptr, "ActivateOpenContextHolder"}, - {200, nullptr, "BeginUserRegistration"}, - {201, nullptr, "CompleteUserRegistration"}, + {200, &ACC_SU::BeginUserRegistration, "BeginUserRegistration"}, + {201, &ACC_SU::CompleteUserRegistration, "CompleteUserRegistration"}, {202, nullptr, "CancelUserRegistration"}, {203, nullptr, "DeleteUser"}, {204, nullptr, "SetUserPosition"}, diff --git a/src/core/hle/service/acc/profile_manager.h b/src/core/hle/service/acc/profile_manager.h index 993a5a57a..900e32200 100644 --- a/src/core/hle/service/acc/profile_manager.h +++ b/src/core/hle/service/acc/profile_manager.h @@ -96,9 +96,10 @@ public: bool SetProfileBaseAndData(Common::UUID uuid, const ProfileBase& profile_new, const UserData& data_new); + void WriteUserSaveFile(); + private: void ParseUserSaveFile(); - void WriteUserSaveFile(); std::optional<std::size_t> AddToProfiles(const ProfileInfo& profile); bool RemoveProfileAtIndex(std::size_t index); diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 819dea6a7..ff067c8d9 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -31,7 +31,7 @@ #include "core/hle/service/apm/apm_controller.h" #include "core/hle/service/apm/apm_interface.h" #include "core/hle/service/bcat/backend/backend.h" -#include "core/hle/service/caps/caps.h" +#include "core/hle/service/caps/caps_types.h" #include "core/hle/service/filesystem/filesystem.h" #include "core/hle/service/ipc_helpers.h" #include "core/hle/service/ns/ns.h" @@ -210,8 +210,8 @@ IDisplayController::IDisplayController(Core::System& system_) {21, nullptr, "ClearAppletTransitionBuffer"}, {22, nullptr, "AcquireLastApplicationCaptureSharedBuffer"}, {23, nullptr, "ReleaseLastApplicationCaptureSharedBuffer"}, - {24, nullptr, "AcquireLastForegroundCaptureSharedBuffer"}, - {25, nullptr, "ReleaseLastForegroundCaptureSharedBuffer"}, + {24, &IDisplayController::AcquireLastForegroundCaptureSharedBuffer, "AcquireLastForegroundCaptureSharedBuffer"}, + {25, &IDisplayController::ReleaseLastForegroundCaptureSharedBuffer, "ReleaseLastForegroundCaptureSharedBuffer"}, {26, &IDisplayController::AcquireCallerAppletCaptureSharedBuffer, "AcquireCallerAppletCaptureSharedBuffer"}, {27, &IDisplayController::ReleaseCallerAppletCaptureSharedBuffer, "ReleaseCallerAppletCaptureSharedBuffer"}, {28, nullptr, "TakeScreenShotOfOwnLayerEx"}, @@ -239,6 +239,22 @@ void IDisplayController::TakeScreenShotOfOwnLayer(HLERequestContext& ctx) { rb.Push(ResultSuccess); } +void IDisplayController::AcquireLastForegroundCaptureSharedBuffer(HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(ResultSuccess); + rb.Push(1U); + rb.Push(0); +} + +void IDisplayController::ReleaseLastForegroundCaptureSharedBuffer(HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); +} + void IDisplayController::AcquireCallerAppletCaptureSharedBuffer(HLERequestContext& ctx) { LOG_WARNING(Service_AM, "(STUBBED) called"); @@ -764,6 +780,68 @@ void AppletMessageQueue::OperationModeChanged() { on_operation_mode_changed->Signal(); } +ILockAccessor::ILockAccessor(Core::System& system_) + : ServiceFramework{system_, "ILockAccessor"}, service_context{system_, "ILockAccessor"} { + // clang-format off + static const FunctionInfo functions[] = { + {1, &ILockAccessor::TryLock, "TryLock"}, + {2, &ILockAccessor::Unlock, "Unlock"}, + {3, &ILockAccessor::GetEvent, "GetEvent"}, + {4,&ILockAccessor::IsLocked, "IsLocked"}, + }; + // clang-format on + + RegisterHandlers(functions); + + lock_event = service_context.CreateEvent("ILockAccessor::LockEvent"); +} + +ILockAccessor::~ILockAccessor() { + service_context.CloseEvent(lock_event); +}; + +void ILockAccessor::TryLock(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto return_handle = rp.Pop<bool>(); + + LOG_WARNING(Service_AM, "(STUBBED) called, return_handle={}", return_handle); + + // TODO: When return_handle is true this function should return the lock handle + + is_locked = true; + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(ResultSuccess); + rb.Push<u8>(is_locked); +} + +void ILockAccessor::Unlock(HLERequestContext& ctx) { + LOG_INFO(Service_AM, "called"); + + is_locked = false; + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); +} + +void ILockAccessor::GetEvent(HLERequestContext& ctx) { + LOG_INFO(Service_AM, "called"); + + lock_event->Signal(); + + IPC::ResponseBuilder rb{ctx, 2, 1}; + rb.Push(ResultSuccess); + rb.PushCopyObjects(lock_event->GetReadableEvent()); +} + +void ILockAccessor::IsLocked(HLERequestContext& ctx) { + LOG_INFO(Service_AM, "called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); + rb.Push<u8>(is_locked); +} + ICommonStateGetter::ICommonStateGetter(Core::System& system_, std::shared_ptr<AppletMessageQueue> msg_queue_) : ServiceFramework{system_, "ICommonStateGetter"}, msg_queue{std::move(msg_queue_)}, @@ -787,7 +865,7 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system_, {14, nullptr, "GetWakeupCount"}, {20, nullptr, "PushToGeneralChannel"}, {30, nullptr, "GetHomeButtonReaderLockAccessor"}, - {31, nullptr, "GetReaderLockAccessorEx"}, + {31, &ICommonStateGetter::GetReaderLockAccessorEx, "GetReaderLockAccessorEx"}, {32, nullptr, "GetWriterLockAccessorEx"}, {40, nullptr, "GetCradleFwVersion"}, {50, &ICommonStateGetter::IsVrModeEnabled, "IsVrModeEnabled"}, @@ -805,7 +883,7 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system_, {65, nullptr, "GetApplicationIdByContentActionName"}, {66, &ICommonStateGetter::SetCpuBoostMode, "SetCpuBoostMode"}, {67, nullptr, "CancelCpuBoostMode"}, - {68, nullptr, "GetBuiltInDisplayType"}, + {68, &ICommonStateGetter::GetBuiltInDisplayType, "GetBuiltInDisplayType"}, {80, &ICommonStateGetter::PerformSystemButtonPressingIfInFocus, "PerformSystemButtonPressingIfInFocus"}, {90, nullptr, "SetPerformanceConfigurationChangedNotification"}, {91, nullptr, "GetCurrentPerformanceConfiguration"}, @@ -833,7 +911,9 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system_, msg_queue->PushMessage(AppletMessageQueue::AppletMessage::ChangeIntoForeground); } -ICommonStateGetter::~ICommonStateGetter() = default; +ICommonStateGetter::~ICommonStateGetter() { + service_context.CloseEvent(sleep_lock_event); +}; void ICommonStateGetter::GetBootMode(HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called"); @@ -886,6 +966,18 @@ void ICommonStateGetter::RequestToAcquireSleepLock(HLERequestContext& ctx) { rb.Push(ResultSuccess); } +void ICommonStateGetter::GetReaderLockAccessorEx(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto unknown = rp.Pop<u32>(); + + LOG_INFO(Service_AM, "called, unknown={}", unknown); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + + rb.Push(ResultSuccess); + rb.PushIpcInterface<ILockAccessor>(system); +} + void ICommonStateGetter::GetAcquiredSleepLockEvent(HLERequestContext& ctx) { LOG_WARNING(Service_AM, "called"); @@ -970,6 +1062,14 @@ void ICommonStateGetter::SetCpuBoostMode(HLERequestContext& ctx) { apm_sys->SetCpuBoostMode(ctx); } +void ICommonStateGetter::GetBuiltInDisplayType(HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(ResultSuccess); + rb.Push(0); +} + void ICommonStateGetter::PerformSystemButtonPressingIfInFocus(HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto system_button{rp.PopEnum<SystemButtonType>()}; @@ -1477,7 +1577,7 @@ ILibraryAppletSelfAccessor::ILibraryAppletSelfAccessor(Core::System& system_) {100, nullptr, "CreateGameMovieTrimmer"}, {101, nullptr, "ReserveResourceForMovieOperation"}, {102, nullptr, "UnreserveResourceForMovieOperation"}, - {110, nullptr, "GetMainAppletAvailableUsers"}, + {110, &ILibraryAppletSelfAccessor::GetMainAppletAvailableUsers, "GetMainAppletAvailableUsers"}, {120, nullptr, "GetLaunchStorageInfoForDebug"}, {130, nullptr, "GetGpuErrorDetectedSystemEvent"}, {140, nullptr, "SetApplicationMemoryReservation"}, @@ -1493,6 +1593,9 @@ ILibraryAppletSelfAccessor::ILibraryAppletSelfAccessor(Core::System& system_) case Applets::AppletId::MiiEdit: PushInShowMiiEditData(); break; + case Applets::AppletId::PhotoViewer: + PushInShowAlbum(); + break; default: break; } @@ -1569,6 +1672,42 @@ void ILibraryAppletSelfAccessor::GetCallerAppletIdentityInfo(HLERequestContext& rb.PushRaw(applet_info); } +void ILibraryAppletSelfAccessor::GetMainAppletAvailableUsers(HLERequestContext& ctx) { + const Service::Account::ProfileManager manager{}; + bool is_empty{true}; + s32 user_count{-1}; + + LOG_INFO(Service_AM, "called"); + + if (manager.GetUserCount() > 0) { + is_empty = false; + user_count = static_cast<s32>(manager.GetUserCount()); + ctx.WriteBuffer(manager.GetAllUsers()); + } + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(ResultSuccess); + rb.Push<u8>(is_empty); + rb.Push(user_count); +} + +void ILibraryAppletSelfAccessor::PushInShowAlbum() { + const Applets::CommonArguments arguments{ + .arguments_version = Applets::CommonArgumentVersion::Version3, + .size = Applets::CommonArgumentSize::Version3, + .library_version = 1, + .theme_color = Applets::ThemeColor::BasicBlack, + .play_startup_sound = true, + .system_tick = system.CoreTiming().GetClockTicks(), + }; + + std::vector<u8> argument_data(sizeof(arguments)); + std::vector<u8> settings_data{2}; + std::memcpy(argument_data.data(), &arguments, sizeof(arguments)); + queue_data.emplace_back(std::move(argument_data)); + queue_data.emplace_back(std::move(settings_data)); +} + void ILibraryAppletSelfAccessor::PushInShowCabinetData() { const Applets::CommonArguments arguments{ .arguments_version = Applets::CommonArgumentVersion::Version3, diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index 349482dcc..64b3f3fe2 100644 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h @@ -124,6 +124,8 @@ public: private: void GetCallerAppletCaptureImageEx(HLERequestContext& ctx); void TakeScreenShotOfOwnLayer(HLERequestContext& ctx); + void AcquireLastForegroundCaptureSharedBuffer(HLERequestContext& ctx); + void ReleaseLastForegroundCaptureSharedBuffer(HLERequestContext& ctx); void AcquireCallerAppletCaptureSharedBuffer(HLERequestContext& ctx); void ReleaseCallerAppletCaptureSharedBuffer(HLERequestContext& ctx); }; @@ -195,6 +197,23 @@ private: ScreenshotPermission screenshot_permission = ScreenshotPermission::Inherit; }; +class ILockAccessor final : public ServiceFramework<ILockAccessor> { +public: + explicit ILockAccessor(Core::System& system_); + ~ILockAccessor() override; + +private: + void TryLock(HLERequestContext& ctx); + void Unlock(HLERequestContext& ctx); + void GetEvent(HLERequestContext& ctx); + void IsLocked(HLERequestContext& ctx); + + bool is_locked{}; + + Kernel::KEvent* lock_event; + KernelHelpers::ServiceContext service_context; +}; + class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> { public: explicit ICommonStateGetter(Core::System& system_, @@ -237,6 +256,7 @@ private: void GetCurrentFocusState(HLERequestContext& ctx); void RequestToAcquireSleepLock(HLERequestContext& ctx); void GetAcquiredSleepLockEvent(HLERequestContext& ctx); + void GetReaderLockAccessorEx(HLERequestContext& ctx); void GetDefaultDisplayResolutionChangeEvent(HLERequestContext& ctx); void GetOperationMode(HLERequestContext& ctx); void GetPerformanceMode(HLERequestContext& ctx); @@ -248,6 +268,7 @@ private: void EndVrModeEx(HLERequestContext& ctx); void GetDefaultDisplayResolution(HLERequestContext& ctx); void SetCpuBoostMode(HLERequestContext& ctx); + void GetBuiltInDisplayType(HLERequestContext& ctx); void PerformSystemButtonPressingIfInFocus(HLERequestContext& ctx); void GetSettingsPlatformRegion(HLERequestContext& ctx); void SetRequestExitToLibraryAppletAtExecuteNextProgramEnabled(HLERequestContext& ctx); @@ -326,7 +347,9 @@ private: void GetLibraryAppletInfo(HLERequestContext& ctx); void ExitProcessAndReturn(HLERequestContext& ctx); void GetCallerAppletIdentityInfo(HLERequestContext& ctx); + void GetMainAppletAvailableUsers(HLERequestContext& ctx); + void PushInShowAlbum(); void PushInShowCabinetData(); void PushInShowMiiEditData(); diff --git a/src/core/hle/service/am/applet_ae.cpp b/src/core/hle/service/am/applet_ae.cpp index eb12312cc..e30e6478a 100644 --- a/src/core/hle/service/am/applet_ae.cpp +++ b/src/core/hle/service/am/applet_ae.cpp @@ -28,8 +28,8 @@ public: {11, &ILibraryAppletProxy::GetLibraryAppletCreator, "GetLibraryAppletCreator"}, {20, &ILibraryAppletProxy::OpenLibraryAppletSelfAccessor, "OpenLibraryAppletSelfAccessor"}, {21, &ILibraryAppletProxy::GetAppletCommonFunctions, "GetAppletCommonFunctions"}, - {22, nullptr, "GetHomeMenuFunctions"}, - {23, nullptr, "GetGlobalStateController"}, + {22, &ILibraryAppletProxy::GetHomeMenuFunctions, "GetHomeMenuFunctions"}, + {23, &ILibraryAppletProxy::GetGlobalStateController, "GetGlobalStateController"}, {1000, &ILibraryAppletProxy::GetDebugFunctions, "GetDebugFunctions"}, }; // clang-format on @@ -110,6 +110,22 @@ private: rb.PushIpcInterface<IAppletCommonFunctions>(system); } + void GetHomeMenuFunctions(HLERequestContext& ctx) { + LOG_DEBUG(Service_AM, "called"); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(ResultSuccess); + rb.PushIpcInterface<IHomeMenuFunctions>(system); + } + + void GetGlobalStateController(HLERequestContext& ctx) { + LOG_DEBUG(Service_AM, "called"); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(ResultSuccess); + rb.PushIpcInterface<IGlobalStateController>(system); + } + void GetDebugFunctions(HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called"); diff --git a/src/core/hle/service/am/applets/applet_cabinet.cpp b/src/core/hle/service/am/applets/applet_cabinet.cpp index 19ed184e8..b379dadeb 100644 --- a/src/core/hle/service/am/applets/applet_cabinet.cpp +++ b/src/core/hle/service/am/applets/applet_cabinet.cpp @@ -25,7 +25,9 @@ Cabinet::Cabinet(Core::System& system_, LibraryAppletMode applet_mode_, service_context.CreateEvent("CabinetApplet:AvailabilityChangeEvent"); } -Cabinet::~Cabinet() = default; +Cabinet::~Cabinet() { + service_context.CloseEvent(availability_change_event); +}; void Cabinet::Initialize() { Applet::Initialize(); diff --git a/src/core/hle/service/am/applets/applet_error.cpp b/src/core/hle/service/am/applets/applet_error.cpp index b46ea840c..5d17c353f 100644 --- a/src/core/hle/service/am/applets/applet_error.cpp +++ b/src/core/hle/service/am/applets/applet_error.cpp @@ -138,6 +138,10 @@ void Error::Initialize() { CopyArgumentData(data, args->application_error); error_code = Result(args->application_error.error_code); break; + case ErrorAppletMode::ShowErrorPctl: + CopyArgumentData(data, args->error_record); + error_code = Decode64BitError(args->error_record.error_code_64); + break; case ErrorAppletMode::ShowErrorRecord: CopyArgumentData(data, args->error_record); error_code = Decode64BitError(args->error_record.error_code_64); @@ -191,6 +195,7 @@ void Error::Execute() { frontend.ShowCustomErrorText(error_code, main_text_string, detail_text_string, callback); break; } + case ErrorAppletMode::ShowErrorPctl: case ErrorAppletMode::ShowErrorRecord: reporter.SaveErrorReport(title_id, error_code, fmt::format("{:016X}", args->error_record.posix_time)); diff --git a/src/core/hle/service/caps/caps.cpp b/src/core/hle/service/caps/caps.cpp index 610fe9940..cd1dfe993 100644 --- a/src/core/hle/service/caps/caps.cpp +++ b/src/core/hle/service/caps/caps.cpp @@ -4,6 +4,7 @@ #include "core/hle/service/caps/caps.h" #include "core/hle/service/caps/caps_a.h" #include "core/hle/service/caps/caps_c.h" +#include "core/hle/service/caps/caps_manager.h" #include "core/hle/service/caps/caps_sc.h" #include "core/hle/service/caps/caps_ss.h" #include "core/hle/service/caps/caps_su.h" @@ -15,13 +16,22 @@ namespace Service::Capture { void LoopProcess(Core::System& system) { auto server_manager = std::make_unique<ServerManager>(system); + auto album_manager = std::make_shared<AlbumManager>(system); + + server_manager->RegisterNamedService( + "caps:a", std::make_shared<IAlbumAccessorService>(system, album_manager)); + server_manager->RegisterNamedService( + "caps:c", std::make_shared<IAlbumControlService>(system, album_manager)); + server_manager->RegisterNamedService( + "caps:u", std::make_shared<IAlbumApplicationService>(system, album_manager)); + + server_manager->RegisterNamedService( + "caps:ss", std::make_shared<IScreenShotService>(system, album_manager)); + server_manager->RegisterNamedService("caps:sc", + std::make_shared<IScreenShotControlService>(system)); + server_manager->RegisterNamedService( + "caps:su", std::make_shared<IScreenShotApplicationService>(system, album_manager)); - server_manager->RegisterNamedService("caps:a", std::make_shared<CAPS_A>(system)); - server_manager->RegisterNamedService("caps:c", std::make_shared<CAPS_C>(system)); - server_manager->RegisterNamedService("caps:u", std::make_shared<CAPS_U>(system)); - server_manager->RegisterNamedService("caps:sc", std::make_shared<CAPS_SC>(system)); - server_manager->RegisterNamedService("caps:ss", std::make_shared<CAPS_SS>(system)); - server_manager->RegisterNamedService("caps:su", std::make_shared<CAPS_SU>(system)); ServerManager::RunServer(std::move(server_manager)); } diff --git a/src/core/hle/service/caps/caps.h b/src/core/hle/service/caps/caps.h index 15f0ecfaa..58e9725b8 100644 --- a/src/core/hle/service/caps/caps.h +++ b/src/core/hle/service/caps/caps.h @@ -3,93 +3,12 @@ #pragma once -#include "common/common_funcs.h" -#include "common/common_types.h" - namespace Core { class System; } -namespace Service::SM { -class ServiceManager; -} - namespace Service::Capture { -enum class AlbumImageOrientation { - Orientation0 = 0, - Orientation1 = 1, - Orientation2 = 2, - Orientation3 = 3, -}; - -enum class AlbumReportOption : s32 { - Disable = 0, - Enable = 1, -}; - -enum class ContentType : u8 { - Screenshot = 0, - Movie = 1, - ExtraMovie = 3, -}; - -enum class AlbumStorage : u8 { - NAND = 0, - SD = 1, -}; - -struct AlbumFileDateTime { - s16 year{}; - s8 month{}; - s8 day{}; - s8 hour{}; - s8 minute{}; - s8 second{}; - s8 uid{}; -}; -static_assert(sizeof(AlbumFileDateTime) == 0x8, "AlbumFileDateTime has incorrect size."); - -struct AlbumEntry { - u64 size{}; - u64 application_id{}; - AlbumFileDateTime datetime{}; - AlbumStorage storage{}; - ContentType content{}; - INSERT_PADDING_BYTES(6); -}; -static_assert(sizeof(AlbumEntry) == 0x20, "AlbumEntry has incorrect size."); - -struct AlbumFileEntry { - u64 size{}; // Size of the entry - u64 hash{}; // AES256 with hardcoded key over AlbumEntry - AlbumFileDateTime datetime{}; - AlbumStorage storage{}; - ContentType content{}; - INSERT_PADDING_BYTES(5); - u8 unknown{1}; // Set to 1 on official SW -}; -static_assert(sizeof(AlbumFileEntry) == 0x20, "AlbumFileEntry has incorrect size."); - -struct ApplicationAlbumEntry { - u64 size{}; // Size of the entry - u64 hash{}; // AES256 with hardcoded key over AlbumEntry - AlbumFileDateTime datetime{}; - AlbumStorage storage{}; - ContentType content{}; - INSERT_PADDING_BYTES(5); - u8 unknown{1}; // Set to 1 on official SW -}; -static_assert(sizeof(ApplicationAlbumEntry) == 0x20, "ApplicationAlbumEntry has incorrect size."); - -struct ApplicationAlbumFileEntry { - ApplicationAlbumEntry entry{}; - AlbumFileDateTime datetime{}; - u64 unknown{}; -}; -static_assert(sizeof(ApplicationAlbumFileEntry) == 0x30, - "ApplicationAlbumFileEntry has incorrect size."); - void LoopProcess(Core::System& system); } // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_a.cpp b/src/core/hle/service/caps/caps_a.cpp index 44267b284..9925720a3 100644 --- a/src/core/hle/service/caps/caps_a.cpp +++ b/src/core/hle/service/caps/caps_a.cpp @@ -1,40 +1,26 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/logging/log.h" #include "core/hle/service/caps/caps_a.h" +#include "core/hle/service/caps/caps_manager.h" +#include "core/hle/service/caps/caps_result.h" +#include "core/hle/service/caps/caps_types.h" +#include "core/hle/service/ipc_helpers.h" namespace Service::Capture { -class IAlbumAccessorSession final : public ServiceFramework<IAlbumAccessorSession> { -public: - explicit IAlbumAccessorSession(Core::System& system_) - : ServiceFramework{system_, "IAlbumAccessorSession"} { - // clang-format off - static const FunctionInfo functions[] = { - {2001, nullptr, "OpenAlbumMovieReadStream"}, - {2002, nullptr, "CloseAlbumMovieReadStream"}, - {2003, nullptr, "GetAlbumMovieReadStreamMovieDataSize"}, - {2004, nullptr, "ReadMovieDataFromAlbumMovieReadStream"}, - {2005, nullptr, "GetAlbumMovieReadStreamBrokenReason"}, - {2006, nullptr, "GetAlbumMovieReadStreamImageDataSize"}, - {2007, nullptr, "ReadImageDataFromAlbumMovieReadStream"}, - {2008, nullptr, "ReadFileAttributeFromAlbumMovieReadStream"}, - }; - // clang-format on - - RegisterHandlers(functions); - } -}; - -CAPS_A::CAPS_A(Core::System& system_) : ServiceFramework{system_, "caps:a"} { +IAlbumAccessorService::IAlbumAccessorService(Core::System& system_, + std::shared_ptr<AlbumManager> album_manager) + : ServiceFramework{system_, "caps:a"}, manager{album_manager} { // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "GetAlbumFileCount"}, {1, nullptr, "GetAlbumFileList"}, {2, nullptr, "LoadAlbumFile"}, - {3, nullptr, "DeleteAlbumFile"}, + {3, &IAlbumAccessorService::DeleteAlbumFile, "DeleteAlbumFile"}, {4, nullptr, "StorageCopyAlbumFile"}, - {5, nullptr, "IsAlbumMounted"}, + {5, &IAlbumAccessorService::IsAlbumMounted, "IsAlbumMounted"}, {6, nullptr, "GetAlbumUsage"}, {7, nullptr, "GetAlbumFileSize"}, {8, nullptr, "LoadAlbumFileThumbnail"}, @@ -47,18 +33,18 @@ CAPS_A::CAPS_A(Core::System& system_) : ServiceFramework{system_, "caps:a"} { {15, nullptr, "GetAlbumUsage3"}, {16, nullptr, "GetAlbumMountResult"}, {17, nullptr, "GetAlbumUsage16"}, - {18, nullptr, "Unknown18"}, + {18, &IAlbumAccessorService::Unknown18, "Unknown18"}, {19, nullptr, "Unknown19"}, {100, nullptr, "GetAlbumFileCountEx0"}, - {101, nullptr, "GetAlbumFileListEx0"}, + {101, &IAlbumAccessorService::GetAlbumFileListEx0, "GetAlbumFileListEx0"}, {202, nullptr, "SaveEditedScreenShot"}, {301, nullptr, "GetLastThumbnail"}, {302, nullptr, "GetLastOverlayMovieThumbnail"}, - {401, nullptr, "GetAutoSavingStorage"}, + {401, &IAlbumAccessorService::GetAutoSavingStorage, "GetAutoSavingStorage"}, {501, nullptr, "GetRequiredStorageSpaceSizeToCopyAll"}, {1001, nullptr, "LoadAlbumScreenShotThumbnailImageEx0"}, - {1002, nullptr, "LoadAlbumScreenShotImageEx1"}, - {1003, nullptr, "LoadAlbumScreenShotThumbnailImageEx1"}, + {1002, &IAlbumAccessorService::LoadAlbumScreenShotImageEx1, "LoadAlbumScreenShotImageEx1"}, + {1003, &IAlbumAccessorService::LoadAlbumScreenShotThumbnailImageEx1, "LoadAlbumScreenShotThumbnailImageEx1"}, {8001, nullptr, "ForceAlbumUnmounted"}, {8002, nullptr, "ResetAlbumMountStatus"}, {8011, nullptr, "RefreshAlbumCache"}, @@ -74,6 +60,199 @@ CAPS_A::CAPS_A(Core::System& system_) : ServiceFramework{system_, "caps:a"} { RegisterHandlers(functions); } -CAPS_A::~CAPS_A() = default; +IAlbumAccessorService::~IAlbumAccessorService() = default; + +void IAlbumAccessorService::DeleteAlbumFile(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto file_id{rp.PopRaw<AlbumFileId>()}; + + LOG_INFO(Service_Capture, "called, application_id=0x{:0x}, storage={}, type={}", + file_id.application_id, file_id.storage, file_id.type); + + Result result = manager->DeleteAlbumFile(file_id); + result = TranslateResult(result); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(result); +} + +void IAlbumAccessorService::IsAlbumMounted(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto storage{rp.PopEnum<AlbumStorage>()}; + + LOG_INFO(Service_Capture, "called, storage={}", storage); + + Result result = manager->IsAlbumMounted(storage); + const bool is_mounted = result.IsSuccess(); + result = TranslateResult(result); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(result); + rb.Push<u8>(is_mounted); +} + +void IAlbumAccessorService::Unknown18(HLERequestContext& ctx) { + struct UnknownBuffer { + INSERT_PADDING_BYTES(0x10); + }; + static_assert(sizeof(UnknownBuffer) == 0x10, "UnknownBuffer is an invalid size"); + + LOG_WARNING(Service_Capture, "(STUBBED) called"); + + std::vector<UnknownBuffer> buffer{}; + + if (!buffer.empty()) { + ctx.WriteBuffer(buffer); + } + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(ResultSuccess); + rb.Push(static_cast<u32>(buffer.size())); +} + +void IAlbumAccessorService::GetAlbumFileListEx0(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto storage{rp.PopEnum<AlbumStorage>()}; + const auto flags{rp.Pop<u8>()}; + const auto album_entry_size{ctx.GetWriteBufferNumElements<AlbumEntry>()}; + + LOG_INFO(Service_Capture, "called, storage={}, flags={}", storage, flags); + + std::vector<AlbumEntry> entries; + Result result = manager->GetAlbumFileList(entries, storage, flags); + result = TranslateResult(result); + + entries.resize(std::min(album_entry_size, entries.size())); + + if (!entries.empty()) { + ctx.WriteBuffer(entries); + } + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(result); + rb.Push<u64>(entries.size()); +} + +void IAlbumAccessorService::GetAutoSavingStorage(HLERequestContext& ctx) { + LOG_WARNING(Service_Capture, "(STUBBED) called"); + + bool is_autosaving{}; + Result result = manager->GetAutoSavingStorage(is_autosaving); + result = TranslateResult(result); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(result); + rb.Push<u8>(is_autosaving); +} + +void IAlbumAccessorService::LoadAlbumScreenShotImageEx1(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto file_id{rp.PopRaw<AlbumFileId>()}; + const auto decoder_options{rp.PopRaw<ScreenShotDecodeOption>()}; + const auto image_buffer_size{ctx.GetWriteBufferSize(1)}; + + LOG_INFO(Service_Capture, "called, application_id=0x{:0x}, storage={}, type={}, flags={}", + file_id.application_id, file_id.storage, file_id.type, decoder_options.flags); + + std::vector<u8> image; + LoadAlbumScreenShotImageOutput image_output; + Result result = + manager->LoadAlbumScreenShotImage(image_output, image, file_id, decoder_options); + result = TranslateResult(result); + + if (image.size() > image_buffer_size) { + result = ResultWorkMemoryError; + } + + if (result.IsSuccess()) { + ctx.WriteBuffer(image_output, 0); + ctx.WriteBuffer(image, 1); + } + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(result); +} + +void IAlbumAccessorService::LoadAlbumScreenShotThumbnailImageEx1(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto file_id{rp.PopRaw<AlbumFileId>()}; + const auto decoder_options{rp.PopRaw<ScreenShotDecodeOption>()}; + + LOG_INFO(Service_Capture, "called, application_id=0x{:0x}, storage={}, type={}, flags={}", + file_id.application_id, file_id.storage, file_id.type, decoder_options.flags); + + std::vector<u8> image(ctx.GetWriteBufferSize(1)); + LoadAlbumScreenShotImageOutput image_output; + Result result = + manager->LoadAlbumScreenShotThumbnail(image_output, image, file_id, decoder_options); + result = TranslateResult(result); + + if (result.IsSuccess()) { + ctx.WriteBuffer(image_output, 0); + ctx.WriteBuffer(image, 1); + } + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(result); +} + +Result IAlbumAccessorService::TranslateResult(Result in_result) { + if (in_result.IsSuccess()) { + return in_result; + } + + if ((in_result.raw & 0x3801ff) == ResultUnknown1024.raw) { + if (in_result.description - 0x514 < 100) { + return ResultInvalidFileData; + } + if (in_result.description - 0x5dc < 100) { + return ResultInvalidFileData; + } + + if (in_result.description - 0x578 < 100) { + if (in_result == ResultFileCountLimit) { + return ResultUnknown22; + } + return ResultUnknown25; + } + + if (in_result.raw < ResultUnknown1801.raw) { + if (in_result == ResultUnknown1202) { + return ResultUnknown810; + } + if (in_result == ResultUnknown1203) { + return ResultUnknown810; + } + if (in_result == ResultUnknown1701) { + return ResultUnknown5; + } + } else if (in_result.raw < ResultUnknown1803.raw) { + if (in_result == ResultUnknown1801) { + return ResultUnknown5; + } + if (in_result == ResultUnknown1802) { + return ResultUnknown6; + } + } else { + if (in_result == ResultUnknown1803) { + return ResultUnknown7; + } + if (in_result == ResultUnknown1804) { + return ResultOutOfRange; + } + } + return ResultUnknown1024; + } + + if (in_result.module == ErrorModule::FS) { + if ((in_result.description >> 0xc < 0x7d) || (in_result.description - 1000 < 2000) || + (((in_result.description - 3000) >> 3) < 0x271)) { + // TODO: Translate FS error + return in_result; + } + } + + return in_result; +} } // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_a.h b/src/core/hle/service/caps/caps_a.h index 98a21a5ad..c90cff71e 100644 --- a/src/core/hle/service/caps/caps_a.h +++ b/src/core/hle/service/caps/caps_a.h @@ -10,11 +10,26 @@ class System; } namespace Service::Capture { +class AlbumManager; -class CAPS_A final : public ServiceFramework<CAPS_A> { +class IAlbumAccessorService final : public ServiceFramework<IAlbumAccessorService> { public: - explicit CAPS_A(Core::System& system_); - ~CAPS_A() override; + explicit IAlbumAccessorService(Core::System& system_, + std::shared_ptr<AlbumManager> album_manager); + ~IAlbumAccessorService() override; + +private: + void DeleteAlbumFile(HLERequestContext& ctx); + void IsAlbumMounted(HLERequestContext& ctx); + void Unknown18(HLERequestContext& ctx); + void GetAlbumFileListEx0(HLERequestContext& ctx); + void GetAutoSavingStorage(HLERequestContext& ctx); + void LoadAlbumScreenShotImageEx1(HLERequestContext& ctx); + void LoadAlbumScreenShotThumbnailImageEx1(HLERequestContext& ctx); + + Result TranslateResult(Result in_result); + + std::shared_ptr<AlbumManager> manager = nullptr; }; } // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_c.cpp b/src/core/hle/service/caps/caps_c.cpp index fc77e35cd..1e7fe6474 100644 --- a/src/core/hle/service/caps/caps_c.cpp +++ b/src/core/hle/service/caps/caps_c.cpp @@ -3,53 +3,21 @@ #include "common/logging/log.h" #include "core/hle/service/caps/caps_c.h" +#include "core/hle/service/caps/caps_manager.h" +#include "core/hle/service/caps/caps_result.h" +#include "core/hle/service/caps/caps_types.h" #include "core/hle/service/ipc_helpers.h" namespace Service::Capture { -class IAlbumControlSession final : public ServiceFramework<IAlbumControlSession> { -public: - explicit IAlbumControlSession(Core::System& system_) - : ServiceFramework{system_, "IAlbumControlSession"} { - // clang-format off - static const FunctionInfo functions[] = { - {2001, nullptr, "OpenAlbumMovieReadStream"}, - {2002, nullptr, "CloseAlbumMovieReadStream"}, - {2003, nullptr, "GetAlbumMovieReadStreamMovieDataSize"}, - {2004, nullptr, "ReadMovieDataFromAlbumMovieReadStream"}, - {2005, nullptr, "GetAlbumMovieReadStreamBrokenReason"}, - {2006, nullptr, "GetAlbumMovieReadStreamImageDataSize"}, - {2007, nullptr, "ReadImageDataFromAlbumMovieReadStream"}, - {2008, nullptr, "ReadFileAttributeFromAlbumMovieReadStream"}, - {2401, nullptr, "OpenAlbumMovieWriteStream"}, - {2402, nullptr, "FinishAlbumMovieWriteStream"}, - {2403, nullptr, "CommitAlbumMovieWriteStream"}, - {2404, nullptr, "DiscardAlbumMovieWriteStream"}, - {2405, nullptr, "DiscardAlbumMovieWriteStreamNoDelete"}, - {2406, nullptr, "CommitAlbumMovieWriteStreamEx"}, - {2411, nullptr, "StartAlbumMovieWriteStreamDataSection"}, - {2412, nullptr, "EndAlbumMovieWriteStreamDataSection"}, - {2413, nullptr, "StartAlbumMovieWriteStreamMetaSection"}, - {2414, nullptr, "EndAlbumMovieWriteStreamMetaSection"}, - {2421, nullptr, "ReadDataFromAlbumMovieWriteStream"}, - {2422, nullptr, "WriteDataToAlbumMovieWriteStream"}, - {2424, nullptr, "WriteMetaToAlbumMovieWriteStream"}, - {2431, nullptr, "GetAlbumMovieWriteStreamBrokenReason"}, - {2433, nullptr, "GetAlbumMovieWriteStreamDataSize"}, - {2434, nullptr, "SetAlbumMovieWriteStreamDataSize"}, - }; - // clang-format on - - RegisterHandlers(functions); - } -}; - -CAPS_C::CAPS_C(Core::System& system_) : ServiceFramework{system_, "caps:c"} { +IAlbumControlService::IAlbumControlService(Core::System& system_, + std::shared_ptr<AlbumManager> album_manager) + : ServiceFramework{system_, "caps:c"}, manager{album_manager} { // clang-format off static const FunctionInfo functions[] = { {1, nullptr, "CaptureRawImage"}, {2, nullptr, "CaptureRawImageWithTimeout"}, - {33, &CAPS_C::SetShimLibraryVersion, "SetShimLibraryVersion"}, + {33, &IAlbumControlService::SetShimLibraryVersion, "SetShimLibraryVersion"}, {1001, nullptr, "RequestTakingScreenShot"}, {1002, nullptr, "RequestTakingScreenShotWithTimeout"}, {1011, nullptr, "NotifyTakingScreenShotRefused"}, @@ -72,9 +40,9 @@ CAPS_C::CAPS_C(Core::System& system_) : ServiceFramework{system_, "caps:c"} { RegisterHandlers(functions); } -CAPS_C::~CAPS_C() = default; +IAlbumControlService::~IAlbumControlService() = default; -void CAPS_C::SetShimLibraryVersion(HLERequestContext& ctx) { +void IAlbumControlService::SetShimLibraryVersion(HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto library_version{rp.Pop<u64>()}; const auto applet_resource_user_id{rp.Pop<u64>()}; diff --git a/src/core/hle/service/caps/caps_c.h b/src/core/hle/service/caps/caps_c.h index 537b3a2e3..92ba242db 100644 --- a/src/core/hle/service/caps/caps_c.h +++ b/src/core/hle/service/caps/caps_c.h @@ -10,14 +10,18 @@ class System; } namespace Service::Capture { +class AlbumManager; -class CAPS_C final : public ServiceFramework<CAPS_C> { +class IAlbumControlService final : public ServiceFramework<IAlbumControlService> { public: - explicit CAPS_C(Core::System& system_); - ~CAPS_C() override; + explicit IAlbumControlService(Core::System& system_, + std::shared_ptr<AlbumManager> album_manager); + ~IAlbumControlService() override; private: void SetShimLibraryVersion(HLERequestContext& ctx); + + std::shared_ptr<AlbumManager> manager = nullptr; }; } // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_manager.cpp b/src/core/hle/service/caps/caps_manager.cpp new file mode 100644 index 000000000..7d733eb54 --- /dev/null +++ b/src/core/hle/service/caps/caps_manager.cpp @@ -0,0 +1,469 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <sstream> + +#include "common/fs/file.h" +#include "common/fs/path_util.h" +#include "common/logging/log.h" +#include "common/stb.h" +#include "core/core.h" +#include "core/hle/service/caps/caps_manager.h" +#include "core/hle/service/caps/caps_result.h" +#include "core/hle/service/time/time_manager.h" +#include "core/hle/service/time/time_zone_content_manager.h" + +namespace Service::Capture { + +AlbumManager::AlbumManager(Core::System& system_) : system{system_} {} + +AlbumManager::~AlbumManager() = default; + +Result AlbumManager::DeleteAlbumFile(const AlbumFileId& file_id) { + if (file_id.storage > AlbumStorage::Sd) { + return ResultInvalidStorage; + } + + if (!is_mounted) { + return ResultIsNotMounted; + } + + std::filesystem::path path; + const auto result = GetFile(path, file_id); + + if (result.IsError()) { + return result; + } + + if (!Common::FS::RemoveFile(path)) { + return ResultFileNotFound; + } + + return ResultSuccess; +} + +Result AlbumManager::IsAlbumMounted(AlbumStorage storage) { + if (storage > AlbumStorage::Sd) { + return ResultInvalidStorage; + } + + is_mounted = true; + + if (storage == AlbumStorage::Sd) { + FindScreenshots(); + } + + return is_mounted ? ResultSuccess : ResultIsNotMounted; +} + +Result AlbumManager::GetAlbumFileList(std::vector<AlbumEntry>& out_entries, AlbumStorage storage, + u8 flags) const { + if (storage > AlbumStorage::Sd) { + return ResultInvalidStorage; + } + + if (!is_mounted) { + return ResultIsNotMounted; + } + + for (auto& [file_id, path] : album_files) { + if (file_id.storage != storage) { + continue; + } + if (out_entries.size() >= SdAlbumFileLimit) { + break; + } + + const auto entry_size = Common::FS::GetSize(path); + out_entries.push_back({ + .entry_size = entry_size, + .file_id = file_id, + }); + } + + return ResultSuccess; +} + +Result AlbumManager::GetAlbumFileList(std::vector<ApplicationAlbumFileEntry>& out_entries, + ContentType contex_type, s64 start_posix_time, + s64 end_posix_time, u64 aruid) const { + if (!is_mounted) { + return ResultIsNotMounted; + } + + std::vector<ApplicationAlbumEntry> album_entries; + const auto start_date = ConvertToAlbumDateTime(start_posix_time); + const auto end_date = ConvertToAlbumDateTime(end_posix_time); + const auto result = GetAlbumFileList(album_entries, contex_type, start_date, end_date, aruid); + + if (result.IsError()) { + return result; + } + + for (const auto& album_entry : album_entries) { + ApplicationAlbumFileEntry entry{ + .entry = album_entry, + .datetime = album_entry.datetime, + .unknown = {}, + }; + out_entries.push_back(entry); + } + + return ResultSuccess; +} + +Result AlbumManager::GetAlbumFileList(std::vector<ApplicationAlbumEntry>& out_entries, + ContentType contex_type, AlbumFileDateTime start_date, + AlbumFileDateTime end_date, u64 aruid) const { + if (!is_mounted) { + return ResultIsNotMounted; + } + + for (auto& [file_id, path] : album_files) { + if (file_id.type != contex_type) { + continue; + } + if (file_id.date > start_date) { + continue; + } + if (file_id.date < end_date) { + continue; + } + if (out_entries.size() >= SdAlbumFileLimit) { + break; + } + + const auto entry_size = Common::FS::GetSize(path); + ApplicationAlbumEntry entry{ + .size = entry_size, + .hash{}, + .datetime = file_id.date, + .storage = file_id.storage, + .content = contex_type, + .unknown = 1, + }; + out_entries.push_back(entry); + } + + return ResultSuccess; +} + +Result AlbumManager::GetAutoSavingStorage(bool& out_is_autosaving) const { + out_is_autosaving = false; + return ResultSuccess; +} + +Result AlbumManager::LoadAlbumScreenShotImage(LoadAlbumScreenShotImageOutput& out_image_output, + std::vector<u8>& out_image, + const AlbumFileId& file_id, + const ScreenShotDecodeOption& decoder_options) const { + if (file_id.storage > AlbumStorage::Sd) { + return ResultInvalidStorage; + } + + if (!is_mounted) { + return ResultIsNotMounted; + } + + out_image_output = { + .width = 1280, + .height = 720, + .attribute = + { + .unknown_0{}, + .orientation = AlbumImageOrientation::None, + .unknown_1{}, + .unknown_2{}, + }, + }; + + std::filesystem::path path; + const auto result = GetFile(path, file_id); + + if (result.IsError()) { + return result; + } + + out_image.resize(out_image_output.height * out_image_output.width * STBI_rgb_alpha); + + return LoadImage(out_image, path, static_cast<int>(out_image_output.width), + +static_cast<int>(out_image_output.height), decoder_options.flags); +} + +Result AlbumManager::LoadAlbumScreenShotThumbnail( + LoadAlbumScreenShotImageOutput& out_image_output, std::vector<u8>& out_image, + const AlbumFileId& file_id, const ScreenShotDecodeOption& decoder_options) const { + if (file_id.storage > AlbumStorage::Sd) { + return ResultInvalidStorage; + } + + if (!is_mounted) { + return ResultIsNotMounted; + } + + out_image_output = { + .width = 320, + .height = 180, + .attribute = + { + .unknown_0{}, + .orientation = AlbumImageOrientation::None, + .unknown_1{}, + .unknown_2{}, + }, + }; + + std::filesystem::path path; + const auto result = GetFile(path, file_id); + + if (result.IsError()) { + return result; + } + + out_image.resize(out_image_output.height * out_image_output.width * STBI_rgb_alpha); + + return LoadImage(out_image, path, static_cast<int>(out_image_output.width), + +static_cast<int>(out_image_output.height), decoder_options.flags); +} + +Result AlbumManager::SaveScreenShot(ApplicationAlbumEntry& out_entry, + const ScreenShotAttribute& attribute, + std::span<const u8> image_data, u64 aruid) { + return SaveScreenShot(out_entry, attribute, {}, image_data, aruid); +} + +Result AlbumManager::SaveScreenShot(ApplicationAlbumEntry& out_entry, + const ScreenShotAttribute& attribute, + const ApplicationData& app_data, std::span<const u8> image_data, + u64 aruid) { + const u64 title_id = system.GetApplicationProcessProgramID(); + const auto& user_clock = system.GetTimeManager().GetStandardUserSystemClockCore(); + + s64 posix_time{}; + Result result = user_clock.GetCurrentTime(system, posix_time); + + if (result.IsError()) { + return result; + } + + const auto date = ConvertToAlbumDateTime(posix_time); + + return SaveImage(out_entry, image_data, title_id, date); +} + +Result AlbumManager::SaveEditedScreenShot(ApplicationAlbumEntry& out_entry, + const ScreenShotAttribute& attribute, + const AlbumFileId& file_id, + std::span<const u8> image_data) { + const auto& user_clock = system.GetTimeManager().GetStandardUserSystemClockCore(); + + s64 posix_time{}; + Result result = user_clock.GetCurrentTime(system, posix_time); + + if (result.IsError()) { + return result; + } + + const auto date = ConvertToAlbumDateTime(posix_time); + + return SaveImage(out_entry, image_data, file_id.application_id, date); +} + +Result AlbumManager::GetFile(std::filesystem::path& out_path, const AlbumFileId& file_id) const { + const auto file = album_files.find(file_id); + + if (file == album_files.end()) { + return ResultFileNotFound; + } + + out_path = file->second; + return ResultSuccess; +} + +void AlbumManager::FindScreenshots() { + is_mounted = false; + album_files.clear(); + + // TODO: Swap this with a blocking operation. + const auto screenshots_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ScreenshotsDir); + Common::FS::IterateDirEntries( + screenshots_dir, + [this](const std::filesystem::path& full_path) { + AlbumEntry entry; + if (GetAlbumEntry(entry, full_path).IsError()) { + return true; + } + while (album_files.contains(entry.file_id)) { + if (++entry.file_id.date.unique_id == 0) { + break; + } + } + album_files[entry.file_id] = full_path; + return true; + }, + Common::FS::DirEntryFilter::File); + + is_mounted = true; +} + +Result AlbumManager::GetAlbumEntry(AlbumEntry& out_entry, const std::filesystem::path& path) const { + std::istringstream line_stream(path.filename().string()); + std::string date; + std::string application; + std::string time; + + // Parse filename to obtain entry properties + std::getline(line_stream, application, '_'); + std::getline(line_stream, date, '_'); + std::getline(line_stream, time, '_'); + + std::istringstream date_stream(date); + std::istringstream time_stream(time); + std::string year; + std::string month; + std::string day; + std::string hour; + std::string minute; + std::string second; + + std::getline(date_stream, year, '-'); + std::getline(date_stream, month, '-'); + std::getline(date_stream, day, '-'); + + std::getline(time_stream, hour, '-'); + std::getline(time_stream, minute, '-'); + std::getline(time_stream, second, '-'); + + try { + out_entry = { + .entry_size = 1, + .file_id{ + .application_id = static_cast<u64>(std::stoll(application, 0, 16)), + .date = + { + .year = static_cast<s16>(std::stoi(year)), + .month = static_cast<s8>(std::stoi(month)), + .day = static_cast<s8>(std::stoi(day)), + .hour = static_cast<s8>(std::stoi(hour)), + .minute = static_cast<s8>(std::stoi(minute)), + .second = static_cast<s8>(std::stoi(second)), + .unique_id = 0, + }, + .storage = AlbumStorage::Sd, + .type = ContentType::Screenshot, + .unknown = 1, + }, + }; + } catch (const std::invalid_argument&) { + return ResultUnknown; + } catch (const std::out_of_range&) { + return ResultUnknown; + } catch (const std::exception&) { + return ResultUnknown; + } + + return ResultSuccess; +} + +Result AlbumManager::LoadImage(std::span<u8> out_image, const std::filesystem::path& path, + int width, int height, ScreenShotDecoderFlag flag) const { + if (out_image.size() != static_cast<std::size_t>(width * height * STBI_rgb_alpha)) { + return ResultUnknown; + } + + const Common::FS::IOFile db_file{path, Common::FS::FileAccessMode::Read, + Common::FS::FileType::BinaryFile}; + + std::vector<u8> raw_file(db_file.GetSize()); + if (db_file.Read(raw_file) != raw_file.size()) { + return ResultUnknown; + } + + int filter_flag = STBIR_FILTER_DEFAULT; + int original_width, original_height, color_channels; + const auto dbi_image = + stbi_load_from_memory(raw_file.data(), static_cast<int>(raw_file.size()), &original_width, + &original_height, &color_channels, STBI_rgb_alpha); + + if (dbi_image == nullptr) { + return ResultUnknown; + } + + switch (flag) { + case ScreenShotDecoderFlag::EnableFancyUpsampling: + filter_flag = STBIR_FILTER_TRIANGLE; + break; + case ScreenShotDecoderFlag::EnableBlockSmoothing: + filter_flag = STBIR_FILTER_BOX; + break; + default: + filter_flag = STBIR_FILTER_DEFAULT; + break; + } + + stbir_resize_uint8_srgb(dbi_image, original_width, original_height, 0, out_image.data(), width, + height, 0, STBI_rgb_alpha, 3, filter_flag); + + return ResultSuccess; +} + +static void PNGToMemory(void* context, void* png, int len) { + std::vector<u8>* png_image = static_cast<std::vector<u8>*>(context); + png_image->reserve(len); + std::memcpy(png_image->data(), png, len); +} + +Result AlbumManager::SaveImage(ApplicationAlbumEntry& out_entry, std::span<const u8> image, + u64 title_id, const AlbumFileDateTime& date) const { + const auto screenshot_path = + Common::FS::GetYuzuPathString(Common::FS::YuzuPath::ScreenshotsDir); + const std::string formatted_date = + fmt::format("{:04}-{:02}-{:02}_{:02}-{:02}-{:02}-{:03}", date.year, date.month, date.day, + date.hour, date.minute, date.second, 0); + const std::string file_path = + fmt::format("{}/{:016x}_{}.png", screenshot_path, title_id, formatted_date); + + const Common::FS::IOFile db_file{file_path, Common::FS::FileAccessMode::Write, + Common::FS::FileType::BinaryFile}; + + std::vector<u8> png_image; + if (!stbi_write_png_to_func(PNGToMemory, &png_image, 1280, 720, STBI_rgb_alpha, image.data(), + 0)) { + return ResultFileCountLimit; + } + + if (db_file.Write(png_image) != png_image.size()) { + return ResultFileCountLimit; + } + + out_entry = { + .size = png_image.size(), + .hash = {}, + .datetime = date, + .storage = AlbumStorage::Sd, + .content = ContentType::Screenshot, + .unknown = 1, + }; + + return ResultSuccess; +} + +AlbumFileDateTime AlbumManager::ConvertToAlbumDateTime(u64 posix_time) const { + Time::TimeZone::CalendarInfo calendar_date{}; + const auto& time_zone_manager = + system.GetTimeManager().GetTimeZoneContentManager().GetTimeZoneManager(); + + time_zone_manager.ToCalendarTimeWithMyRules(posix_time, calendar_date); + + return { + .year = calendar_date.time.year, + .month = calendar_date.time.month, + .day = calendar_date.time.day, + .hour = calendar_date.time.hour, + .minute = calendar_date.time.minute, + .second = calendar_date.time.second, + .unique_id = 0, + }; +} + +} // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_manager.h b/src/core/hle/service/caps/caps_manager.h new file mode 100644 index 000000000..44d85117f --- /dev/null +++ b/src/core/hle/service/caps/caps_manager.h @@ -0,0 +1,90 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <unordered_map> + +#include "common/fs/fs.h" +#include "core/hle/result.h" +#include "core/hle/service/caps/caps_types.h" + +namespace Core { +class System; +} + +namespace std { +// Hash used to create lists from AlbumFileId data +template <> +struct hash<Service::Capture::AlbumFileId> { + size_t operator()(const Service::Capture::AlbumFileId& pad_id) const noexcept { + u64 hash_value = (static_cast<u64>(pad_id.date.year) << 8); + hash_value ^= (static_cast<u64>(pad_id.date.month) << 7); + hash_value ^= (static_cast<u64>(pad_id.date.day) << 6); + hash_value ^= (static_cast<u64>(pad_id.date.hour) << 5); + hash_value ^= (static_cast<u64>(pad_id.date.minute) << 4); + hash_value ^= (static_cast<u64>(pad_id.date.second) << 3); + hash_value ^= (static_cast<u64>(pad_id.date.unique_id) << 2); + hash_value ^= (static_cast<u64>(pad_id.storage) << 1); + hash_value ^= static_cast<u64>(pad_id.type); + return static_cast<size_t>(hash_value); + } +}; + +} // namespace std + +namespace Service::Capture { + +class AlbumManager { +public: + explicit AlbumManager(Core::System& system_); + ~AlbumManager(); + + Result DeleteAlbumFile(const AlbumFileId& file_id); + Result IsAlbumMounted(AlbumStorage storage); + Result GetAlbumFileList(std::vector<AlbumEntry>& out_entries, AlbumStorage storage, + u8 flags) const; + Result GetAlbumFileList(std::vector<ApplicationAlbumFileEntry>& out_entries, + ContentType contex_type, s64 start_posix_time, s64 end_posix_time, + u64 aruid) const; + Result GetAlbumFileList(std::vector<ApplicationAlbumEntry>& out_entries, + ContentType contex_type, AlbumFileDateTime start_date, + AlbumFileDateTime end_date, u64 aruid) const; + Result GetAutoSavingStorage(bool& out_is_autosaving) const; + Result LoadAlbumScreenShotImage(LoadAlbumScreenShotImageOutput& out_image_output, + std::vector<u8>& out_image, const AlbumFileId& file_id, + const ScreenShotDecodeOption& decoder_options) const; + Result LoadAlbumScreenShotThumbnail(LoadAlbumScreenShotImageOutput& out_image_output, + std::vector<u8>& out_image, const AlbumFileId& file_id, + const ScreenShotDecodeOption& decoder_options) const; + + Result SaveScreenShot(ApplicationAlbumEntry& out_entry, const ScreenShotAttribute& attribute, + std::span<const u8> image_data, u64 aruid); + Result SaveScreenShot(ApplicationAlbumEntry& out_entry, const ScreenShotAttribute& attribute, + const ApplicationData& app_data, std::span<const u8> image_data, + u64 aruid); + Result SaveEditedScreenShot(ApplicationAlbumEntry& out_entry, + const ScreenShotAttribute& attribute, const AlbumFileId& file_id, + std::span<const u8> image_data); + +private: + static constexpr std::size_t NandAlbumFileLimit = 1000; + static constexpr std::size_t SdAlbumFileLimit = 10000; + + void FindScreenshots(); + Result GetFile(std::filesystem::path& out_path, const AlbumFileId& file_id) const; + Result GetAlbumEntry(AlbumEntry& out_entry, const std::filesystem::path& path) const; + Result LoadImage(std::span<u8> out_image, const std::filesystem::path& path, int width, + int height, ScreenShotDecoderFlag flag) const; + Result SaveImage(ApplicationAlbumEntry& out_entry, std::span<const u8> image, u64 title_id, + const AlbumFileDateTime& date) const; + + AlbumFileDateTime ConvertToAlbumDateTime(u64 posix_time) const; + + bool is_mounted{}; + std::unordered_map<AlbumFileId, std::filesystem::path> album_files; + + Core::System& system; +}; + +} // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_result.h b/src/core/hle/service/caps/caps_result.h new file mode 100644 index 000000000..c65e5fb9a --- /dev/null +++ b/src/core/hle/service/caps/caps_result.h @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "core/hle/result.h" + +namespace Service::Capture { + +constexpr Result ResultWorkMemoryError(ErrorModule::Capture, 3); +constexpr Result ResultUnknown5(ErrorModule::Capture, 5); +constexpr Result ResultUnknown6(ErrorModule::Capture, 6); +constexpr Result ResultUnknown7(ErrorModule::Capture, 7); +constexpr Result ResultOutOfRange(ErrorModule::Capture, 8); +constexpr Result ResulInvalidTimestamp(ErrorModule::Capture, 12); +constexpr Result ResultInvalidStorage(ErrorModule::Capture, 13); +constexpr Result ResultInvalidFileContents(ErrorModule::Capture, 14); +constexpr Result ResultIsNotMounted(ErrorModule::Capture, 21); +constexpr Result ResultUnknown22(ErrorModule::Capture, 22); +constexpr Result ResultFileNotFound(ErrorModule::Capture, 23); +constexpr Result ResultInvalidFileData(ErrorModule::Capture, 24); +constexpr Result ResultUnknown25(ErrorModule::Capture, 25); +constexpr Result ResultReadBufferShortage(ErrorModule::Capture, 30); +constexpr Result ResultUnknown810(ErrorModule::Capture, 810); +constexpr Result ResultUnknown1024(ErrorModule::Capture, 1024); +constexpr Result ResultUnknown1202(ErrorModule::Capture, 1202); +constexpr Result ResultUnknown1203(ErrorModule::Capture, 1203); +constexpr Result ResultFileCountLimit(ErrorModule::Capture, 1401); +constexpr Result ResultUnknown1701(ErrorModule::Capture, 1701); +constexpr Result ResultUnknown1801(ErrorModule::Capture, 1801); +constexpr Result ResultUnknown1802(ErrorModule::Capture, 1802); +constexpr Result ResultUnknown1803(ErrorModule::Capture, 1803); +constexpr Result ResultUnknown1804(ErrorModule::Capture, 1804); + +} // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_sc.cpp b/src/core/hle/service/caps/caps_sc.cpp index 395b13da7..6117cb7c6 100644 --- a/src/core/hle/service/caps/caps_sc.cpp +++ b/src/core/hle/service/caps/caps_sc.cpp @@ -5,7 +5,8 @@ namespace Service::Capture { -CAPS_SC::CAPS_SC(Core::System& system_) : ServiceFramework{system_, "caps:sc"} { +IScreenShotControlService::IScreenShotControlService(Core::System& system_) + : ServiceFramework{system_, "caps:sc"} { // clang-format off static const FunctionInfo functions[] = { {1, nullptr, "CaptureRawImage"}, @@ -34,6 +35,6 @@ CAPS_SC::CAPS_SC(Core::System& system_) : ServiceFramework{system_, "caps:sc"} { RegisterHandlers(functions); } -CAPS_SC::~CAPS_SC() = default; +IScreenShotControlService::~IScreenShotControlService() = default; } // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_sc.h b/src/core/hle/service/caps/caps_sc.h index e5600f6d7..d555f4979 100644 --- a/src/core/hle/service/caps/caps_sc.h +++ b/src/core/hle/service/caps/caps_sc.h @@ -11,10 +11,10 @@ class System; namespace Service::Capture { -class CAPS_SC final : public ServiceFramework<CAPS_SC> { +class IScreenShotControlService final : public ServiceFramework<IScreenShotControlService> { public: - explicit CAPS_SC(Core::System& system_); - ~CAPS_SC() override; + explicit IScreenShotControlService(Core::System& system_); + ~IScreenShotControlService() override; }; } // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_ss.cpp b/src/core/hle/service/caps/caps_ss.cpp index 62b9edd41..1ba2b7972 100644 --- a/src/core/hle/service/caps/caps_ss.cpp +++ b/src/core/hle/service/caps/caps_ss.cpp @@ -1,18 +1,25 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/logging/log.h" +#include "core/hle/service/caps/caps_manager.h" +#include "core/hle/service/caps/caps_types.h" +#include "core/hle/service/ipc_helpers.h" + #include "core/hle/service/caps/caps_ss.h" namespace Service::Capture { -CAPS_SS::CAPS_SS(Core::System& system_) : ServiceFramework{system_, "caps:ss"} { +IScreenShotService::IScreenShotService(Core::System& system_, + std::shared_ptr<AlbumManager> album_manager) + : ServiceFramework{system_, "caps:ss"}, manager{album_manager} { // clang-format off static const FunctionInfo functions[] = { {201, nullptr, "SaveScreenShot"}, {202, nullptr, "SaveEditedScreenShot"}, - {203, nullptr, "SaveScreenShotEx0"}, + {203, &IScreenShotService::SaveScreenShotEx0, "SaveScreenShotEx0"}, {204, nullptr, "SaveEditedScreenShotEx0"}, - {206, nullptr, "Unknown206"}, + {206, &IScreenShotService::SaveEditedScreenShotEx1, "SaveEditedScreenShotEx1"}, {208, nullptr, "SaveScreenShotOfMovieEx1"}, {1000, nullptr, "Unknown1000"}, }; @@ -21,6 +28,67 @@ CAPS_SS::CAPS_SS(Core::System& system_) : ServiceFramework{system_, "caps:ss"} { RegisterHandlers(functions); } -CAPS_SS::~CAPS_SS() = default; +IScreenShotService::~IScreenShotService() = default; + +void IScreenShotService::SaveScreenShotEx0(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + struct Parameters { + ScreenShotAttribute attribute{}; + u32 report_option{}; + INSERT_PADDING_BYTES(0x4); + u64 applet_resource_user_id{}; + }; + static_assert(sizeof(Parameters) == 0x50, "Parameters has incorrect size."); + + const auto parameters{rp.PopRaw<Parameters>()}; + const auto image_data_buffer = ctx.ReadBuffer(); + + LOG_INFO(Service_Capture, + "called, report_option={}, image_data_buffer_size={}, applet_resource_user_id={}", + parameters.report_option, image_data_buffer.size(), + parameters.applet_resource_user_id); + + ApplicationAlbumEntry entry{}; + const auto result = manager->SaveScreenShot(entry, parameters.attribute, image_data_buffer, + parameters.applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 10}; + rb.Push(result); + rb.PushRaw(entry); +} +void IScreenShotService::SaveEditedScreenShotEx1(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + struct Parameters { + ScreenShotAttribute attribute; + u64 width; + u64 height; + u64 thumbnail_width; + u64 thumbnail_height; + AlbumFileId file_id; + }; + static_assert(sizeof(Parameters) == 0x78, "Parameters has incorrect size."); + + const auto parameters{rp.PopRaw<Parameters>()}; + const auto application_data_buffer = ctx.ReadBuffer(0); + const auto image_data_buffer = ctx.ReadBuffer(1); + const auto thumbnail_image_data_buffer = ctx.ReadBuffer(2); + + LOG_INFO(Service_Capture, + "called, width={}, height={}, thumbnail_width={}, thumbnail_height={}, " + "application_id={:016x}, storage={}, type={}, app_data_buffer_size={}, " + "image_data_buffer_size={}, thumbnail_image_buffer_size={}", + parameters.width, parameters.height, parameters.thumbnail_width, + parameters.thumbnail_height, parameters.file_id.application_id, + parameters.file_id.storage, parameters.file_id.type, application_data_buffer.size(), + image_data_buffer.size(), thumbnail_image_data_buffer.size()); + + ApplicationAlbumEntry entry{}; + const auto result = manager->SaveEditedScreenShot(entry, parameters.attribute, + parameters.file_id, image_data_buffer); + + IPC::ResponseBuilder rb{ctx, 10}; + rb.Push(result); + rb.PushRaw(entry); +} } // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_ss.h b/src/core/hle/service/caps/caps_ss.h index 718ade485..a7e9972ab 100644 --- a/src/core/hle/service/caps/caps_ss.h +++ b/src/core/hle/service/caps/caps_ss.h @@ -11,10 +11,16 @@ class System; namespace Service::Capture { -class CAPS_SS final : public ServiceFramework<CAPS_SS> { +class IScreenShotService final : public ServiceFramework<IScreenShotService> { public: - explicit CAPS_SS(Core::System& system_); - ~CAPS_SS() override; + explicit IScreenShotService(Core::System& system_, std::shared_ptr<AlbumManager> album_manager); + ~IScreenShotService() override; + +private: + void SaveScreenShotEx0(HLERequestContext& ctx); + void SaveEditedScreenShotEx1(HLERequestContext& ctx); + + std::shared_ptr<AlbumManager> manager; }; } // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_su.cpp b/src/core/hle/service/caps/caps_su.cpp index 3b11cc95c..e85625ee4 100644 --- a/src/core/hle/service/caps/caps_su.cpp +++ b/src/core/hle/service/caps/caps_su.cpp @@ -2,18 +2,22 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/logging/log.h" +#include "core/hle/service/caps/caps_manager.h" #include "core/hle/service/caps/caps_su.h" +#include "core/hle/service/caps/caps_types.h" #include "core/hle/service/ipc_helpers.h" namespace Service::Capture { -CAPS_SU::CAPS_SU(Core::System& system_) : ServiceFramework{system_, "caps:su"} { +IScreenShotApplicationService::IScreenShotApplicationService( + Core::System& system_, std::shared_ptr<AlbumManager> album_manager) + : ServiceFramework{system_, "caps:su"}, manager{album_manager} { // clang-format off static const FunctionInfo functions[] = { - {32, &CAPS_SU::SetShimLibraryVersion, "SetShimLibraryVersion"}, + {32, &IScreenShotApplicationService::SetShimLibraryVersion, "SetShimLibraryVersion"}, {201, nullptr, "SaveScreenShot"}, - {203, nullptr, "SaveScreenShotEx0"}, - {205, nullptr, "SaveScreenShotEx1"}, + {203, &IScreenShotApplicationService::SaveScreenShotEx0, "SaveScreenShotEx0"}, + {205, &IScreenShotApplicationService::SaveScreenShotEx1, "SaveScreenShotEx1"}, {210, nullptr, "SaveScreenShotEx2"}, }; // clang-format on @@ -21,9 +25,9 @@ CAPS_SU::CAPS_SU(Core::System& system_) : ServiceFramework{system_, "caps:su"} { RegisterHandlers(functions); } -CAPS_SU::~CAPS_SU() = default; +IScreenShotApplicationService::~IScreenShotApplicationService() = default; -void CAPS_SU::SetShimLibraryVersion(HLERequestContext& ctx) { +void IScreenShotApplicationService::SetShimLibraryVersion(HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto library_version{rp.Pop<u64>()}; const auto applet_resource_user_id{rp.Pop<u64>()}; @@ -35,4 +39,62 @@ void CAPS_SU::SetShimLibraryVersion(HLERequestContext& ctx) { rb.Push(ResultSuccess); } +void IScreenShotApplicationService::SaveScreenShotEx0(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + struct Parameters { + ScreenShotAttribute attribute{}; + AlbumReportOption report_option{}; + INSERT_PADDING_BYTES(0x4); + u64 applet_resource_user_id{}; + }; + static_assert(sizeof(Parameters) == 0x50, "Parameters has incorrect size."); + + const auto parameters{rp.PopRaw<Parameters>()}; + const auto image_data_buffer = ctx.ReadBuffer(); + + LOG_INFO(Service_Capture, + "called, report_option={}, image_data_buffer_size={}, applet_resource_user_id={}", + parameters.report_option, image_data_buffer.size(), + parameters.applet_resource_user_id); + + ApplicationAlbumEntry entry{}; + const auto result = manager->SaveScreenShot(entry, parameters.attribute, image_data_buffer, + parameters.applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 10}; + rb.Push(result); + rb.PushRaw(entry); +} + +void IScreenShotApplicationService::SaveScreenShotEx1(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + struct Parameters { + ScreenShotAttribute attribute{}; + AlbumReportOption report_option{}; + INSERT_PADDING_BYTES(0x4); + u64 applet_resource_user_id{}; + }; + static_assert(sizeof(Parameters) == 0x50, "Parameters has incorrect size."); + + const auto parameters{rp.PopRaw<Parameters>()}; + const auto app_data_buffer = ctx.ReadBuffer(0); + const auto image_data_buffer = ctx.ReadBuffer(1); + + LOG_INFO(Service_Capture, + "called, report_option={}, image_data_buffer_size={}, applet_resource_user_id={}", + parameters.report_option, image_data_buffer.size(), + parameters.applet_resource_user_id); + + ApplicationAlbumEntry entry{}; + ApplicationData app_data{}; + std::memcpy(&app_data, app_data_buffer.data(), sizeof(ApplicationData)); + const auto result = + manager->SaveScreenShot(entry, parameters.attribute, app_data, image_data_buffer, + parameters.applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 10}; + rb.Push(result); + rb.PushRaw(entry); +} + } // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_su.h b/src/core/hle/service/caps/caps_su.h index c6398858d..89e71f506 100644 --- a/src/core/hle/service/caps/caps_su.h +++ b/src/core/hle/service/caps/caps_su.h @@ -10,14 +10,20 @@ class System; } namespace Service::Capture { +class AlbumManager; -class CAPS_SU final : public ServiceFramework<CAPS_SU> { +class IScreenShotApplicationService final : public ServiceFramework<IScreenShotApplicationService> { public: - explicit CAPS_SU(Core::System& system_); - ~CAPS_SU() override; + explicit IScreenShotApplicationService(Core::System& system_, + std::shared_ptr<AlbumManager> album_manager); + ~IScreenShotApplicationService() override; private: void SetShimLibraryVersion(HLERequestContext& ctx); + void SaveScreenShotEx0(HLERequestContext& ctx); + void SaveScreenShotEx1(HLERequestContext& ctx); + + std::shared_ptr<AlbumManager> manager; }; } // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_types.h b/src/core/hle/service/caps/caps_types.h new file mode 100644 index 000000000..589ac28d3 --- /dev/null +++ b/src/core/hle/service/caps/caps_types.h @@ -0,0 +1,186 @@ +// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Service::Capture { + +// This is nn::album::ImageOrientation +enum class AlbumImageOrientation { + None, + Rotate90, + Rotate180, + Rotate270, +}; + +// This is nn::album::AlbumReportOption +enum class AlbumReportOption : s32 { + Disable, + Enable, + Unknown2, + Unknown3, +}; + +enum class ContentType : u8 { + Screenshot = 0, + Movie = 1, + ExtraMovie = 3, +}; + +enum class AlbumStorage : u8 { + Nand, + Sd, +}; + +enum class ScreenShotDecoderFlag : u64 { + None = 0, + EnableFancyUpsampling = 1 << 0, + EnableBlockSmoothing = 1 << 1, +}; + +// This is nn::capsrv::AlbumFileDateTime +struct AlbumFileDateTime { + s16 year{}; + s8 month{}; + s8 day{}; + s8 hour{}; + s8 minute{}; + s8 second{}; + s8 unique_id{}; + + friend constexpr bool operator==(const AlbumFileDateTime&, const AlbumFileDateTime&) = default; + friend constexpr bool operator>(const AlbumFileDateTime& a, const AlbumFileDateTime& b) { + if (a.year > b.year) { + return true; + } + if (a.month > b.month) { + return true; + } + if (a.day > b.day) { + return true; + } + if (a.hour > b.hour) { + return true; + } + if (a.minute > b.minute) { + return true; + } + return a.second > b.second; + }; + friend constexpr bool operator<(const AlbumFileDateTime& a, const AlbumFileDateTime& b) { + if (a.year < b.year) { + return true; + } + if (a.month < b.month) { + return true; + } + if (a.day < b.day) { + return true; + } + if (a.hour < b.hour) { + return true; + } + if (a.minute < b.minute) { + return true; + } + return a.second < b.second; + }; +}; +static_assert(sizeof(AlbumFileDateTime) == 0x8, "AlbumFileDateTime has incorrect size."); + +// This is nn::album::AlbumEntry +struct AlbumFileEntry { + u64 size{}; // Size of the entry + u64 hash{}; // AES256 with hardcoded key over AlbumEntry + AlbumFileDateTime datetime{}; + AlbumStorage storage{}; + ContentType content{}; + INSERT_PADDING_BYTES(5); + u8 unknown{}; // Set to 1 on official SW +}; +static_assert(sizeof(AlbumFileEntry) == 0x20, "AlbumFileEntry has incorrect size."); + +struct AlbumFileId { + u64 application_id{}; + AlbumFileDateTime date{}; + AlbumStorage storage{}; + ContentType type{}; + INSERT_PADDING_BYTES(0x5); + u8 unknown{}; + + friend constexpr bool operator==(const AlbumFileId&, const AlbumFileId&) = default; +}; +static_assert(sizeof(AlbumFileId) == 0x18, "AlbumFileId is an invalid size"); + +// This is nn::capsrv::AlbumEntry +struct AlbumEntry { + u64 entry_size{}; + AlbumFileId file_id{}; +}; +static_assert(sizeof(AlbumEntry) == 0x20, "AlbumEntry has incorrect size."); + +// This is nn::capsrv::ApplicationAlbumEntry +struct ApplicationAlbumEntry { + u64 size{}; // Size of the entry + u64 hash{}; // AES256 with hardcoded key over AlbumEntry + AlbumFileDateTime datetime{}; + AlbumStorage storage{}; + ContentType content{}; + INSERT_PADDING_BYTES(5); + u8 unknown{1}; // Set to 1 on official SW +}; +static_assert(sizeof(ApplicationAlbumEntry) == 0x20, "ApplicationAlbumEntry has incorrect size."); + +// This is nn::capsrv::ApplicationAlbumFileEntry +struct ApplicationAlbumFileEntry { + ApplicationAlbumEntry entry{}; + AlbumFileDateTime datetime{}; + u64 unknown{}; +}; +static_assert(sizeof(ApplicationAlbumFileEntry) == 0x30, + "ApplicationAlbumFileEntry has incorrect size."); + +struct ApplicationData { + std::array<u8, 0x400> data{}; + u32 data_size{}; +}; +static_assert(sizeof(ApplicationData) == 0x404, "ApplicationData is an invalid size"); + +struct ScreenShotAttribute { + u32 unknown_0{}; + AlbumImageOrientation orientation{}; + u32 unknown_1{}; + u32 unknown_2{}; + INSERT_PADDING_BYTES(0x30); +}; +static_assert(sizeof(ScreenShotAttribute) == 0x40, "ScreenShotAttribute is an invalid size"); + +struct ScreenShotDecodeOption { + ScreenShotDecoderFlag flags{}; + INSERT_PADDING_BYTES(0x18); +}; +static_assert(sizeof(ScreenShotDecodeOption) == 0x20, "ScreenShotDecodeOption is an invalid size"); + +struct LoadAlbumScreenShotImageOutput { + s64 width{}; + s64 height{}; + ScreenShotAttribute attribute{}; + INSERT_PADDING_BYTES(0x400); +}; +static_assert(sizeof(LoadAlbumScreenShotImageOutput) == 0x450, + "LoadAlbumScreenShotImageOutput is an invalid size"); + +struct LoadAlbumScreenShotImageOutputForApplication { + s64 width{}; + s64 height{}; + ScreenShotAttribute attribute{}; + ApplicationData data{}; + INSERT_PADDING_BYTES(0xAC); +}; +static_assert(sizeof(LoadAlbumScreenShotImageOutputForApplication) == 0x500, + "LoadAlbumScreenShotImageOutput is an invalid size"); + +} // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_u.cpp b/src/core/hle/service/caps/caps_u.cpp index bffe0f8d0..b6b33fb2f 100644 --- a/src/core/hle/service/caps/caps_u.cpp +++ b/src/core/hle/service/caps/caps_u.cpp @@ -2,45 +2,29 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/logging/log.h" -#include "core/hle/service/caps/caps.h" +#include "core/hle/service/caps/caps_manager.h" +#include "core/hle/service/caps/caps_types.h" #include "core/hle/service/caps/caps_u.h" #include "core/hle/service/ipc_helpers.h" namespace Service::Capture { -class IAlbumAccessorApplicationSession final - : public ServiceFramework<IAlbumAccessorApplicationSession> { -public: - explicit IAlbumAccessorApplicationSession(Core::System& system_) - : ServiceFramework{system_, "IAlbumAccessorApplicationSession"} { - // clang-format off - static const FunctionInfo functions[] = { - {2001, nullptr, "OpenAlbumMovieReadStream"}, - {2002, nullptr, "CloseAlbumMovieReadStream"}, - {2003, nullptr, "GetAlbumMovieReadStreamMovieDataSize"}, - {2004, nullptr, "ReadMovieDataFromAlbumMovieReadStream"}, - {2005, nullptr, "GetAlbumMovieReadStreamBrokenReason"}, - }; - // clang-format on - - RegisterHandlers(functions); - } -}; - -CAPS_U::CAPS_U(Core::System& system_) : ServiceFramework{system_, "caps:u"} { +IAlbumApplicationService::IAlbumApplicationService(Core::System& system_, + std::shared_ptr<AlbumManager> album_manager) + : ServiceFramework{system_, "caps:u"}, manager{album_manager} { // clang-format off static const FunctionInfo functions[] = { - {32, &CAPS_U::SetShimLibraryVersion, "SetShimLibraryVersion"}, - {102, &CAPS_U::GetAlbumContentsFileListForApplication, "GetAlbumContentsFileListForApplication"}, - {103, nullptr, "DeleteAlbumContentsFileForApplication"}, - {104, nullptr, "GetAlbumContentsFileSizeForApplication"}, + {32, &IAlbumApplicationService::SetShimLibraryVersion, "SetShimLibraryVersion"}, + {102, &IAlbumApplicationService::GetAlbumFileList0AafeAruidDeprecated, "GetAlbumFileList0AafeAruidDeprecated"}, + {103, nullptr, "DeleteAlbumFileByAruid"}, + {104, nullptr, "GetAlbumFileSizeByAruid"}, {105, nullptr, "DeleteAlbumFileByAruidForDebug"}, - {110, nullptr, "LoadAlbumContentsFileScreenShotImageForApplication"}, - {120, nullptr, "LoadAlbumContentsFileThumbnailImageForApplication"}, - {130, nullptr, "PrecheckToCreateContentsForApplication"}, + {110, nullptr, "LoadAlbumScreenShotImageByAruid"}, + {120, nullptr, "LoadAlbumScreenShotThumbnailImageByAruid"}, + {130, nullptr, "PrecheckToCreateContentsByAruid"}, {140, nullptr, "GetAlbumFileList1AafeAruidDeprecated"}, {141, nullptr, "GetAlbumFileList2AafeUidAruidDeprecated"}, - {142, &CAPS_U::GetAlbumFileList3AaeAruid, "GetAlbumFileList3AaeAruid"}, + {142, &IAlbumApplicationService::GetAlbumFileList3AaeAruid, "GetAlbumFileList3AaeAruid"}, {143, nullptr, "GetAlbumFileList4AaeUidAruid"}, {144, nullptr, "GetAllAlbumFileList3AaeAruid"}, {60002, nullptr, "OpenAccessorSessionForApplication"}, @@ -50,9 +34,9 @@ CAPS_U::CAPS_U(Core::System& system_) : ServiceFramework{system_, "caps:u"} { RegisterHandlers(functions); } -CAPS_U::~CAPS_U() = default; +IAlbumApplicationService::~IAlbumApplicationService() = default; -void CAPS_U::SetShimLibraryVersion(HLERequestContext& ctx) { +void IAlbumApplicationService::SetShimLibraryVersion(HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto library_version{rp.Pop<u64>()}; const auto applet_resource_user_id{rp.Pop<u64>()}; @@ -64,37 +48,89 @@ void CAPS_U::SetShimLibraryVersion(HLERequestContext& ctx) { rb.Push(ResultSuccess); } -void CAPS_U::GetAlbumContentsFileListForApplication(HLERequestContext& ctx) { - // Takes a type-0x6 output buffer containing an array of ApplicationAlbumFileEntry, a PID, an - // u8 ContentType, two s64s, and an u64 AppletResourceUserId. Returns an output u64 for total - // output entries (which is copied to a s32 by official SW). +void IAlbumApplicationService::GetAlbumFileList0AafeAruidDeprecated(HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; - const auto pid{rp.Pop<s32>()}; - const auto content_type{rp.PopEnum<ContentType>()}; - const auto start_posix_time{rp.Pop<s64>()}; - const auto end_posix_time{rp.Pop<s64>()}; - const auto applet_resource_user_id{rp.Pop<u64>()}; + struct Parameters { + ContentType content_type; + INSERT_PADDING_BYTES(7); + s64 start_posix_time; + s64 end_posix_time; + u64 applet_resource_user_id; + }; + static_assert(sizeof(Parameters) == 0x20, "Parameters has incorrect size."); - // TODO: Update this when we implement the album. - // Currently we do not have a method of accessing album entries, set this to 0 for now. - constexpr u32 total_entries_1{}; - constexpr u32 total_entries_2{}; + const auto parameters{rp.PopRaw<Parameters>()}; - LOG_WARNING( - Service_Capture, - "(STUBBED) called. pid={}, content_type={}, start_posix_time={}, " - "end_posix_time={}, applet_resource_user_id={}, total_entries_1={}, total_entries_2={}", - pid, content_type, start_posix_time, end_posix_time, applet_resource_user_id, - total_entries_1, total_entries_2); + LOG_WARNING(Service_Capture, + "(STUBBED) called. content_type={}, start_posix_time={}, end_posix_time={}, " + "applet_resource_user_id={}", + parameters.content_type, parameters.start_posix_time, parameters.end_posix_time, + parameters.applet_resource_user_id); + + Result result = ResultSuccess; + + if (result.IsSuccess()) { + result = manager->IsAlbumMounted(AlbumStorage::Sd); + } + + std::vector<ApplicationAlbumFileEntry> entries; + if (result.IsSuccess()) { + result = manager->GetAlbumFileList(entries, parameters.content_type, + parameters.start_posix_time, parameters.end_posix_time, + parameters.applet_resource_user_id); + } + + if (!entries.empty()) { + ctx.WriteBuffer(entries); + } IPC::ResponseBuilder rb{ctx, 4}; - rb.Push(ResultSuccess); - rb.Push(total_entries_1); - rb.Push(total_entries_2); + rb.Push(result); + rb.Push<u64>(entries.size()); } -void CAPS_U::GetAlbumFileList3AaeAruid(HLERequestContext& ctx) { - GetAlbumContentsFileListForApplication(ctx); +void IAlbumApplicationService::GetAlbumFileList3AaeAruid(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + struct Parameters { + ContentType content_type; + INSERT_PADDING_BYTES(1); + AlbumFileDateTime start_date_time; + AlbumFileDateTime end_date_time; + INSERT_PADDING_BYTES(6); + u64 applet_resource_user_id; + }; + static_assert(sizeof(Parameters) == 0x20, "Parameters has incorrect size."); + + const auto parameters{rp.PopRaw<Parameters>()}; + + LOG_WARNING(Service_Capture, + "(STUBBED) called. content_type={}, start_date={}/{}/{}, " + "end_date={}/{}/{}, applet_resource_user_id={}", + parameters.content_type, parameters.start_date_time.year, + parameters.start_date_time.month, parameters.start_date_time.day, + parameters.end_date_time.year, parameters.end_date_time.month, + parameters.end_date_time.day, parameters.applet_resource_user_id); + + Result result = ResultSuccess; + + if (result.IsSuccess()) { + result = manager->IsAlbumMounted(AlbumStorage::Sd); + } + + std::vector<ApplicationAlbumEntry> entries; + if (result.IsSuccess()) { + result = + manager->GetAlbumFileList(entries, parameters.content_type, parameters.start_date_time, + parameters.end_date_time, parameters.applet_resource_user_id); + } + + if (!entries.empty()) { + ctx.WriteBuffer(entries); + } + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(result); + rb.Push<u64>(entries.size()); } } // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_u.h b/src/core/hle/service/caps/caps_u.h index e8dd037d7..9458c128e 100644 --- a/src/core/hle/service/caps/caps_u.h +++ b/src/core/hle/service/caps/caps_u.h @@ -10,16 +10,20 @@ class System; } namespace Service::Capture { +class AlbumManager; -class CAPS_U final : public ServiceFramework<CAPS_U> { +class IAlbumApplicationService final : public ServiceFramework<IAlbumApplicationService> { public: - explicit CAPS_U(Core::System& system_); - ~CAPS_U() override; + explicit IAlbumApplicationService(Core::System& system_, + std::shared_ptr<AlbumManager> album_manager); + ~IAlbumApplicationService() override; private: void SetShimLibraryVersion(HLERequestContext& ctx); - void GetAlbumContentsFileListForApplication(HLERequestContext& ctx); + void GetAlbumFileList0AafeAruidDeprecated(HLERequestContext& ctx); void GetAlbumFileList3AaeAruid(HLERequestContext& ctx); + + std::shared_ptr<AlbumManager> manager = nullptr; }; } // namespace Service::Capture diff --git a/src/core/hle/service/hid/controllers/palma.cpp b/src/core/hle/service/hid/controllers/palma.cpp index 14c67e454..73a2a2b91 100644 --- a/src/core/hle/service/hid/controllers/palma.cpp +++ b/src/core/hle/service/hid/controllers/palma.cpp @@ -19,7 +19,9 @@ Controller_Palma::Controller_Palma(Core::HID::HIDCore& hid_core_, u8* raw_shared operation_complete_event = service_context.CreateEvent("hid:PalmaOperationCompleteEvent"); } -Controller_Palma::~Controller_Palma() = default; +Controller_Palma::~Controller_Palma() { + service_context.CloseEvent(operation_complete_event); +}; void Controller_Palma::OnInit() {} diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 4d70006c1..929dd5f03 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -2757,6 +2757,10 @@ public: joy_detach_event = service_context.CreateEvent("HidSys::JoyDetachEvent"); } + ~HidSys() { + service_context.CloseEvent(joy_detach_event); + }; + private: void ApplyNpadSystemCommonPolicy(HLERequestContext& ctx) { LOG_WARNING(Service_HID, "called"); diff --git a/src/core/hle/service/hid/hidbus/hidbus_base.cpp b/src/core/hle/service/hid/hidbus/hidbus_base.cpp index ee522c36e..8c44f93e8 100644 --- a/src/core/hle/service/hid/hidbus/hidbus_base.cpp +++ b/src/core/hle/service/hid/hidbus/hidbus_base.cpp @@ -13,7 +13,10 @@ HidbusBase::HidbusBase(Core::System& system_, KernelHelpers::ServiceContext& ser : system(system_), service_context(service_context_) { send_command_async_event = service_context.CreateEvent("hidbus:SendCommandAsyncEvent"); } -HidbusBase::~HidbusBase() = default; + +HidbusBase::~HidbusBase() { + service_context.CloseEvent(send_command_async_event); +}; void HidbusBase::ActivateDevice() { if (is_activated) { diff --git a/src/core/hle/service/hle_ipc.cpp b/src/core/hle/service/hle_ipc.cpp index f6a1e54f2..ff374ae39 100644 --- a/src/core/hle/service/hle_ipc.cpp +++ b/src/core/hle/service/hle_ipc.cpp @@ -23,6 +23,19 @@ #include "core/hle/service/ipc_helpers.h" #include "core/memory.h" +namespace { +static thread_local std::array read_buffer_data_a{ + Common::ScratchBuffer<u8>(), + Common::ScratchBuffer<u8>(), + Common::ScratchBuffer<u8>(), +}; +static thread_local std::array read_buffer_data_x{ + Common::ScratchBuffer<u8>(), + Common::ScratchBuffer<u8>(), + Common::ScratchBuffer<u8>(), +}; +} // Anonymous namespace + namespace Service { SessionRequestHandler::SessionRequestHandler(Kernel::KernelCore& kernel_, const char* service_name_) @@ -328,26 +341,61 @@ std::vector<u8> HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons } } -std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { +std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const { static thread_local std::array read_buffer_a{ Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), + Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), }; - static thread_local std::array read_buffer_data_a{ - Common::ScratchBuffer<u8>(), - Common::ScratchBuffer<u8>(), - }; + + ASSERT_OR_EXECUTE_MSG( + BufferDescriptorA().size() > buffer_index, { return {}; }, + "BufferDescriptorA invalid buffer_index {}", buffer_index); + auto& read_buffer = read_buffer_a[buffer_index]; + return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), + BufferDescriptorA()[buffer_index].Size(), + &read_buffer_data_a[buffer_index]); +} + +std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const { static thread_local std::array read_buffer_x{ Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), + Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), + }; + + ASSERT_OR_EXECUTE_MSG( + BufferDescriptorX().size() > buffer_index, { return {}; }, + "BufferDescriptorX invalid buffer_index {}", buffer_index); + auto& read_buffer = read_buffer_x[buffer_index]; + return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), + BufferDescriptorX()[buffer_index].Size(), + &read_buffer_data_x[buffer_index]); +} + +std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { + static thread_local std::array read_buffer_a{ + Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), + Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), + Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), }; - static thread_local std::array read_buffer_data_x{ - Common::ScratchBuffer<u8>(), - Common::ScratchBuffer<u8>(), + static thread_local std::array read_buffer_x{ + Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), + Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), + Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), }; const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && BufferDescriptorA()[buffer_index].Size()}; + const bool is_buffer_x{BufferDescriptorX().size() > buffer_index && + BufferDescriptorX()[buffer_index].Size()}; + + if (is_buffer_a && is_buffer_x) { + LOG_WARNING(Input, "Both buffer descriptors are available a.size={}, x.size={}", + BufferDescriptorA()[buffer_index].Size(), + BufferDescriptorX()[buffer_index].Size()); + } + if (is_buffer_a) { ASSERT_OR_EXECUTE_MSG( BufferDescriptorA().size() > buffer_index, { return {}; }, diff --git a/src/core/hle/service/hle_ipc.h b/src/core/hle/service/hle_ipc.h index 4bd24c899..ad5259a5c 100644 --- a/src/core/hle/service/hle_ipc.h +++ b/src/core/hle/service/hle_ipc.h @@ -253,6 +253,12 @@ public: return domain_message_header.has_value(); } + /// Helper function to get a span of a buffer using the buffer descriptor A + [[nodiscard]] std::span<const u8> ReadBufferA(std::size_t buffer_index = 0) const; + + /// Helper function to get a span of a buffer using the buffer descriptor X + [[nodiscard]] std::span<const u8> ReadBufferX(std::size_t buffer_index = 0) const; + /// Helper function to get a span of a buffer using the appropriate buffer descriptor [[nodiscard]] std::span<const u8> ReadBuffer(std::size_t buffer_index = 0) const; diff --git a/src/core/hle/service/jit/jit_context.cpp b/src/core/hle/service/jit/jit_context.cpp index 4ed3f02e2..0090e8568 100644 --- a/src/core/hle/service/jit/jit_context.cpp +++ b/src/core/hle/service/jit/jit_context.cpp @@ -156,6 +156,8 @@ public: bool LoadNRO(std::span<const u8> data) { local_memory.clear(); + + relocbase = local_memory.size(); local_memory.insert(local_memory.end(), data.begin(), data.end()); if (FixupRelocations()) { @@ -181,8 +183,8 @@ public: // https://refspecs.linuxbase.org/elf/gabi4+/ch5.dynamic.html // https://refspecs.linuxbase.org/elf/gabi4+/ch4.reloc.html VAddr dynamic_offset{mod_offset + callbacks->MemoryRead32(mod_offset + 4)}; - VAddr rela_dyn = 0; - size_t num_rela = 0; + VAddr rela_dyn = 0, relr_dyn = 0; + size_t num_rela = 0, num_relr = 0; while (true) { const auto dyn{callbacks->ReadMemory<Elf64_Dyn>(dynamic_offset)}; dynamic_offset += sizeof(Elf64_Dyn); @@ -196,6 +198,12 @@ public: if (dyn.d_tag == ElfDtRelasz) { num_rela = dyn.d_un.d_val / sizeof(Elf64_Rela); } + if (dyn.d_tag == ElfDtRelr) { + relr_dyn = dyn.d_un.d_ptr; + } + if (dyn.d_tag == ElfDtRelrsz) { + num_relr = dyn.d_un.d_val / sizeof(Elf64_Relr); + } } for (size_t i = 0; i < num_rela; i++) { @@ -207,6 +215,29 @@ public: callbacks->MemoryWrite64(rela.r_offset, contents + rela.r_addend); } + VAddr relr_where = 0; + for (size_t i = 0; i < num_relr; i++) { + const auto relr{callbacks->ReadMemory<Elf64_Relr>(relr_dyn + i * sizeof(Elf64_Relr))}; + const auto incr{[&](VAddr where) { + callbacks->MemoryWrite64(where, callbacks->MemoryRead64(where) + relocbase); + }}; + + if ((relr & 1) == 0) { + // where pointer + relr_where = relocbase + relr; + incr(relr_where); + relr_where += sizeof(Elf64_Addr); + } else { + // bitmap + for (int bit = 1; bit < 64; bit++) { + if ((relr & (1ULL << bit)) != 0) { + incr(relr_where + i * sizeof(Elf64_Addr)); + } + } + relr_where += 63 * sizeof(Elf64_Addr); + } + } + return true; } @@ -313,6 +344,7 @@ public: Core::Memory::Memory& memory; VAddr top_of_stack; VAddr heap_pointer; + VAddr relocbase; }; void DynarmicCallbacks64::CallSVC(u32 swi) { diff --git a/src/core/hle/service/kernel_helpers.cpp b/src/core/hle/service/kernel_helpers.cpp index 6a313a03b..f51e63564 100644 --- a/src/core/hle/service/kernel_helpers.cpp +++ b/src/core/hle/service/kernel_helpers.cpp @@ -21,10 +21,8 @@ ServiceContext::ServiceContext(Core::System& system_, std::string name_) // Create the process. process = Kernel::KProcess::Create(kernel); - ASSERT(Kernel::KProcess::Initialize(process, system_, std::move(name_), - Kernel::KProcess::ProcessType::KernelInternal, - kernel.GetSystemResourceLimit()) - .IsSuccess()); + ASSERT(R_SUCCEEDED(process->Initialize(Kernel::Svc::CreateProcessParameter{}, + kernel.GetSystemResourceLimit(), false))); // Register the process. Kernel::KProcess::Register(kernel, process); diff --git a/src/core/hle/service/mii/types/core_data.cpp b/src/core/hle/service/mii/types/core_data.cpp index 970c748ca..ba1da76ba 100644 --- a/src/core/hle/service/mii/types/core_data.cpp +++ b/src/core/hle/service/mii/types/core_data.cpp @@ -41,6 +41,7 @@ void CoreData::BuildRandom(Age age, Gender gender, Race race) { } } + SetDefault(); SetGender(gender); SetFavoriteColor(MiiUtil::GetRandomValue(FavoriteColor::Max)); SetRegionMove(0); diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 21b06d10b..22dc55a6d 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp @@ -545,6 +545,16 @@ void IGeneralService::IsAnyInternetRequestAccepted(HLERequestContext& ctx) { } } +void IGeneralService::IsAnyForegroundRequestAccepted(HLERequestContext& ctx) { + const bool is_accepted{}; + + LOG_WARNING(Service_NIFM, "(STUBBED) called, is_accepted={}", is_accepted); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(ResultSuccess); + rb.Push<u8>(is_accepted); +} + IGeneralService::IGeneralService(Core::System& system_) : ServiceFramework{system_, "IGeneralService"}, network{system_.GetRoomNetwork()} { // clang-format off @@ -569,7 +579,7 @@ IGeneralService::IGeneralService(Core::System& system_) {19, nullptr, "SetEthernetCommunicationEnabled"}, {20, &IGeneralService::IsEthernetCommunicationEnabled, "IsEthernetCommunicationEnabled"}, {21, &IGeneralService::IsAnyInternetRequestAccepted, "IsAnyInternetRequestAccepted"}, - {22, nullptr, "IsAnyForegroundRequestAccepted"}, + {22, &IGeneralService::IsAnyForegroundRequestAccepted, "IsAnyForegroundRequestAccepted"}, {23, nullptr, "PutToSleep"}, {24, nullptr, "WakeUp"}, {25, nullptr, "GetSsidListVersion"}, diff --git a/src/core/hle/service/nifm/nifm.h b/src/core/hle/service/nifm/nifm.h index ae99c4695..b74b66438 100644 --- a/src/core/hle/service/nifm/nifm.h +++ b/src/core/hle/service/nifm/nifm.h @@ -35,6 +35,7 @@ private: void GetInternetConnectionStatus(HLERequestContext& ctx); void IsEthernetCommunicationEnabled(HLERequestContext& ctx); void IsAnyInternetRequestAccepted(HLERequestContext& ctx); + void IsAnyForegroundRequestAccepted(HLERequestContext& ctx); Network::RoomNetwork& network; }; diff --git a/src/core/hle/service/ns/ns.cpp b/src/core/hle/service/ns/ns.cpp index 6e0baf0be..f9e0e272d 100644 --- a/src/core/hle/service/ns/ns.cpp +++ b/src/core/hle/service/ns/ns.cpp @@ -7,6 +7,7 @@ #include "core/file_sys/control_metadata.h" #include "core/file_sys/patch_manager.h" #include "core/file_sys/vfs.h" +#include "core/hle/service/filesystem/filesystem.h" #include "core/hle/service/glue/glue_manager.h" #include "core/hle/service/ipc_helpers.h" #include "core/hle/service/ns/errors.h" @@ -502,8 +503,8 @@ IContentManagementInterface::IContentManagementInterface(Core::System& system_) static const FunctionInfo functions[] = { {11, nullptr, "CalculateApplicationOccupiedSize"}, {43, nullptr, "CheckSdCardMountStatus"}, - {47, nullptr, "GetTotalSpaceSize"}, - {48, nullptr, "GetFreeSpaceSize"}, + {47, &IContentManagementInterface::GetTotalSpaceSize, "GetTotalSpaceSize"}, + {48, &IContentManagementInterface::GetFreeSpaceSize, "GetFreeSpaceSize"}, {600, nullptr, "CountApplicationContentMeta"}, {601, nullptr, "ListApplicationContentMetaStatus"}, {605, nullptr, "ListApplicationContentMetaStatusWithRightsCheck"}, @@ -516,6 +517,28 @@ IContentManagementInterface::IContentManagementInterface(Core::System& system_) IContentManagementInterface::~IContentManagementInterface() = default; +void IContentManagementInterface::GetTotalSpaceSize(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto storage{rp.PopEnum<FileSys::StorageId>()}; + + LOG_INFO(Service_Capture, "called, storage={}", storage); + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(ResultSuccess); + rb.Push<u64>(system.GetFileSystemController().GetTotalSpaceSize(storage)); +} + +void IContentManagementInterface::GetFreeSpaceSize(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto storage{rp.PopEnum<FileSys::StorageId>()}; + + LOG_INFO(Service_Capture, "called, storage={}", storage); + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(ResultSuccess); + rb.Push<u64>(system.GetFileSystemController().GetFreeSpaceSize(storage)); +} + IDocumentInterface::IDocumentInterface(Core::System& system_) : ServiceFramework{system_, "IDocumentInterface"} { // clang-format off diff --git a/src/core/hle/service/ns/ns.h b/src/core/hle/service/ns/ns.h index 175dad780..34d2a45dc 100644 --- a/src/core/hle/service/ns/ns.h +++ b/src/core/hle/service/ns/ns.h @@ -48,6 +48,10 @@ class IContentManagementInterface final : public ServiceFramework<IContentManage public: explicit IContentManagementInterface(Core::System& system_); ~IContentManagementInterface() override; + +private: + void GetTotalSpaceSize(HLERequestContext& ctx); + void GetFreeSpaceSize(HLERequestContext& ctx); }; class IDocumentInterface final : public ServiceFramework<IDocumentInterface> { diff --git a/src/core/hle/service/nvnflinger/buffer_queue_core.cpp b/src/core/hle/service/nvnflinger/buffer_queue_core.cpp index 2dbe29616..ed66f6f5b 100644 --- a/src/core/hle/service/nvnflinger/buffer_queue_core.cpp +++ b/src/core/hle/service/nvnflinger/buffer_queue_core.cpp @@ -41,7 +41,7 @@ bool BufferQueueCore::WaitForDequeueCondition(std::unique_lock<std::mutex>& lk) s32 BufferQueueCore::GetMinUndequeuedBufferCountLocked(bool async) const { // If DequeueBuffer is allowed to error out, we don't have to add an extra buffer. if (!use_async_buffer) { - return max_acquired_buffer_count; + return 0; } if (dequeue_buffer_cannot_block || async) { @@ -52,7 +52,7 @@ s32 BufferQueueCore::GetMinUndequeuedBufferCountLocked(bool async) const { } s32 BufferQueueCore::GetMinMaxBufferCountLocked(bool async) const { - return GetMinUndequeuedBufferCountLocked(async) + 1; + return GetMinUndequeuedBufferCountLocked(async); } s32 BufferQueueCore::GetMaxBufferCountLocked(bool async) const { @@ -61,7 +61,7 @@ s32 BufferQueueCore::GetMaxBufferCountLocked(bool async) const { if (override_max_buffer_count != 0) { ASSERT(override_max_buffer_count >= min_buffer_count); - max_buffer_count = override_max_buffer_count; + return override_max_buffer_count; } // Any buffers that are dequeued by the producer or sitting in the queue waiting to be consumed diff --git a/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp b/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp index dc6917d5d..6e7a49658 100644 --- a/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp +++ b/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp @@ -134,7 +134,7 @@ Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found, St const s32 max_buffer_count = core->GetMaxBufferCountLocked(async); if (async && core->override_max_buffer_count) { if (core->override_max_buffer_count < max_buffer_count) { - LOG_ERROR(Service_Nvnflinger, "async mode is invalid with buffer count override"); + *found = BufferQueueCore::INVALID_BUFFER_SLOT; return Status::BadValue; } } @@ -142,7 +142,8 @@ Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found, St // Free up any buffers that are in slots beyond the max buffer count for (s32 s = max_buffer_count; s < BufferQueueDefs::NUM_BUFFER_SLOTS; ++s) { ASSERT(slots[s].buffer_state == BufferState::Free); - if (slots[s].graphic_buffer != nullptr) { + if (slots[s].graphic_buffer != nullptr && slots[s].buffer_state == BufferState::Free && + !slots[s].is_preallocated) { core->FreeBufferLocked(s); *return_flags |= Status::ReleaseAllBuffers; } diff --git a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp index 469a53244..2e29bc848 100644 --- a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp +++ b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp @@ -46,7 +46,7 @@ Result AllocateIoForProcessAddressSpace(Common::ProcessAddress* out_map_address, // Get bounds of where mapping is possible. const VAddr alias_code_begin = GetInteger(page_table.GetAliasCodeRegionStart()); const VAddr alias_code_size = page_table.GetAliasCodeRegionSize() / YUZU_PAGESIZE; - const auto state = Kernel::KMemoryState::Io; + const auto state = Kernel::KMemoryState::IoMemory; const auto perm = Kernel::KMemoryPermission::UserReadWrite; std::mt19937_64 rng{process->GetRandomEntropy(0)}; diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp index a07c621d9..bebb45eae 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.cpp +++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp @@ -66,7 +66,6 @@ Nvnflinger::Nvnflinger(Core::System& system_, HosBinderDriverServer& hos_binder_ "ScreenComposition", [this](std::uintptr_t, s64 time, std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> { - { const auto lock_guard = Lock(); } vsync_signal.Set(); return std::chrono::nanoseconds(GetNextTicks()); }); @@ -99,6 +98,7 @@ Nvnflinger::~Nvnflinger() { } ShutdownLayers(); + vsync_thread = {}; if (nvdrv) { nvdrv->Close(disp_fd); @@ -106,6 +106,7 @@ Nvnflinger::~Nvnflinger() { } void Nvnflinger::ShutdownLayers() { + const auto lock_guard = Lock(); for (auto& display : displays) { for (size_t layer = 0; layer < display.GetNumLayers(); ++layer) { display.GetLayer(layer).Core().NotifyShutdown(); @@ -229,16 +230,6 @@ VI::Layer* Nvnflinger::FindLayer(u64 display_id, u64 layer_id) { return display->FindLayer(layer_id); } -const VI::Layer* Nvnflinger::FindLayer(u64 display_id, u64 layer_id) const { - const auto* const display = FindDisplay(display_id); - - if (display == nullptr) { - return nullptr; - } - - return display->FindLayer(layer_id); -} - VI::Layer* Nvnflinger::FindOrCreateLayer(u64 display_id, u64 layer_id) { auto* const display = FindDisplay(display_id); @@ -288,7 +279,6 @@ void Nvnflinger::Compose() { auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd); ASSERT(nvdisp); - guard->unlock(); Common::Rectangle<int> crop_rect{ static_cast<int>(buffer.crop.Left()), static_cast<int>(buffer.crop.Top()), static_cast<int>(buffer.crop.Right()), static_cast<int>(buffer.crop.Bottom())}; @@ -299,7 +289,6 @@ void Nvnflinger::Compose() { buffer.fence.fences, buffer.fence.num_fences); MicroProfileFlip(); - guard->lock(); swap_interval = buffer.swap_interval; diff --git a/src/core/hle/service/nvnflinger/nvnflinger.h b/src/core/hle/service/nvnflinger/nvnflinger.h index 14c783582..959d8b46b 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.h +++ b/src/core/hle/service/nvnflinger/nvnflinger.h @@ -117,9 +117,6 @@ private: /// Finds the layer identified by the specified ID in the desired display. [[nodiscard]] VI::Layer* FindLayer(u64 display_id, u64 layer_id); - /// Finds the layer identified by the specified ID in the desired display. - [[nodiscard]] const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const; - /// Finds the layer identified by the specified ID in the desired display, /// or creates the layer if it is not found. /// To be used when the system expects the specified ID to already exist. diff --git a/src/core/hle/service/pctl/pctl_module.cpp b/src/core/hle/service/pctl/pctl_module.cpp index 5db1703d1..6a7fd72bc 100644 --- a/src/core/hle/service/pctl/pctl_module.cpp +++ b/src/core/hle/service/pctl/pctl_module.cpp @@ -33,7 +33,7 @@ public: {1001, &IParentalControlService::CheckFreeCommunicationPermission, "CheckFreeCommunicationPermission"}, {1002, nullptr, "ConfirmLaunchApplicationPermission"}, {1003, nullptr, "ConfirmResumeApplicationPermission"}, - {1004, nullptr, "ConfirmSnsPostPermission"}, + {1004, &IParentalControlService::ConfirmSnsPostPermission, "ConfirmSnsPostPermission"}, {1005, nullptr, "ConfirmSystemSettingsPermission"}, {1006, &IParentalControlService::IsRestrictionTemporaryUnlocked, "IsRestrictionTemporaryUnlocked"}, {1007, nullptr, "RevertRestrictionTemporaryUnlocked"}, @@ -141,6 +141,12 @@ public: service_context.CreateEvent("IParentalControlService::RequestSuspensionEvent"); } + ~IParentalControlService() { + service_context.CloseEvent(synchronization_event); + service_context.CloseEvent(unlinked_event); + service_context.CloseEvent(request_suspension_event); + }; + private: bool CheckFreeCommunicationPermissionImpl() const { if (states.temporary_unlocked) { @@ -236,6 +242,13 @@ private: states.free_communication = true; } + void ConfirmSnsPostPermission(HLERequestContext& ctx) { + LOG_WARNING(Service_PCTL, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(Error::ResultNoFreeCommunication); + } + void IsRestrictionTemporaryUnlocked(HLERequestContext& ctx) { const bool is_temporary_unlocked = false; diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp index f9cf2dda3..d92499f05 100644 --- a/src/core/hle/service/pm/pm.cpp +++ b/src/core/hle/service/pm/pm.cpp @@ -37,7 +37,7 @@ std::optional<Kernel::KProcess*> SearchProcessList( void GetApplicationPidGeneric(HLERequestContext& ctx, const std::vector<Kernel::KProcess*>& process_list) { const auto process = SearchProcessList(process_list, [](const auto& proc) { - return proc->GetProcessId() == Kernel::KProcess::ProcessIDMin; + return proc->GetProcessId() == Kernel::KProcess::ProcessIdMin; }); IPC::ResponseBuilder rb{ctx, 4}; diff --git a/src/core/hle/service/prepo/prepo.cpp b/src/core/hle/service/prepo/prepo.cpp index ec4a84989..14e8df63a 100644 --- a/src/core/hle/service/prepo/prepo.cpp +++ b/src/core/hle/service/prepo/prepo.cpp @@ -58,14 +58,8 @@ private: IPC::RequestParser rp{ctx}; const auto process_id = rp.PopRaw<u64>(); - const auto data1 = ctx.ReadBuffer(0); - const auto data2 = [&ctx] { - if (ctx.CanReadBuffer(1)) { - return ctx.ReadBuffer(1); - } - - return std::span<const u8>{}; - }(); + const auto data1 = ctx.ReadBufferA(0); + const auto data2 = ctx.ReadBufferX(0); LOG_DEBUG(Service_PREPO, "called, type={:02X}, process_id={:016X}, data1_size={:016X}, data2_size={:016X}", @@ -85,14 +79,8 @@ private: const auto user_id = rp.PopRaw<u128>(); const auto process_id = rp.PopRaw<u64>(); - const auto data1 = ctx.ReadBuffer(0); - const auto data2 = [&ctx] { - if (ctx.CanReadBuffer(1)) { - return ctx.ReadBuffer(1); - } - - return std::span<const u8>{}; - }(); + const auto data1 = ctx.ReadBufferA(0); + const auto data2 = ctx.ReadBufferX(0); LOG_DEBUG(Service_PREPO, "called, type={:02X}, user_id={:016X}{:016X}, process_id={:016X}, " @@ -137,14 +125,8 @@ private: IPC::RequestParser rp{ctx}; const auto title_id = rp.PopRaw<u64>(); - const auto data1 = ctx.ReadBuffer(0); - const auto data2 = [&ctx] { - if (ctx.CanReadBuffer(1)) { - return ctx.ReadBuffer(1); - } - - return std::span<const u8>{}; - }(); + const auto data1 = ctx.ReadBufferA(0); + const auto data2 = ctx.ReadBufferX(0); LOG_DEBUG(Service_PREPO, "called, title_id={:016X}, data1_size={:016X}, data2_size={:016X}", title_id, data1.size(), data2.size()); @@ -161,14 +143,8 @@ private: const auto user_id = rp.PopRaw<u128>(); const auto title_id = rp.PopRaw<u64>(); - const auto data1 = ctx.ReadBuffer(0); - const auto data2 = [&ctx] { - if (ctx.CanReadBuffer(1)) { - return ctx.ReadBuffer(1); - } - - return std::span<const u8>{}; - }(); + const auto data1 = ctx.ReadBufferA(0); + const auto data2 = ctx.ReadBufferX(0); LOG_DEBUG(Service_PREPO, "called, user_id={:016X}{:016X}, title_id={:016X}, data1_size={:016X}, " diff --git a/src/core/hle/service/ptm/ts.cpp b/src/core/hle/service/ptm/ts.cpp index ca064dd90..652f38b97 100644 --- a/src/core/hle/service/ptm/ts.cpp +++ b/src/core/hle/service/ptm/ts.cpp @@ -9,6 +9,35 @@ namespace Service::PTM { +enum class Location : u8 { + Internal, + External, +}; + +class ISession : public ServiceFramework<ISession> { +public: + explicit ISession(Core::System& system_) : ServiceFramework{system_, "ISession"} { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "GetTemperatureRange"}, + {2, nullptr, "SetMeasurementMode"}, + {4, &ISession::GetTemperature, "GetTemperature"}, + }; + // clang-format on + + RegisterHandlers(functions); + } + +private: + void GetTemperature(HLERequestContext& ctx) { + constexpr f32 temperature = 35; + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(ResultSuccess); + rb.Push(temperature); + } +}; + TS::TS(Core::System& system_) : ServiceFramework{system_, "ts"} { // clang-format off static const FunctionInfo functions[] = { @@ -16,7 +45,7 @@ TS::TS(Core::System& system_) : ServiceFramework{system_, "ts"} { {1, &TS::GetTemperature, "GetTemperature"}, {2, nullptr, "SetMeasurementMode"}, {3, &TS::GetTemperatureMilliC, "GetTemperatureMilliC"}, - {4, nullptr, "OpenSession"}, + {4, &TS::OpenSession, "OpenSession"}, }; // clang-format on @@ -47,4 +76,13 @@ void TS::GetTemperatureMilliC(HLERequestContext& ctx) { rb.Push(temperature); } +void TS::OpenSession(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + [[maybe_unused]] const u32 device_code = rp.Pop<u32>(); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(ResultSuccess); + rb.PushIpcInterface<ISession>(system); +} + } // namespace Service::PTM diff --git a/src/core/hle/service/ptm/ts.h b/src/core/hle/service/ptm/ts.h index c3f43d5a3..a10a91a64 100644 --- a/src/core/hle/service/ptm/ts.h +++ b/src/core/hle/service/ptm/ts.h @@ -14,13 +14,9 @@ public: ~TS() override; private: - enum class Location : u8 { - Internal, - External, - }; - void GetTemperature(HLERequestContext& ctx); void GetTemperatureMilliC(HLERequestContext& ctx); + void OpenSession(HLERequestContext& ctx); }; } // namespace Service::PTM diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp index 165b97dad..ec3af80af 100644 --- a/src/core/hle/service/set/set_sys.cpp +++ b/src/core/hle/service/set/set_sys.cpp @@ -5,8 +5,13 @@ #include "common/logging/log.h" #include "common/settings.h" #include "common/string_util.h" +#include "core/core.h" +#include "core/file_sys/content_archive.h" #include "core/file_sys/errors.h" -#include "core/file_sys/system_archive/system_version.h" +#include "core/file_sys/nca_metadata.h" +#include "core/file_sys/registered_cache.h" +#include "core/file_sys/romfs.h" +#include "core/file_sys/system_archive/system_archive.h" #include "core/hle/service/filesystem/filesystem.h" #include "core/hle/service/ipc_helpers.h" #include "core/hle/service/set/set.h" @@ -22,18 +27,30 @@ enum class GetFirmwareVersionType { Version2, }; -void GetFirmwareVersionImpl(HLERequestContext& ctx, GetFirmwareVersionType type) { - LOG_WARNING(Service_SET, "called - Using hardcoded firmware version '{}'", - FileSys::SystemArchive::GetLongDisplayVersion()); - +void GetFirmwareVersionImpl(Core::System& system, HLERequestContext& ctx, + GetFirmwareVersionType type) { ASSERT_MSG(ctx.GetWriteBufferSize() == 0x100, "FirmwareVersion output buffer must be 0x100 bytes in size!"); - // Instead of using the normal procedure of checking for the real system archive and if it - // doesn't exist, synthesizing one, I feel that that would lead to strange bugs because a - // used is using a really old or really new SystemVersion title. The synthesized one ensures - // consistence (currently reports as 5.1.0-0.0) - const auto archive = FileSys::SystemArchive::SystemVersion(); + constexpr u64 FirmwareVersionSystemDataId = 0x0100000000000809; + auto& fsc = system.GetFileSystemController(); + + // Attempt to load version data from disk + const FileSys::RegisteredCache* bis_system{}; + std::unique_ptr<FileSys::NCA> nca{}; + FileSys::VirtualDir romfs{}; + + bis_system = fsc.GetSystemNANDContents(); + if (bis_system) { + nca = bis_system->GetEntry(FirmwareVersionSystemDataId, FileSys::ContentRecordType::Data); + } + if (nca) { + romfs = FileSys::ExtractRomFS(nca->GetRomFS()); + } + if (!romfs) { + romfs = FileSys::ExtractRomFS( + FileSys::SystemArchive::SynthesizeSystemArchive(FirmwareVersionSystemDataId)); + } const auto early_exit_failure = [&ctx](std::string_view desc, Result code) { LOG_ERROR(Service_SET, "General failure while attempting to resolve firmware version ({}).", @@ -42,13 +59,7 @@ void GetFirmwareVersionImpl(HLERequestContext& ctx, GetFirmwareVersionType type) rb.Push(code); }; - if (archive == nullptr) { - early_exit_failure("The system version archive couldn't be synthesized.", - FileSys::ERROR_FAILED_MOUNT_ARCHIVE); - return; - } - - const auto ver_file = archive->GetFile("file"); + const auto ver_file = romfs->GetFile("file"); if (ver_file == nullptr) { early_exit_failure("The system version archive didn't contain the file 'file'.", FileSys::ERROR_INVALID_ARGUMENT); @@ -87,12 +98,12 @@ void SET_SYS::SetLanguageCode(HLERequestContext& ctx) { void SET_SYS::GetFirmwareVersion(HLERequestContext& ctx) { LOG_DEBUG(Service_SET, "called"); - GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version1); + GetFirmwareVersionImpl(system, ctx, GetFirmwareVersionType::Version1); } void SET_SYS::GetFirmwareVersion2(HLERequestContext& ctx) { LOG_DEBUG(Service_SET, "called"); - GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version2); + GetFirmwareVersionImpl(system, ctx, GetFirmwareVersionType::Version2); } void SET_SYS::GetAccountSettings(HLERequestContext& ctx) { diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index 5a42dea48..5c36b71e5 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -118,7 +118,7 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect return {ResultStatus::ErrorMissingNPDM, {}}; } - const ResultStatus result2 = metadata.Load(npdm); + const ResultStatus result2 = metadata.Reload(npdm); if (result2 != ResultStatus::Success) { return {result2, {}}; } diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp index a06e99166..53a89cc8f 100644 --- a/src/core/memory/cheat_engine.cpp +++ b/src/core/memory/cheat_engine.cpp @@ -19,16 +19,23 @@ namespace Core::Memory { namespace { constexpr auto CHEAT_ENGINE_NS = std::chrono::nanoseconds{1000000000 / 12}; -std::string_view ExtractName(std::string_view data, std::size_t start_index, char match) { +std::string_view ExtractName(std::size_t& out_name_size, std::string_view data, + std::size_t start_index, char match) { auto end_index = start_index; while (data[end_index] != match) { ++end_index; - if (end_index > data.size() || - (end_index - start_index - 1) > sizeof(CheatDefinition::readable_name)) { + if (end_index > data.size()) { return {}; } } + out_name_size = end_index - start_index; + + // Clamp name if it's too big + if (out_name_size > sizeof(CheatDefinition::readable_name)) { + end_index = start_index + sizeof(CheatDefinition::readable_name); + } + return data.substr(start_index, end_index - start_index); } } // Anonymous namespace @@ -113,7 +120,8 @@ std::vector<CheatEntry> TextCheatParser::Parse(std::string_view data) const { return {}; } - const auto name = ExtractName(data, i + 1, '}'); + std::size_t name_size{}; + const auto name = ExtractName(name_size, data, i + 1, '}'); if (name.empty()) { return {}; } @@ -125,12 +133,13 @@ std::vector<CheatEntry> TextCheatParser::Parse(std::string_view data) const { .definition.readable_name[out[*current_entry].definition.readable_name.size() - 1] = '\0'; - i += name.length() + 1; + i += name_size + 1; } else if (data[i] == '[') { current_entry = out.size(); out.emplace_back(); - const auto name = ExtractName(data, i + 1, ']'); + std::size_t name_size{}; + const auto name = ExtractName(name_size, data, i + 1, ']'); if (name.empty()) { return {}; } @@ -142,7 +151,7 @@ std::vector<CheatEntry> TextCheatParser::Parse(std::string_view data) const { .definition.readable_name[out[*current_entry].definition.readable_name.size() - 1] = '\0'; - i += name.length() + 1; + i += name_size + 1; } else if (::isxdigit(data[i])) { if (!current_entry || out[*current_entry].definition.num_opcodes >= out[*current_entry].definition.opcodes.size()) { diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index ed875d444..5d168cbc1 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp @@ -116,7 +116,7 @@ json GetProcessorStateDataAuto(Core::System& system) { Core::ARM_Interface::ThreadContext64 context{}; arm.SaveContext(context); - return GetProcessorStateData(process->Is64BitProcess() ? "AArch64" : "AArch32", + return GetProcessorStateData(process->Is64Bit() ? "AArch64" : "AArch32", GetInteger(process->GetEntryPoint()), context.sp, context.pc, context.pstate, context.cpu_registers); } diff --git a/src/input_common/drivers/udp_client.cpp b/src/input_common/drivers/udp_client.cpp index 808b21069..77db60e92 100644 --- a/src/input_common/drivers/udp_client.cpp +++ b/src/input_common/drivers/udp_client.cpp @@ -338,6 +338,7 @@ void UDPClient::StartCommunication(std::size_t client, const std::string& host, for (std::size_t index = 0; index < PADS_PER_CLIENT; ++index) { const PadIdentifier identifier = GetPadIdentifier(client * PADS_PER_CLIENT + index); PreSetController(identifier); + PreSetMotion(identifier, 0); } } diff --git a/src/input_common/helpers/joycon_driver.cpp b/src/input_common/helpers/joycon_driver.cpp index cf51f3481..c9f903213 100644 --- a/src/input_common/helpers/joycon_driver.cpp +++ b/src/input_common/helpers/joycon_driver.cpp @@ -139,7 +139,7 @@ void JoyconDriver::InputThread(std::stop_token stop_token) { input_thread_running = true; // Max update rate is 5ms, ensure we are always able to read a bit faster - constexpr int ThreadDelay = 2; + constexpr int ThreadDelay = 3; std::vector<u8> buffer(MaxBufferSize); while (!stop_token.stop_requested()) { @@ -163,6 +163,17 @@ void JoyconDriver::InputThread(std::stop_token stop_token) { OnNewData(buffer); } + if (!vibration_queue.Empty()) { + VibrationValue vibration_value; + vibration_queue.Pop(vibration_value); + last_vibration_result = rumble_protocol->SendVibration(vibration_value); + } + + // We can't keep up with vibrations. Start skipping. + while (vibration_queue.Size() > 6) { + vibration_queue.Pop(); + } + std::this_thread::yield(); } @@ -402,7 +413,8 @@ Common::Input::DriverResult JoyconDriver::SetVibration(const VibrationValue& vib if (disable_input_thread) { return Common::Input::DriverResult::HandleInUse; } - return rumble_protocol->SendVibration(vibration); + vibration_queue.Push(vibration); + return last_vibration_result; } Common::Input::DriverResult JoyconDriver::SetLedConfig(u8 led_pattern) { diff --git a/src/input_common/helpers/joycon_driver.h b/src/input_common/helpers/joycon_driver.h index 335e12cc3..5355780fb 100644 --- a/src/input_common/helpers/joycon_driver.h +++ b/src/input_common/helpers/joycon_driver.h @@ -9,6 +9,7 @@ #include <span> #include <thread> +#include "common/threadsafe_queue.h" #include "input_common/helpers/joycon_protocol/joycon_types.h" namespace Common::Input { @@ -152,6 +153,10 @@ private: SerialNumber handle_serial_number{}; // Serial number type reported by hidapi SupportedFeatures supported_features{}; + /// Queue of vibration request to controllers + Common::Input::DriverResult last_vibration_result{Common::Input::DriverResult::Success}; + Common::SPSCQueue<VibrationValue> vibration_queue; + // Thread related mutable std::mutex mutex; std::jthread input_thread; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index d91e04446..66ecfc9f7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -242,6 +242,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR } if (program.info.uses_subgroup_shuffles) { ctx.header += "bool shfl_in_bounds;"; + ctx.header += "uint shfl_result;"; } ctx.code.insert(0, ctx.header); ctx.code += '}'; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 1245c9429..f9be5de1c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -141,7 +141,8 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); - ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); + ctx.Add("shfl_result=readInvocationARB({},{});", value, src_thread_id); + ctx.AddU32("{}=shfl_in_bounds?shfl_result:{};", inst, value); } void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, @@ -158,7 +159,8 @@ void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std const auto src_thread_id{fmt::format("({}-{})", THREAD_ID, index)}; ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); - ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); + ctx.Add("shfl_result=readInvocationARB({},{});", value, src_thread_id); + ctx.AddU32("{}=shfl_in_bounds?shfl_result:{};", inst, value); } void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, @@ -175,7 +177,8 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, const auto src_thread_id{fmt::format("({}+{})", THREAD_ID, index)}; ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); - ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); + ctx.Add("shfl_result=readInvocationARB({},{});", value, src_thread_id); + ctx.AddU32("{}=shfl_in_bounds?shfl_result:{};", inst, value); } void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, @@ -193,7 +196,8 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view val const auto src_thread_id{fmt::format("({}^{})", THREAD_ID, index)}; ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); - ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); + ctx.Add("shfl_result=readInvocationARB({},{});", value, src_thread_id); + ctx.AddU32("{}=shfl_in_bounds?shfl_result:{};", inst, value); } void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 2868fc57d..1d77426e0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -111,16 +111,33 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, } else if (element_size > 1) { const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; const Id shift{ctx.Const(log2_element_size)}; - buffer_offset = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); + buffer_offset = ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), shift); } else { buffer_offset = ctx.Def(offset); } if (!binding.IsImmediate()) { return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset); } + const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)}; - return ctx.OpLoad(result_type, access_chain); + const Id val = ctx.OpLoad(result_type, access_chain); + + if (offset.IsImmediate() || !ctx.profile.has_broken_robust) { + return val; + } + + const auto is_float = UniformDefinitions::IsFloat(member_ptr); + const auto num_elements = UniformDefinitions::NumElements(member_ptr); + const std::array zero_vec{ + is_float ? ctx.Const(0.0f) : ctx.Const(0u), + is_float ? ctx.Const(0.0f) : ctx.Const(0u), + is_float ? ctx.Const(0.0f) : ctx.Const(0u), + is_float ? ctx.Const(0.0f) : ctx.Const(0u), + }; + const Id cond = ctx.OpULessThanEqual(ctx.TypeBool(), buffer_offset, ctx.Const(0xFFFFu)); + const Id zero = ctx.OpCompositeConstruct(result_type, std::span(zero_vec.data(), num_elements)); + return ctx.OpSelect(result_type, cond, val, zero); } Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { @@ -138,7 +155,7 @@ Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 inde const u32 element{(offset.U32() / 4) % 4 + index_offset}; return ctx.OpCompositeExtract(ctx.U32[1], vector, element); } - const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; + const Id shift{ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; if (index_offset > 0) { element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 7c49fd504..1aa79863d 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -64,6 +64,42 @@ struct UniformDefinitions { Id F32{}; Id U32x2{}; Id U32x4{}; + + constexpr static size_t NumElements(Id UniformDefinitions::*member_ptr) { + if (member_ptr == &UniformDefinitions::U8) { + return 1; + } + if (member_ptr == &UniformDefinitions::S8) { + return 1; + } + if (member_ptr == &UniformDefinitions::U16) { + return 1; + } + if (member_ptr == &UniformDefinitions::S16) { + return 1; + } + if (member_ptr == &UniformDefinitions::U32) { + return 1; + } + if (member_ptr == &UniformDefinitions::F32) { + return 1; + } + if (member_ptr == &UniformDefinitions::U32x2) { + return 2; + } + if (member_ptr == &UniformDefinitions::U32x4) { + return 4; + } + ASSERT(false); + return 1; + } + + constexpr static bool IsFloat(Id UniformDefinitions::*member_ptr) { + if (member_ptr == &UniformDefinitions::F32) { + return true; + } + return false; + } }; struct StorageTypeDefinition { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 9ca97f6a4..38d820db2 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -9,7 +9,6 @@ namespace Shader { struct Profile { u32 supported_spirv{0x00010000}; - bool unified_descriptor_binding{}; bool support_descriptor_aliasing{}; bool support_int8{}; @@ -82,6 +81,9 @@ struct Profile { bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{}; u32 gl_max_compute_smem_size{}; + + /// Maxwell and earlier nVidia architectures have broken robust support + bool has_broken_robust{}; }; } // namespace Shader diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp index f7a23e876..42e6ade09 100644 --- a/src/tests/common/unique_function.cpp +++ b/src/tests/common/unique_function.cpp @@ -46,8 +46,8 @@ TEST_CASE("UniqueFunction", "[common]") { Noisy noisy; REQUIRE(noisy.state == "Default constructed"); - Common::UniqueFunction<void> func = [noisy = std::move(noisy)] { - REQUIRE(noisy.state == "Move constructed"); + Common::UniqueFunction<void> func = [noisy_inner = std::move(noisy)] { + REQUIRE(noisy_inner.state == "Move constructed"); }; REQUIRE(noisy.state == "Moved away"); func(); @@ -101,7 +101,7 @@ TEST_CASE("UniqueFunction", "[common]") { }; Foo object{&num_destroyed}; { - Common::UniqueFunction<void> func = [object = std::move(object)] {}; + Common::UniqueFunction<void> func = [object_inner = std::move(object)] {}; REQUIRE(num_destroyed == 0); } REQUIRE(num_destroyed == 1); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9e90c587c..081a574e8 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -544,7 +544,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { it++; } - boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; + boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads; u64 total_size_bytes = 0; u64 largest_copy = 0; for (const IntervalSet& intervals : committed_ranges) { @@ -914,6 +914,11 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { const u32 offset = buffer.Offset(binding.cpu_addr); const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; + + if (is_written) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); + } + if constexpr (NEEDS_BIND_STORAGE_INDEX) { runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written); ++binding_index; @@ -931,6 +936,11 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); + const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0; + if (is_written) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); + } + const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { @@ -962,6 +972,8 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); + const u32 offset = buffer.Offset(binding.cpu_addr); host_bindings.buffers.push_back(&buffer); host_bindings.offsets.push_back(offset); @@ -1011,6 +1023,11 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { const u32 offset = buffer.Offset(binding.cpu_addr); const bool is_written = ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; + + if (is_written) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); + } + if constexpr (NEEDS_BIND_STORAGE_INDEX) { runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written); ++binding_index; @@ -1028,6 +1045,12 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); + const bool is_written = + ((channel_state->written_compute_texture_buffers >> index) & 1) != 0; + if (is_written) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); + } + const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { @@ -1044,8 +1067,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { template <class P> void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { - do { - channel_state->has_deleted_buffers = false; + BufferOperations([&]() { if (is_indexed) { UpdateIndexBuffer(); } @@ -1059,14 +1081,16 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { if (current_draw_indirect) { UpdateDrawIndirect(); } - } while (channel_state->has_deleted_buffers); + }); } template <class P> void BufferCache<P>::DoUpdateComputeBuffers() { - UpdateComputeUniformBuffers(); - UpdateComputeStorageBuffers(); - UpdateComputeTextureBuffers(); + BufferOperations([&]() { + UpdateComputeUniformBuffers(); + UpdateComputeStorageBuffers(); + UpdateComputeTextureBuffers(); + }); } template <class P> @@ -1201,16 +1225,11 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) { template <class P> void BufferCache<P>::UpdateStorageBuffers(size_t stage) { - const u32 written_mask = channel_state->written_storage_buffers[stage]; ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { // Resolve buffer Binding& binding = channel_state->storage_buffers[stage][index]; const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); binding.buffer_id = buffer_id; - // Mark buffer as written if needed - if (((written_mask >> index) & 1) != 0) { - MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); - } }); } @@ -1219,10 +1238,6 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) { ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { Binding& binding = channel_state->texture_buffers[stage][index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); - // Mark buffer as written if needed - if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) { - MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); - } }); } @@ -1252,7 +1267,6 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { .size = size, .buffer_id = buffer_id, }; - MarkWrittenBuffer(buffer_id, *cpu_addr, size); } template <class P> @@ -1279,10 +1293,6 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { // Resolve buffer Binding& binding = channel_state->compute_storage_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); - // Mark as written if needed - if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) { - MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); - } }); } @@ -1291,18 +1301,11 @@ void BufferCache<P>::UpdateComputeTextureBuffers() { ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { Binding& binding = channel_state->compute_texture_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); - // Mark as written if needed - if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) { - MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); - } }); } template <class P> void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { - if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) { - SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size); - } memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); const IntervalType base_interval{cpu_addr, cpu_addr + size}; diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index c4f6e8d12..eed267361 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -62,7 +62,11 @@ using BufferId = SlotId; using VideoCore::Surface::PixelFormat; using namespace Common::Literals; +#ifdef __APPLE__ +constexpr u32 NUM_VERTEX_BUFFERS = 16; +#else constexpr u32 NUM_VERTEX_BUFFERS = 32; +#endif constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index f34090791..d77ff455b 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -48,8 +48,14 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) { SetInlineIndexBuffer(regs.inline_index_4x8.index3); break; case MAXWELL3D_REG_INDEX(vertex_array_instance_first): + DrawArrayInstanced(regs.vertex_array_instance_first.topology.Value(), + regs.vertex_array_instance_first.start.Value(), + regs.vertex_array_instance_first.count.Value(), false); + break; case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): { - LOG_WARNING(HW_GPU, "(STUBBED) called"); + DrawArrayInstanced(regs.vertex_array_instance_subsequent.topology.Value(), + regs.vertex_array_instance_subsequent.start.Value(), + regs.vertex_array_instance_subsequent.count.Value(), true); break; } case MAXWELL3D_REG_INDEX(draw_texture.src_y0): { @@ -84,6 +90,22 @@ void DrawManager::DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 ve ProcessDraw(false, num_instances); } +void DrawManager::DrawArrayInstanced(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, + bool subsequent) { + draw_state.topology = topology; + draw_state.vertex_buffer.first = vertex_first; + draw_state.vertex_buffer.count = vertex_count; + + if (!subsequent) { + draw_state.instance_count = 1; + } + + draw_state.base_instance = draw_state.instance_count - 1; + draw_state.draw_mode = DrawMode::Instance; + draw_state.instance_count++; + ProcessDraw(false, 1); +} + void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, u32 base_instance, u32 num_instances) { const auto& regs{maxwell3d->regs}; diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 18d959143..cfc8127fc 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -66,6 +66,8 @@ public: void DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, u32 base_instance, u32 num_instances); + void DrawArrayInstanced(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, + bool subsequent); void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, u32 base_instance, u32 num_instances); diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 805a89900..c0e6471fe 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -86,7 +86,10 @@ public: uncommitted_operations.emplace_back(std::move(func)); } pending_operations.emplace_back(std::move(uncommitted_operations)); - QueueFence(new_fence); + { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + QueueFence(new_fence); + } if (!delay_fence) { func(); } diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index 8d7da50fc..dbcf508e5 100644 --- a/src/video_core/host1x/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp @@ -137,16 +137,6 @@ bool Codec::CreateGpuAvDevice() { break; } if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) { -#if defined(__unix__) - // Some linux decoding backends are reported to crash with this config method - // TODO(ameerj): Properly support this method - if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) { - // skip zero-copy decoders, we don't currently support them - LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.", - av_hwdevice_get_type_name(type), config->methods); - continue; - } -#endif LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); av_codec_ctx->pix_fmt = config->pix_fmt; return true; diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 6b912027f..cd2549232 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -19,6 +19,8 @@ set(SHADER_FILES block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp convert_abgr8_to_d24s8.frag + convert_abgr8_to_d32f.frag + convert_d32f_to_abgr8.frag convert_d24s8_to_abgr8.frag convert_depth_to_float.frag convert_float_to_depth.frag diff --git a/src/video_core/host_shaders/convert_abgr8_to_d32f.frag b/src/video_core/host_shaders/convert_abgr8_to_d32f.frag new file mode 100644 index 000000000..095b910c2 --- /dev/null +++ b/src/video_core/host_shaders/convert_abgr8_to_d32f.frag @@ -0,0 +1,15 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +layout(binding = 0) uniform sampler2D color_texture; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + vec4 color = texelFetch(color_texture, coord, 0).abgr; + + float value = color.a * (color.r + color.g + color.b) / 3.0f; + + gl_FragDepth = value; +} diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag index d33131d7c..b81a54056 100644 --- a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag +++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag @@ -3,16 +3,16 @@ #version 450 +precision mediump int; +precision highp float; + layout(binding = 0) uniform sampler2D depth_tex; -layout(binding = 1) uniform isampler2D stencil_tex; +layout(binding = 1) uniform usampler2D stencil_tex; layout(location = 0) out vec4 color; void main() { ivec2 coord = ivec2(gl_FragCoord.xy); - uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); - uint stencil = uint(textureLod(stencil_tex, coord, 0).r); - highp uint depth_val = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0)); lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r; diff --git a/src/video_core/host_shaders/convert_d32f_to_abgr8.frag b/src/video_core/host_shaders/convert_d32f_to_abgr8.frag new file mode 100644 index 000000000..4e5a9f955 --- /dev/null +++ b/src/video_core/host_shaders/convert_d32f_to_abgr8.frag @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +layout(binding = 0) uniform sampler2D depth_tex; + +layout(location = 0) out vec4 color; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + float depth = texelFetch(depth_tex, coord, 0).r; + color = vec4(depth, depth, depth, 1.0); +} diff --git a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag index 31db7d426..6a457981d 100644 --- a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag +++ b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag @@ -3,16 +3,16 @@ #version 450 +precision mediump int; +precision highp float; + layout(binding = 0) uniform sampler2D depth_tex; -layout(binding = 1) uniform isampler2D stencil_tex; +layout(binding = 1) uniform usampler2D stencil_tex; layout(location = 0) out vec4 color; void main() { ivec2 coord = ivec2(gl_FragCoord.xy); - uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); - uint stencil = uint(textureLod(stencil_tex, coord, 0).r); - highp uint depth_val = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0)); lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r; diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 3e12a8813..78ea5208b 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -89,9 +89,6 @@ public: void RequestScreenshot(void* data, std::function<void(bool)> callback, const Layout::FramebufferLayout& layout); - /// This is called to notify the rendering backend of a surface change - virtual void NotifySurfaceChanged() {} - protected: Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. std::unique_ptr<Core::Frontend::GraphicsContext> context; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 9cafd2983..512eef575 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1048,6 +1048,10 @@ void Image::Scale(bool up_scale) { } bool Image::ScaleUp(bool ignore) { + const auto& resolution = runtime->resolution; + if (!resolution.active) { + return false; + } if (True(flags & ImageFlagBits::Rescaled)) { return false; } @@ -1060,9 +1064,6 @@ bool Image::ScaleUp(bool ignore) { return false; } flags |= ImageFlagBits::Rescaled; - if (!runtime->resolution.active) { - return false; - } has_scaled = true; if (ignore) { current_texture = upscaled_backup.handle; @@ -1073,13 +1074,14 @@ bool Image::ScaleUp(bool ignore) { } bool Image::ScaleDown(bool ignore) { - if (False(flags & ImageFlagBits::Rescaled)) { + const auto& resolution = runtime->resolution; + if (!resolution.active) { return false; } - flags &= ~ImageFlagBits::Rescaled; - if (!runtime->resolution.active) { + if (False(flags & ImageFlagBits::Rescaled)) { return false; } + flags &= ~ImageFlagBits::Rescaled; if (ignore) { current_texture = texture.handle; return true; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 3676eaaa9..e71b87e99 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -118,6 +118,8 @@ public: void InsertUploadMemoryBarrier(); + void TransitionImageLayout(Image& image) {} + FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; bool HasNativeBgr() const noexcept { diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index c7dc7e0a1..5ea9e2378 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -116,6 +116,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT_24_8}, // X8_D24_UNORM {GL_STENCIL_INDEX8, GL_STENCIL, GL_UNSIGNED_BYTE}, // S8_UINT {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 1032c9d12..c3db09424 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -8,7 +8,9 @@ #include "common/settings.h" #include "video_core/host_shaders/blit_color_float_frag_spv.h" #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" +#include "video_core/host_shaders/convert_abgr8_to_d32f_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" +#include "video_core/host_shaders/convert_d32f_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" #include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h" @@ -433,6 +435,8 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), + convert_abgr8_to_d32f_frag(BuildShader(device, CONVERT_ABGR8_TO_D32F_FRAG_SPV)), + convert_d32f_to_abgr8_frag(BuildShader(device, CONVERT_D32F_TO_ABGR8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)), linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)), @@ -557,6 +561,20 @@ void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view); } +void BlitImageHelper::ConvertABGR8ToD32F(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertPipelineDepthTargetEx(convert_abgr8_to_d32f_pipeline, dst_framebuffer->RenderPass(), + convert_abgr8_to_d32f_frag); + Convert(*convert_abgr8_to_d32f_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertD32FToABGR8(const Framebuffer* dst_framebuffer, + ImageView& src_image_view) { + ConvertPipelineColorTargetEx(convert_d32f_to_abgr8_pipeline, dst_framebuffer->RenderPass(), + convert_d32f_to_abgr8_frag); + ConvertDepthStencil(*convert_d32f_to_abgr8_pipeline, dst_framebuffer, src_image_view); +} + void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view) { ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(), @@ -609,6 +627,8 @@ void BlitImageHelper::ClearDepthStencil(const Framebuffer* dst_framebuffer, bool const VkPipelineLayout layout = *clear_color_pipeline_layout; scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf) { + constexpr std::array blend_constants{0.0f, 0.0f, 0.0f, 0.0f}; + cmdbuf.SetBlendConstants(blend_constants.data()); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); BindBlitState(cmdbuf, dst_region); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth); @@ -865,7 +885,7 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline( .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .depthTestEnable = VK_FALSE, + .depthTestEnable = key.depth_clear, .depthWriteEnable = key.depth_clear, .depthCompareOp = VK_COMPARE_OP_ALWAYS, .depthBoundsTestEnable = VK_FALSE, diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index dcfe217aa..b2104a59e 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -67,6 +67,10 @@ public: void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertABGR8ToD32F(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + + void ConvertD32FToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); + void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); @@ -128,6 +132,8 @@ private: vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; + vk::ShaderModule convert_abgr8_to_d32f_frag; + vk::ShaderModule convert_d32f_to_abgr8_frag; vk::ShaderModule convert_d24s8_to_abgr8_frag; vk::ShaderModule convert_s8d24_to_abgr8_frag; vk::Sampler linear_sampler; @@ -146,6 +152,8 @@ private: vk::Pipeline convert_d16_to_r16_pipeline; vk::Pipeline convert_r16_to_d16_pipeline; vk::Pipeline convert_abgr8_to_d24s8_pipeline; + vk::Pipeline convert_abgr8_to_d32f_pipeline; + vk::Pipeline convert_d32f_to_abgr8_pipeline; vk::Pipeline convert_d24s8_to_abgr8_pipeline; vk::Pipeline convert_s8d24_to_abgr8_pipeline; }; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 208e88533..a08f2f67f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -214,8 +214,9 @@ struct FormatTuple { {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT // Depth formats - {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT - {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM + {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT + {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM + {VK_FORMAT_X8_D24_UNORM_PACK32, Attachable}, // X8_D24_UNORM // Stencil formats {VK_FORMAT_S8_UINT, Attachable}, // S8_UINT diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index c4c30d807..7e7a80740 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -132,12 +132,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { const bool use_accelerated = rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); const bool is_srgb = use_accelerated && screen_info.is_srgb; - RenderScreenshot(*framebuffer, use_accelerated); - Frame* frame = present_manager.GetRenderFrame(); - blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb); - scheduler.Flush(*frame->render_ready); - present_manager.Present(frame); + { + std::scoped_lock lock{rasterizer.LockCaches()}; + RenderScreenshot(*framebuffer, use_accelerated); + + Frame* frame = present_manager.GetRenderFrame(); + blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb); + scheduler.Flush(*frame->render_ready); + present_manager.Present(frame); + } gpu.RendererFrameEndNotify(); rasterizer.TickFrame(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 590bc1c64..14e257cf7 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -56,10 +56,6 @@ public: return device.GetDriverName(); } - void NotifySurfaceChanged() override { - present_manager.NotifySurfaceChanged(); - } - private: void Report() const; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 31928bb94..52fc142d1 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -96,6 +96,7 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { switch (framebuffer.pixel_format) { case Service::android::PixelFormat::Rgba8888: + case Service::android::PixelFormat::Rgbx8888: return VK_FORMAT_A8B8G8R8_UNORM_PACK32; case Service::android::PixelFormat::Rgb565: return VK_FORMAT_R5G6B5_UNORM_PACK16; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a1ec1a100..22bf8cc77 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -356,7 +356,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, .ignore_nan_fp_comparisons = false, .has_broken_spirv_subgroup_mask_vector_extract_dynamic = - driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; + driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, + .has_broken_robust = + device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, + }; + host_info = Shader::HostTranslateInfo{ .support_float64 = device.IsFloat64Supported(), .support_float16 = device.IsFloat16Supported(), diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp index d681bd22a..2ef36583b 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp @@ -103,8 +103,7 @@ PresentManager::PresentManager(const vk::Instance& instance_, surface{surface_}, blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())}, use_present_thread{Settings::values.async_presentation.GetValue()}, - image_count{swapchain.GetImageCount()}, last_render_surface{ - render_window_.GetWindowInfo().render_surface} { + image_count{swapchain.GetImageCount()} { auto& dld = device.GetLogical(); cmdpool = dld.CreateCommandPool({ @@ -289,44 +288,36 @@ void PresentManager::PresentThread(std::stop_token token) { } } -void PresentManager::NotifySurfaceChanged() { -#ifdef ANDROID - std::scoped_lock lock{recreate_surface_mutex}; - recreate_surface_cv.notify_one(); -#endif +void PresentManager::RecreateSwapchain(Frame* frame) { + swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb); + image_count = swapchain.GetImageCount(); } void PresentManager::CopyToSwapchain(Frame* frame) { - MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain); - - const auto recreate_swapchain = [&] { - swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb); - image_count = swapchain.GetImageCount(); - }; - -#ifdef ANDROID - std::unique_lock lock{recreate_surface_mutex}; - - const auto needs_recreation = [&] { - if (last_render_surface != render_window.GetWindowInfo().render_surface) { - return true; - } - if (swapchain.NeedsRecreation(frame->is_srgb)) { - return true; + bool requires_recreation = false; + + while (true) { + try { + // Recreate surface and swapchain if needed. + if (requires_recreation) { + surface = CreateSurface(instance, render_window.GetWindowInfo()); + RecreateSwapchain(frame); + } + + // Draw to swapchain. + return CopyToSwapchainImpl(frame); + } catch (const vk::Exception& except) { + if (except.GetResult() != VK_ERROR_SURFACE_LOST_KHR) { + throw; + } + + requires_recreation = true; } - return false; - }; - - recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400), - [&]() { return !needs_recreation(); }); - - // If the frontend recreated the surface, recreate the renderer surface and swapchain. - if (last_render_surface != render_window.GetWindowInfo().render_surface) { - last_render_surface = render_window.GetWindowInfo().render_surface; - surface = CreateSurface(instance, render_window.GetWindowInfo()); - recreate_swapchain(); } -#endif +} + +void PresentManager::CopyToSwapchainImpl(Frame* frame) { + MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain); // If the size or colorspace of the incoming frames has changed, recreate the swapchain // to account for that. @@ -334,11 +325,11 @@ void PresentManager::CopyToSwapchain(Frame* frame) { const bool size_changed = swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height; if (srgb_changed || size_changed) { - recreate_swapchain(); + RecreateSwapchain(frame); } while (swapchain.AcquireNextImage()) { - recreate_swapchain(); + RecreateSwapchain(frame); } const vk::CommandBuffer cmdbuf{frame->cmdbuf}; @@ -488,4 +479,4 @@ void PresentManager::CopyToSwapchain(Frame* frame) { swapchain.Present(render_semaphore); } -} // namespace Vulkan
\ No newline at end of file +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h index 83e859416..a3d825fe6 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.h +++ b/src/video_core/renderer_vulkan/vk_present_manager.h @@ -54,14 +54,15 @@ public: /// Waits for the present thread to finish presenting all queued frames. void WaitPresent(); - /// This is called to notify the rendering backend of a surface change - void NotifySurfaceChanged(); - private: void PresentThread(std::stop_token token); void CopyToSwapchain(Frame* frame); + void CopyToSwapchainImpl(Frame* frame); + + void RecreateSwapchain(Frame* frame); + private: const vk::Instance& instance; Core::Frontend::EmuWindow& render_window; @@ -76,16 +77,13 @@ private: std::queue<Frame*> free_queue; std::condition_variable_any frame_cv; std::condition_variable free_cv; - std::condition_variable recreate_surface_cv; std::mutex swapchain_mutex; - std::mutex recreate_surface_mutex; std::mutex queue_mutex; std::mutex free_mutex; std::jthread present_thread; bool blit_supported; bool use_present_thread; std::size_t image_count{}; - void* last_render_surface{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 2edaafa7e..66c03bf17 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -1436,6 +1436,7 @@ void QueryCacheRuntime::Barriers(bool is_prebarrier) { .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, }; + impl->scheduler.RequestOutsideRenderPassOperationContext(); if (is_prebarrier) { impl->scheduler.Record([](vk::CommandBuffer cmdbuf) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1628d76d6..059b7cb40 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -13,6 +13,7 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/settings.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/control/channel_state.h" #include "video_core/engines/draw_manager.h" #include "video_core/engines/kepler_compute.h" @@ -198,7 +199,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { if (!pipeline) { return; } - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + std::scoped_lock lock{LockCaches()}; // update engine as channel may be different. pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); @@ -285,6 +286,7 @@ void RasterizerVulkan::DrawTexture() { query_cache.NotifySegment(true); + std::scoped_lock l{texture_cache.mutex}; texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.UpdateRenderTargets(false); @@ -422,7 +424,8 @@ void RasterizerVulkan::Clear(u32 layer_count) { return; } - if (use_stencil && regs.stencil_front_mask != 0xFF && regs.stencil_front_mask != 0) { + if (use_stencil && framebuffer->HasAspectStencilBit() && regs.stencil_front_mask != 0xFF && + regs.stencil_front_mask != 0) { Region2D dst_region = { Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width), @@ -707,6 +710,7 @@ void RasterizerVulkan::TiledCacheBarrier() { } void RasterizerVulkan::FlushCommands() { + std::scoped_lock lock{LockCaches()}; if (draw_counter == 0) { return; } @@ -804,6 +808,7 @@ void RasterizerVulkan::FlushWork() { if ((++draw_counter & 7) != 7) { return; } + std::scoped_lock lock{LockCaches()}; if (draw_counter < DRAWS_TO_DISPATCH) { // Send recorded tasks to the worker thread scheduler.DispatchWork(); @@ -974,6 +979,19 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs if (!state_tracker.TouchScissors()) { return; } + if (!regs.viewport_scale_offset_enabled) { + const auto x = static_cast<float>(regs.surface_clip.x); + const auto y = static_cast<float>(regs.surface_clip.y); + const auto width = static_cast<float>(regs.surface_clip.width); + const auto height = static_cast<float>(regs.surface_clip.height); + VkRect2D scissor; + scissor.offset.x = static_cast<u32>(x); + scissor.offset.y = static_cast<u32>(y); + scissor.extent.width = static_cast<u32>(width != 0.0f ? width : 1.0f); + scissor.extent.height = static_cast<u32>(height != 0.0f ? height : 1.0f); + scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissor); }); + return; + } u32 up_scale = 1; u32 down_shift = 0; if (texture_cache.IsRescaling()) { @@ -1485,7 +1503,7 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) { CreateChannel(channel); { - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + std::scoped_lock lock{LockCaches()}; texture_cache.CreateChannel(channel); buffer_cache.CreateChannel(channel); } @@ -1498,7 +1516,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) { const s32 channel_id = channel.bind_id; BindToChannel(channel_id); { - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + std::scoped_lock lock{LockCaches()}; texture_cache.BindToChannel(channel_id); buffer_cache.BindToChannel(channel_id); } @@ -1511,7 +1529,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) { void RasterizerVulkan::ReleaseChannel(s32 channel_id) { EraseChannel(channel_id); { - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + std::scoped_lock lock{LockCaches()}; texture_cache.EraseChannel(channel_id); buffer_cache.EraseChannel(channel_id); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ad069556c..ce3dfbaab 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -133,6 +133,10 @@ public: void ReleaseChannel(s32 channel_id) override; + std::scoped_lock<std::recursive_mutex, std::recursive_mutex> LockCaches() { + return std::scoped_lock{buffer_cache.mutex, texture_cache.mutex}; + } + private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index ae9f1de64..7746a88d3 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -19,7 +19,7 @@ VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat VkSampleCountFlagBits samples) { using MaxwellToVK::SurfaceFormat; return { - .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .flags = {}, .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, .samples = samples, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index ce92f66ab..b278614e6 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -24,25 +24,38 @@ using namespace Common::Literals; // Maximum potential alignment of a Vulkan buffer constexpr VkDeviceSize MAX_ALIGNMENT = 256; -// Maximum size to put elements in the stream buffer -constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; // Stream buffer size in bytes -constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; -constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; +constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB; -size_t Region(size_t iterator) noexcept { - return iterator / REGION_SIZE; +size_t GetStreamBufferSize(const Device& device) { + VkDeviceSize size{0}; + if (device.HasDebuggingToolAttached()) { + ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) { + size = std::max(size, heap.size); + }); + // If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be + // loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue + // as the heap will be much larger. + if (size <= 256_MiB) { + size = size * 40 / 100; + } + } else { + size = MAX_STREAM_BUFFER_SIZE; + } + return std::min(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE); } } // Anonymous namespace StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_) - : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { + : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, + stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size / + StagingBufferPool::NUM_SYNCS} { VkBufferCreateInfo stream_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .size = STREAM_BUFFER_SIZE, + .size = stream_buffer_size, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, @@ -63,7 +76,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem StagingBufferPool::~StagingBufferPool() = default; StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) { - if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { + if (!deferred && usage == MemoryUsage::Upload && size <= region_size) { return GetStreamBuffer(size); } return GetStagingBuffer(size, usage, deferred); @@ -101,7 +114,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { used_iterator = iterator; free_iterator = std::max(free_iterator, iterator + size); - if (iterator + size >= STREAM_BUFFER_SIZE) { + if (iterator + size >= stream_buffer_size) { std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, current_tick); used_iterator = 0; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 5f69f08b1..d3deb9072 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -90,6 +90,9 @@ private: void ReleaseCache(MemoryUsage usage); void ReleaseLevel(StagingBuffersCache& cache, size_t log2); + size_t Region(size_t iter) const noexcept { + return iter / region_size; + } const Device& device; MemoryAllocator& memory_allocator; @@ -97,6 +100,8 @@ private: vk::Buffer stream_buffer; std::span<u8> stream_pointer; + VkDeviceSize stream_buffer_size; + VkDeviceSize region_size; size_t iterator = 0; size_t used_iterator = 0; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index d56558a83..daaea2979 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -190,7 +190,7 @@ void SetupDirtySpecialOps(Tables& tables) { void SetupDirtyViewportSwizzles(Tables& tables) { static constexpr size_t swizzle_offset = 6; for (size_t index = 0; index < Regs::NumViewports; ++index) { - tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = + tables[1][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = ViewportSwizzles; } } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 71fdec809..de34f6d49 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -238,6 +238,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { return any_r ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; case PixelFormat::D16_UNORM: case PixelFormat::D32_FLOAT: + case PixelFormat::X8_D24_UNORM: return VK_IMAGE_ASPECT_DEPTH_BIT; case PixelFormat::S8_UINT: return VK_IMAGE_ASPECT_STENCIL_BIT; @@ -1193,6 +1194,11 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im return blit_image_helper.ConvertD16ToR16(dst, src_view); } break; + case PixelFormat::A8B8G8R8_SRGB: + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32FToABGR8(dst, src_view); + } + break; case PixelFormat::A8B8G8R8_UNORM: if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view); @@ -1200,6 +1206,19 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im if (src_view.format == PixelFormat::D24_UNORM_S8_UINT) { return blit_image_helper.ConvertS8D24ToABGR8(dst, src_view); } + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32FToABGR8(dst, src_view); + } + break; + case PixelFormat::B8G8R8A8_SRGB: + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32FToABGR8(dst, src_view); + } + break; + case PixelFormat::B8G8R8A8_UNORM: + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32FToABGR8(dst, src_view); + } break; case PixelFormat::R32_FLOAT: if (src_view.format == PixelFormat::D32_FLOAT) { @@ -1218,6 +1237,12 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im } break; case PixelFormat::D32_FLOAT: + if (src_view.format == PixelFormat::A8B8G8R8_UNORM || + src_view.format == PixelFormat::B8G8R8A8_UNORM || + src_view.format == PixelFormat::A8B8G8R8_SRGB || + src_view.format == PixelFormat::B8G8R8A8_SRGB) { + return blit_image_helper.ConvertABGR8ToD32F(dst, src_view); + } if (src_view.format == PixelFormat::R32_FLOAT) { return blit_image_helper.ConvertR32ToD32(dst, src_view); } @@ -1526,15 +1551,15 @@ bool Image::IsRescaled() const noexcept { } bool Image::ScaleUp(bool ignore) { + const auto& resolution = runtime->resolution; + if (!resolution.active) { + return false; + } if (True(flags & ImageFlagBits::Rescaled)) { return false; } ASSERT(info.type != ImageType::Linear); flags |= ImageFlagBits::Rescaled; - const auto& resolution = runtime->resolution; - if (!resolution.active) { - return false; - } has_scaled = true; if (!scaled_image) { const bool is_2d = info.type == ImageType::e2D; @@ -1563,15 +1588,15 @@ bool Image::ScaleUp(bool ignore) { } bool Image::ScaleDown(bool ignore) { + const auto& resolution = runtime->resolution; + if (!resolution.active) { + return false; + } if (False(flags & ImageFlagBits::Rescaled)) { return false; } ASSERT(info.type != ImageType::Linear); flags &= ~ImageFlagBits::Rescaled; - const auto& resolution = runtime->resolution; - if (!resolution.active) { - return false; - } current_image = *original_image; if (ignore) { return true; @@ -2009,4 +2034,32 @@ void TextureCacheRuntime::AccelerateImageUpload( ASSERT(false); } +void TextureCacheRuntime::TransitionImageLayout(Image& image) { + if (!image.ExchangeInitialization()) { + VkImageMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_NONE, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image.Handle(), + .subresourceRange{ + .aspectMask = image.AspectMask(), + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([barrier](vk::CommandBuffer cmdbuf) { + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier); + }); + } +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index d6c5a15cc..7a0807709 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -92,6 +92,8 @@ public: void InsertUploadMemoryBarrier() {} + void TransitionImageLayout(Image& image); + bool HasBrokenTextureViewFormats() const noexcept { // No known Vulkan driver has broken image views return false; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index e16cd5e73..5b3c7aa5a 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -85,6 +85,8 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { return PixelFormat::S8_UINT; case Tegra::DepthFormat::Z32_FLOAT_X24S8_UINT: return PixelFormat::D32_FLOAT_S8_UINT; + case Tegra::DepthFormat::X8Z24_UNORM: + return PixelFormat::X8_D24_UNORM; default: UNIMPLEMENTED_MSG("Unimplemented format={}", format); return PixelFormat::S8_UINT_D24_UNORM; @@ -202,6 +204,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) PixelFormat PixelFormatFromGPUPixelFormat(Service::android::PixelFormat format) { switch (format) { case Service::android::PixelFormat::Rgba8888: + case Service::android::PixelFormat::Rgbx8888: return PixelFormat::A8B8G8R8_UNORM; case Service::android::PixelFormat::Rgb565: return PixelFormat::R5G6B5_UNORM; diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 9b9c4d9bc..a5e8e2f62 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -115,6 +115,7 @@ enum class PixelFormat { // Depth formats D32_FLOAT = MaxColorFormat, D16_UNORM, + X8_D24_UNORM, MaxDepthFormat, @@ -251,6 +252,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{ 1, // E5B9G9R9_FLOAT 1, // D32_FLOAT 1, // D16_UNORM + 1, // X8_D24_UNORM 1, // S8_UINT 1, // D24_UNORM_S8_UINT 1, // S8_UINT_D24_UNORM @@ -360,6 +362,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{ 1, // E5B9G9R9_FLOAT 1, // D32_FLOAT 1, // D16_UNORM + 1, // X8_D24_UNORM 1, // S8_UINT 1, // D24_UNORM_S8_UINT 1, // S8_UINT_D24_UNORM @@ -469,6 +472,7 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{ 32, // E5B9G9R9_FLOAT 32, // D32_FLOAT 16, // D16_UNORM + 32, // X8_D24_UNORM 8, // S8_UINT 32, // D24_UNORM_S8_UINT 32, // S8_UINT_D24_UNORM diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 56307d030..8c774f512 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -138,10 +138,16 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, return PixelFormat::E5B9G9R9_FLOAT; case Hash(TextureFormat::Z32, FLOAT): return PixelFormat::D32_FLOAT; + case Hash(TextureFormat::Z32, FLOAT, UINT, UINT, UINT, LINEAR): + return PixelFormat::D32_FLOAT; case Hash(TextureFormat::Z16, UNORM): return PixelFormat::D16_UNORM; case Hash(TextureFormat::Z16, UNORM, UINT, UINT, UINT, LINEAR): return PixelFormat::D16_UNORM; + case Hash(TextureFormat::X8Z24, UNORM): + return PixelFormat::X8_D24_UNORM; + case Hash(TextureFormat::X8Z24, UNORM, UINT, UINT, UINT, LINEAR): + return PixelFormat::X8_D24_UNORM; case Hash(TextureFormat::Z24S8, UINT, UNORM, UNORM, UNORM, LINEAR): return PixelFormat::S8_UINT_D24_UNORM; case Hash(TextureFormat::Z24S8, UINT, UNORM, UINT, UINT, LINEAR): diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index 6279d8e9e..2b7e0df72 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -10,19 +10,23 @@ #include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/render_targets.h" +#include "video_core/texture_cache/samples_helper.h" namespace VideoCommon { std::string Name(const ImageBase& image) { const GPUVAddr gpu_addr = image.gpu_addr; const ImageInfo& info = image.info; - const u32 width = info.size.width; - const u32 height = info.size.height; + u32 width = info.size.width; + u32 height = info.size.height; const u32 depth = info.size.depth; const u32 num_layers = image.info.resources.layers; const u32 num_levels = image.info.resources.levels; std::string resource; if (image.info.num_samples > 1) { + const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(image.info.num_samples); + width >>= samples_x; + height >>= samples_y; resource += fmt::format(":{}xMSAA", image.info.num_samples); } if (num_layers > 1) { diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index 9ee57a076..cabbfcb2d 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -211,6 +211,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str return "D32_FLOAT"; case PixelFormat::D16_UNORM: return "D16_UNORM"; + case PixelFormat::X8_D24_UNORM: + return "X8_D24_UNORM"; case PixelFormat::S8_UINT: return "S8_UINT"; case PixelFormat::D24_UNORM_S8_UINT: diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index 0c5f4450d..18b9250f9 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -85,6 +85,7 @@ bool ImageViewBase::SupportsAnisotropy() const noexcept { // Depth formats case PixelFormat::D32_FLOAT: case PixelFormat::D16_UNORM: + case PixelFormat::X8_D24_UNORM: // Stencil formats case PixelFormat::S8_UINT: // DepthStencil formats diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h index 203ac1b11..2ee2f8312 100644 --- a/src/video_core/texture_cache/samples_helper.h +++ b/src/video_core/texture_cache/samples_helper.h @@ -24,7 +24,7 @@ namespace VideoCommon { return {2, 2}; } ASSERT_MSG(false, "Invalid number of samples={}", num_samples); - return {1, 1}; + return {0, 0}; } [[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1bdb0def5..d575c57ca 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1016,6 +1016,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); + runtime.TransitionImageLayout(image); return; } if (True(image.flags & ImageFlagBits::AsynchronousDecode)) { diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 0a86ce139..15596c925 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -68,6 +68,7 @@ struct LevelInfo { Extent2D tile_size; u32 bpp_log2; u32 tile_width_spacing; + u32 num_levels; }; [[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) { @@ -118,11 +119,11 @@ template <u32 GOB_EXTENT> } [[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size, - u32 level) { + u32 level, u32 num_levels) { return { .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level), .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level), - .depth = level == 0 + .depth = level == 0 && num_levels == 1 ? block_size.depth : AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level), }; @@ -166,7 +167,7 @@ template <u32 GOB_EXTENT> } [[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) { - if (level == 0) { + if (level == 0 && info.num_levels == 1) { return Extent3D{ .width = info.block.width, .height = info.block.height, @@ -257,7 +258,7 @@ template <u32 GOB_EXTENT> } [[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block, - u32 tile_width_spacing) { + u32 tile_width_spacing, u32 num_levels) { const u32 bytes_per_block = BytesPerBlock(format); return { .size = @@ -270,16 +271,18 @@ template <u32 GOB_EXTENT> .tile_size = DefaultBlockSize(format), .bpp_log2 = BytesPerBlockLog2(bytes_per_block), .tile_width_spacing = tile_width_spacing, + .num_levels = num_levels, }; } [[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) { - return MakeLevelInfo(info.format, info.size, info.block, info.tile_width_spacing); + return MakeLevelInfo(info.format, info.size, info.block, info.tile_width_spacing, + info.resources.levels); } [[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block, u32 tile_width_spacing, u32 level) { - const LevelInfo info = MakeLevelInfo(format, size, block, tile_width_spacing); + const LevelInfo info = MakeLevelInfo(format, size, block, tile_width_spacing, level); u32 offset = 0; for (u32 current_level = 0; current_level < level; ++current_level) { offset += CalculateLevelSize(info, current_level); @@ -466,7 +469,7 @@ template <u32 GOB_EXTENT> }; const u32 bpp_log2 = BytesPerBlockLog2(info.format); const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing); - const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0); + const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0, info.resources.levels); return Extent3D{ .width = Common::AlignUpLog2(num_tiles.width, alignment), .height = Common::AlignUpLog2(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height), @@ -533,7 +536,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr UNIMPLEMENTED_IF(copy.image_extent != level_size); const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); - const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + const Extent3D block = + AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels); size_t host_offset = copy.buffer_offset; @@ -698,7 +702,7 @@ u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) { const Extent2D tile_size = DefaultBlockSize(info.format); const Extent3D level_size = AdjustMipSize(info.size, level); const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); - const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level); + const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level, info.resources.levels); const u32 bpp_log2 = BytesPerBlockLog2(info.format); return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing); } @@ -887,7 +891,8 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory .image_extent = level_size, }; const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); - const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + const Extent3D block = + AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels); const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2); size_t guest_layer_offset = 0; @@ -1041,7 +1046,7 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) { const Extent2D tile_size = DefaultBlockSize(info.format); const Extent3D level_size = AdjustMipSize(info.size, level); const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); - return AdjustMipBlockSize(num_tiles, level_info.block, level); + return AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels); } boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) { @@ -1063,7 +1068,8 @@ boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const I for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); - const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + const Extent3D block = + AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels); params[level] = SwizzleParameters{ .num_tiles = num_tiles, .block = block, @@ -1292,11 +1298,11 @@ u32 MapSizeBytes(const ImageBase& image) { } } -static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == +static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0, 1}, 0) == 0x7f8000); -static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x40000); +static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0, 1}, 0) == 0x40000); -static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0}, 0) == 0x40000); +static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0, 1}, 0) == 0x40000); static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) == 0x2afc00); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 3960b135a..e518756d2 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -83,12 +83,6 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{ } // namespace Alternatives -enum class NvidiaArchitecture { - AmpereOrNewer, - Turing, - VoltaOrOlder, -}; - template <typename T> void SetNext(void**& next, T& data) { *next = &data; @@ -200,6 +194,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica VK_FORMAT_BC7_UNORM_BLOCK, VK_FORMAT_D16_UNORM, VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_X8_D24_UNORM_PACK32, VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D32_SFLOAT, VK_FORMAT_D32_SFLOAT_S8_UINT, @@ -321,13 +316,38 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, physical.GetProperties2(physical_properties); if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { // Only Ampere and newer support this feature - return NvidiaArchitecture::AmpereOrNewer; + // TODO: Find a way to differentiate Ampere and Ada + return NvidiaArchitecture::Arch_AmpereOrNewer; } + return NvidiaArchitecture::Arch_Turing; } - if (exts.contains(VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) { - return NvidiaArchitecture::Turing; + + if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) { + VkPhysicalDeviceBlendOperationAdvancedPropertiesEXT advanced_blending_props{}; + advanced_blending_props.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BLEND_OPERATION_ADVANCED_PROPERTIES_EXT; + VkPhysicalDeviceProperties2 physical_properties{}; + physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + physical_properties.pNext = &advanced_blending_props; + physical.GetProperties2(physical_properties); + if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) { + return NvidiaArchitecture::Arch_Maxwell; + } + + if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) { + VkPhysicalDeviceConservativeRasterizationPropertiesEXT conservative_raster_props{}; + conservative_raster_props.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT; + physical_properties.pNext = &conservative_raster_props; + physical.GetProperties2(physical_properties); + if (conservative_raster_props.degenerateLinesRasterized) { + return NvidiaArchitecture::Arch_Volta; + } + return NvidiaArchitecture::Arch_Pascal; + } } - return NvidiaArchitecture::VoltaOrOlder; + + return NvidiaArchitecture::Arch_KeplerOrOlder; } std::vector<const char*> ExtensionListForVulkan( @@ -407,6 +427,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); } + if (is_nvidia) { + nvidia_arch = GetNvidiaArchitecture(physical, supported_extensions); + } + SetupFamilies(surface); const auto queue_cis = GetDeviceQueueCreateInfos(); @@ -503,20 +527,15 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR if (is_nvidia) { const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; - const auto arch = GetNvidiaArchitecture(physical, supported_extensions); - switch (arch) { - case NvidiaArchitecture::AmpereOrNewer: + const auto arch = GetNvidiaArch(); + if (arch >= NvidiaArchitecture::Arch_AmpereOrNewer) { LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math"); features.shader_float16_int8.shaderFloat16 = false; - break; - case NvidiaArchitecture::Turing: - break; - case NvidiaArchitecture::VoltaOrOlder: + } else if (arch <= NvidiaArchitecture::Arch_Volta) { if (nv_major_version < 527) { LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor"); RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); } - break; } if (nv_major_version >= 510) { LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits"); @@ -661,7 +680,15 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR "ANV drivers 22.3.0 to 23.1.0 have broken VK_KHR_push_descriptor"); RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); } + } else if (extensions.push_descriptor && is_nvidia) { + const auto arch = GetNvidiaArch(); + if (arch <= NvidiaArchitecture::Arch_Pascal) { + LOG_WARNING(Render_Vulkan, + "Pascal and older architectures have broken VK_KHR_push_descriptor"); + RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); + } } + if (is_mvk) { LOG_WARNING(Render_Vulkan, "MVK driver breaks when using more than 16 vertex attributes/bindings"); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 282a2925d..b213ed7dd 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -177,6 +177,15 @@ enum class FormatType { Linear, Optimal, Buffer }; /// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup). const u32 GuestWarpSize = 32; +enum class NvidiaArchitecture { + Arch_KeplerOrOlder, + Arch_Maxwell, + Arch_Pascal, + Arch_Volta, + Arch_Turing, + Arch_AmpereOrNewer, +}; + /// Handles data specific to a physical device. class Device { public: @@ -670,6 +679,14 @@ public: return false; } + bool IsNvidia() const noexcept { + return properties.driver.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY; + } + + NvidiaArchitecture GetNvidiaArch() const noexcept { + return nvidia_arch; + } + private: /// Checks if the physical device is suitable and configures the object state /// with all necessary info about its properties. @@ -788,6 +805,7 @@ private: bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. u64 device_access_memory{}; ///< Total size of device local memory in bytes. u32 sets_per_pool{}; ///< Sets per Description Pool + NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer}; // Telemetry parameters std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions. diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 3ef381a38..8dd1667f3 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -9,6 +9,7 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" +#include "common/literals.h" #include "common/logging/log.h" #include "common/polyfill_ranges.h" #include "video_core/vulkan_common/vma.h" @@ -65,12 +66,12 @@ struct Range { switch (usage) { case MemoryUsage::Upload: case MemoryUsage::Stream: - return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + return VMA_ALLOCATION_CREATE_MAPPED_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; case MemoryUsage::Download: - return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + return VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; case MemoryUsage::DeviceLocal: - return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | - VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT; + return {}; } return {}; } @@ -212,7 +213,20 @@ MemoryAllocator::MemoryAllocator(const Device& device_) : device{device_}, allocator{device.GetAllocator()}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, buffer_image_granularity{ - device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} + device_.GetPhysical().GetProperties().limits.bufferImageGranularity} { + // GPUs not supporting rebar may only have a region with less than 256MB host visible/device + // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to + // the heap running out of memory. With RenderDoc attached and only a small host/device region, + // only allow the stream buffer in this memory heap. + if (device.HasDebuggingToolAttached()) { + using namespace Common::Literals; + ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) { + if (heap.size <= 256_MiB) { + valid_memory_types &= ~(1u << index); + } + }); + } +} MemoryAllocator::~MemoryAllocator() = default; @@ -239,12 +253,11 @@ vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const { vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const { const VmaAllocationCreateInfo alloc_ci = { - .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT | - MemoryUsageVmaFlags(usage), + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), .usage = MemoryUsageVma(usage), .requiredFlags = 0, .preferredFlags = MemoryUsagePreferedVmaFlags(usage), - .memoryTypeBits = 0, + .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, .pool = VK_NULL_HANDLE, .pUserData = nullptr, .priority = 0.f, diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index f449bc8d0..38a182bcb 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h @@ -7,6 +7,7 @@ #include <span> #include <vector> #include "common/common_types.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" VK_DEFINE_HANDLE(VmaAllocator) @@ -26,6 +27,18 @@ enum class MemoryUsage { Stream, ///< Requests device local host visible buffer, falling back host memory. }; +template <typename F> +void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) { + auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties; + for (size_t i = 0; i < memory_props.memoryTypeCount; i++) { + auto& memory_type = memory_props.memoryTypes[i]; + if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && + (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { + f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]); + } + } +} + /// Ownership handle of a memory commitment. /// Points to a subregion of a memory allocation. class MemoryCommit { @@ -124,6 +137,7 @@ private: std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers // and optimal images + u32 valid_memory_types{~0u}; }; } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 2f3254a97..70cf14afa 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -522,7 +522,7 @@ Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char .applicationVersion = VK_MAKE_VERSION(0, 1, 0), .pEngineName = "yuzu Emulator", .engineVersion = VK_MAKE_VERSION(0, 1, 0), - .apiVersion = version, + .apiVersion = VK_API_VERSION_1_3, }; const VkInstanceCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 1e3c0fa64..0487cd3b6 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -117,6 +117,9 @@ public: virtual ~Exception() = default; const char* what() const noexcept override; + VkResult GetResult() const noexcept { + return result; + } private: VkResult result; diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index d23c1e9d0..33e1fb663 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -195,6 +195,8 @@ add_executable(yuzu multiplayer/state.cpp multiplayer/state.h multiplayer/validation.h + play_time_manager.cpp + play_time_manager.h precompiled_headers.h qt_common.cpp qt_common.h @@ -382,7 +384,7 @@ if (USE_DISCORD_PRESENCE) discord_impl.cpp discord_impl.h ) - target_link_libraries(yuzu PRIVATE DiscordRPC::discord-rpc httplib::httplib) + target_link_libraries(yuzu PRIVATE DiscordRPC::discord-rpc httplib::httplib Qt${QT_MAJOR_VERSION}::Network) target_compile_definitions(yuzu PRIVATE -DUSE_DISCORD_PRESENCE) endif() diff --git a/src/yuzu/applets/qt_controller.cpp b/src/yuzu/applets/qt_controller.cpp index d15559518..515cb7ce6 100644 --- a/src/yuzu/applets/qt_controller.cpp +++ b/src/yuzu/applets/qt_controller.cpp @@ -23,6 +23,7 @@ #include "yuzu/configuration/configure_vibration.h" #include "yuzu/configuration/input_profiles.h" #include "yuzu/main.h" +#include "yuzu/util/controller_navigation.h" namespace { @@ -132,6 +133,8 @@ QtControllerSelectorDialog::QtControllerSelectorDialog( ui->checkboxPlayer7Connected, ui->checkboxPlayer8Connected, }; + ui->labelError->setVisible(false); + // Setup/load everything prior to setting up connections. // This avoids unintentionally changing the states of elements while loading them in. SetSupportedControllers(); @@ -143,6 +146,8 @@ QtControllerSelectorDialog::QtControllerSelectorDialog( LoadConfiguration(); + controller_navigation = new ControllerNavigation(system.HIDCore(), this); + for (std::size_t i = 0; i < NUM_PLAYERS; ++i) { SetExplainText(i); UpdateControllerIcon(i); @@ -150,16 +155,27 @@ QtControllerSelectorDialog::QtControllerSelectorDialog( UpdateBorderColor(i); connect(player_groupboxes[i], &QGroupBox::toggled, [this, i](bool checked) { - if (checked) { - for (std::size_t index = 0; index <= i; ++index) { - connected_controller_checkboxes[index]->setChecked(checked); - } - } else { - for (std::size_t index = i; index < NUM_PLAYERS; ++index) { - connected_controller_checkboxes[index]->setChecked(checked); - } + // Reconnect current controller if it was the last one checked + // (player number was reduced by more than one) + const bool reconnect_first = !checked && i < player_groupboxes.size() - 1 && + player_groupboxes[i + 1]->isChecked(); + + // Ensures that connecting a controller changes the number of players + if (connected_controller_checkboxes[i]->isChecked() != checked) { + // Ensures that the players are always connected in sequential order + PropagatePlayerNumberChanged(i, checked, reconnect_first); } }); + connect(connected_controller_checkboxes[i], &QCheckBox::clicked, [this, i](bool checked) { + // Reconnect current controller if it was the last one checked + // (player number was reduced by more than one) + const bool reconnect_first = !checked && + i < connected_controller_checkboxes.size() - 1 && + connected_controller_checkboxes[i + 1]->isChecked(); + + // Ensures that the players are always connected in sequential order + PropagatePlayerNumberChanged(i, checked, reconnect_first); + }); connect(emulated_controllers[i], qOverload<int>(&QComboBox::currentIndexChanged), [this, i](int) { @@ -199,6 +215,12 @@ QtControllerSelectorDialog::QtControllerSelectorDialog( connect(ui->buttonBox, &QDialogButtonBox::accepted, this, &QtControllerSelectorDialog::ApplyConfiguration); + connect(controller_navigation, &ControllerNavigation::TriggerKeyboardEvent, + [this](Qt::Key key) { + QKeyEvent* event = new QKeyEvent(QEvent::KeyPress, key, Qt::NoModifier); + QCoreApplication::postEvent(this, event); + }); + // Enhancement: Check if the parameters have already been met before disconnecting controllers. // If all the parameters are met AND only allows a single player, // stop the constructor here as we do not need to continue. @@ -217,6 +239,7 @@ QtControllerSelectorDialog::QtControllerSelectorDialog( } QtControllerSelectorDialog::~QtControllerSelectorDialog() { + controller_navigation->UnloadController(); system.HIDCore().DisableAllControllerConfiguration(); } @@ -291,6 +314,31 @@ void QtControllerSelectorDialog::CallConfigureInputProfileDialog() { dialog.exec(); } +void QtControllerSelectorDialog::keyPressEvent(QKeyEvent* evt) { + const auto num_connected_players = static_cast<int>( + std::count_if(player_groupboxes.begin(), player_groupboxes.end(), + [](const QGroupBox* player) { return player->isChecked(); })); + + const auto min_supported_players = parameters.enable_single_mode ? 1 : parameters.min_players; + const auto max_supported_players = parameters.enable_single_mode ? 1 : parameters.max_players; + + if ((evt->key() == Qt::Key_Enter || evt->key() == Qt::Key_Return) && !parameters_met) { + // Display error message when trying to validate using "Enter" and "OK" button is disabled + ui->labelError->setVisible(true); + return; + } else if (evt->key() == Qt::Key_Left && num_connected_players > min_supported_players) { + // Remove a player if possible + connected_controller_checkboxes[num_connected_players - 1]->setChecked(false); + return; + } else if (evt->key() == Qt::Key_Right && num_connected_players < max_supported_players) { + // Add a player, if possible + ui->labelError->setVisible(false); + connected_controller_checkboxes[num_connected_players]->setChecked(true); + return; + } + QDialog::keyPressEvent(evt); +} + bool QtControllerSelectorDialog::CheckIfParametersMet() { // Here, we check and validate the current configuration against all applicable parameters. const auto num_connected_players = static_cast<int>( @@ -629,6 +677,29 @@ void QtControllerSelectorDialog::UpdateDockedState(bool is_handheld) { } } +void QtControllerSelectorDialog::PropagatePlayerNumberChanged(size_t player_index, bool checked, + bool reconnect_current) { + connected_controller_checkboxes[player_index]->setChecked(checked); + // Hide eventual error message about number of controllers + ui->labelError->setVisible(false); + + if (checked) { + // Check all previous buttons when checked + if (player_index > 0) { + PropagatePlayerNumberChanged(player_index - 1, checked); + } + } else { + // Unchecked all following buttons when unchecked + if (player_index < connected_controller_checkboxes.size() - 1) { + PropagatePlayerNumberChanged(player_index + 1, checked); + } + } + + if (reconnect_current) { + connected_controller_checkboxes[player_index]->setCheckState(Qt::Checked); + } +} + void QtControllerSelectorDialog::DisableUnsupportedPlayers() { const auto max_supported_players = parameters.enable_single_mode ? 1 : parameters.max_players; diff --git a/src/yuzu/applets/qt_controller.h b/src/yuzu/applets/qt_controller.h index 2fdc35857..e5372495d 100644 --- a/src/yuzu/applets/qt_controller.h +++ b/src/yuzu/applets/qt_controller.h @@ -34,6 +34,8 @@ class HIDCore; enum class NpadStyleIndex : u8; } // namespace Core::HID +class ControllerNavigation; + class QtControllerSelectorDialog final : public QDialog { Q_OBJECT @@ -46,6 +48,8 @@ public: int exec() override; + void keyPressEvent(QKeyEvent* evt) override; + private: // Applies the current configuration. void ApplyConfiguration(); @@ -96,6 +100,10 @@ private: // Updates the console mode. void UpdateDockedState(bool is_handheld); + // Enable preceding controllers or disable following ones + void PropagatePlayerNumberChanged(size_t player_index, bool checked, + bool reconnect_current = false); + // Disables and disconnects unsupported players based on the given parameters. void DisableUnsupportedPlayers(); @@ -110,6 +118,8 @@ private: Core::System& system; + ControllerNavigation* controller_navigation = nullptr; + // This is true if and only if all parameters are met. Otherwise, this is false. // This determines whether the "OK" button can be clicked to exit the applet. bool parameters_met{false}; diff --git a/src/yuzu/applets/qt_controller.ui b/src/yuzu/applets/qt_controller.ui index 729e921ee..6f7cb3c13 100644 --- a/src/yuzu/applets/qt_controller.ui +++ b/src/yuzu/applets/qt_controller.ui @@ -2624,13 +2624,53 @@ </spacer> </item> <item alignment="Qt::AlignBottom"> - <widget class="QDialogButtonBox" name="buttonBox"> - <property name="enabled"> - <bool>true</bool> - </property> - <property name="standardButtons"> - <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set> - </property> + <widget class="QWidget" name="closeButtons" native="true"> + <layout class="QVBoxLayout" name="verticalLayout_46"> + <property name="spacing"> + <number>7</number> + </property> + <property name="leftMargin"> + <number>0</number> + </property> + <property name="topMargin"> + <number>0</number> + </property> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + <item> + <widget class="QLabel" name="labelError"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="styleSheet"> + <string notr="true">QLabel { color : red; }</string> + </property> + <property name="text"> + <string>Not enough controllers</string> + </property> + <property name="alignment"> + <set>Qt::AlignCenter</set> + </property> + <property name="margin"> + <number>0</number> + </property> + </widget> + </item> + <item> + <widget class="QDialogButtonBox" name="buttonBox"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="standardButtons"> + <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set> + </property> + </widget> + </item> + </layout> </widget> </item> </layout> diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 1de093447..baa3e55f3 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -114,7 +114,7 @@ const std::map<Settings::ShaderBackend, QString> Config::shader_backend_texts_ma // This must be in alphabetical order according to action name as it must have the same order as // UISetting::values.shortcuts, which is alphabetically ordered. // clang-format off -const std::array<UISettings::Shortcut, 22> Config::default_hotkeys{{ +const std::array<UISettings::Shortcut, 23> Config::default_hotkeys{{ {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Mute/Unmute")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+M"), QStringLiteral("Home+Dpad_Right"), Qt::WindowShortcut, false}}, {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Volume Down")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("-"), QStringLiteral("Home+Dpad_Down"), Qt::ApplicationShortcut, true}}, {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Volume Up")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("="), QStringLiteral("Home+Dpad_Up"), Qt::ApplicationShortcut, true}}, @@ -128,14 +128,15 @@ const std::array<UISettings::Shortcut, 22> Config::default_hotkeys{{ {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Fullscreen")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F11"), QStringLiteral("Home+B"), Qt::WindowShortcut, false}}, {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Load File")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+O"), QStringLiteral(""), Qt::WidgetWithChildrenShortcut, false}}, {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Load/Remove Amiibo")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F2"), QStringLiteral("Home+A"), Qt::WidgetWithChildrenShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Restart Emulation")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F6"), QStringLiteral(""), Qt::WindowShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Stop Emulation")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F5"), QStringLiteral(""), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Restart Emulation")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F6"), QStringLiteral("R+Plus+Minus"), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Stop Emulation")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F5"), QStringLiteral("L+Plus+Minus"), Qt::WindowShortcut, false}}, {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Record")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F7"), QStringLiteral(""), Qt::ApplicationShortcut, false}}, {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Reset")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F6"), QStringLiteral(""), Qt::ApplicationShortcut, false}}, {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Start/Stop")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F5"), QStringLiteral(""), Qt::ApplicationShortcut, false}}, {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Filter Bar")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F"), QStringLiteral(""), Qt::WindowShortcut, false}}, {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Framerate Limit")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+U"), QStringLiteral("Home+Y"), Qt::ApplicationShortcut, false}}, {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Mouse Panning")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F9"), QStringLiteral(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Renderdoc Capture")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral(""), QStringLiteral(""), Qt::ApplicationShortcut, false}}, {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Status Bar")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+S"), QStringLiteral(""), Qt::WindowShortcut, false}}, }}; // clang-format on diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 727feebfb..74ec4f771 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -48,7 +48,7 @@ public: default_mouse_buttons; static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys; static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods; - static const std::array<UISettings::Shortcut, 22> default_hotkeys; + static const std::array<UISettings::Shortcut, 23> default_hotkeys; static const std::map<Settings::AntiAliasing, QString> anti_aliasing_texts_map; static const std::map<Settings::ScalingFilter, QString> scaling_filter_texts_map; diff --git a/src/yuzu/configuration/configure_audio.cpp b/src/yuzu/configuration/configure_audio.cpp index 9ccfb2435..81dd51ad3 100644 --- a/src/yuzu/configuration/configure_audio.cpp +++ b/src/yuzu/configuration/configure_audio.cpp @@ -42,6 +42,9 @@ void ConfigureAudio::Setup(const ConfigurationShared::Builder& builder) { for (auto* setting : Settings::values.linkage.by_category[category]) { settings.push_back(setting); } + for (auto* setting : UISettings::values.linkage.by_category[category]) { + settings.push_back(setting); + } }; push(Settings::Category::Audio); diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp index 0b2a965f8..68e21cd84 100644 --- a/src/yuzu/configuration/configure_hotkeys.cpp +++ b/src/yuzu/configuration/configure_hotkeys.cpp @@ -319,6 +319,13 @@ void ConfigureHotkeys::ApplyConfiguration(HotkeyRegistry& registry) { void ConfigureHotkeys::RestoreDefaults() { for (int r = 0; r < model->rowCount(); ++r) { const QStandardItem* parent = model->item(r, 0); + const int hotkey_size = static_cast<int>(Config::default_hotkeys.size()); + + if (hotkey_size != parent->rowCount()) { + QMessageBox::warning(this, tr("Invalid hotkey settings"), + tr("An error occurred. Please report this issue on github.")); + return; + } for (int r2 = 0; r2 < parent->rowCount(); ++r2) { model->item(r, 0) diff --git a/src/yuzu/configuration/configure_input.cpp b/src/yuzu/configuration/configure_input.cpp index e8f9ebfd8..3dcad2701 100644 --- a/src/yuzu/configuration/configure_input.cpp +++ b/src/yuzu/configuration/configure_input.cpp @@ -101,13 +101,13 @@ void ConfigureInput::Initialize(InputCommon::InputSubsystem* input_subsystem, ui->tabPlayer5, ui->tabPlayer6, ui->tabPlayer7, ui->tabPlayer8, }; - player_connected = { + connected_controller_checkboxes = { ui->checkboxPlayer1Connected, ui->checkboxPlayer2Connected, ui->checkboxPlayer3Connected, ui->checkboxPlayer4Connected, ui->checkboxPlayer5Connected, ui->checkboxPlayer6Connected, ui->checkboxPlayer7Connected, ui->checkboxPlayer8Connected, }; - std::array<QLabel*, 8> player_connected_labels = { + std::array<QLabel*, 8> connected_controller_labels = { ui->label, ui->label_3, ui->label_4, ui->label_5, ui->label_6, ui->label_7, ui->label_8, ui->label_9, }; @@ -115,31 +115,37 @@ void ConfigureInput::Initialize(InputCommon::InputSubsystem* input_subsystem, for (std::size_t i = 0; i < player_tabs.size(); ++i) { player_tabs[i]->setLayout(new QHBoxLayout(player_tabs[i])); player_tabs[i]->layout()->addWidget(player_controllers[i]); - connect(player_controllers[i], &ConfigureInputPlayer::Connected, [&, i](bool is_connected) { - // Ensures that the controllers are always connected in sequential order - if (is_connected) { - for (std::size_t index = 0; index <= i; ++index) { - player_connected[index]->setChecked(is_connected); - } - } else { - for (std::size_t index = i; index < player_tabs.size(); ++index) { - player_connected[index]->setChecked(is_connected); - } + connect(player_controllers[i], &ConfigureInputPlayer::Connected, [this, i](bool checked) { + // Ensures that connecting a controller changes the number of players + if (connected_controller_checkboxes[i]->isChecked() != checked) { + // Ensures that the players are always connected in sequential order + PropagatePlayerNumberChanged(i, checked); } }); + connect(connected_controller_checkboxes[i], &QCheckBox::clicked, [this, i](bool checked) { + // Reconnect current controller if it was the last one checked + // (player number was reduced by more than one) + const bool reconnect_first = !checked && + i < connected_controller_checkboxes.size() - 1 && + connected_controller_checkboxes[i + 1]->isChecked(); + + // Ensures that the players are always connected in sequential order + PropagatePlayerNumberChanged(i, checked, reconnect_first); + }); connect(player_controllers[i], &ConfigureInputPlayer::RefreshInputDevices, this, &ConfigureInput::UpdateAllInputDevices); connect(player_controllers[i], &ConfigureInputPlayer::RefreshInputProfiles, this, &ConfigureInput::UpdateAllInputProfiles, Qt::QueuedConnection); - connect(player_connected[i], &QCheckBox::stateChanged, [this, i](int state) { + connect(connected_controller_checkboxes[i], &QCheckBox::stateChanged, [this, i](int state) { + // Keep activated controllers synced with the "Connected Controllers" checkboxes player_controllers[i]->ConnectPlayer(state == Qt::Checked); }); // Remove/hide all the elements that exceed max_players, if applicable. if (i >= max_players) { ui->tabWidget->removeTab(static_cast<int>(max_players)); - player_connected[i]->hide(); - player_connected_labels[i]->hide(); + connected_controller_checkboxes[i]->hide(); + connected_controller_labels[i]->hide(); } } // Only the first player can choose handheld mode so connect the signal just to player 1 @@ -183,6 +189,27 @@ void ConfigureInput::Initialize(InputCommon::InputSubsystem* input_subsystem, LoadConfiguration(); } +void ConfigureInput::PropagatePlayerNumberChanged(size_t player_index, bool checked, + bool reconnect_current) { + connected_controller_checkboxes[player_index]->setChecked(checked); + + if (checked) { + // Check all previous buttons when checked + if (player_index > 0) { + PropagatePlayerNumberChanged(player_index - 1, checked); + } + } else { + // Unchecked all following buttons when unchecked + if (player_index < connected_controller_checkboxes.size() - 1) { + PropagatePlayerNumberChanged(player_index + 1, checked); + } + } + + if (reconnect_current) { + connected_controller_checkboxes[player_index]->setCheckState(Qt::Checked); + } +} + QList<QWidget*> ConfigureInput::GetSubTabs() const { return { ui->tabPlayer1, ui->tabPlayer2, ui->tabPlayer3, ui->tabPlayer4, ui->tabPlayer5, @@ -233,17 +260,17 @@ void ConfigureInput::LoadConfiguration() { } void ConfigureInput::LoadPlayerControllerIndices() { - for (std::size_t i = 0; i < player_connected.size(); ++i) { + for (std::size_t i = 0; i < connected_controller_checkboxes.size(); ++i) { if (i == 0) { auto* handheld = system.HIDCore().GetEmulatedController(Core::HID::NpadIdType::Handheld); if (handheld->IsConnected()) { - player_connected[i]->setChecked(true); + connected_controller_checkboxes[i]->setChecked(true); continue; } } const auto* controller = system.HIDCore().GetEmulatedControllerByIndex(i); - player_connected[i]->setChecked(controller->IsConnected()); + connected_controller_checkboxes[i]->setChecked(controller->IsConnected()); } } diff --git a/src/yuzu/configuration/configure_input.h b/src/yuzu/configuration/configure_input.h index c89189c36..136cd3a0a 100644 --- a/src/yuzu/configuration/configure_input.h +++ b/src/yuzu/configuration/configure_input.h @@ -56,6 +56,9 @@ private: void UpdateDockedState(bool is_handheld); void UpdateAllInputDevices(); void UpdateAllInputProfiles(std::size_t player_index); + // Enable preceding controllers or disable following ones + void PropagatePlayerNumberChanged(size_t player_index, bool checked, + bool reconnect_current = false); /// Load configuration settings. void LoadConfiguration(); @@ -70,7 +73,8 @@ private: std::array<ConfigureInputPlayer*, 8> player_controllers; std::array<QWidget*, 8> player_tabs; - std::array<QCheckBox*, 8> player_connected; + // Checkboxes representing the "Connected Controllers". + std::array<QCheckBox*, 8> connected_controller_checkboxes; ConfigureInputAdvanced* advanced; Core::System& system; diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h index d4df43d73..d3255d2b4 100644 --- a/src/yuzu/configuration/configure_input_player.h +++ b/src/yuzu/configuration/configure_input_player.h @@ -75,7 +75,7 @@ public: void ClearAll(); signals: - /// Emitted when this controller is connected by the user. + /// Emitted when this controller is (dis)connected by the user. void Connected(bool connected); /// Emitted when the Handheld mode is selected (undocked with dual joycons attached). void HandheldStateChanged(bool is_handheld); @@ -183,9 +183,6 @@ private: /// Stores a pair of "Connected Controllers" combobox index and Controller Type enum. std::vector<std::pair<int, Core::HID::NpadStyleIndex>> index_controller_type_pairs; - static constexpr int PLAYER_COUNT = 8; - std::array<QCheckBox*, PLAYER_COUNT> player_connected_checkbox; - /// This will be the the setting function when an input is awaiting configuration. std::optional<std::function<void(const Common::ParamPackage&)>> input_setter; diff --git a/src/yuzu/configuration/configure_ui.cpp b/src/yuzu/configuration/configure_ui.cpp index a9fde9f4f..82f3b6e78 100644 --- a/src/yuzu/configuration/configure_ui.cpp +++ b/src/yuzu/configuration/configure_ui.cpp @@ -123,6 +123,8 @@ ConfigureUi::ConfigureUi(Core::System& system_, QWidget* parent) connect(ui->show_compat, &QCheckBox::stateChanged, this, &ConfigureUi::RequestGameListUpdate); connect(ui->show_size, &QCheckBox::stateChanged, this, &ConfigureUi::RequestGameListUpdate); connect(ui->show_types, &QCheckBox::stateChanged, this, &ConfigureUi::RequestGameListUpdate); + connect(ui->show_play_time, &QCheckBox::stateChanged, this, + &ConfigureUi::RequestGameListUpdate); connect(ui->game_icon_size_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, &ConfigureUi::RequestGameListUpdate); connect(ui->folder_icon_size_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), @@ -167,6 +169,7 @@ void ConfigureUi::ApplyConfiguration() { UISettings::values.show_compat = ui->show_compat->isChecked(); UISettings::values.show_size = ui->show_size->isChecked(); UISettings::values.show_types = ui->show_types->isChecked(); + UISettings::values.show_play_time = ui->show_play_time->isChecked(); UISettings::values.game_icon_size = ui->game_icon_size_combobox->currentData().toUInt(); UISettings::values.folder_icon_size = ui->folder_icon_size_combobox->currentData().toUInt(); UISettings::values.row_1_text_id = ui->row_1_text_combobox->currentData().toUInt(); @@ -179,6 +182,7 @@ void ConfigureUi::ApplyConfiguration() { const u32 height = ScreenshotDimensionToInt(ui->screenshot_height->currentText()); UISettings::values.screenshot_height.SetValue(height); + RequestGameListUpdate(); system.ApplySettings(); } @@ -194,6 +198,7 @@ void ConfigureUi::SetConfiguration() { ui->show_compat->setChecked(UISettings::values.show_compat.GetValue()); ui->show_size->setChecked(UISettings::values.show_size.GetValue()); ui->show_types->setChecked(UISettings::values.show_types.GetValue()); + ui->show_play_time->setChecked(UISettings::values.show_play_time.GetValue()); ui->game_icon_size_combobox->setCurrentIndex( ui->game_icon_size_combobox->findData(UISettings::values.game_icon_size.GetValue())); ui->folder_icon_size_combobox->setCurrentIndex( diff --git a/src/yuzu/configuration/configure_ui.ui b/src/yuzu/configuration/configure_ui.ui index cb66ef104..b8e648381 100644 --- a/src/yuzu/configuration/configure_ui.ui +++ b/src/yuzu/configuration/configure_ui.ui @@ -105,6 +105,13 @@ </widget> </item> <item> + <widget class="QCheckBox" name="show_play_time"> + <property name="text"> + <string>Show Play Time Column</string> + </property> + </widget> + </item> + <item> <layout class="QHBoxLayout" name="game_icon_size_qhbox_layout_2"> <item> <widget class="QLabel" name="game_icon_size_label"> diff --git a/src/yuzu/configuration/configure_vibration.cpp b/src/yuzu/configuration/configure_vibration.cpp index d765e808a..68c28b320 100644 --- a/src/yuzu/configuration/configure_vibration.cpp +++ b/src/yuzu/configuration/configure_vibration.cpp @@ -89,7 +89,7 @@ void ConfigureVibration::VibrateController(Core::HID::ControllerTriggerType type auto& player = Settings::values.players.GetValue()[player_index]; auto controller = hid_core.GetEmulatedControllerByIndex(player_index); - const int vibration_strenght = vibration_spinboxes[player_index]->value(); + const int vibration_strength = vibration_spinboxes[player_index]->value(); const auto& buttons = controller->GetButtonsValues(); bool button_is_pressed = false; @@ -105,10 +105,10 @@ void ConfigureVibration::VibrateController(Core::HID::ControllerTriggerType type return; } - const int old_vibration_enabled = player.vibration_enabled; - const bool old_vibration_strenght = player.vibration_strength; + const bool old_vibration_enabled = player.vibration_enabled; + const int old_vibration_strength = player.vibration_strength; player.vibration_enabled = true; - player.vibration_strength = vibration_strenght; + player.vibration_strength = vibration_strength; const Core::HID::VibrationValue vibration{ .low_amplitude = 1.0f, @@ -121,7 +121,7 @@ void ConfigureVibration::VibrateController(Core::HID::ControllerTriggerType type // Restore previous values player.vibration_enabled = old_vibration_enabled; - player.vibration_strength = old_vibration_strenght; + player.vibration_strength = old_vibration_strength; } void ConfigureVibration::StopVibrations() { diff --git a/src/yuzu/configuration/shared_translation.cpp b/src/yuzu/configuration/shared_translation.cpp index 276bdbaba..3fe448f27 100644 --- a/src/yuzu/configuration/shared_translation.cpp +++ b/src/yuzu/configuration/shared_translation.cpp @@ -29,9 +29,10 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QWidget* parent) { INSERT(Settings, sink_id, "Output Engine:", ""); INSERT(Settings, audio_output_device_id, "Output Device:", ""); INSERT(Settings, audio_input_device_id, "Input Device:", ""); - INSERT(Settings, audio_muted, "Mute audio when in background", ""); + INSERT(Settings, audio_muted, "Mute audio", ""); INSERT(Settings, volume, "Volume:", ""); INSERT(Settings, dump_audio_commands, "", ""); + INSERT(UISettings, mute_when_in_background, "Mute audio when in background", ""); // Core INSERT(Settings, use_multi_core, "Multicore CPU Emulation", ""); @@ -156,6 +157,7 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QWidget* parent) { INSERT(UISettings, select_user_on_boot, "Prompt for user on game boot", ""); INSERT(UISettings, pause_when_in_background, "Pause emulation when in background", ""); INSERT(UISettings, confirm_before_closing, "Confirm exit while emulation is running", ""); + INSERT(UISettings, confirm_before_stopping, "Confirm before stopping emulation", ""); INSERT(UISettings, hide_mouse, "Hide mouse on inactivity", ""); INSERT(UISettings, controller_applet_disabled, "Disable controller applet", ""); @@ -382,6 +384,13 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QWidget* parent) { translations->insert( {Settings::EnumMetadata<Settings::ConsoleMode>::Index(), {PAIR(ConsoleMode, Docked, "Docked"), PAIR(ConsoleMode, Handheld, "Handheld")}}); + translations->insert( + {Settings::EnumMetadata<Settings::ConfirmStop>::Index(), + { + PAIR(ConfirmStop, Ask_Always, "Always ask (Default)"), + PAIR(ConfirmStop, Ask_Based_On_Game, "Only if game specifies not to stop"), + PAIR(ConfirmStop, Ask_Never, "Never ask"), + }}); #undef PAIR #undef CTX_PAIR diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index 0783a2430..7049c57b6 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -127,7 +127,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons return list; } - if (thread.GetOwnerProcess() == nullptr || !thread.GetOwnerProcess()->Is64BitProcess()) { + if (thread.GetOwnerProcess() == nullptr || !thread.GetOwnerProcess()->Is64Bit()) { return list; } diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index f254c1e1c..7e7d8e252 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -312,8 +312,10 @@ void GameList::OnFilterCloseClicked() { } GameList::GameList(FileSys::VirtualFilesystem vfs_, FileSys::ManualContentProvider* provider_, - Core::System& system_, GMainWindow* parent) - : QWidget{parent}, vfs{std::move(vfs_)}, provider{provider_}, system{system_} { + PlayTime::PlayTimeManager& play_time_manager_, Core::System& system_, + GMainWindow* parent) + : QWidget{parent}, vfs{std::move(vfs_)}, provider{provider_}, + play_time_manager{play_time_manager_}, system{system_} { watcher = new QFileSystemWatcher(this); connect(watcher, &QFileSystemWatcher::directoryChanged, this, &GameList::RefreshGameDirectory); @@ -340,6 +342,7 @@ GameList::GameList(FileSys::VirtualFilesystem vfs_, FileSys::ManualContentProvid tree_view->setColumnHidden(COLUMN_ADD_ONS, !UISettings::values.show_add_ons); tree_view->setColumnHidden(COLUMN_COMPATIBILITY, !UISettings::values.show_compat); + tree_view->setColumnHidden(COLUMN_PLAY_TIME, !UISettings::values.show_play_time); item_model->setSortRole(GameListItemPath::SortRole); connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::OnUpdateThemedIcons); @@ -377,7 +380,6 @@ void GameList::UnloadController() { GameList::~GameList() { UnloadController(); - emit ShouldCancelWorker(); } void GameList::SetFilterFocus() { @@ -394,6 +396,10 @@ void GameList::ClearFilter() { search_field->clear(); } +void GameList::WorkerEvent() { + current_worker->ProcessEvents(this); +} + void GameList::AddDirEntry(GameListDir* entry_items) { item_model->invisibleRootItem()->appendRow(entry_items); tree_view->setExpanded( @@ -548,6 +554,7 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update")); QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC")); QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); + QAction* remove_play_time_data = remove_menu->addAction(tr("Remove Play Time Data")); QAction* remove_cache_storage = remove_menu->addAction(tr("Remove Cache Storage")); QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Pipeline Cache")); QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Pipeline Cache")); @@ -560,9 +567,9 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri QAction* verify_integrity = context_menu.addAction(tr("Verify Integrity")); QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard")); QAction* navigate_to_gamedb_entry = context_menu.addAction(tr("Navigate to GameDB entry")); -#ifndef WIN32 QMenu* shortcut_menu = context_menu.addMenu(tr("Create Shortcut")); QAction* create_desktop_shortcut = shortcut_menu->addAction(tr("Add to Desktop")); +#ifndef WIN32 QAction* create_applications_menu_shortcut = shortcut_menu->addAction(tr("Add to Applications Menu")); #endif @@ -622,6 +629,8 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() { emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path); }); + connect(remove_play_time_data, &QAction::triggered, + [this, program_id]() { emit RemovePlayTimeRequested(program_id); }); connect(remove_cache_storage, &QAction::triggered, [this, program_id, path] { emit RemoveFileRequested(program_id, GameListRemoveTarget::CacheStorage, path); }); @@ -638,10 +647,10 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri connect(navigate_to_gamedb_entry, &QAction::triggered, [this, program_id]() { emit NavigateToGamedbEntryRequested(program_id, compatibility_list); }); -#ifndef WIN32 connect(create_desktop_shortcut, &QAction::triggered, [this, program_id, path]() { emit CreateShortcut(program_id, path, GameListShortcutTarget::Desktop); }); +#ifndef WIN32 connect(create_applications_menu_shortcut, &QAction::triggered, [this, program_id, path]() { emit CreateShortcut(program_id, path, GameListShortcutTarget::Applications); }); @@ -790,6 +799,7 @@ void GameList::RetranslateUI() { item_model->setHeaderData(COLUMN_ADD_ONS, Qt::Horizontal, tr("Add-ons")); item_model->setHeaderData(COLUMN_FILE_TYPE, Qt::Horizontal, tr("File type")); item_model->setHeaderData(COLUMN_SIZE, Qt::Horizontal, tr("Size")); + item_model->setHeaderData(COLUMN_PLAY_TIME, Qt::Horizontal, tr("Play time")); } void GameListSearchField::changeEvent(QEvent* event) { @@ -817,28 +827,23 @@ void GameList::PopulateAsync(QVector<UISettings::GameDir>& game_dirs) { tree_view->setColumnHidden(COLUMN_COMPATIBILITY, !UISettings::values.show_compat); tree_view->setColumnHidden(COLUMN_FILE_TYPE, !UISettings::values.show_types); tree_view->setColumnHidden(COLUMN_SIZE, !UISettings::values.show_size); + tree_view->setColumnHidden(COLUMN_PLAY_TIME, !UISettings::values.show_play_time); + + // Cancel any existing worker. + current_worker.reset(); // Delete any rows that might already exist if we're repopulating item_model->removeRows(0, item_model->rowCount()); search_field->clear(); - emit ShouldCancelWorker(); + current_worker = std::make_unique<GameListWorker>(vfs, provider, game_dirs, compatibility_list, + play_time_manager, system); - GameListWorker* worker = - new GameListWorker(vfs, provider, game_dirs, compatibility_list, system); - - connect(worker, &GameListWorker::EntryReady, this, &GameList::AddEntry, Qt::QueuedConnection); - connect(worker, &GameListWorker::DirEntryReady, this, &GameList::AddDirEntry, - Qt::QueuedConnection); - connect(worker, &GameListWorker::Finished, this, &GameList::DonePopulating, + // Get events from the worker as data becomes available + connect(current_worker.get(), &GameListWorker::DataAvailable, this, &GameList::WorkerEvent, Qt::QueuedConnection); - // Use DirectConnection here because worker->Cancel() is thread-safe and we want it to - // cancel without delay. - connect(this, &GameList::ShouldCancelWorker, worker, &GameListWorker::Cancel, - Qt::DirectConnection); - QThreadPool::globalInstance()->start(worker); - current_worker = std::move(worker); + QThreadPool::globalInstance()->start(current_worker.get()); } void GameList::SaveInterfaceLayout() { diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h index 1fcbbf0ba..563a3a35b 100644 --- a/src/yuzu/game_list.h +++ b/src/yuzu/game_list.h @@ -18,6 +18,7 @@ #include "core/core.h" #include "uisettings.h" #include "yuzu/compatibility_list.h" +#include "yuzu/play_time_manager.h" namespace Core { class System; @@ -75,11 +76,13 @@ public: COLUMN_ADD_ONS, COLUMN_FILE_TYPE, COLUMN_SIZE, + COLUMN_PLAY_TIME, COLUMN_COUNT, // Number of columns }; explicit GameList(std::shared_ptr<FileSys::VfsFilesystem> vfs_, - FileSys::ManualContentProvider* provider_, Core::System& system_, + FileSys::ManualContentProvider* provider_, + PlayTime::PlayTimeManager& play_time_manager_, Core::System& system_, GMainWindow* parent = nullptr); ~GameList() override; @@ -106,13 +109,13 @@ signals: void BootGame(const QString& game_path, u64 program_id, std::size_t program_index, StartGameType type, AmLaunchType launch_type); void GameChosen(const QString& game_path, const u64 title_id = 0); - void ShouldCancelWorker(); void OpenFolderRequested(u64 program_id, GameListOpenTarget target, const std::string& game_path); void OpenTransferableShaderCacheRequested(u64 program_id); void RemoveInstalledEntryRequested(u64 program_id, InstalledEntryType type); void RemoveFileRequested(u64 program_id, GameListRemoveTarget target, const std::string& game_path); + void RemovePlayTimeRequested(u64 program_id); void DumpRomFSRequested(u64 program_id, const std::string& game_path, DumpRomFSTarget target); void VerifyIntegrityRequested(const std::string& game_path); void CopyTIDRequested(u64 program_id); @@ -134,11 +137,16 @@ private slots: void OnUpdateThemedIcons(); private: + friend class GameListWorker; + void WorkerEvent(); + void AddDirEntry(GameListDir* entry_items); void AddEntry(const QList<QStandardItem*>& entry_items, GameListDir* parent); - void ValidateEntry(const QModelIndex& item); void DonePopulating(const QStringList& watch_list); +private: + void ValidateEntry(const QModelIndex& item); + void RefreshGameDirectory(); void ToggleFavorite(u64 program_id); @@ -161,13 +169,14 @@ private: QVBoxLayout* layout = nullptr; QTreeView* tree_view = nullptr; QStandardItemModel* item_model = nullptr; - GameListWorker* current_worker = nullptr; + std::unique_ptr<GameListWorker> current_worker; QFileSystemWatcher* watcher = nullptr; ControllerNavigation* controller_navigation = nullptr; CompatibilityList compatibility_list; friend class GameListSearchField; + const PlayTime::PlayTimeManager& play_time_manager; Core::System& system; }; diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h index 1800f090f..86a0c41d9 100644 --- a/src/yuzu/game_list_p.h +++ b/src/yuzu/game_list_p.h @@ -18,6 +18,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "common/string_util.h" +#include "yuzu/play_time_manager.h" #include "yuzu/uisettings.h" #include "yuzu/util/util.h" @@ -221,6 +222,31 @@ public: } }; +/** + * GameListItem for Play Time values. + * This object stores the play time of a game in seconds, and its readable + * representation in minutes/hours + */ +class GameListItemPlayTime : public GameListItem { +public: + static constexpr int PlayTimeRole = SortRole; + + GameListItemPlayTime() = default; + explicit GameListItemPlayTime(const qulonglong time_seconds) { + setData(time_seconds, PlayTimeRole); + } + + void setData(const QVariant& value, int role) override { + qulonglong time_seconds = value.toULongLong(); + GameListItem::setData(PlayTime::ReadablePlayTime(time_seconds), Qt::DisplayRole); + GameListItem::setData(value, PlayTimeRole); + } + + bool operator<(const QStandardItem& other) const override { + return data(PlayTimeRole).toULongLong() < other.data(PlayTimeRole).toULongLong(); + } +}; + class GameListDir : public GameListItem { public: static constexpr int GameDirRole = Qt::UserRole + 2; diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp index e7fb8a282..69be21027 100644 --- a/src/yuzu/game_list_worker.cpp +++ b/src/yuzu/game_list_worker.cpp @@ -194,6 +194,7 @@ QList<QStandardItem*> MakeGameListEntry(const std::string& path, const std::stri const std::size_t size, const std::vector<u8>& icon, Loader::AppLoader& loader, u64 program_id, const CompatibilityList& compatibility_list, + const PlayTime::PlayTimeManager& play_time_manager, const FileSys::PatchManager& patch) { const auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id); @@ -212,6 +213,7 @@ QList<QStandardItem*> MakeGameListEntry(const std::string& path, const std::stri new GameListItemCompat(compatibility), new GameListItem(file_type_string), new GameListItemSize(size), + new GameListItemPlayTime(play_time_manager.GetPlayTime(program_id)), }; const auto patch_versions = GetGameListCachedObject( @@ -227,11 +229,57 @@ QList<QStandardItem*> MakeGameListEntry(const std::string& path, const std::stri GameListWorker::GameListWorker(FileSys::VirtualFilesystem vfs_, FileSys::ManualContentProvider* provider_, QVector<UISettings::GameDir>& game_dirs_, - const CompatibilityList& compatibility_list_, Core::System& system_) + const CompatibilityList& compatibility_list_, + const PlayTime::PlayTimeManager& play_time_manager_, + Core::System& system_) : vfs{std::move(vfs_)}, provider{provider_}, game_dirs{game_dirs_}, - compatibility_list{compatibility_list_}, system{system_} {} + compatibility_list{compatibility_list_}, play_time_manager{play_time_manager_}, system{ + system_} { + // We want the game list to manage our lifetime. + setAutoDelete(false); +} + +GameListWorker::~GameListWorker() { + this->disconnect(); + stop_requested.store(true); + processing_completed.Wait(); +} -GameListWorker::~GameListWorker() = default; +void GameListWorker::ProcessEvents(GameList* game_list) { + while (true) { + std::function<void(GameList*)> func; + { + // Lock queue to protect concurrent modification. + std::scoped_lock lk(lock); + + // If we can't pop a function, return. + if (queued_events.empty()) { + return; + } + + // Pop a function. + func = std::move(queued_events.back()); + queued_events.pop_back(); + } + + // Run the function. + func(game_list); + } +} + +template <typename F> +void GameListWorker::RecordEvent(F&& func) { + { + // Lock queue to protect concurrent modification. + std::scoped_lock lk(lock); + + // Add the function into the front of the queue. + queued_events.emplace_front(std::move(func)); + } + + // Data now available. + emit DataAvailable(); +} void GameListWorker::AddTitlesToGameList(GameListDir* parent_dir) { using namespace FileSys; @@ -279,16 +327,16 @@ void GameListWorker::AddTitlesToGameList(GameListDir* parent_dir) { GetMetadataFromControlNCA(patch, *control, icon, name); } - emit EntryReady(MakeGameListEntry(file->GetFullPath(), name, file->GetSize(), icon, *loader, - program_id, compatibility_list, patch), - parent_dir); + auto entry = MakeGameListEntry(file->GetFullPath(), name, file->GetSize(), icon, *loader, + program_id, compatibility_list, play_time_manager, patch); + RecordEvent([=](GameList* game_list) { game_list->AddEntry(entry, parent_dir); }); } } void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_path, bool deep_scan, GameListDir* parent_dir) { const auto callback = [this, target, parent_dir](const std::filesystem::path& path) -> bool { - if (stop_processing) { + if (stop_requested) { // Breaks the callback loop. return false; } @@ -355,10 +403,12 @@ void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_pa const FileSys::PatchManager patch{id, system.GetFileSystemController(), system.GetContentProvider()}; - emit EntryReady(MakeGameListEntry(physical_name, name, - Common::FS::GetSize(physical_name), icon, - *loader, id, compatibility_list, patch), - parent_dir); + auto entry = MakeGameListEntry( + physical_name, name, Common::FS::GetSize(physical_name), icon, *loader, + id, compatibility_list, play_time_manager, patch); + + RecordEvent( + [=](GameList* game_list) { game_list->AddEntry(entry, parent_dir); }); } } else { std::vector<u8> icon; @@ -370,10 +420,12 @@ void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_pa const FileSys::PatchManager patch{program_id, system.GetFileSystemController(), system.GetContentProvider()}; - emit EntryReady( - MakeGameListEntry(physical_name, name, Common::FS::GetSize(physical_name), - icon, *loader, program_id, compatibility_list, patch), - parent_dir); + auto entry = MakeGameListEntry( + physical_name, name, Common::FS::GetSize(physical_name), icon, *loader, + program_id, compatibility_list, play_time_manager, patch); + + RecordEvent( + [=](GameList* game_list) { game_list->AddEntry(entry, parent_dir); }); } } } else if (is_dir) { @@ -392,26 +444,34 @@ void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_pa } void GameListWorker::run() { - stop_processing = false; + watch_list.clear(); provider->ClearAllEntries(); + const auto DirEntryReady = [&](GameListDir* game_list_dir) { + RecordEvent([=](GameList* game_list) { game_list->AddDirEntry(game_list_dir); }); + }; + for (UISettings::GameDir& game_dir : game_dirs) { + if (stop_requested) { + break; + } + if (game_dir.path == QStringLiteral("SDMC")) { auto* const game_list_dir = new GameListDir(game_dir, GameListItemType::SdmcDir); - emit DirEntryReady(game_list_dir); + DirEntryReady(game_list_dir); AddTitlesToGameList(game_list_dir); } else if (game_dir.path == QStringLiteral("UserNAND")) { auto* const game_list_dir = new GameListDir(game_dir, GameListItemType::UserNandDir); - emit DirEntryReady(game_list_dir); + DirEntryReady(game_list_dir); AddTitlesToGameList(game_list_dir); } else if (game_dir.path == QStringLiteral("SysNAND")) { auto* const game_list_dir = new GameListDir(game_dir, GameListItemType::SysNandDir); - emit DirEntryReady(game_list_dir); + DirEntryReady(game_list_dir); AddTitlesToGameList(game_list_dir); } else { watch_list.append(game_dir.path); auto* const game_list_dir = new GameListDir(game_dir); - emit DirEntryReady(game_list_dir); + DirEntryReady(game_list_dir); ScanFileSystem(ScanTarget::FillManualContentProvider, game_dir.path.toStdString(), game_dir.deep_scan, game_list_dir); ScanFileSystem(ScanTarget::PopulateGameList, game_dir.path.toStdString(), @@ -419,10 +479,6 @@ void GameListWorker::run() { } } - emit Finished(watch_list); -} - -void GameListWorker::Cancel() { - this->disconnect(); - stop_processing = true; + RecordEvent([=](GameList* game_list) { game_list->DonePopulating(watch_list); }); + processing_completed.Set(); } diff --git a/src/yuzu/game_list_worker.h b/src/yuzu/game_list_worker.h index 24a4e92c3..d5990fcde 100644 --- a/src/yuzu/game_list_worker.h +++ b/src/yuzu/game_list_worker.h @@ -4,6 +4,7 @@ #pragma once #include <atomic> +#include <deque> #include <memory> #include <string> @@ -12,12 +13,15 @@ #include <QRunnable> #include <QString> +#include "common/thread.h" #include "yuzu/compatibility_list.h" +#include "yuzu/play_time_manager.h" namespace Core { class System; } +class GameList; class QStandardItem; namespace FileSys { @@ -36,30 +40,30 @@ public: explicit GameListWorker(std::shared_ptr<FileSys::VfsFilesystem> vfs_, FileSys::ManualContentProvider* provider_, QVector<UISettings::GameDir>& game_dirs_, - const CompatibilityList& compatibility_list_, Core::System& system_); + const CompatibilityList& compatibility_list_, + const PlayTime::PlayTimeManager& play_time_manager_, + Core::System& system_); ~GameListWorker() override; /// Starts the processing of directory tree information. void run() override; - /// Tells the worker that it should no longer continue processing. Thread-safe. - void Cancel(); - -signals: +public: /** - * The `EntryReady` signal is emitted once an entry has been prepared and is ready - * to be added to the game list. - * @param entry_items a list with `QStandardItem`s that make up the columns of the new - * entry. + * Synchronously processes any events queued by the worker. + * + * AddDirEntry is called on the game list for every discovered directory. + * AddEntry is called on the game list for every discovered program. + * DonePopulating is called on the game list when processing completes. */ - void DirEntryReady(GameListDir* entry_items); - void EntryReady(QList<QStandardItem*> entry_items, GameListDir* parent_dir); + void ProcessEvents(GameList* game_list); - /** - * After the worker has traversed the game directory looking for entries, this signal is - * emitted with a list of folders that should be watched for changes as well. - */ - void Finished(QStringList watch_list); +signals: + void DataAvailable(); + +private: + template <typename F> + void RecordEvent(F&& func); private: void AddTitlesToGameList(GameListDir* parent_dir); @@ -76,9 +80,15 @@ private: FileSys::ManualContentProvider* provider; QVector<UISettings::GameDir>& game_dirs; const CompatibilityList& compatibility_list; + const PlayTime::PlayTimeManager& play_time_manager; QStringList watch_list; - std::atomic_bool stop_processing; + + std::mutex lock; + std::condition_variable cv; + std::deque<std::function<void(GameList*)>> queued_events; + std::atomic_bool stop_requested = false; + Common::Event processing_completed; Core::System& system; }; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index eb69da4ba..7cc11ae3b 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -67,6 +67,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #define QT_NO_OPENGL #include <QClipboard> #include <QDesktopServices> +#include <QDir> #include <QFile> #include <QFileDialog> #include <QInputDialog> @@ -76,6 +77,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include <QPushButton> #include <QScreen> #include <QShortcut> +#include <QStandardPaths> #include <QStatusBar> #include <QString> #include <QSysInfo> @@ -98,6 +100,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "common/scm_rev.h" #include "common/scope_exit.h" #ifdef _WIN32 +#include <shlobj.h> #include "common/windows/timer_resolution.h" #endif #ifdef ARCHITECTURE_x86_64 @@ -150,6 +153,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "yuzu/install_dialog.h" #include "yuzu/loading_screen.h" #include "yuzu/main.h" +#include "yuzu/play_time_manager.h" #include "yuzu/startup_checks.h" #include "yuzu/uisettings.h" #include "yuzu/util/clickable_label.h" @@ -207,7 +211,7 @@ void GMainWindow::ShowTelemetryCallout() { tr("<a href='https://yuzu-emu.org/help/feature/telemetry/'>Anonymous " "data is collected</a> to help improve yuzu. " "<br/><br/>Would you like to share your usage data with us?"); - if (QMessageBox::question(this, tr("Telemetry"), telemetry_message) != QMessageBox::Yes) { + if (!question(this, tr("Telemetry"), telemetry_message)) { Settings::values.enable_telemetry = false; system->ApplySettings(); } @@ -338,6 +342,8 @@ GMainWindow::GMainWindow(std::unique_ptr<Config> config_, bool has_broken_vulkan SetDiscordEnabled(UISettings::values.enable_discord_presence.GetValue()); discord_rpc->Update(); + play_time_manager = std::make_unique<PlayTime::PlayTimeManager>(); + system->GetRoomNetwork().Init(); RegisterMetaTypes(); @@ -986,7 +992,7 @@ void GMainWindow::InitializeWidgets() { render_window = new GRenderWindow(this, emu_thread.get(), input_subsystem, *system); render_window->hide(); - game_list = new GameList(vfs, provider.get(), *system, this); + game_list = new GameList(vfs, provider.get(), *play_time_manager, *system, this); ui->horizontalLayout->addWidget(game_list); game_list_placeholder = new GameListPlaceholder(this); @@ -1447,6 +1453,7 @@ void GMainWindow::OnAppFocusStateChanged(Qt::ApplicationState state) { Settings::values.audio_muted = false; auto_muted = false; } + UpdateVolumeUI(); } } @@ -1460,6 +1467,8 @@ void GMainWindow::ConnectWidgetEvents() { connect(game_list, &GameList::RemoveInstalledEntryRequested, this, &GMainWindow::OnGameListRemoveInstalledEntry); connect(game_list, &GameList::RemoveFileRequested, this, &GMainWindow::OnGameListRemoveFile); + connect(game_list, &GameList::RemovePlayTimeRequested, this, + &GMainWindow::OnGameListRemovePlayTimeData); connect(game_list, &GameList::DumpRomFSRequested, this, &GMainWindow::OnGameListDumpRomFS); connect(game_list, &GameList::VerifyIntegrityRequested, this, &GMainWindow::OnGameListVerifyIntegrity); @@ -1553,6 +1562,7 @@ void GMainWindow::ConnectMenuEvents() { // Tools connect_menu(ui->action_Rederive, std::bind(&GMainWindow::OnReinitializeKeys, this, ReinitializeKeyBehavior::Warning)); + connect_menu(ui->action_Load_Album, &GMainWindow::OnAlbum); connect_menu(ui->action_Load_Cabinet_Nickname_Owner, [this]() { OnCabinet(Service::NFP::CabinetMode::StartNicknameAndOwnerSettings); }); connect_menu(ui->action_Load_Cabinet_Eraser, @@ -1590,6 +1600,7 @@ void GMainWindow::UpdateMenuState() { }; const std::array applet_actions{ + ui->action_Load_Album, ui->action_Load_Cabinet_Nickname_Owner, ui->action_Load_Cabinet_Eraser, ui->action_Load_Cabinet_Restorer, @@ -2008,7 +2019,7 @@ void GMainWindow::BootGame(const QString& filename, u64 program_id, std::size_t std::filesystem::path{Common::U16StringFromBuffer(filename.utf16(), filename.size())} .filename()); } - const bool is_64bit = system->Kernel().ApplicationProcess()->Is64BitProcess(); + const bool is_64bit = system->Kernel().ApplicationProcess()->Is64Bit(); const auto instruction_set_suffix = is_64bit ? tr("(64-bit)") : tr("(32-bit)"); title_name = tr("%1 %2", "%1 is the title name. %2 indicates if the title is 64-bit or 32-bit") .arg(QString::fromStdString(title_name), instruction_set_suffix) @@ -2409,9 +2420,8 @@ void GMainWindow::OnGameListRemoveInstalledEntry(u64 program_id, InstalledEntryT } }(); - if (QMessageBox::question(this, tr("Remove Entry"), entry_question, - QMessageBox::Yes | QMessageBox::No, - QMessageBox::No) != QMessageBox::Yes) { + if (!question(this, tr("Remove Entry"), entry_question, QMessageBox::Yes | QMessageBox::No, + QMessageBox::No)) { return; } @@ -2510,8 +2520,8 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ } }(); - if (QMessageBox::question(this, tr("Remove File"), question, QMessageBox::Yes | QMessageBox::No, - QMessageBox::No) != QMessageBox::Yes) { + if (!GMainWindow::question(this, tr("Remove File"), question, + QMessageBox::Yes | QMessageBox::No, QMessageBox::No)) { return; } @@ -2534,6 +2544,17 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ } } +void GMainWindow::OnGameListRemovePlayTimeData(u64 program_id) { + if (QMessageBox::question(this, tr("Remove Play Time Data"), tr("Reset play time?"), + QMessageBox::Yes | QMessageBox::No, + QMessageBox::No) != QMessageBox::Yes) { + return; + } + + play_time_manager->ResetProgramPlayTime(program_id); + game_list->PopulateAsync(UISettings::values.game_dirs); +} + void GMainWindow::RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target) { const auto target_file_name = [target] { switch (target) { @@ -2825,7 +2846,6 @@ void GMainWindow::OnGameListCreateShortcut(u64 program_id, const std::string& ga const QStringList args = QApplication::arguments(); std::filesystem::path yuzu_command = args[0].toStdString(); -#if defined(__linux__) || defined(__FreeBSD__) // If relative path, make it an absolute path if (yuzu_command.c_str()[0] == '.') { yuzu_command = Common::FS::GetCurrentDir() / yuzu_command; @@ -2848,44 +2868,52 @@ void GMainWindow::OnGameListCreateShortcut(u64 program_id, const std::string& ga UISettings::values.shortcut_already_warned = true; } #endif // __linux__ -#endif // __linux__ || __FreeBSD__ std::filesystem::path target_directory{}; - // Determine target directory for shortcut -#if defined(__linux__) || defined(__FreeBSD__) - const char* home = std::getenv("HOME"); - const std::filesystem::path home_path = (home == nullptr ? "~" : home); - const char* xdg_data_home = std::getenv("XDG_DATA_HOME"); - if (target == GameListShortcutTarget::Desktop) { - target_directory = home_path / "Desktop"; - if (!Common::FS::IsDir(target_directory)) { - QMessageBox::critical( - this, tr("Create Shortcut"), - tr("Cannot create shortcut on desktop. Path \"%1\" does not exist.") - .arg(QString::fromStdString(target_directory)), - QMessageBox::StandardButton::Ok); - return; - } - } else if (target == GameListShortcutTarget::Applications) { - target_directory = (xdg_data_home == nullptr ? home_path / ".local/share" : xdg_data_home) / - "applications"; - if (!Common::FS::CreateDirs(target_directory)) { - QMessageBox::critical(this, tr("Create Shortcut"), - tr("Cannot create shortcut in applications menu. Path \"%1\" " - "does not exist and cannot be created.") - .arg(QString::fromStdString(target_directory)), - QMessageBox::StandardButton::Ok); - return; + switch (target) { + case GameListShortcutTarget::Desktop: { + const QString desktop_path = + QStandardPaths::writableLocation(QStandardPaths::DesktopLocation); + target_directory = desktop_path.toUtf8().toStdString(); + break; + } + case GameListShortcutTarget::Applications: { + const QString applications_path = + QStandardPaths::writableLocation(QStandardPaths::ApplicationsLocation); + if (applications_path.isEmpty()) { + const char* home = std::getenv("HOME"); + if (home != nullptr) { + target_directory = std::filesystem::path(home) / ".local/share/applications"; + } + } else { + target_directory = applications_path.toUtf8().toStdString(); } + break; + } + default: + return; + } + + const QDir dir(QString::fromStdString(target_directory.generic_string())); + if (!dir.exists()) { + QMessageBox::critical(this, tr("Create Shortcut"), + tr("Cannot create shortcut. Path \"%1\" does not exist.") + .arg(QString::fromStdString(target_directory.generic_string())), + QMessageBox::StandardButton::Ok); + return; } -#endif const std::string game_file_name = std::filesystem::path(game_path).filename().string(); // Determine full paths for icon and shortcut #if defined(__linux__) || defined(__FreeBSD__) + const char* home = std::getenv("HOME"); + const std::filesystem::path home_path = (home == nullptr ? "~" : home); + const char* xdg_data_home = std::getenv("XDG_DATA_HOME"); + std::filesystem::path system_icons_path = - (xdg_data_home == nullptr ? home_path / ".local/share/" : xdg_data_home) / + (xdg_data_home == nullptr ? home_path / ".local/share/" + : std::filesystem::path(xdg_data_home)) / "icons/hicolor/256x256"; if (!Common::FS::CreateDirs(system_icons_path)) { QMessageBox::critical( @@ -2901,9 +2929,14 @@ void GMainWindow::OnGameListCreateShortcut(u64 program_id, const std::string& ga const std::filesystem::path shortcut_path = target_directory / (program_id == 0 ? fmt::format("yuzu-{}.desktop", game_file_name) : fmt::format("yuzu-{:016X}.desktop", program_id)); +#elif defined(WIN32) + std::filesystem::path icons_path = + Common::FS::GetYuzuPathString(Common::FS::YuzuPath::IconsDir); + std::filesystem::path icon_path = + icons_path / ((program_id == 0 ? fmt::format("yuzu-{}.ico", game_file_name) + : fmt::format("yuzu-{:016X}.ico", program_id))); #else - const std::filesystem::path icon_path{}; - const std::filesystem::path shortcut_path{}; + std::string icon_extension; #endif // Get title from game file @@ -2928,29 +2961,37 @@ void GMainWindow::OnGameListCreateShortcut(u64 program_id, const std::string& ga LOG_WARNING(Frontend, "Could not read icon from {:s}", game_path); } - QImage icon_jpeg = + QImage icon_data = QImage::fromData(icon_image_file.data(), static_cast<int>(icon_image_file.size())); #if defined(__linux__) || defined(__FreeBSD__) // Convert and write the icon as a PNG - if (!icon_jpeg.save(QString::fromStdString(icon_path.string()))) { + if (!icon_data.save(QString::fromStdString(icon_path.string()))) { LOG_ERROR(Frontend, "Could not write icon as PNG to file"); } else { LOG_INFO(Frontend, "Wrote an icon to {}", icon_path.string()); } +#elif defined(WIN32) + if (!SaveIconToFile(icon_path.string(), icon_data)) { + LOG_ERROR(Frontend, "Could not write icon to file"); + return; + } #endif // __linux__ -#if defined(__linux__) || defined(__FreeBSD__) +#ifdef _WIN32 + // Replace characters that are illegal in Windows filenames by a dash + const std::string illegal_chars = "<>:\"/\\|?*"; + for (char c : illegal_chars) { + std::replace(title.begin(), title.end(), c, '_'); + } + const std::filesystem::path shortcut_path = target_directory / (title + ".lnk").c_str(); +#endif + const std::string comment = tr("Start %1 with the yuzu Emulator").arg(QString::fromStdString(title)).toStdString(); const std::string arguments = fmt::format("-g \"{:s}\"", game_path); const std::string categories = "Game;Emulator;Qt;"; const std::string keywords = "Switch;Nintendo;"; -#else - const std::string comment{}; - const std::string arguments{}; - const std::string categories{}; - const std::string keywords{}; -#endif + if (!CreateShortcut(shortcut_path.string(), title, comment, icon_path.string(), yuzu_command.string(), arguments, categories, keywords)) { QMessageBox::critical(this, tr("Create Shortcut"), @@ -3357,6 +3398,9 @@ void GMainWindow::OnStartGame() { UpdateMenuState(); OnTasStateChanged(); + play_time_manager->SetProgramId(system->GetApplicationProcessProgramID()); + play_time_manager->Start(); + discord_rpc->Update(); } @@ -3364,14 +3408,18 @@ void GMainWindow::OnRestartGame() { if (!system->IsPoweredOn()) { return; } - // Make a copy since ShutdownGame edits game_path - const auto current_game = QString(current_game_path); - ShutdownGame(); - BootGame(current_game); + + if (ConfirmShutdownGame()) { + // Make a copy since ShutdownGame edits game_path + const auto current_game = QString(current_game_path); + ShutdownGame(); + BootGame(current_game); + } } void GMainWindow::OnPauseGame() { emu_thread->SetRunning(false); + play_time_manager->Stop(); UpdateMenuState(); AllowOSSleep(); } @@ -3388,15 +3436,39 @@ void GMainWindow::OnPauseContinueGame() { } void GMainWindow::OnStopGame() { - if (system->GetExitLocked() && !ConfirmForceLockedExit()) { - return; + if (ConfirmShutdownGame()) { + play_time_manager->Stop(); + // Update game list to show new play time + game_list->PopulateAsync(UISettings::values.game_dirs); + if (OnShutdownBegin()) { + OnShutdownBeginDialog(); + } else { + OnEmulationStopped(); + } } +} - if (OnShutdownBegin()) { - OnShutdownBeginDialog(); +bool GMainWindow::ConfirmShutdownGame() { + if (UISettings::values.confirm_before_stopping.GetValue() == ConfirmStop::Ask_Always) { + if (system->GetExitLocked()) { + if (!ConfirmForceLockedExit()) { + return false; + } + } else { + if (!ConfirmChangeGame()) { + return false; + } + } } else { - OnEmulationStopped(); + if (UISettings::values.confirm_before_stopping.GetValue() == + ConfirmStop::Ask_Based_On_Game && + system->GetExitLocked()) { + if (!ConfirmForceLockedExit()) { + return false; + } + } } + return true; } void GMainWindow::OnLoadComplete() { @@ -3776,22 +3848,11 @@ void GMainWindow::OnTasRecord() { const bool is_recording = input_subsystem->GetTas()->Record(); if (!is_recording) { is_tas_recording_dialog_active = true; - ControllerNavigation* controller_navigation = - new ControllerNavigation(system->HIDCore(), this); - // Use QMessageBox instead of question so we can link controller navigation - QMessageBox* box_dialog = new QMessageBox(); - box_dialog->setWindowTitle(tr("TAS Recording")); - box_dialog->setText(tr("Overwrite file of player 1?")); - box_dialog->setStandardButtons(QMessageBox::Yes | QMessageBox::No); - box_dialog->setDefaultButton(QMessageBox::Yes); - connect(controller_navigation, &ControllerNavigation::TriggerKeyboardEvent, - [box_dialog](Qt::Key key) { - QKeyEvent* event = new QKeyEvent(QEvent::KeyPress, key, Qt::NoModifier); - QCoreApplication::postEvent(box_dialog, event); - }); - int res = box_dialog->exec(); - controller_navigation->UnloadController(); - input_subsystem->GetTas()->SaveRecording(res == QMessageBox::Yes); + + bool answer = question(this, tr("TAS Recording"), tr("Overwrite file of player 1?"), + QMessageBox::Yes | QMessageBox::No, QMessageBox::Yes); + + input_subsystem->GetTas()->SaveRecording(answer); is_tas_recording_dialog_active = false; } OnTasStateChanged(); @@ -3965,6 +4026,34 @@ bool GMainWindow::CreateShortcut(const std::string& shortcut_path, const std::st shortcut_stream.close(); return true; +#elif defined(WIN32) + IShellLinkW* shell_link; + auto hres = CoCreateInstance(CLSID_ShellLink, NULL, CLSCTX_INPROC_SERVER, IID_IShellLinkW, + (void**)&shell_link); + if (FAILED(hres)) { + return false; + } + shell_link->SetPath( + Common::UTF8ToUTF16W(command).data()); // Path to the object we are referring to + shell_link->SetArguments(Common::UTF8ToUTF16W(arguments).data()); + shell_link->SetDescription(Common::UTF8ToUTF16W(comment).data()); + shell_link->SetIconLocation(Common::UTF8ToUTF16W(icon_path).data(), 0); + + IPersistFile* persist_file; + hres = shell_link->QueryInterface(IID_IPersistFile, (void**)&persist_file); + if (FAILED(hres)) { + return false; + } + + hres = persist_file->Save(Common::UTF8ToUTF16W(shortcut_path).data(), TRUE); + if (FAILED(hres)) { + return false; + } + + persist_file->Release(); + shell_link->Release(); + + return true; #endif return false; } @@ -4004,6 +4093,29 @@ void GMainWindow::OnLoadAmiibo() { LoadAmiibo(filename); } +bool GMainWindow::question(QWidget* parent, const QString& title, const QString& text, + QMessageBox::StandardButtons buttons, + QMessageBox::StandardButton defaultButton) { + + QMessageBox* box_dialog = new QMessageBox(parent); + box_dialog->setWindowTitle(title); + box_dialog->setText(text); + box_dialog->setStandardButtons(buttons); + box_dialog->setDefaultButton(defaultButton); + + ControllerNavigation* controller_navigation = + new ControllerNavigation(system->HIDCore(), box_dialog); + connect(controller_navigation, &ControllerNavigation::TriggerKeyboardEvent, + [box_dialog](Qt::Key key) { + QKeyEvent* event = new QKeyEvent(QEvent::KeyPress, key, Qt::NoModifier); + QCoreApplication::postEvent(box_dialog, event); + }); + int res = box_dialog->exec(); + + controller_navigation->UnloadController(); + return res == QMessageBox::Yes; +} + void GMainWindow::LoadAmiibo(const QString& filename) { auto* virtual_amiibo = input_subsystem->GetVirtualAmiibo(); const QString title = tr("Error loading Amiibo data"); @@ -4157,6 +4269,29 @@ void GMainWindow::OnToggleStatusBar() { statusBar()->setVisible(ui->action_Show_Status_Bar->isChecked()); } +void GMainWindow::OnAlbum() { + constexpr u64 AlbumId = 0x010000000000100Dull; + auto bis_system = system->GetFileSystemController().GetSystemNANDContents(); + if (!bis_system) { + QMessageBox::warning(this, tr("No firmware available"), + tr("Please install the firmware to use the Album applet.")); + return; + } + + auto album_nca = bis_system->GetEntry(AlbumId, FileSys::ContentRecordType::Program); + if (!album_nca) { + QMessageBox::warning(this, tr("Album Applet"), + tr("Album applet is not available. Please reinstall firmware.")); + return; + } + + system->GetAppletManager().SetCurrentAppletId(Service::AM::Applets::AppletId::PhotoViewer); + + const auto filename = QString::fromStdString(album_nca->GetFullPath()); + UISettings::values.roms_path = QFileInfo(filename).path(); + BootGame(filename); +} + void GMainWindow::OnCabinet(Service::NFP::CabinetMode mode) { constexpr u64 CabinetId = 0x0100000000001002ull; auto bis_system = system->GetFileSystemController().GetSystemNANDContents(); @@ -4714,8 +4849,7 @@ bool GMainWindow::ConfirmClose() { return true; } const auto text = tr("Are you sure you want to close yuzu?"); - const auto answer = QMessageBox::question(this, tr("yuzu"), text); - return answer != QMessageBox::No; + return question(this, tr("yuzu"), text); } void GMainWindow::closeEvent(QCloseEvent* event) { @@ -4808,11 +4942,11 @@ bool GMainWindow::ConfirmChangeGame() { if (emu_thread == nullptr) return true; - const auto answer = QMessageBox::question( + // Use custom question to link controller navigation + return question( this, tr("yuzu"), tr("Are you sure you want to stop the emulation? Any unsaved progress will be lost."), - QMessageBox::Yes | QMessageBox::No, QMessageBox::No); - return answer != QMessageBox::No; + QMessageBox::Yes | QMessageBox::No, QMessageBox::Yes); } bool GMainWindow::ConfirmForceLockedExit() { @@ -4822,8 +4956,7 @@ bool GMainWindow::ConfirmForceLockedExit() { const auto text = tr("The currently running application has requested yuzu to not exit.\n\n" "Would you like to bypass this and exit anyway?"); - const auto answer = QMessageBox::question(this, tr("yuzu"), text); - return answer != QMessageBox::No; + return question(this, tr("yuzu"), text); } void GMainWindow::RequestGameExit() { diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 52028234c..270a40c5f 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -7,6 +7,7 @@ #include <optional> #include <QMainWindow> +#include <QMessageBox> #include <QTimer> #include <QTranslator> @@ -15,6 +16,7 @@ #include "input_common/drivers/tas_input.h" #include "yuzu/compatibility_list.h" #include "yuzu/hotkeys.h" +#include "yuzu/util/controller_navigation.h" #ifdef __unix__ #include <QVariant> @@ -81,6 +83,10 @@ namespace DiscordRPC { class DiscordInterface; } +namespace PlayTime { +class PlayTimeManager; +} + namespace FileSys { class ContentProvider; class ManualContentProvider; @@ -323,6 +329,7 @@ private slots: void OnGameListRemoveInstalledEntry(u64 program_id, InstalledEntryType type); void OnGameListRemoveFile(u64 program_id, GameListRemoveTarget target, const std::string& game_path); + void OnGameListRemovePlayTimeData(u64 program_id); void OnGameListDumpRomFS(u64 program_id, const std::string& game_path, DumpRomFSTarget target); void OnGameListVerifyIntegrity(const std::string& game_path); void OnGameListCopyTID(u64 program_id); @@ -369,6 +376,7 @@ private slots: void ResetWindowSize720(); void ResetWindowSize900(); void ResetWindowSize1080(); + void OnAlbum(); void OnCabinet(Service::NFP::CabinetMode mode); void OnMiiEdit(); void OnCaptureScreenshot(); @@ -389,6 +397,7 @@ private: void RemoveVulkanDriverPipelineCache(u64 program_id); void RemoveAllTransferableShaderCaches(u64 program_id); void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); + void RemovePlayTimeData(u64 program_id); void RemoveCacheStorage(u64 program_id); bool SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id, u64* selected_title_id, u8* selected_content_record_type); @@ -417,6 +426,11 @@ private: bool CheckSystemArchiveDecryption(); bool CheckFirmwarePresence(); void ConfigureFilesystemProvider(const std::string& filepath); + /** + * Open (or not) the right confirm dialog based on current setting and game exit lock + * @returns true if the player confirmed or the settings do no require it + */ + bool ConfirmShutdownGame(); QString GetTasStateDescription() const; bool CreateShortcut(const std::string& shortcut_path, const std::string& title, @@ -424,10 +438,22 @@ private: const std::string& command, const std::string& arguments, const std::string& categories, const std::string& keywords); + /** + * Mimic the behavior of QMessageBox::question but link controller navigation to the dialog + * The only difference is that it returns a boolean. + * + * @returns true if buttons contains QMessageBox::Yes and the user clicks on the "Yes" button. + */ + bool question(QWidget* parent, const QString& title, const QString& text, + QMessageBox::StandardButtons buttons = + QMessageBox::StandardButtons(QMessageBox::Yes | QMessageBox::No), + QMessageBox::StandardButton defaultButton = QMessageBox::NoButton); + std::unique_ptr<Ui::MainWindow> ui; std::unique_ptr<Core::System> system; std::unique_ptr<DiscordRPC::DiscordInterface> discord_rpc; + std::unique_ptr<PlayTime::PlayTimeManager> play_time_manager; std::shared_ptr<InputCommon::InputSubsystem> input_subsystem; MultiplayerState* multiplayer_state = nullptr; diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index 31c3de9ef..88684ffb5 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui @@ -160,6 +160,7 @@ <addaction name="action_Verify_installed_contents"/> <addaction name="separator"/> <addaction name="menu_cabinet_applet"/> + <addaction name="action_Load_Album"/> <addaction name="action_Load_Mii_Edit"/> <addaction name="separator"/> <addaction name="action_Capture_Screenshot"/> @@ -380,6 +381,11 @@ <string>&Capture Screenshot</string> </property> </action> + <action name="action_Load_Album"> + <property name="text"> + <string>Open &Album</string> + </property> + </action> <action name="action_Load_Cabinet_Nickname_Owner"> <property name="text"> <string>&Set Nickname and Owner</string> diff --git a/src/yuzu/play_time_manager.cpp b/src/yuzu/play_time_manager.cpp new file mode 100644 index 000000000..155c36b7d --- /dev/null +++ b/src/yuzu/play_time_manager.cpp @@ -0,0 +1,179 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/alignment.h" +#include "common/fs/file.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" +#include "common/logging/log.h" +#include "common/settings.h" +#include "common/thread.h" +#include "core/hle/service/acc/profile_manager.h" +#include "yuzu/play_time_manager.h" + +namespace PlayTime { + +namespace { + +struct PlayTimeElement { + ProgramId program_id; + PlayTime play_time; +}; + +std::optional<std::filesystem::path> GetCurrentUserPlayTimePath() { + const Service::Account::ProfileManager manager; + const auto uuid = manager.GetUser(static_cast<s32>(Settings::values.current_user)); + if (!uuid.has_value()) { + return std::nullopt; + } + return Common::FS::GetYuzuPath(Common::FS::YuzuPath::PlayTimeDir) / + uuid->RawString().append(".bin"); +} + +[[nodiscard]] bool ReadPlayTimeFile(PlayTimeDatabase& out_play_time_db) { + const auto filename = GetCurrentUserPlayTimePath(); + + if (!filename.has_value()) { + LOG_ERROR(Frontend, "Failed to get current user path"); + return false; + } + + out_play_time_db.clear(); + + if (Common::FS::Exists(filename.value())) { + Common::FS::IOFile file{filename.value(), Common::FS::FileAccessMode::Read, + Common::FS::FileType::BinaryFile}; + if (!file.IsOpen()) { + LOG_ERROR(Frontend, "Failed to open play time file: {}", + Common::FS::PathToUTF8String(filename.value())); + return false; + } + + const size_t num_elements = file.GetSize() / sizeof(PlayTimeElement); + std::vector<PlayTimeElement> elements(num_elements); + + if (file.ReadSpan<PlayTimeElement>(elements) != num_elements) { + return false; + } + + for (const auto& [program_id, play_time] : elements) { + if (program_id != 0) { + out_play_time_db[program_id] = play_time; + } + } + } + + return true; +} + +[[nodiscard]] bool WritePlayTimeFile(const PlayTimeDatabase& play_time_db) { + const auto filename = GetCurrentUserPlayTimePath(); + + if (!filename.has_value()) { + LOG_ERROR(Frontend, "Failed to get current user path"); + return false; + } + + Common::FS::IOFile file{filename.value(), Common::FS::FileAccessMode::Write, + Common::FS::FileType::BinaryFile}; + if (!file.IsOpen()) { + LOG_ERROR(Frontend, "Failed to open play time file: {}", + Common::FS::PathToUTF8String(filename.value())); + return false; + } + + std::vector<PlayTimeElement> elements; + elements.reserve(play_time_db.size()); + + for (auto& [program_id, play_time] : play_time_db) { + if (program_id != 0) { + elements.push_back(PlayTimeElement{program_id, play_time}); + } + } + + return file.WriteSpan<PlayTimeElement>(elements) == elements.size(); +} + +} // namespace + +PlayTimeManager::PlayTimeManager() { + if (!ReadPlayTimeFile(database)) { + LOG_ERROR(Frontend, "Failed to read play time database! Resetting to default."); + } +} + +PlayTimeManager::~PlayTimeManager() { + Save(); +} + +void PlayTimeManager::SetProgramId(u64 program_id) { + running_program_id = program_id; +} + +void PlayTimeManager::Start() { + play_time_thread = std::jthread([&](std::stop_token stop_token) { AutoTimestamp(stop_token); }); +} + +void PlayTimeManager::Stop() { + play_time_thread = {}; +} + +void PlayTimeManager::AutoTimestamp(std::stop_token stop_token) { + Common::SetCurrentThreadName("PlayTimeReport"); + + using namespace std::literals::chrono_literals; + using std::chrono::seconds; + using std::chrono::steady_clock; + + auto timestamp = steady_clock::now(); + + const auto GetDuration = [&]() -> u64 { + const auto last_timestamp = std::exchange(timestamp, steady_clock::now()); + const auto duration = std::chrono::duration_cast<seconds>(timestamp - last_timestamp); + return static_cast<u64>(duration.count()); + }; + + while (!stop_token.stop_requested()) { + Common::StoppableTimedWait(stop_token, 30s); + + database[running_program_id] += GetDuration(); + Save(); + } +} + +void PlayTimeManager::Save() { + if (!WritePlayTimeFile(database)) { + LOG_ERROR(Frontend, "Failed to update play time database!"); + } +} + +u64 PlayTimeManager::GetPlayTime(u64 program_id) const { + auto it = database.find(program_id); + if (it != database.end()) { + return it->second; + } else { + return 0; + } +} + +void PlayTimeManager::ResetProgramPlayTime(u64 program_id) { + database.erase(program_id); + Save(); +} + +QString ReadablePlayTime(qulonglong time_seconds) { + if (time_seconds == 0) { + return {}; + } + const auto time_minutes = std::max(static_cast<double>(time_seconds) / 60, 1.0); + const auto time_hours = static_cast<double>(time_seconds) / 3600; + const bool is_minutes = time_minutes < 60; + const char* unit = is_minutes ? "m" : "h"; + const auto value = is_minutes ? time_minutes : time_hours; + + return QStringLiteral("%L1 %2") + .arg(value, 0, 'f', !is_minutes && time_seconds % 60 != 0) + .arg(QString::fromUtf8(unit)); +} + +} // namespace PlayTime diff --git a/src/yuzu/play_time_manager.h b/src/yuzu/play_time_manager.h new file mode 100644 index 000000000..5f96f3447 --- /dev/null +++ b/src/yuzu/play_time_manager.h @@ -0,0 +1,44 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <QString> + +#include <map> + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/polyfill_thread.h" + +namespace PlayTime { + +using ProgramId = u64; +using PlayTime = u64; +using PlayTimeDatabase = std::map<ProgramId, PlayTime>; + +class PlayTimeManager { +public: + explicit PlayTimeManager(); + ~PlayTimeManager(); + + YUZU_NON_COPYABLE(PlayTimeManager); + YUZU_NON_MOVEABLE(PlayTimeManager); + + u64 GetPlayTime(u64 program_id) const; + void ResetProgramPlayTime(u64 program_id); + void SetProgramId(u64 program_id); + void Start(); + void Stop(); + +private: + PlayTimeDatabase database; + u64 running_program_id; + std::jthread play_time_thread; + void AutoTimestamp(std::stop_token stop_token); + void Save(); +}; + +QString ReadablePlayTime(qulonglong time_seconds); + +} // namespace PlayTime diff --git a/src/yuzu/uisettings.h b/src/yuzu/uisettings.h index 8efd63f31..b62ff620c 100644 --- a/src/yuzu/uisettings.h +++ b/src/yuzu/uisettings.h @@ -16,7 +16,9 @@ #include "common/settings_enums.h" using Settings::Category; +using Settings::ConfirmStop; using Settings::Setting; +using Settings::SwitchableSetting; #ifndef CANNOT_EXPLICITLY_INSTANTIATE namespace Settings { @@ -94,6 +96,15 @@ struct Values { Setting<bool> confirm_before_closing{ linkage, true, "confirmClose", Category::UiGeneral, Settings::Specialization::Default, true, true}; + + SwitchableSetting<ConfirmStop> confirm_before_stopping{linkage, + ConfirmStop::Ask_Always, + "confirmStop", + Category::UiGeneral, + Settings::Specialization::Default, + true, + true}; + Setting<bool> first_start{linkage, true, "firstStart", Category::Ui}; Setting<bool> pause_when_in_background{linkage, false, @@ -103,7 +114,7 @@ struct Values { true, true}; Setting<bool> mute_when_in_background{ - linkage, false, "muteWhenInBackground", Category::Ui, Settings::Specialization::Default, + linkage, false, "muteWhenInBackground", Category::Audio, Settings::Specialization::Default, true, true}; Setting<bool> hide_mouse{ linkage, true, "hideInactiveMouse", Category::UiGeneral, Settings::Specialization::Default, @@ -183,6 +194,9 @@ struct Values { Setting<bool> show_size{linkage, true, "show_size", Category::UiGameList}; Setting<bool> show_types{linkage, true, "show_types", Category::UiGameList}; + // Play time + Setting<bool> show_play_time{linkage, true, "show_play_time", Category::UiGameList}; + bool configuration_applied; bool reset_to_defaults; bool shortcut_already_warned{false}; diff --git a/src/yuzu/util/util.cpp b/src/yuzu/util/util.cpp index 5c3e4589e..f2854c8ec 100644 --- a/src/yuzu/util/util.cpp +++ b/src/yuzu/util/util.cpp @@ -5,6 +5,10 @@ #include <cmath> #include <QPainter> #include "yuzu/util/util.h" +#ifdef _WIN32 +#include <windows.h> +#include "common/fs/file.h" +#endif QFont GetMonospaceFont() { QFont font(QStringLiteral("monospace")); @@ -37,3 +41,101 @@ QPixmap CreateCirclePixmapFromColor(const QColor& color) { painter.drawEllipse({circle_pixmap.width() / 2.0, circle_pixmap.height() / 2.0}, 7.0, 7.0); return circle_pixmap; } + +bool SaveIconToFile(const std::string_view path, const QImage& image) { +#if defined(WIN32) +#pragma pack(push, 2) + struct IconDir { + WORD id_reserved; + WORD id_type; + WORD id_count; + }; + + struct IconDirEntry { + BYTE width; + BYTE height; + BYTE color_count; + BYTE reserved; + WORD planes; + WORD bit_count; + DWORD bytes_in_res; + DWORD image_offset; + }; +#pragma pack(pop) + + const QImage source_image = image.convertToFormat(QImage::Format_RGB32); + constexpr std::array<int, 7> scale_sizes{256, 128, 64, 48, 32, 24, 16}; + constexpr int bytes_per_pixel = 4; + + const IconDir icon_dir{ + .id_reserved = 0, + .id_type = 1, + .id_count = static_cast<WORD>(scale_sizes.size()), + }; + + Common::FS::IOFile icon_file(path, Common::FS::FileAccessMode::Write, + Common::FS::FileType::BinaryFile); + if (!icon_file.IsOpen()) { + return false; + } + + if (!icon_file.Write(icon_dir)) { + return false; + } + + std::size_t image_offset = sizeof(IconDir) + (sizeof(IconDirEntry) * scale_sizes.size()); + for (std::size_t i = 0; i < scale_sizes.size(); i++) { + const int image_size = scale_sizes[i] * scale_sizes[i] * bytes_per_pixel; + const IconDirEntry icon_entry{ + .width = static_cast<BYTE>(scale_sizes[i]), + .height = static_cast<BYTE>(scale_sizes[i]), + .color_count = 0, + .reserved = 0, + .planes = 1, + .bit_count = bytes_per_pixel * 8, + .bytes_in_res = static_cast<DWORD>(sizeof(BITMAPINFOHEADER) + image_size), + .image_offset = static_cast<DWORD>(image_offset), + }; + image_offset += icon_entry.bytes_in_res; + if (!icon_file.Write(icon_entry)) { + return false; + } + } + + for (std::size_t i = 0; i < scale_sizes.size(); i++) { + const QImage scaled_image = source_image.scaled( + scale_sizes[i], scale_sizes[i], Qt::IgnoreAspectRatio, Qt::SmoothTransformation); + const BITMAPINFOHEADER info_header{ + .biSize = sizeof(BITMAPINFOHEADER), + .biWidth = scaled_image.width(), + .biHeight = scaled_image.height() * 2, + .biPlanes = 1, + .biBitCount = bytes_per_pixel * 8, + .biCompression = BI_RGB, + .biSizeImage{}, + .biXPelsPerMeter{}, + .biYPelsPerMeter{}, + .biClrUsed{}, + .biClrImportant{}, + }; + + if (!icon_file.Write(info_header)) { + return false; + } + + for (int y = 0; y < scaled_image.height(); y++) { + const auto* line = scaled_image.scanLine(scaled_image.height() - 1 - y); + std::vector<u8> line_data(scaled_image.width() * bytes_per_pixel); + std::memcpy(line_data.data(), line, line_data.size()); + if (!icon_file.Write(line_data)) { + return false; + } + } + } + icon_file.Close(); + + return true; +#else + return false; +#endif +} diff --git a/src/yuzu/util/util.h b/src/yuzu/util/util.h index 39dd2d895..09c14ce3f 100644 --- a/src/yuzu/util/util.h +++ b/src/yuzu/util/util.h @@ -7,14 +7,22 @@ #include <QString> /// Returns a QFont object appropriate to use as a monospace font for debugging widgets, etc. -QFont GetMonospaceFont(); +[[nodiscard]] QFont GetMonospaceFont(); /// Convert a size in bytes into a readable format (KiB, MiB, etc.) -QString ReadableByteSize(qulonglong size); +[[nodiscard]] QString ReadableByteSize(qulonglong size); /** * Creates a circle pixmap from a specified color * @param color The color the pixmap shall have * @return QPixmap circle pixmap */ -QPixmap CreateCirclePixmapFromColor(const QColor& color); +[[nodiscard]] QPixmap CreateCirclePixmapFromColor(const QColor& color); + +/** + * Saves a windows icon to a file + * @param path The icons path + * @param image The image to save + * @return bool If the operation succeeded + */ +[[nodiscard]] bool SaveIconToFile(const std::string_view path, const QImage& image); |