diff options
Diffstat (limited to '')
71 files changed, 1812 insertions, 645 deletions
diff --git a/.travis-build.sh b/.travis-build.sh index 64f5aed94..bb4e6fc47 100755 --- a/.travis-build.sh +++ b/.travis-build.sh @@ -52,7 +52,7 @@ elif [ "$TRAVIS_OS_NAME" = "osx" ]; then export Qt5_DIR=$(brew --prefix)/opt/qt5 mkdir build && cd build - cmake .. -GXcode + cmake .. -DUSE_SYSTEM_CURL=ON -GXcode xcodebuild -configuration Release ctest -VV -C Release diff --git a/CMakeLists.txt b/CMakeLists.txt index ddba04ef9..d9c2f78a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.6) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules") list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules") +include(DownloadExternals) project(citra) @@ -12,6 +13,15 @@ option(ENABLE_QT "Enable the Qt frontend" ON) option(CITRA_USE_BUNDLED_QT "Download bundled Qt binaries" OFF) option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON) +option(CITRA_USE_BUNDLED_CURL "FOR MINGW ONLY: Download curl configured against winssl instead of openssl" OFF) +if (ENABLE_WEB_SERVICE AND CITRA_USE_BUNDLED_CURL AND WINDOWS AND MSVC) + message("Turning off use bundled curl as msvc can compile curl on cpr") + SET(CITRA_USE_BUNDLED_CURL OFF CACHE BOOL "" FORCE) +endif() +if (ENABLE_WEB_SERVICE AND NOT CITRA_USE_BUNDLED_CURL AND MINGW) + message(AUTHOR_WARNING "Turning on CITRA_USE_BUNDLED_CURL. Override it only if you know what you are doing.") + SET(CITRA_USE_BUNDLED_CURL ON CACHE BOOL "" FORCE) +endif() if(NOT EXISTS ${CMAKE_SOURCE_DIR}/.git/hooks/pre-commit) message(STATUS "Copying pre-commit hook") @@ -129,8 +139,8 @@ else() set(CMAKE_C_FLAGS_RELEASE "/O2 /GS- /MD" CACHE STRING "" FORCE) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}" CACHE STRING "" FORCE) - set(CMAKE_EXE_LINKER_FLAGS_DEBUG "/DEBUG" CACHE STRING "" FORCE) - set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE) + set(CMAKE_EXE_LINKER_FLAGS_DEBUG "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE) + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE) endif() # Set file offset size to 64 bits. @@ -151,24 +161,6 @@ set_property(DIRECTORY APPEND PROPERTY # System imported libraries # ====================== -# This function downloads a binary library package from our external repo. -# Params: -# remote_path: path to the file to download, relative to the remote repository root -# prefix_var: name of a variable which will be set with the path to the extracted contents -function(download_bundled_external remote_path lib_name prefix_var) - set(prefix "${CMAKE_BINARY_DIR}/externals/${lib_name}") - if (NOT EXISTS "${prefix}") - message(STATUS "Downloading binaries for ${lib_name}...") - file(DOWNLOAD - https://github.com/citra-emu/ext-windows-bin/raw/master/${remote_path}${lib_name}.7z - "${CMAKE_BINARY_DIR}/externals/${lib_name}.7z" SHOW_PROGRESS) - execute_process(COMMAND ${CMAKE_COMMAND} -E tar xf "${CMAKE_BINARY_DIR}/externals/${lib_name}.7z" - WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/externals") - endif() - message(STATUS "Using bundled binaries at ${prefix}") - set(${prefix_var} "${prefix}" PARENT_SCOPE) -endfunction() - find_package(PNG QUIET) if (NOT PNG_FOUND) message(STATUS "libpng not found. Some debugging features have been disabled.") diff --git a/CMakeModules/DownloadExternals.cmake b/CMakeModules/DownloadExternals.cmake new file mode 100644 index 000000000..138a15d5a --- /dev/null +++ b/CMakeModules/DownloadExternals.cmake @@ -0,0 +1,18 @@ + +# This function downloads a binary library package from our external repo. +# Params: +# remote_path: path to the file to download, relative to the remote repository root +# prefix_var: name of a variable which will be set with the path to the extracted contents +function(download_bundled_external remote_path lib_name prefix_var) +set(prefix "${CMAKE_BINARY_DIR}/externals/${lib_name}") +if (NOT EXISTS "${prefix}") + message(STATUS "Downloading binaries for ${lib_name}...") + file(DOWNLOAD + https://github.com/citra-emu/ext-windows-bin/raw/master/${remote_path}${lib_name}.7z + "${CMAKE_BINARY_DIR}/externals/${lib_name}.7z" SHOW_PROGRESS) + execute_process(COMMAND ${CMAKE_COMMAND} -E tar xf "${CMAKE_BINARY_DIR}/externals/${lib_name}.7z" + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/externals") +endif() +message(STATUS "Using bundled binaries at ${prefix}") +set(${prefix_var} "${prefix}" PARENT_SCOPE) +endfunction()
\ No newline at end of file diff --git a/appveyor.yml b/appveyor.yml index 94e9969f5..5524eb576 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -7,6 +7,15 @@ cache: os: Visual Studio 2017 +environment: + # Tell msys2 to add mingw64 to the path + MSYSTEM: MINGW64 + # Tell msys2 to inherit the current directory when starting the shell + CHERE_INVOKING: 1 + matrix: + - BUILD_TYPE: mingw + - BUILD_TYPE: msvc + platform: - x64 @@ -15,72 +24,149 @@ configuration: install: - git submodule update --init --recursive + - ps: | + if ($env:BUILD_TYPE -eq 'mingw') { + $dependencies = "mingw64/mingw-w64-x86_64-cmake", + "mingw64/mingw-w64-x86_64-qt5", + "mingw64/mingw-w64-x86_64-curl", + "mingw64/mingw-w64-x86_64-SDL2" + # redirect err to null to prevent warnings from becoming errors + # workaround to prevent pacman from failing due to cyclical dependencies + C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S mingw64/mingw-w64-x86_64-freetype mingw64/mingw-w64-x86_64-fontconfig" 2> $null + C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S $dependencies" 2> $null + } before_build: - - mkdir build - - cd build - - cmake -G "Visual Studio 15 2017 Win64" -DCITRA_USE_BUNDLED_QT=1 -DCITRA_USE_BUNDLED_SDL2=1 -DCMAKE_USE_OPENSSL=0 .. + - mkdir %BUILD_TYPE%_build + - cd %BUILD_TYPE%_build + - ps: | + if ($env:BUILD_TYPE -eq 'msvc') { + # redirect stderr and change the exit code to prevent powershell from cancelling the build if cmake prints a warning + cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DCITRA_USE_BUNDLED_QT=1 -DCITRA_USE_BUNDLED_SDL2=1 -DCMAKE_USE_OPENSSL=0 .. 2>&1 && exit 0' + } else { + C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DUSE_SYSTEM_CURL=1 -DCITRA_USE_BUNDLED_CURL=1 -DCMAKE_BUILD_TYPE=Release .. 2>&1" + } - cd .. -build: - project: build/citra.sln - parallel: true +build_script: + - ps: | + if ($env:BUILD_TYPE -eq 'msvc') { + # https://www.appveyor.com/docs/build-phase + msbuild msvc_build/citra.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" + } else { + C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1' + } after_build: - ps: | $GITDATE = $(git show -s --date=short --format='%ad') -replace "-","" $GITREV = $(git show -s --format='%h') - $GIT_LONG_HASH = $(git rev-parse HEAD) - # Where are these spaces coming from? Regardless, let's remove them - $MSVC_BUILD_NAME = "citra-windows-msvc-$GITDATE-$GITREV.zip" -replace " ", "" - $MSVC_BUILD_PDB = "citra-windows-msvc-$GITDATE-$GITREV-debugsymbols.zip" -replace " ", "" - $MSVC_SEVENZIP = "citra-windows-msvc-$GITDATE-$GITREV.7z" -replace " ", "" - $BINTRAY_VERSION = "nightly-$GIT_LONG_HASH" -replace " ", "" - - # set the build names as env vars so the artifacts can upload them - $env:MSVC_BUILD_NAME = $MSVC_BUILD_NAME - $env:MSVC_BUILD_PDB = $MSVC_BUILD_PDB - $env:MSVC_SEVENZIP = $MSVC_SEVENZIP - $env:GITREV = $GITREV - - 7z a -tzip $MSVC_BUILD_PDB .\build\bin\release\*.pdb - rm .\build\bin\release\*.pdb # Find out which kind of release we are producing by tag name if ($env:APPVEYOR_REPO_TAG_NAME) { - $RELEASE_DIST, $RELEASE_VERSION = $env:APPVEYOR_REPO_TAG_NAME.split('-') + $RELEASE_DIST, $RELEASE_VERSION = $env:APPVEYOR_REPO_TAG_NAME.split('-') } else { - # There is no repo tag - make assumptions - $RELEASE_DIST = "head" + # There is no repo tag - make assumptions + $RELEASE_DIST = "head" } - mkdir $RELEASE_DIST - Copy-Item .\build\bin\release\* -Destination $RELEASE_DIST -Recurse - Copy-Item .\license.txt -Destination $RELEASE_DIST - Copy-Item .\README.md -Destination $RELEASE_DIST - - 7z a -tzip $MSVC_BUILD_NAME $RELEASE_DIST\* - 7z a $MSVC_SEVENZIP $RELEASE_DIST + if ($env:BUILD_TYPE -eq 'msvc') { + # Where are these spaces coming from? Regardless, let's remove them + $MSVC_BUILD_ZIP = "citra-windows-msvc-$GITDATE-$GITREV.zip" -replace " ", "" + $MSVC_BUILD_PDB = "citra-windows-msvc-$GITDATE-$GITREV-debugsymbols.zip" -replace " ", "" + $MSVC_SEVENZIP = "citra-windows-msvc-$GITDATE-$GITREV.7z" -replace " ", "" + + # set the build names as env vars so the artifacts can upload them + $env:BUILD_ZIP = $MSVC_BUILD_ZIP + $env:BUILD_SYMBOLS = $MSVC_BUILD_PDB + $env:BUILD_UPDATE = $MSVC_SEVENZIP + + 7z a -tzip $MSVC_BUILD_PDB .\msvc_build\bin\release\*.pdb + rm .\msvc_build\bin\release\*.pdb + + mkdir $RELEASE_DIST + Copy-Item .\msvc_build\bin\release\* -Destination $RELEASE_DIST -Recurse + Copy-Item .\license.txt -Destination $RELEASE_DIST + Copy-Item .\README.md -Destination $RELEASE_DIST + 7z a -tzip $MSVC_BUILD_ZIP $RELEASE_DIST\* + 7z a $MSVC_SEVENZIP $RELEASE_DIST + } else { + $MINGW_BUILD_ZIP = "citra-windows-mingw-$GITDATE-$GITREV.zip" -replace " ", "" + $MINGW_SEVENZIP = "citra-windows-mingw-$GITDATE-$GITREV.7z" -replace " ", "" + # not going to bother adding separate debug symbols for mingw, so just upload a README for it + # if someone wants to add them, change mingw to compile with -g and use objdump and strip to separate the symbols from the binary + $MINGW_NO_DEBUG_SYMBOLS = "README_No_Debug_Symbols.txt" + Set-Content -Path $MINGW_NO_DEBUG_SYMBOLS -Value "This is a workaround for Appveyor since msvc has debug symbols but mingw doesnt" -Force + + # store the build information in env vars so we can use them as artifacts + $env:BUILD_ZIP = $MINGW_BUILD_ZIP + $env:BUILD_SYMBOLS = $MINGW_NO_DEBUG_SYMBOLS + $env:BUILD_UPDATE = $MINGW_SEVENZIP + + $CMAKE_SOURCE_DIR = "$env:APPVEYOR_BUILD_FOLDER" + $CMAKE_BINARY_DIR = "$CMAKE_SOURCE_DIR/mingw_build" + $RELEASE_DIST = $RELEASE_DIST + "-mingw" + + mkdir $RELEASE_DIST + mkdir $RELEASE_DIST/platforms + + # copy the compiled binaries and other release files to the release folder + Get-ChildItem "$CMAKE_BINARY_DIR" -Recurse -Filter "citra*.exe" | Copy-Item -destination $RELEASE_DIST + # copy the libcurl dll + Get-ChildItem "$CMAKE_BINARY_DIR" -Recurse -Filter "libcurl.dll" | Copy-Item -destination $RELEASE_DIST + Copy-Item -path "$CMAKE_SOURCE_DIR/license.txt" -destination $RELEASE_DIST + Copy-Item -path "$CMAKE_SOURCE_DIR/README.md" -destination $RELEASE_DIST + # copy all the dll dependencies to the release folder + # hardcoded list because we don't build static and determining the list of dlls from the binary is a pain. + $MingwDLLs = "Qt5Core.dll","Qt5Widgets.dll","Qt5Gui.dll","Qt5OpenGL.dll", + # QT dll dependencies + "libbz2-*.dll","libicudt*.dll","libicuin*.dll","libicuuc*.dll","libffi-*.dll", + "libfreetype-*.dll","libglib-*.dll","libgobject-*.dll","libgraphite2.dll","libiconv-*.dll", + "libharfbuzz-*.dll","libintl-*.dll","libpcre-*.dll","libpcre16-*.dll","libpng16-*.dll", + # Runtime/Other dependencies + "libgcc_s_seh-*.dll","libstdc++-*.dll","libwinpthread-*.dll","SDL2.dll","zlib1.dll" + foreach ($file in $MingwDLLs) { + Copy-Item -path "C:/msys64/mingw64/bin/$file" -force -destination "$RELEASE_DIST" + } + # the above list copies a few extra debug dlls that aren't needed (thanks globbing patterns!) + # so we can remove them by hardcoding another list of extra dlls to remove + $DebugDLLs = "libicudtd*.dll","libicuind*.dll","libicuucd*.dll" + foreach ($file in $DebugDLLs) { + Remove-Item -path "$RELEASE_DIST/$file" + } + + # copy the qt windows plugin dll to platforms + Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/platforms/qwindows.dll" -force -destination "$RELEASE_DIST/platforms" + + 7z a -tzip $MINGW_BUILD_ZIP $RELEASE_DIST\* + 7z a $MINGW_SEVENZIP $RELEASE_DIST + } test_script: - - cd build && ctest -VV -C Release && cd .. + - cd %BUILD_TYPE%_build + - ps: | + if ($env:BUILD_TYPE -eq 'msvc') { + ctest -VV -C Release + } else { + C:\msys64\usr\bin\bash.exe -lc "ctest -VV -C Release" + } + - cd .. artifacts: - - path: $(MSVC_BUILD_NAME) - name: msvcbuild - type: zip - - path: $(MSVC_BUILD_PDB) - name: msvcdebug + - path: $(BUILD_ZIP) + name: build type: zip - - path: $(MSVC_SEVENZIP) - name: msvcupdate + - path: $(BUILD_SYMBOLS) + name: debugsymbols + - path: $(BUILD_UPDATE) + name: update deploy: provider: GitHub release: $(appveyor_repo_tag_name) auth_token: secure: "dbpsMC/MgPKWFNJCXpQl4cR8FYhepkPLjgNp/pRMktZ8oLKTqPYErfreaIxb/4P1" - artifact: msvcupdate,msvcbuild + artifact: update,build draft: false prerelease: false on: diff --git a/dist/citra.manifest b/dist/citra.manifest new file mode 100644 index 000000000..fd30b656f --- /dev/null +++ b/dist/citra.manifest @@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0"> + <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3"> + <security> + <requestedPrivileges> + <requestedExecutionLevel level="asInvoker" uiAccess="false"/> + </requestedPrivileges> + </security> + </trustInfo> + <application xmlns="urn:schemas-microsoft-com:asm.v3"> + <windowsSettings> + <dpiAware xmlns="http://schemas.microsoft.com/SMI/2005/WindowsSettings">True/PM</dpiAware> + <longPathAware xmlns="http://schemas.microsoft.com/SMI/2016/WindowsSettings">true</longPathAware> + </windowsSettings> + </application> + <compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1"> + <application> + <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/> + <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/> + <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/> + <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/> + </application> + </compatibility> +</assembly>
\ No newline at end of file diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 8e4bcf21f..4a4ba1101 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -1,5 +1,8 @@ # Definitions for all external bundled libraries +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules) +include(DownloadExternals) + # Catch add_library(catch-single-include INTERFACE) target_include_directories(catch-single-include INTERFACE catch/single_include) @@ -54,9 +57,21 @@ add_subdirectory(enet) target_include_directories(enet INTERFACE ./enet/include) if (ENABLE_WEB_SERVICE) + # msys installed curl is configured to use openssl, but that isn't portable + # since it relies on having the bundled certs install in the home folder for SSL + # by default on mingw, download the precompiled curl thats linked against windows native ssl + if (MINGW AND CITRA_USE_BUNDLED_CURL) + download_bundled_external("curl/" "curl-7_55_1" CURL_PREFIX) + set(CURL_PREFIX "${CMAKE_BINARY_DIR}/externals/curl-7_55_1") + set(CURL_FOUND YES) + set(CURL_INCLUDE_DIR "${CURL_PREFIX}/include" CACHE PATH "Path to curl headers") + set(CURL_LIBRARY "${CURL_PREFIX}/lib/libcurldll.a" CACHE PATH "Path to curl library") + set(CURL_DLL_DIR "${CURL_PREFIX}/lib/" CACHE PATH "Path to curl.dll") + set(USE_SYSTEM_CURL ON CACHE BOOL "") + endif() # CPR - option(BUILD_TESTING OFF) - option(BUILD_CPR_TESTS OFF) + set(BUILD_TESTING OFF CACHE BOOL "") + set(BUILD_CPR_TESTS OFF CACHE BOOL "") add_subdirectory(cpr) target_include_directories(cpr INTERFACE ./cpr/include) diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp index 92484c526..de4e88cae 100644 --- a/src/audio_core/hle/source.cpp +++ b/src/audio_core/hle/source.cpp @@ -244,17 +244,27 @@ void Source::GenerateFrame() { break; } - const size_t size_to_copy = - std::min(state.current_buffer.size(), current_frame.size() - frame_position); - - std::copy(state.current_buffer.begin(), state.current_buffer.begin() + size_to_copy, - current_frame.begin() + frame_position); - state.current_buffer.erase(state.current_buffer.begin(), - state.current_buffer.begin() + size_to_copy); - - frame_position += size_to_copy; - state.next_sample_number += static_cast<u32>(size_to_copy); + switch (state.interpolation_mode) { + case InterpolationMode::None: + AudioInterp::None(state.interp_state, state.current_buffer, state.rate_multiplier, + current_frame, frame_position); + break; + case InterpolationMode::Linear: + AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier, + current_frame, frame_position); + break; + case InterpolationMode::Polyphase: + // TODO(merry): Implement polyphase interpolation + LOG_DEBUG(Audio_DSP, "Polyphase interpolation unimplemented; falling back to linear"); + AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier, + current_frame, frame_position); + break; + default: + UNIMPLEMENTED(); + break; + } } + state.next_sample_number += frame_position; state.filters.ProcessFrame(current_frame); } @@ -305,25 +315,6 @@ bool Source::DequeueBuffer() { return true; } - switch (state.interpolation_mode) { - case InterpolationMode::None: - state.current_buffer = - AudioInterp::None(state.interp_state, state.current_buffer, state.rate_multiplier); - break; - case InterpolationMode::Linear: - state.current_buffer = - AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier); - break; - case InterpolationMode::Polyphase: - // TODO(merry): Implement polyphase interpolation - state.current_buffer = - AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier); - break; - default: - UNIMPLEMENTED(); - break; - } - // the first playthrough starts at play_position, loops start at the beginning of the buffer state.current_sample_number = (!buf.has_played) ? buf.play_position : 0; state.next_sample_number = state.current_sample_number; diff --git a/src/audio_core/interpolate.cpp b/src/audio_core/interpolate.cpp index 8a5d4181a..16e68bc5c 100644 --- a/src/audio_core/interpolate.cpp +++ b/src/audio_core/interpolate.cpp @@ -13,74 +13,64 @@ namespace AudioInterp { constexpr u64 scale_factor = 1 << 24; constexpr u64 scale_mask = scale_factor - 1; -/// Here we step over the input in steps of rate_multiplier, until we consume all of the input. +/// Here we step over the input in steps of rate, until we consume all of the input. /// Three adjacent samples are passed to fn each step. template <typename Function> -static StereoBuffer16 StepOverSamples(State& state, const StereoBuffer16& input, - float rate_multiplier, Function fn) { - ASSERT(rate_multiplier > 0); +static void StepOverSamples(State& state, StereoBuffer16& input, float rate, + DSP::HLE::StereoFrame16& output, size_t& outputi, Function fn) { + ASSERT(rate > 0); - if (input.size() < 2) - return {}; + if (input.empty()) + return; - StereoBuffer16 output; - output.reserve(static_cast<size_t>(input.size() / rate_multiplier)); + input.insert(input.begin(), {state.xn2, state.xn1}); - u64 step_size = static_cast<u64>(rate_multiplier * scale_factor); + const u64 step_size = static_cast<u64>(rate * scale_factor); + u64 fposition = state.fposition; + size_t inputi = 0; - u64 fposition = 0; - const u64 max_fposition = input.size() * scale_factor; + while (outputi < output.size()) { + inputi = static_cast<size_t>(fposition / scale_factor); - while (fposition < 1 * scale_factor) { - u64 fraction = fposition & scale_mask; - - output.push_back(fn(fraction, state.xn2, state.xn1, input[0])); - - fposition += step_size; - } - - while (fposition < 2 * scale_factor) { - u64 fraction = fposition & scale_mask; - - output.push_back(fn(fraction, state.xn1, input[0], input[1])); - - fposition += step_size; - } + if (inputi + 2 >= input.size()) { + inputi = input.size() - 2; + break; + } - while (fposition < max_fposition) { u64 fraction = fposition & scale_mask; - - size_t index = static_cast<size_t>(fposition / scale_factor); - output.push_back(fn(fraction, input[index - 2], input[index - 1], input[index])); + output[outputi++] = fn(fraction, input[inputi], input[inputi + 1], input[inputi + 2]); fposition += step_size; } - state.xn2 = input[input.size() - 2]; - state.xn1 = input[input.size() - 1]; + state.xn2 = input[inputi]; + state.xn1 = input[inputi + 1]; + state.fposition = fposition - inputi * scale_factor; - return output; + input.erase(input.begin(), input.begin() + inputi + 2); } -StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier) { - return StepOverSamples( - state, input, rate_multiplier, +void None(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output, + size_t& outputi) { + StepOverSamples( + state, input, rate, output, outputi, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { return x0; }); } -StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier) { +void Linear(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output, + size_t& outputi) { // Note on accuracy: Some values that this produces are +/- 1 from the actual firmware. - return StepOverSamples(state, input, rate_multiplier, - [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { - // This is a saturated subtraction. (Verified by black-box fuzzing.) - s64 delta0 = MathUtil::Clamp<s64>(x1[0] - x0[0], -32768, 32767); - s64 delta1 = MathUtil::Clamp<s64>(x1[1] - x0[1], -32768, 32767); - - return std::array<s16, 2>{ - static_cast<s16>(x0[0] + fraction * delta0 / scale_factor), - static_cast<s16>(x0[1] + fraction * delta1 / scale_factor), - }; - }); + StepOverSamples(state, input, rate, output, outputi, + [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { + // This is a saturated subtraction. (Verified by black-box fuzzing.) + s64 delta0 = MathUtil::Clamp<s64>(x1[0] - x0[0], -32768, 32767); + s64 delta1 = MathUtil::Clamp<s64>(x1[1] - x0[1], -32768, 32767); + + return std::array<s16, 2>{ + static_cast<s16>(x0[0] + fraction * delta0 / scale_factor), + static_cast<s16>(x0[1] + fraction * delta1 / scale_factor), + }; + }); } } // namespace AudioInterp diff --git a/src/audio_core/interpolate.h b/src/audio_core/interpolate.h index 19a7b66cb..59f59bc14 100644 --- a/src/audio_core/interpolate.h +++ b/src/audio_core/interpolate.h @@ -6,6 +6,7 @@ #include <array> #include <vector> +#include "audio_core/hle/common.h" #include "common/common_types.h" namespace AudioInterp { @@ -14,31 +15,35 @@ namespace AudioInterp { using StereoBuffer16 = std::vector<std::array<s16, 2>>; struct State { - // Two historical samples. + /// Two historical samples. std::array<s16, 2> xn1 = {}; ///< x[n-1] std::array<s16, 2> xn2 = {}; ///< x[n-2] + /// Current fractional position. + u64 fposition = 0; }; /** * No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay. * @param state Interpolation state. * @param input Input buffer. - * @param rate_multiplier Stretch factor. Must be a positive non-zero value. - * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 - * performs upsampling. - * @return The resampled audio buffer. + * @param rate Stretch factor. Must be a positive non-zero value. + * rate > 1.0 performs decimation and rate < 1.0 performs upsampling. + * @param output The resampled audio buffer. + * @param outputi The index of output to start writing to. */ -StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier); +void None(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output, + size_t& outputi); /** * Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay. * @param state Interpolation state. * @param input Input buffer. - * @param rate_multiplier Stretch factor. Must be a positive non-zero value. - * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 - * performs upsampling. - * @return The resampled audio buffer. + * @param rate Stretch factor. Must be a positive non-zero value. + * rate > 1.0 performs decimation and rate < 1.0 performs upsampling. + * @param output The resampled audio buffer. + * @param outputi The index of output to start writing to. */ -StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier); +void Linear(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output, + size_t& outputi); } // namespace AudioInterp diff --git a/src/citra/citra.rc b/src/citra/citra.rc index fea603004..c490ef302 100644 --- a/src/citra/citra.rc +++ b/src/citra/citra.rc @@ -1,3 +1,4 @@ +#include "winresrc.h" ///////////////////////////////////////////////////////////////////////////// // // Icon @@ -7,3 +8,10 @@ // remains consistent on all systems. CITRA_ICON ICON "../../dist/citra.ico" + +///////////////////////////////////////////////////////////////////////////// +// +// RT_MANIFEST +// + +1 RT_MANIFEST "../../dist/citra.manifest" diff --git a/src/citra/config.cpp b/src/citra/config.cpp index 3869b6b5d..a48ef08c7 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp @@ -78,6 +78,8 @@ void Config::ReadValues() { Settings::values.motion_device = sdl2_config->Get( "Controls", "motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01"); + Settings::values.touch_device = + sdl2_config->Get("Controls", "touch_device", "engine:emu_window"); // Core Settings::values.use_cpu_jit = sdl2_config->GetBoolean("Core", "use_cpu_jit", true); diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h index ea02a788d..4b13a2e1b 100644 --- a/src/citra/default_ini.h +++ b/src/citra/default_ini.h @@ -62,6 +62,10 @@ c_stick= # - "sensitivity": the coefficient converting mouse movement to tilting angle (default to 0.01) motion_device= +# for touch input, the following devices are available: +# - "emu_window" (default) for emulating touch input from mouse input to the emulation window. No parameters required +touch_device= + [Core] # Whether to use the Just-In-Time (JIT) compiler for CPU emulation # 0: Interpreter (slow), 1 (default): JIT (fast) diff --git a/src/citra_qt/citra-qt.rc b/src/citra_qt/citra-qt.rc index fea603004..a48a9440d 100644 --- a/src/citra_qt/citra-qt.rc +++ b/src/citra_qt/citra-qt.rc @@ -1,3 +1,4 @@ +#include "winresrc.h" ///////////////////////////////////////////////////////////////////////////// // // Icon @@ -5,5 +6,14 @@ // Icon with lowest ID value placed first to ensure application icon // remains consistent on all systems. -CITRA_ICON ICON "../../dist/citra.ico" +// QT requires that the default application icon is named IDI_ICON1 +IDI_ICON1 ICON "../../dist/citra.ico" + + +///////////////////////////////////////////////////////////////////////////// +// +// RT_MANIFEST +// + +1 RT_MANIFEST "../../dist/citra.manifest" diff --git a/src/citra_qt/configuration/config.cpp b/src/citra_qt/configuration/config.cpp index e2dceaa4c..ef114aad3 100644 --- a/src/citra_qt/configuration/config.cpp +++ b/src/citra_qt/configuration/config.cpp @@ -61,6 +61,8 @@ void Config::ReadValues() { qt_config->value("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01") .toString() .toStdString(); + Settings::values.touch_device = + qt_config->value("touch_device", "engine:emu_window").toString().toStdString(); qt_config->endGroup(); @@ -213,6 +215,7 @@ void Config::SaveValues() { QString::fromStdString(Settings::values.analogs[i])); } qt_config->setValue("motion_device", QString::fromStdString(Settings::values.motion_device)); + qt_config->setValue("touch_device", QString::fromStdString(Settings::values.touch_device)); qt_config->endGroup(); qt_config->beginGroup("Core"); diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 89578024f..cd1a8de2d 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -146,6 +146,7 @@ set(SRCS hle/service/nwm/nwm_tst.cpp hle/service/nwm/nwm_uds.cpp hle/service/nwm/uds_beacon.cpp + hle/service/nwm/uds_connection.cpp hle/service/nwm/uds_data.cpp hle/service/pm_app.cpp hle/service/ptm/ptm.cpp @@ -346,6 +347,7 @@ set(HEADERS hle/service/nwm/nwm_tst.h hle/service/nwm/nwm_uds.h hle/service/nwm/uds_beacon.h + hle/service/nwm/uds_connection.h hle/service/nwm/uds_data.h hle/service/pm_app.h hle/service/ptm/ptm.h diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 0a0b91590..34c5aa381 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -56,7 +56,9 @@ static Dynarmic::UserCallbacks GetUserCallbacks( user_callbacks.memory.Write16 = &Memory::Write16; user_callbacks.memory.Write32 = &Memory::Write32; user_callbacks.memory.Write64 = &Memory::Write64; - user_callbacks.page_table = Memory::GetCurrentPageTablePointers(); + // TODO(Subv): Re-add the page table pointers once dynarmic supports switching page tables at + // runtime. + user_callbacks.page_table = nullptr; user_callbacks.coprocessors[15] = std::make_shared<DynarmicCP15>(interpeter_state); return user_callbacks; } diff --git a/src/core/core.cpp b/src/core/core.cpp index 5332318cf..59b8768e7 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -137,7 +137,6 @@ void System::Reschedule() { } System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) { - Memory::InitMemoryMap(); LOG_DEBUG(HW_Memory, "initialized OK"); if (Settings::values.use_cpu_jit) { diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp index 54fa5c7fa..e67394177 100644 --- a/src/core/frontend/emu_window.cpp +++ b/src/core/frontend/emu_window.cpp @@ -2,14 +2,55 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <algorithm> #include <cmath> -#include "common/assert.h" -#include "core/3ds.h" -#include "core/core.h" +#include <mutex> #include "core/frontend/emu_window.h" +#include "core/frontend/input.h" #include "core/settings.h" +class EmuWindow::TouchState : public Input::Factory<Input::TouchDevice>, + public std::enable_shared_from_this<TouchState> { +public: + std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage&) override { + return std::make_unique<Device>(shared_from_this()); + } + + std::mutex mutex; + + bool touch_pressed = false; ///< True if touchpad area is currently pressed, otherwise false + + float touch_x = 0.0f; ///< Touchpad X-position + float touch_y = 0.0f; ///< Touchpad Y-position + +private: + class Device : public Input::TouchDevice { + public: + explicit Device(std::weak_ptr<TouchState>&& touch_state) : touch_state(touch_state) {} + std::tuple<float, float, bool> GetStatus() const override { + if (auto state = touch_state.lock()) { + std::lock_guard<std::mutex> guard(state->mutex); + return std::make_tuple(state->touch_x, state->touch_y, state->touch_pressed); + } + return std::make_tuple(0.0f, 0.0f, false); + } + + private: + std::weak_ptr<TouchState> touch_state; + }; +}; + +EmuWindow::EmuWindow() { + // TODO: Find a better place to set this. + config.min_client_area_size = std::make_pair(400u, 480u); + active_config = config; + touch_state = std::make_shared<TouchState>(); + Input::RegisterFactory<Input::TouchDevice>("emu_window", touch_state); +} + +EmuWindow::~EmuWindow() { + Input::UnregisterFactory<Input::TouchDevice>("emu_window"); +} + /** * Check if the given x/y coordinates are within the touchpad specified by the framebuffer layout * @param layout FramebufferLayout object describing the framebuffer size and screen positions @@ -38,22 +79,26 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) { if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y)) return; - touch_x = Core::kScreenBottomWidth * (framebuffer_x - framebuffer_layout.bottom_screen.left) / - (framebuffer_layout.bottom_screen.right - framebuffer_layout.bottom_screen.left); - touch_y = Core::kScreenBottomHeight * (framebuffer_y - framebuffer_layout.bottom_screen.top) / - (framebuffer_layout.bottom_screen.bottom - framebuffer_layout.bottom_screen.top); + std::lock_guard<std::mutex> guard(touch_state->mutex); + touch_state->touch_x = + static_cast<float>(framebuffer_x - framebuffer_layout.bottom_screen.left) / + (framebuffer_layout.bottom_screen.right - framebuffer_layout.bottom_screen.left); + touch_state->touch_y = + static_cast<float>(framebuffer_y - framebuffer_layout.bottom_screen.top) / + (framebuffer_layout.bottom_screen.bottom - framebuffer_layout.bottom_screen.top); - touch_pressed = true; + touch_state->touch_pressed = true; } void EmuWindow::TouchReleased() { - touch_pressed = false; - touch_x = 0; - touch_y = 0; + std::lock_guard<std::mutex> guard(touch_state->mutex); + touch_state->touch_pressed = false; + touch_state->touch_x = 0; + touch_state->touch_y = 0; } void EmuWindow::TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y) { - if (!touch_pressed) + if (!touch_state->touch_pressed) return; if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y)) diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 7bdee251c..c10dee51b 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -4,11 +4,10 @@ #pragma once -#include <mutex> +#include <memory> #include <tuple> #include <utility> #include "common/common_types.h" -#include "common/math_util.h" #include "core/frontend/framebuffer_layout.h" /** @@ -69,17 +68,6 @@ public: void TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y); /** - * Gets the current touch screen state (touch X/Y coordinates and whether or not it is pressed). - * @note This should be called by the core emu thread to get a state set by the window thread. - * @todo Fix this function to be thread-safe. - * @return std::tuple of (x, y, pressed) where `x` and `y` are the touch coordinates and - * `pressed` is true if the touch screen is currently being pressed - */ - std::tuple<u16, u16, bool> GetTouchState() const { - return std::make_tuple(touch_x, touch_y, touch_pressed); - } - - /** * Returns currently active configuration. * @note Accesses to the returned object need not be consistent because it may be modified in * another thread @@ -113,15 +101,8 @@ public: void UpdateCurrentFramebufferLayout(unsigned width, unsigned height); protected: - EmuWindow() { - // TODO: Find a better place to set this. - config.min_client_area_size = std::make_pair(400u, 480u); - active_config = config; - touch_x = 0; - touch_y = 0; - touch_pressed = false; - } - virtual ~EmuWindow() {} + EmuWindow(); + virtual ~EmuWindow(); /** * Processes any pending configuration changes from the last SetConfig call. @@ -177,10 +158,8 @@ private: /// ProcessConfigurationChanges) WindowConfig active_config; ///< Internal active configuration - bool touch_pressed; ///< True if touchpad area is currently pressed, otherwise false - - u16 touch_x; ///< Touchpad X-position in native 3DS pixel coordinates (0-320) - u16 touch_y; ///< Touchpad Y-position in native 3DS pixel coordinates (0-240) + class TouchState; + std::shared_ptr<TouchState> touch_state; /** * Clip the provided coordinates to be inside the touchscreen area. diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h index 5916a901d..8c256beb5 100644 --- a/src/core/frontend/input.h +++ b/src/core/frontend/input.h @@ -126,4 +126,10 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>; */ using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>; +/** + * A touch device is an input device that returns a tuple of two floats and a bool. The floats are + * x and y coordinates in the range 0.0 - 1.0, and the bool indicates whether it is pressed. + */ +using TouchDevice = InputDevice<std::tuple<float, float, bool>>; + } // namespace Input diff --git a/src/core/hle/applets/mii_selector.cpp b/src/core/hle/applets/mii_selector.cpp index 705859f1e..f225c23a5 100644 --- a/src/core/hle/applets/mii_selector.cpp +++ b/src/core/hle/applets/mii_selector.cpp @@ -66,7 +66,7 @@ ResultCode MiiSelector::StartImpl(const Service::APT::AppletStartupParameter& pa // continue. MiiResult result; memset(&result, 0, sizeof(result)); - result.result_code = 0; + result.return_code = 0; // Let the application know that we're closing Service::APT::MessageParameter message; @@ -82,5 +82,5 @@ ResultCode MiiSelector::StartImpl(const Service::APT::AppletStartupParameter& pa } void MiiSelector::Update() {} -} -} // namespace +} // namespace Applets +} // namespace HLE diff --git a/src/core/hle/applets/mii_selector.h b/src/core/hle/applets/mii_selector.h index ec00e29d2..136ce8948 100644 --- a/src/core/hle/applets/mii_selector.h +++ b/src/core/hle/applets/mii_selector.h @@ -16,51 +16,46 @@ namespace HLE { namespace Applets { struct MiiConfig { - u8 unk_000; - u8 unk_001; - u8 unk_002; - u8 unk_003; - u8 unk_004; + u8 enable_cancel_button; + u8 enable_guest_mii; + u8 show_on_top_screen; + INSERT_PADDING_BYTES(5); + u16 title[0x40]; + INSERT_PADDING_BYTES(4); + u8 show_guest_miis; INSERT_PADDING_BYTES(3); - u16 unk_008; - INSERT_PADDING_BYTES(0x82); - u8 unk_08C; - INSERT_PADDING_BYTES(3); - u16 unk_090; + u32 initially_selected_mii_index; + u8 guest_mii_whitelist[6]; + u8 user_mii_whitelist[0x64]; INSERT_PADDING_BYTES(2); - u32 unk_094; - u16 unk_098; - u8 unk_09A[0x64]; - u8 unk_0FE; - u8 unk_0FF; - u32 unk_100; + u32 magic_value; }; - static_assert(sizeof(MiiConfig) == 0x104, "MiiConfig structure has incorrect size"); #define ASSERT_REG_POSITION(field_name, position) \ static_assert(offsetof(MiiConfig, field_name) == position, \ "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(unk_008, 0x08); -ASSERT_REG_POSITION(unk_08C, 0x8C); -ASSERT_REG_POSITION(unk_090, 0x90); -ASSERT_REG_POSITION(unk_094, 0x94); -ASSERT_REG_POSITION(unk_0FE, 0xFE); +ASSERT_REG_POSITION(title, 0x08); +ASSERT_REG_POSITION(show_guest_miis, 0x8C); +ASSERT_REG_POSITION(initially_selected_mii_index, 0x90); +ASSERT_REG_POSITION(guest_mii_whitelist, 0x94); #undef ASSERT_REG_POSITION struct MiiResult { - u32 result_code; - u8 unk_04; - INSERT_PADDING_BYTES(7); - u8 unk_0C[0x60]; - u8 unk_6C[0x16]; + u32 return_code; + u32 is_guest_mii_selected; + u32 selected_guest_mii_index; + // TODO(mailwl): expand to Mii Format structure: https://www.3dbrew.org/wiki/Mii + u8 selected_mii_data[0x5C]; INSERT_PADDING_BYTES(2); + u16 mii_data_checksum; + u16 guest_mii_name[0xC]; }; static_assert(sizeof(MiiResult) == 0x84, "MiiResult structure has incorrect size"); #define ASSERT_REG_POSITION(field_name, position) \ static_assert(offsetof(MiiResult, field_name) == position, \ "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(unk_0C, 0x0C); -ASSERT_REG_POSITION(unk_6C, 0x6C); +ASSERT_REG_POSITION(selected_mii_data, 0x0C); +ASSERT_REG_POSITION(guest_mii_name, 0x6C); #undef ASSERT_REG_POSITION class MiiSelector final : public Applet { @@ -79,5 +74,5 @@ private: MiiConfig config; }; -} -} // namespace +} // namespace Applets +} // namespace HLE diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp index 496d07cb5..7f27e9655 100644 --- a/src/core/hle/kernel/memory.cpp +++ b/src/core/hle/kernel/memory.cpp @@ -8,7 +8,6 @@ #include <memory> #include <utility> #include <vector> -#include "audio_core/audio_core.h" #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" @@ -24,7 +23,7 @@ namespace Kernel { -static MemoryRegionInfo memory_regions[3]; +MemoryRegionInfo memory_regions[3]; /// Size of the APPLICATION, SYSTEM and BASE memory regions (respectively) for each system /// memory configuration type. @@ -96,9 +95,6 @@ MemoryRegionInfo* GetMemoryRegion(MemoryRegion region) { } } -std::array<u8, Memory::VRAM_SIZE> vram; -std::array<u8, Memory::N3DS_EXTRA_RAM_SIZE> n3ds_extra_ram; - void HandleSpecialMapping(VMManager& address_space, const AddressMapping& mapping) { using namespace Memory; @@ -143,30 +139,14 @@ void HandleSpecialMapping(VMManager& address_space, const AddressMapping& mappin return; } - // TODO(yuriks): Use GetPhysicalPointer when that becomes independent of the virtual - // mappings. - u8* target_pointer = nullptr; - switch (area->paddr_base) { - case VRAM_PADDR: - target_pointer = vram.data(); - break; - case DSP_RAM_PADDR: - target_pointer = AudioCore::GetDspMemory().data(); - break; - case N3DS_EXTRA_RAM_PADDR: - target_pointer = n3ds_extra_ram.data(); - break; - default: - UNREACHABLE(); - } + u8* target_pointer = Memory::GetPhysicalPointer(area->paddr_base + offset_into_region); // TODO(yuriks): This flag seems to have some other effect, but it's unknown what MemoryState memory_state = mapping.unk_flag ? MemoryState::Static : MemoryState::IO; - auto vma = address_space - .MapBackingMemory(mapping.address, target_pointer + offset_into_region, - mapping.size, memory_state) - .Unwrap(); + auto vma = + address_space.MapBackingMemory(mapping.address, target_pointer, mapping.size, memory_state) + .Unwrap(); address_space.Reprotect(vma, mapping.read_only ? VMAPermission::Read : VMAPermission::ReadWrite); } diff --git a/src/core/hle/kernel/memory.h b/src/core/hle/kernel/memory.h index 08c1a9989..da6bb3563 100644 --- a/src/core/hle/kernel/memory.h +++ b/src/core/hle/kernel/memory.h @@ -26,4 +26,6 @@ MemoryRegionInfo* GetMemoryRegion(MemoryRegion region); void HandleSpecialMapping(VMManager& address_space, const AddressMapping& mapping); void MapSharedPages(VMManager& address_space); + +extern MemoryRegionInfo memory_regions[3]; } // namespace Kernel diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index b957c45dd..324415a36 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -171,6 +171,8 @@ static void SwitchContext(Thread* new_thread) { // Cancel any outstanding wakeup events for this thread CoreTiming::UnscheduleEvent(ThreadWakeupEventType, new_thread->callback_handle); + auto previous_process = Kernel::g_current_process; + current_thread = new_thread; ready_queue.remove(new_thread->current_priority, new_thread); @@ -178,8 +180,18 @@ static void SwitchContext(Thread* new_thread) { Core::CPU().LoadContext(new_thread->context); Core::CPU().SetCP15Register(CP15_THREAD_URO, new_thread->GetTLSAddress()); + + if (previous_process != current_thread->owner_process) { + Kernel::g_current_process = current_thread->owner_process; + Memory::current_page_table = &Kernel::g_current_process->vm_manager.page_table; + // We have switched processes and thus, page tables, clear the instruction cache so we + // don't keep stale data from the previous process. + Core::CPU().ClearInstructionCache(); + } } else { current_thread = nullptr; + // Note: We do not reset the current process and current page table when idling because + // technically we haven't changed processes, our threads are just paused. } } diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index cef1f7fa8..7a007c065 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -56,6 +56,10 @@ void VMManager::Reset() { initial_vma.size = MAX_ADDRESS; vma_map.emplace(initial_vma.base, initial_vma); + page_table.pointers.fill(nullptr); + page_table.attributes.fill(Memory::PageType::Unmapped); + page_table.cached_res_count.fill(0); + UpdatePageTableForVMA(initial_vma); } @@ -328,16 +332,17 @@ VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { switch (vma.type) { case VMAType::Free: - Memory::UnmapRegion(vma.base, vma.size); + Memory::UnmapRegion(page_table, vma.base, vma.size); break; case VMAType::AllocatedMemoryBlock: - Memory::MapMemoryRegion(vma.base, vma.size, vma.backing_block->data() + vma.offset); + Memory::MapMemoryRegion(page_table, vma.base, vma.size, + vma.backing_block->data() + vma.offset); break; case VMAType::BackingMemory: - Memory::MapMemoryRegion(vma.base, vma.size, vma.backing_memory); + Memory::MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory); break; case VMAType::MMIO: - Memory::MapIoRegion(vma.base, vma.size, vma.mmio_handler); + Memory::MapIoRegion(page_table, vma.base, vma.size, vma.mmio_handler); break; } } diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index 38e0d74d0..1302527bb 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h @@ -9,6 +9,7 @@ #include <vector> #include "common/common_types.h" #include "core/hle/result.h" +#include "core/memory.h" #include "core/mmio.h" namespace Kernel { @@ -102,7 +103,6 @@ struct VirtualMemoryArea { * - http://duartes.org/gustavo/blog/post/page-cache-the-affair-between-memory-and-files/ */ class VMManager final { - // TODO(yuriks): Make page tables switchable to support multiple VMManagers public: /** * The maximum amount of address space managed by the kernel. Addresses above this are never @@ -184,6 +184,10 @@ public: /// Dumps the address space layout to the log, for debugging void LogLayout(Log::Level log_level) const; + /// Each VMManager has its own page table, which is set as the main one when the owning process + /// is scheduled. + Memory::PageTable page_table; + private: using VMAIter = decltype(vma_map)::iterator; diff --git a/src/core/hle/lock.cpp b/src/core/hle/lock.cpp index 082f689c8..1c24c7ce9 100644 --- a/src/core/hle/lock.cpp +++ b/src/core/hle/lock.cpp @@ -7,5 +7,5 @@ #include <core/hle/lock.h> namespace HLE { -std::mutex g_hle_lock; +std::recursive_mutex g_hle_lock; } diff --git a/src/core/hle/lock.h b/src/core/hle/lock.h index 8265621e1..5c99fe996 100644 --- a/src/core/hle/lock.h +++ b/src/core/hle/lock.h @@ -14,5 +14,5 @@ namespace HLE { * to the emulated memory is not protected by this mutex, and should be avoided in any threads other * than the CPU thread. */ -extern std::mutex g_hle_lock; +extern std::recursive_mutex g_hle_lock; } // namespace HLE diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp index 58d94768c..8c0ba73f2 100644 --- a/src/core/hle/service/apt/apt.cpp +++ b/src/core/hle/service/apt/apt.cpp @@ -19,6 +19,7 @@ #include "core/hle/service/apt/apt_s.h" #include "core/hle/service/apt/apt_u.h" #include "core/hle/service/apt/bcfnt/bcfnt.h" +#include "core/hle/service/cfg/cfg.h" #include "core/hle/service/fs/archive.h" #include "core/hle/service/ptm/ptm.h" #include "core/hle/service/service.h" @@ -198,6 +199,143 @@ void Initialize(Service::Interface* self) { Kernel::g_handle_table.Create(slot_data->parameter_event).Unwrap()); } +static u32 DecompressLZ11(const u8* in, u8* out) { + u32_le decompressed_size; + memcpy(&decompressed_size, in, sizeof(u32)); + in += 4; + + u8 type = decompressed_size & 0xFF; + ASSERT(type == 0x11); + decompressed_size >>= 8; + + u32 current_out_size = 0; + u8 flags = 0, mask = 1; + while (current_out_size < decompressed_size) { + if (mask == 1) { + flags = *(in++); + mask = 0x80; + } else { + mask >>= 1; + } + + if (flags & mask) { + u8 byte1 = *(in++); + u32 length = byte1 >> 4; + u32 offset; + if (length == 0) { + u8 byte2 = *(in++); + u8 byte3 = *(in++); + length = (((byte1 & 0x0F) << 4) | (byte2 >> 4)) + 0x11; + offset = (((byte2 & 0x0F) << 8) | byte3) + 0x1; + } else if (length == 1) { + u8 byte2 = *(in++); + u8 byte3 = *(in++); + u8 byte4 = *(in++); + length = (((byte1 & 0x0F) << 12) | (byte2 << 4) | (byte3 >> 4)) + 0x111; + offset = (((byte3 & 0x0F) << 8) | byte4) + 0x1; + } else { + u8 byte2 = *(in++); + length = (byte1 >> 4) + 0x1; + offset = (((byte1 & 0x0F) << 8) | byte2) + 0x1; + } + + for (u32 i = 0; i < length; i++) { + *out = *(out - offset); + ++out; + } + + current_out_size += length; + } else { + *(out++) = *(in++); + current_out_size++; + } + } + return decompressed_size; +} + +static bool LoadSharedFont() { + u8 font_region_code; + switch (CFG::GetRegionValue()) { + case 4: // CHN + font_region_code = 2; + break; + case 5: // KOR + font_region_code = 3; + break; + case 6: // TWN + font_region_code = 4; + break; + default: // JPN/EUR/USA + font_region_code = 1; + break; + } + + const u64_le shared_font_archive_id_low = 0x0004009b00014002 | ((font_region_code - 1) << 8); + const u64_le shared_font_archive_id_high = 0x00000001ffffff00; + std::vector<u8> shared_font_archive_id(16); + std::memcpy(&shared_font_archive_id[0], &shared_font_archive_id_low, sizeof(u64)); + std::memcpy(&shared_font_archive_id[8], &shared_font_archive_id_high, sizeof(u64)); + FileSys::Path archive_path(shared_font_archive_id); + auto archive_result = Service::FS::OpenArchive(Service::FS::ArchiveIdCode::NCCH, archive_path); + if (archive_result.Failed()) + return false; + + std::vector<u8> romfs_path(20, 0); // 20-byte all zero path for opening RomFS + FileSys::Path file_path(romfs_path); + FileSys::Mode open_mode = {}; + open_mode.read_flag.Assign(1); + auto file_result = Service::FS::OpenFileFromArchive(*archive_result, file_path, open_mode); + if (file_result.Failed()) + return false; + + auto romfs = std::move(file_result).Unwrap(); + std::vector<u8> romfs_buffer(romfs->backend->GetSize()); + romfs->backend->Read(0, romfs_buffer.size(), romfs_buffer.data()); + romfs->backend->Close(); + + const char16_t* file_name[4] = {u"cbf_std.bcfnt.lz", u"cbf_zh-Hans-CN.bcfnt.lz", + u"cbf_ko-Hang-KR.bcfnt.lz", u"cbf_zh-Hant-TW.bcfnt.lz"}; + const u8* font_file = + RomFS::GetFilePointer(romfs_buffer.data(), {file_name[font_region_code - 1]}); + if (font_file == nullptr) + return false; + + struct { + u32_le status; + u32_le region; + u32_le decompressed_size; + INSERT_PADDING_WORDS(0x1D); + } shared_font_header{}; + static_assert(sizeof(shared_font_header) == 0x80, "shared_font_header has incorrect size"); + + shared_font_header.status = 2; // successfully loaded + shared_font_header.region = font_region_code; + shared_font_header.decompressed_size = + DecompressLZ11(font_file, shared_font_mem->GetPointer(0x80)); + std::memcpy(shared_font_mem->GetPointer(), &shared_font_header, sizeof(shared_font_header)); + *shared_font_mem->GetPointer(0x83) = 'U'; // Change the magic from "CFNT" to "CFNU" + + return true; +} + +static bool LoadLegacySharedFont() { + // This is the legacy method to load shared font. + // The expected format is a decrypted, uncompressed BCFNT file with the 0x80 byte header + // generated by the APT:U service. The best way to get is by dumping it from RAM. We've provided + // a homebrew app to do this: https://github.com/citra-emu/3dsutils. Put the resulting file + // "shared_font.bin" in the Citra "sysdata" directory. + std::string filepath = FileUtil::GetUserPath(D_SYSDATA_IDX) + SHARED_FONT; + + FileUtil::CreateFullPath(filepath); // Create path if not already created + FileUtil::IOFile file(filepath, "rb"); + if (file.IsOpen()) { + file.ReadBytes(shared_font_mem->GetPointer(), file.GetSize()); + return true; + } + + return false; +} + void GetSharedFont(Service::Interface* self) { IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x44, 0, 0); // 0x00440000 IPC::RequestBuilder rb = rp.MakeBuilder(2, 2); @@ -206,11 +344,20 @@ void GetSharedFont(Service::Interface* self) { Core::Telemetry().AddField(Telemetry::FieldType::Session, "RequiresSharedFont", true); if (!shared_font_loaded) { - LOG_ERROR(Service_APT, "shared font file missing - go dump it from your 3ds"); - rb.Push<u32>(-1); // TODO: Find the right error code - rb.Skip(1 + 2, true); - Core::System::GetInstance().SetStatus(Core::System::ResultStatus::ErrorSharedFont); - return; + // On real 3DS, font loading happens on booting. However, we load it on demand to coordinate + // with CFG region auto configuration, which happens later than APT initialization. + if (LoadSharedFont()) { + shared_font_loaded = true; + } else if (LoadLegacySharedFont()) { + LOG_WARNING(Service_APT, "Loaded shared font by legacy method"); + shared_font_loaded = true; + } else { + LOG_ERROR(Service_APT, "shared font file missing - go dump it from your 3ds"); + rb.Push<u32>(-1); // TODO: Find the right error code + rb.Skip(1 + 2, true); + Core::System::GetInstance().SetStatus(Core::System::ResultStatus::ErrorSharedFont); + return; + } } // The shared font has to be relocated to the new address before being passed to the @@ -863,125 +1010,6 @@ void CheckNew3DS(Service::Interface* self) { LOG_WARNING(Service_APT, "(STUBBED) called"); } -static u32 DecompressLZ11(const u8* in, u8* out) { - u32_le decompressed_size; - memcpy(&decompressed_size, in, sizeof(u32)); - in += 4; - - u8 type = decompressed_size & 0xFF; - ASSERT(type == 0x11); - decompressed_size >>= 8; - - u32 current_out_size = 0; - u8 flags = 0, mask = 1; - while (current_out_size < decompressed_size) { - if (mask == 1) { - flags = *(in++); - mask = 0x80; - } else { - mask >>= 1; - } - - if (flags & mask) { - u8 byte1 = *(in++); - u32 length = byte1 >> 4; - u32 offset; - if (length == 0) { - u8 byte2 = *(in++); - u8 byte3 = *(in++); - length = (((byte1 & 0x0F) << 4) | (byte2 >> 4)) + 0x11; - offset = (((byte2 & 0x0F) << 8) | byte3) + 0x1; - } else if (length == 1) { - u8 byte2 = *(in++); - u8 byte3 = *(in++); - u8 byte4 = *(in++); - length = (((byte1 & 0x0F) << 12) | (byte2 << 4) | (byte3 >> 4)) + 0x111; - offset = (((byte3 & 0x0F) << 8) | byte4) + 0x1; - } else { - u8 byte2 = *(in++); - length = (byte1 >> 4) + 0x1; - offset = (((byte1 & 0x0F) << 8) | byte2) + 0x1; - } - - for (u32 i = 0; i < length; i++) { - *out = *(out - offset); - ++out; - } - - current_out_size += length; - } else { - *(out++) = *(in++); - current_out_size++; - } - } - return decompressed_size; -} - -static bool LoadSharedFont() { - // TODO (wwylele): load different font archive for region CHN/KOR/TWN - const u64_le shared_font_archive_id_low = 0x0004009b00014002; - const u64_le shared_font_archive_id_high = 0x00000001ffffff00; - std::vector<u8> shared_font_archive_id(16); - std::memcpy(&shared_font_archive_id[0], &shared_font_archive_id_low, sizeof(u64)); - std::memcpy(&shared_font_archive_id[8], &shared_font_archive_id_high, sizeof(u64)); - FileSys::Path archive_path(shared_font_archive_id); - auto archive_result = Service::FS::OpenArchive(Service::FS::ArchiveIdCode::NCCH, archive_path); - if (archive_result.Failed()) - return false; - - std::vector<u8> romfs_path(20, 0); // 20-byte all zero path for opening RomFS - FileSys::Path file_path(romfs_path); - FileSys::Mode open_mode = {}; - open_mode.read_flag.Assign(1); - auto file_result = Service::FS::OpenFileFromArchive(*archive_result, file_path, open_mode); - if (file_result.Failed()) - return false; - - auto romfs = std::move(file_result).Unwrap(); - std::vector<u8> romfs_buffer(romfs->backend->GetSize()); - romfs->backend->Read(0, romfs_buffer.size(), romfs_buffer.data()); - romfs->backend->Close(); - - const u8* font_file = RomFS::GetFilePointer(romfs_buffer.data(), {u"cbf_std.bcfnt.lz"}); - if (font_file == nullptr) - return false; - - struct { - u32_le status; - u32_le region; - u32_le decompressed_size; - INSERT_PADDING_WORDS(0x1D); - } shared_font_header{}; - static_assert(sizeof(shared_font_header) == 0x80, "shared_font_header has incorrect size"); - - shared_font_header.status = 2; // successfully loaded - shared_font_header.region = 1; // region JPN/EUR/USA - shared_font_header.decompressed_size = - DecompressLZ11(font_file, shared_font_mem->GetPointer(0x80)); - std::memcpy(shared_font_mem->GetPointer(), &shared_font_header, sizeof(shared_font_header)); - *shared_font_mem->GetPointer(0x83) = 'U'; // Change the magic from "CFNT" to "CFNU" - - return true; -} - -static bool LoadLegacySharedFont() { - // This is the legacy method to load shared font. - // The expected format is a decrypted, uncompressed BCFNT file with the 0x80 byte header - // generated by the APT:U service. The best way to get is by dumping it from RAM. We've provided - // a homebrew app to do this: https://github.com/citra-emu/3dsutils. Put the resulting file - // "shared_font.bin" in the Citra "sysdata" directory. - std::string filepath = FileUtil::GetUserPath(D_SYSDATA_IDX) + SHARED_FONT; - - FileUtil::CreateFullPath(filepath); // Create path if not already created - FileUtil::IOFile file(filepath, "rb"); - if (file.IsOpen()) { - file.ReadBytes(shared_font_mem->GetPointer(), file.GetSize()); - return true; - } - - return false; -} - void Init() { AddService(new APT_A_Interface); AddService(new APT_S_Interface); @@ -995,16 +1023,6 @@ void Init() { MemoryPermission::ReadWrite, MemoryPermission::Read, 0, Kernel::MemoryRegion::SYSTEM, "APT:SharedFont"); - if (LoadSharedFont()) { - shared_font_loaded = true; - } else if (LoadLegacySharedFont()) { - LOG_WARNING(Service_APT, "Loaded shared font by legacy method"); - shared_font_loaded = true; - } else { - LOG_WARNING(Service_APT, "Unable to load shared font"); - shared_font_loaded = false; - } - lock = Kernel::Mutex::Create(false, "APT_U:Lock"); cpu_percent = 0; diff --git a/src/core/hle/service/cfg/cfg.cpp b/src/core/hle/service/cfg/cfg.cpp index 3dbeb27cc..f26a1f65f 100644 --- a/src/core/hle/service/cfg/cfg.cpp +++ b/src/core/hle/service/cfg/cfg.cpp @@ -168,7 +168,7 @@ void GetCountryCodeID(Service::Interface* self) { cmd_buff[2] = country_code_id; } -static u32 GetRegionValue() { +u32 GetRegionValue() { if (Settings::values.region_value == Settings::REGION_VALUE_AUTO_SELECT) return preferred_region_code; diff --git a/src/core/hle/service/cfg/cfg.h b/src/core/hle/service/cfg/cfg.h index 1659ebf32..282b6936b 100644 --- a/src/core/hle/service/cfg/cfg.h +++ b/src/core/hle/service/cfg/cfg.h @@ -101,6 +101,8 @@ void GetCountryCodeString(Service::Interface* self); */ void GetCountryCodeID(Service::Interface* self); +u32 GetRegionValue(); + /** * CFG::SecureInfoGetRegion service function * Inputs: diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 31f34a7ae..aa5d821f9 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -7,9 +7,9 @@ #include <cmath> #include <memory> #include "common/logging/log.h" +#include "core/3ds.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/frontend/emu_window.h" #include "core/frontend/input.h" #include "core/hle/ipc.h" #include "core/hle/kernel/event.h" @@ -19,7 +19,6 @@ #include "core/hle/service/hid/hid_spvr.h" #include "core/hle/service/hid/hid_user.h" #include "core/hle/service/service.h" -#include "video_core/video_core.h" namespace Service { namespace HID { @@ -59,6 +58,7 @@ static std::array<std::unique_ptr<Input::ButtonDevice>, Settings::NativeButton:: buttons; static std::unique_ptr<Input::AnalogDevice> circle_pad; static std::unique_ptr<Input::MotionDevice> motion_device; +static std::unique_ptr<Input::TouchDevice> touch_device; DirectionState GetStickDirectionState(s16 circle_pad_x, s16 circle_pad_y) { // 30 degree and 60 degree are angular thresholds for directions @@ -96,6 +96,7 @@ static void LoadInputDevices() { circle_pad = Input::CreateDevice<Input::AnalogDevice>( Settings::values.analogs[Settings::NativeAnalog::CirclePad]); motion_device = Input::CreateDevice<Input::MotionDevice>(Settings::values.motion_device); + touch_device = Input::CreateDevice<Input::TouchDevice>(Settings::values.touch_device); } static void UnloadInputDevices() { @@ -104,6 +105,7 @@ static void UnloadInputDevices() { } circle_pad.reset(); motion_device.reset(); + touch_device.reset(); } static void UpdatePadCallback(u64 userdata, int cycles_late) { @@ -172,8 +174,10 @@ static void UpdatePadCallback(u64 userdata, int cycles_late) { // Get the current touch entry TouchDataEntry& touch_entry = mem->touch.entries[mem->touch.index]; bool pressed = false; - - std::tie(touch_entry.x, touch_entry.y, pressed) = VideoCore::g_emu_window->GetTouchState(); + float x, y; + std::tie(x, y, pressed) = touch_device->GetStatus(); + touch_entry.x = static_cast<u16>(x * Core::kScreenBottomWidth); + touch_entry.y = static_cast<u16>(y * Core::kScreenBottomHeight); touch_entry.valid.Assign(pressed ? 1 : 0); // TODO(bunnei): We're not doing anything with offset 0xA8 + 0x18 of HID SharedMemory, which diff --git a/src/core/hle/service/nwm/nwm_uds.cpp b/src/core/hle/service/nwm/nwm_uds.cpp index 6dbdff044..893bbb1e7 100644 --- a/src/core/hle/service/nwm/nwm_uds.cpp +++ b/src/core/hle/service/nwm/nwm_uds.cpp @@ -4,6 +4,7 @@ #include <array> #include <cstring> +#include <mutex> #include <unordered_map> #include <vector> #include "common/common_types.h" @@ -15,8 +16,10 @@ #include "core/hle/result.h" #include "core/hle/service/nwm/nwm_uds.h" #include "core/hle/service/nwm/uds_beacon.h" +#include "core/hle/service/nwm/uds_connection.h" #include "core/hle/service/nwm/uds_data.h" #include "core/memory.h" +#include "network/network.h" namespace Service { namespace NWM { @@ -51,6 +54,135 @@ static NetworkInfo network_info; // Event that will generate and send the 802.11 beacon frames. static int beacon_broadcast_event; +// Mutex to synchronize access to the list of received beacons between the emulation thread and the +// network thread. +static std::mutex beacon_mutex; + +// Number of beacons to store before we start dropping the old ones. +// TODO(Subv): Find a more accurate value for this limit. +constexpr size_t MaxBeaconFrames = 15; + +// List of the last <MaxBeaconFrames> beacons received from the network. +static std::deque<Network::WifiPacket> received_beacons; + +/** + * Returns a list of received 802.11 beacon frames from the specified sender since the last call. + */ +std::deque<Network::WifiPacket> GetReceivedBeacons(const MacAddress& sender) { + std::lock_guard<std::mutex> lock(beacon_mutex); + // TODO(Subv): Filter by sender. + return std::move(received_beacons); +} + +/// Sends a WifiPacket to the room we're currently connected to. +void SendPacket(Network::WifiPacket& packet) { + // TODO(Subv): Implement. +} + +// Inserts the received beacon frame in the beacon queue and removes any older beacons if the size +// limit is exceeded. +void HandleBeaconFrame(const Network::WifiPacket& packet) { + std::lock_guard<std::mutex> lock(beacon_mutex); + + received_beacons.emplace_back(packet); + + // Discard old beacons if the buffer is full. + if (received_beacons.size() > MaxBeaconFrames) + received_beacons.pop_front(); +} + +/* + * Returns an available index in the nodes array for the + * currently-hosted UDS network. + */ +static u16 GetNextAvailableNodeId() { + ASSERT_MSG(connection_status.status == static_cast<u32>(NetworkStatus::ConnectedAsHost), + "Can not accept clients if we're not hosting a network"); + + for (u16 index = 0; index < connection_status.max_nodes; ++index) { + if ((connection_status.node_bitmask & (1 << index)) == 0) + return index; + } + + // Any connection attempts to an already full network should have been refused. + ASSERT_MSG(false, "No available connection slots in the network"); +} + +/* + * Start a connection sequence with an UDS server. The sequence starts by sending an 802.11 + * authentication frame with SEQ1. + */ +void StartConnectionSequence(const MacAddress& server) { + ASSERT(connection_status.status == static_cast<u32>(NetworkStatus::NotConnected)); + + // TODO(Subv): Handle timeout. + + // Send an authentication frame with SEQ1 + using Network::WifiPacket; + WifiPacket auth_request; + auth_request.channel = network_channel; + auth_request.data = GenerateAuthenticationFrame(AuthenticationSeq::SEQ1); + auth_request.destination_address = server; + auth_request.type = WifiPacket::PacketType::Authentication; + + SendPacket(auth_request); +} + +/// Sends an Association Response frame to the specified mac address +void SendAssociationResponseFrame(const MacAddress& address) { + ASSERT_MSG(connection_status.status == static_cast<u32>(NetworkStatus::ConnectedAsHost)); + + using Network::WifiPacket; + WifiPacket assoc_response; + assoc_response.channel = network_channel; + // TODO(Subv): This will cause multiple clients to end up with the same association id, but + // we're not using that for anything. + u16 association_id = 1; + assoc_response.data = GenerateAssocResponseFrame(AssocStatus::Successful, association_id, + network_info.network_id); + assoc_response.destination_address = address; + assoc_response.type = WifiPacket::PacketType::AssociationResponse; + + SendPacket(assoc_response); +} + +/* + * Handles the authentication request frame and sends the authentication response and association + * response frames. Once an Authentication frame with SEQ1 is received by the server, it responds + * with an Authentication frame containing SEQ2, and immediately sends an Association response frame + * containing the details of the access point and the assigned association id for the new client. + */ +void HandleAuthenticationFrame(const Network::WifiPacket& packet) { + // Only the SEQ1 auth frame is handled here, the SEQ2 frame doesn't need any special behavior + if (GetAuthenticationSeqNumber(packet.data) == AuthenticationSeq::SEQ1) { + ASSERT_MSG(connection_status.status == static_cast<u32>(NetworkStatus::ConnectedAsHost)); + + // Respond with an authentication response frame with SEQ2 + using Network::WifiPacket; + WifiPacket auth_request; + auth_request.channel = network_channel; + auth_request.data = GenerateAuthenticationFrame(AuthenticationSeq::SEQ2); + auth_request.destination_address = packet.transmitter_address; + auth_request.type = WifiPacket::PacketType::Authentication; + + SendPacket(auth_request); + + SendAssociationResponseFrame(packet.transmitter_address); + } +} + +/// Callback to parse and handle a received wifi packet. +void OnWifiPacketReceived(const Network::WifiPacket& packet) { + switch (packet.type) { + case Network::WifiPacket::PacketType::Beacon: + HandleBeaconFrame(packet); + break; + case Network::WifiPacket::PacketType::Authentication: + HandleAuthenticationFrame(packet); + break; + } +} + /** * NWM_UDS::Shutdown service function * Inputs: @@ -111,8 +243,7 @@ static void RecvBeaconBroadcastData(Interface* self) { u32 total_size = sizeof(BeaconDataReplyHeader); // Retrieve all beacon frames that were received from the desired mac address. - std::deque<WifiPacket> beacons = - GetReceivedPackets(WifiPacket::PacketType::Beacon, mac_address); + auto beacons = GetReceivedBeacons(mac_address); BeaconDataReplyHeader data_reply_header{}; data_reply_header.total_entries = beacons.size(); @@ -193,6 +324,9 @@ static void InitializeWithVersion(Interface* self) { rb.Push(RESULT_SUCCESS); rb.PushCopyHandles(Kernel::g_handle_table.Create(connection_status_event).Unwrap()); + // TODO(Subv): Connect the OnWifiPacketReceived function to the wifi packet received callback of + // the room we're currently in. + LOG_DEBUG(Service_NWM, "called sharedmem_size=0x%08X, version=0x%08X, sharedmem_handle=0x%08X", sharedmem_size, version, sharedmem_handle); } @@ -610,32 +744,23 @@ static void BeaconBroadcastCallback(u64 userdata, int cycles_late) { if (connection_status.status != static_cast<u32>(NetworkStatus::ConnectedAsHost)) return; - // TODO(Subv): Actually send the beacon. std::vector<u8> frame = GenerateBeaconFrame(network_info, node_info); + using Network::WifiPacket; + WifiPacket packet; + packet.type = WifiPacket::PacketType::Beacon; + packet.data = std::move(frame); + packet.destination_address = Network::BroadcastMac; + packet.channel = network_channel; + + SendPacket(packet); + // Start broadcasting the network, send a beacon frame every 102.4ms. CoreTiming::ScheduleEvent(msToCycles(DefaultBeaconInterval * MillisecondsPerTU) - cycles_late, beacon_broadcast_event, 0); } /* - * Returns an available index in the nodes array for the - * currently-hosted UDS network. - */ -static u32 GetNextAvailableNodeId() { - ASSERT_MSG(connection_status.status == static_cast<u32>(NetworkStatus::ConnectedAsHost), - "Can not accept clients if we're not hosting a network"); - - for (unsigned index = 0; index < connection_status.max_nodes; ++index) { - if ((connection_status.node_bitmask & (1 << index)) == 0) - return index; - } - - // Any connection attempts to an already full network should have been refused. - ASSERT_MSG(false, "No available connection slots in the network"); -} - -/* * Called when a client connects to an UDS network we're hosting, * updates the connection status and signals the update event. * @param network_node_id Network Node Id of the connecting client. diff --git a/src/core/hle/service/nwm/nwm_uds.h b/src/core/hle/service/nwm/nwm_uds.h index 141f49f9c..f1caaf974 100644 --- a/src/core/hle/service/nwm/nwm_uds.h +++ b/src/core/hle/service/nwm/nwm_uds.h @@ -42,6 +42,7 @@ using NodeList = std::vector<NodeInfo>; enum class NetworkStatus { NotConnected = 3, ConnectedAsHost = 6, + Connecting = 7, ConnectedAsClient = 9, ConnectedAsSpectator = 10, }; @@ -85,6 +86,17 @@ static_assert(offsetof(NetworkInfo, oui_value) == 0xC, "oui_value is at the wron static_assert(offsetof(NetworkInfo, wlan_comm_id) == 0x10, "wlancommid is at the wrong offset."); static_assert(sizeof(NetworkInfo) == 0x108, "NetworkInfo has incorrect size."); +/// Additional block tag ids in the Beacon and Association Response frames +enum class TagId : u8 { + SSID = 0, + SupportedRates = 1, + DSParameterSet = 2, + TrafficIndicationMap = 5, + CountryInformation = 7, + ERPInformation = 42, + VendorSpecific = 221 +}; + class NWM_UDS final : public Interface { public: NWM_UDS(); diff --git a/src/core/hle/service/nwm/uds_beacon.cpp b/src/core/hle/service/nwm/uds_beacon.cpp index 6332b404c..552eaf65e 100644 --- a/src/core/hle/service/nwm/uds_beacon.cpp +++ b/src/core/hle/service/nwm/uds_beacon.cpp @@ -325,8 +325,5 @@ std::vector<u8> GenerateBeaconFrame(const NetworkInfo& network_info, const NodeL return buffer; } -std::deque<WifiPacket> GetReceivedPackets(WifiPacket::PacketType type, const MacAddress& sender) { - return {}; -} } // namespace NWM } // namespace Service diff --git a/src/core/hle/service/nwm/uds_beacon.h b/src/core/hle/service/nwm/uds_beacon.h index caacf4c6f..50cc76da2 100644 --- a/src/core/hle/service/nwm/uds_beacon.h +++ b/src/core/hle/service/nwm/uds_beacon.h @@ -17,17 +17,6 @@ namespace NWM { using MacAddress = std::array<u8, 6>; constexpr std::array<u8, 3> NintendoOUI = {0x00, 0x1F, 0x32}; -/// Additional block tag ids in the Beacon frames -enum class TagId : u8 { - SSID = 0, - SupportedRates = 1, - DSParameterSet = 2, - TrafficIndicationMap = 5, - CountryInformation = 7, - ERPInformation = 42, - VendorSpecific = 221 -}; - /** * Internal vendor-specific tag ids as stored inside * VendorSpecific blocks in the Beacon frames. @@ -135,20 +124,6 @@ struct BeaconData { static_assert(sizeof(BeaconData) == 0x12, "BeaconData has incorrect size."); -/// Information about a received WiFi packet. -/// Acts as our own 802.11 header. -struct WifiPacket { - enum class PacketType { Beacon, Data }; - - PacketType type; ///< The type of 802.11 frame, Beacon / Data. - - /// Raw 802.11 frame data, starting at the management frame header for management frames. - std::vector<u8> data; - MacAddress transmitter_address; ///< Mac address of the transmitter. - MacAddress destination_address; ///< Mac address of the receiver. - u8 channel; ///< WiFi channel where this frame was transmitted. -}; - /** * Decrypts the beacon data buffer for the network described by `network_info`. */ @@ -161,10 +136,5 @@ void DecryptBeaconData(const NetworkInfo& network_info, std::vector<u8>& buffer) */ std::vector<u8> GenerateBeaconFrame(const NetworkInfo& network_info, const NodeList& nodes); -/** - * Returns a list of received 802.11 frames from the specified sender - * matching the type since the last call. - */ -std::deque<WifiPacket> GetReceivedPackets(WifiPacket::PacketType type, const MacAddress& sender); } // namespace NWM } // namespace Service diff --git a/src/core/hle/service/nwm/uds_connection.cpp b/src/core/hle/service/nwm/uds_connection.cpp new file mode 100644 index 000000000..c8a76ec2a --- /dev/null +++ b/src/core/hle/service/nwm/uds_connection.cpp @@ -0,0 +1,79 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/hle/service/nwm/nwm_uds.h" +#include "core/hle/service/nwm/uds_connection.h" +#include "fmt/format.h" + +namespace Service { +namespace NWM { + +// Note: These values were taken from a packet capture of an o3DS XL +// broadcasting a Super Smash Bros. 4 lobby. +constexpr u16 DefaultExtraCapabilities = 0x0431; + +std::vector<u8> GenerateAuthenticationFrame(AuthenticationSeq seq) { + AuthenticationFrame frame{}; + frame.auth_seq = static_cast<u16>(seq); + + std::vector<u8> data(sizeof(frame)); + std::memcpy(data.data(), &frame, sizeof(frame)); + + return data; +} + +AuthenticationSeq GetAuthenticationSeqNumber(const std::vector<u8>& body) { + AuthenticationFrame frame; + std::memcpy(&frame, body.data(), sizeof(frame)); + + return static_cast<AuthenticationSeq>(frame.auth_seq); +} + +/** + * Generates an SSID tag of an 802.11 Beacon frame with an 8-byte character representation of the + * specified network id as the SSID value. + * @param network_id The network id to use. + * @returns A buffer with the SSID tag. + */ +static std::vector<u8> GenerateSSIDTag(u32 network_id) { + constexpr u8 SSIDSize = 8; + + struct { + u8 id = static_cast<u8>(TagId::SSID); + u8 size = SSIDSize; + } tag_header; + + std::vector<u8> buffer(sizeof(tag_header) + SSIDSize); + + std::memcpy(buffer.data(), &tag_header, sizeof(tag_header)); + + std::string network_name = fmt::format("{0:08X}", network_id); + + std::memcpy(buffer.data() + sizeof(tag_header), network_name.c_str(), SSIDSize); + + return buffer; +} + +std::vector<u8> GenerateAssocResponseFrame(AssocStatus status, u16 association_id, u32 network_id) { + AssociationResponseFrame frame{}; + frame.capabilities = DefaultExtraCapabilities; + frame.status_code = static_cast<u16>(status); + // The association id is ORed with this magic value (0xC000) + constexpr u16 AssociationIdMagic = 0xC000; + frame.assoc_id = association_id | AssociationIdMagic; + + std::vector<u8> data(sizeof(frame)); + std::memcpy(data.data(), &frame, sizeof(frame)); + + auto ssid_tag = GenerateSSIDTag(network_id); + data.insert(data.end(), ssid_tag.begin(), ssid_tag.end()); + + // TODO(Subv): Add the SupportedRates tag. + // TODO(Subv): Add the DSParameterSet tag. + // TODO(Subv): Add the ERPInformation tag. + return data; +} + +} // namespace NWM +} // namespace Service diff --git a/src/core/hle/service/nwm/uds_connection.h b/src/core/hle/service/nwm/uds_connection.h new file mode 100644 index 000000000..73f55a4fd --- /dev/null +++ b/src/core/hle/service/nwm/uds_connection.h @@ -0,0 +1,51 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include "common/common_types.h" +#include "common/swap.h" +#include "core/hle/service/service.h" + +namespace Service { +namespace NWM { + +/// Sequence number of the 802.11 authentication frames. +enum class AuthenticationSeq : u16 { SEQ1 = 1, SEQ2 = 2 }; + +enum class AuthAlgorithm : u16 { OpenSystem = 0 }; + +enum class AuthStatus : u16 { Successful = 0 }; + +enum class AssocStatus : u16 { Successful = 0 }; + +struct AuthenticationFrame { + u16_le auth_algorithm = static_cast<u16>(AuthAlgorithm::OpenSystem); + u16_le auth_seq; + u16_le status_code = static_cast<u16>(AuthStatus::Successful); +}; + +static_assert(sizeof(AuthenticationFrame) == 6, "AuthenticationFrame has wrong size"); + +struct AssociationResponseFrame { + u16_le capabilities; + u16_le status_code; + u16_le assoc_id; +}; + +static_assert(sizeof(AssociationResponseFrame) == 6, "AssociationResponseFrame has wrong size"); + +/// Generates an 802.11 authentication frame, starting at the frame body. +std::vector<u8> GenerateAuthenticationFrame(AuthenticationSeq seq); + +/// Returns the sequence number from the body of an Authentication frame. +AuthenticationSeq GetAuthenticationSeqNumber(const std::vector<u8>& body); + +/// Generates an 802.11 association response frame with the specified status, association id and +/// network id, starting at the frame body. +std::vector<u8> GenerateAssocResponseFrame(AssocStatus status, u16 association_id, u32 network_id); + +} // namespace NWM +} // namespace Service diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp index b98938cb4..dfc36748c 100644 --- a/src/core/hle/svc.cpp +++ b/src/core/hle/svc.cpp @@ -1334,7 +1334,7 @@ void CallSVC(u32 immediate) { MICROPROFILE_SCOPE(Kernel_SVC); // Lock the global kernel mutex when we enter the kernel HLE. - std::lock_guard<std::mutex> lock(HLE::g_hle_lock); + std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); const FunctionDef* info = GetSVCInfo(immediate); if (info) { diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp index 74e336487..69cdc0867 100644 --- a/src/core/loader/3dsx.cpp +++ b/src/core/loader/3dsx.cpp @@ -270,6 +270,7 @@ ResultStatus AppLoader_THREEDSX::Load() { Kernel::g_current_process = Kernel::Process::Create(std::move(codeset)); Kernel::g_current_process->svc_access_mask.set(); Kernel::g_current_process->address_mappings = default_address_mappings; + Memory::current_page_table = &Kernel::g_current_process->vm_manager.page_table; // Attach the default resource limit (APPLICATION) to the process Kernel::g_current_process->resource_limit = diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index cfcde9167..2f27606a1 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -397,6 +397,7 @@ ResultStatus AppLoader_ELF::Load() { Kernel::g_current_process = Kernel::Process::Create(std::move(codeset)); Kernel::g_current_process->svc_access_mask.set(); Kernel::g_current_process->address_mappings = default_address_mappings; + Memory::current_page_table = &Kernel::g_current_process->vm_manager.page_table; // Attach the default resource limit (APPLICATION) to the process Kernel::g_current_process->resource_limit = diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 7aff7f29b..79ea50147 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -172,6 +172,7 @@ ResultStatus AppLoader_NCCH::LoadExec() { codeset->memory = std::make_shared<std::vector<u8>>(std::move(code)); Kernel::g_current_process = Kernel::Process::Create(std::move(codeset)); + Memory::current_page_table = &Kernel::g_current_process->vm_manager.page_table; // Attach a resource limit to the process based on the resource limit category Kernel::g_current_process->resource_limit = diff --git a/src/core/memory.cpp b/src/core/memory.cpp index a3c5f4a9d..68a6b1ac2 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -4,83 +4,31 @@ #include <array> #include <cstring> +#include "audio_core/audio_core.h" #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" #include "common/swap.h" +#include "core/hle/kernel/memory.h" #include "core/hle/kernel/process.h" #include "core/hle/lock.h" #include "core/memory.h" #include "core/memory_setup.h" -#include "core/mmio.h" #include "video_core/renderer_base.h" #include "video_core/video_core.h" namespace Memory { -enum class PageType { - /// Page is unmapped and should cause an access error. - Unmapped, - /// Page is mapped to regular memory. This is the only type you can get pointers to. - Memory, - /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and - /// invalidation - RasterizerCachedMemory, - /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions. - Special, - /// Page is mapped to a I/O region, but also needs to check for rasterizer cache flushing and - /// invalidation - RasterizerCachedSpecial, -}; - -struct SpecialRegion { - VAddr base; - u32 size; - MMIORegionPointer handler; -}; +static std::array<u8, Memory::VRAM_SIZE> vram; +static std::array<u8, Memory::N3DS_EXTRA_RAM_SIZE> n3ds_extra_ram; -/** - * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely - * mimics the way a real CPU page table works, but instead is optimized for minimal decoding and - * fetching requirements when accessing. In the usual case of an access to regular memory, it only - * requires an indexed fetch and a check for NULL. - */ -struct PageTable { - /** - * Array of memory pointers backing each page. An entry can only be non-null if the - * corresponding entry in the `attributes` array is of type `Memory`. - */ - std::array<u8*, PAGE_TABLE_NUM_ENTRIES> pointers; - - /** - * Contains MMIO handlers that back memory regions whose entries in the `attribute` array is of - * type `Special`. - */ - std::vector<SpecialRegion> special_regions; - - /** - * Array of fine grained page attributes. If it is set to any value other than `Memory`, then - * the corresponding entry in `pointers` MUST be set to null. - */ - std::array<PageType, PAGE_TABLE_NUM_ENTRIES> attributes; - - /** - * Indicates the number of externally cached resources touching a page that should be - * flushed before the memory is accessed - */ - std::array<u8, PAGE_TABLE_NUM_ENTRIES> cached_res_count; -}; - -/// Singular page table used for the singleton process -static PageTable main_page_table; -/// Currently active page table -static PageTable* current_page_table = &main_page_table; +PageTable* current_page_table = nullptr; std::array<u8*, PAGE_TABLE_NUM_ENTRIES>* GetCurrentPageTablePointers() { return ¤t_page_table->pointers; } -static void MapPages(u32 base, u32 size, u8* memory, PageType type) { +static void MapPages(PageTable& page_table, u32 base, u32 size, u8* memory, PageType type) { LOG_DEBUG(HW_Memory, "Mapping %p onto %08X-%08X", memory, base * PAGE_SIZE, (base + size) * PAGE_SIZE); @@ -91,9 +39,9 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) { while (base != end) { ASSERT_MSG(base < PAGE_TABLE_NUM_ENTRIES, "out of range mapping at %08X", base); - current_page_table->attributes[base] = type; - current_page_table->pointers[base] = memory; - current_page_table->cached_res_count[base] = 0; + page_table.attributes[base] = type; + page_table.pointers[base] = memory; + page_table.cached_res_count[base] = 0; base += 1; if (memory != nullptr) @@ -101,30 +49,24 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) { } } -void InitMemoryMap() { - main_page_table.pointers.fill(nullptr); - main_page_table.attributes.fill(PageType::Unmapped); - main_page_table.cached_res_count.fill(0); -} - -void MapMemoryRegion(VAddr base, u32 size, u8* target) { +void MapMemoryRegion(PageTable& page_table, VAddr base, u32 size, u8* target) { ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: %08X", size); ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: %08X", base); - MapPages(base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory); + MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory); } -void MapIoRegion(VAddr base, u32 size, MMIORegionPointer mmio_handler) { +void MapIoRegion(PageTable& page_table, VAddr base, u32 size, MMIORegionPointer mmio_handler) { ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: %08X", size); ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: %08X", base); - MapPages(base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special); + MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special); - current_page_table->special_regions.emplace_back(SpecialRegion{base, size, mmio_handler}); + page_table.special_regions.emplace_back(SpecialRegion{base, size, mmio_handler}); } -void UnmapRegion(VAddr base, u32 size) { +void UnmapRegion(PageTable& page_table, VAddr base, u32 size) { ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: %08X", size); ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: %08X", base); - MapPages(base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped); + MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped); } /** @@ -183,7 +125,7 @@ T Read(const VAddr vaddr) { } // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state - std::lock_guard<std::mutex> lock(HLE::g_hle_lock); + std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; switch (type) { @@ -224,7 +166,7 @@ void Write(const VAddr vaddr, const T data) { } // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state - std::lock_guard<std::mutex> lock(HLE::g_hle_lock); + std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; switch (type) { @@ -273,8 +215,7 @@ bool IsValidVirtualAddress(const VAddr vaddr) { } bool IsValidPhysicalAddress(const PAddr paddr) { - boost::optional<VAddr> vaddr = PhysicalToVirtualAddress(paddr); - return vaddr && IsValidVirtualAddress(*vaddr); + return GetPhysicalPointer(paddr) != nullptr; } u8* GetPointer(const VAddr vaddr) { @@ -306,9 +247,63 @@ std::string ReadCString(VAddr vaddr, std::size_t max_length) { } u8* GetPhysicalPointer(PAddr address) { - // TODO(Subv): This call should not go through the application's memory mapping. - boost::optional<VAddr> vaddr = PhysicalToVirtualAddress(address); - return vaddr ? GetPointer(*vaddr) : nullptr; + struct MemoryArea { + PAddr paddr_base; + u32 size; + }; + + static constexpr MemoryArea memory_areas[] = { + {VRAM_PADDR, VRAM_SIZE}, + {IO_AREA_PADDR, IO_AREA_SIZE}, + {DSP_RAM_PADDR, DSP_RAM_SIZE}, + {FCRAM_PADDR, FCRAM_N3DS_SIZE}, + {N3DS_EXTRA_RAM_PADDR, N3DS_EXTRA_RAM_SIZE}, + }; + + const auto area = + std::find_if(std::begin(memory_areas), std::end(memory_areas), [&](const auto& area) { + return address >= area.paddr_base && address < area.paddr_base + area.size; + }); + + if (area == std::end(memory_areas)) { + LOG_ERROR(HW_Memory, "unknown GetPhysicalPointer @ 0x%08X", address); + return nullptr; + } + + if (area->paddr_base == IO_AREA_PADDR) { + LOG_ERROR(HW_Memory, "MMIO mappings are not supported yet. phys_addr=0x%08X", address); + return nullptr; + } + + u32 offset_into_region = address - area->paddr_base; + + u8* target_pointer = nullptr; + switch (area->paddr_base) { + case VRAM_PADDR: + target_pointer = vram.data() + offset_into_region; + break; + case DSP_RAM_PADDR: + target_pointer = AudioCore::GetDspMemory().data() + offset_into_region; + break; + case FCRAM_PADDR: + for (const auto& region : Kernel::memory_regions) { + if (offset_into_region >= region.base && + offset_into_region < region.base + region.size) { + target_pointer = + region.linear_heap_memory->data() + offset_into_region - region.base; + break; + } + } + ASSERT_MSG(target_pointer != nullptr, "Invalid FCRAM address"); + break; + case N3DS_EXTRA_RAM_PADDR: + target_pointer = n3ds_extra_ram.data() + offset_into_region; + break; + default: + UNREACHABLE(); + } + + return target_pointer; } void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { diff --git a/src/core/memory.h b/src/core/memory.h index c8c56babd..b228a48c2 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -7,8 +7,10 @@ #include <array> #include <cstddef> #include <string> +#include <vector> #include <boost/optional.hpp> #include "common/common_types.h" +#include "core/mmio.h" namespace Memory { @@ -21,6 +23,59 @@ const u32 PAGE_MASK = PAGE_SIZE - 1; const int PAGE_BITS = 12; const size_t PAGE_TABLE_NUM_ENTRIES = 1 << (32 - PAGE_BITS); +enum class PageType { + /// Page is unmapped and should cause an access error. + Unmapped, + /// Page is mapped to regular memory. This is the only type you can get pointers to. + Memory, + /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and + /// invalidation + RasterizerCachedMemory, + /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions. + Special, + /// Page is mapped to a I/O region, but also needs to check for rasterizer cache flushing and + /// invalidation + RasterizerCachedSpecial, +}; + +struct SpecialRegion { + VAddr base; + u32 size; + MMIORegionPointer handler; +}; + +/** + * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely + * mimics the way a real CPU page table works, but instead is optimized for minimal decoding and + * fetching requirements when accessing. In the usual case of an access to regular memory, it only + * requires an indexed fetch and a check for NULL. + */ +struct PageTable { + /** + * Array of memory pointers backing each page. An entry can only be non-null if the + * corresponding entry in the `attributes` array is of type `Memory`. + */ + std::array<u8*, PAGE_TABLE_NUM_ENTRIES> pointers; + + /** + * Contains MMIO handlers that back memory regions whose entries in the `attribute` array is of + * type `Special`. + */ + std::vector<SpecialRegion> special_regions; + + /** + * Array of fine grained page attributes. If it is set to any value other than `Memory`, then + * the corresponding entry in `pointers` MUST be set to null. + */ + std::array<PageType, PAGE_TABLE_NUM_ENTRIES> attributes; + + /** + * Indicates the number of externally cached resources touching a page that should be + * flushed before the memory is accessed + */ + std::array<u8, PAGE_TABLE_NUM_ENTRIES> cached_res_count; +}; + /// Physical memory regions as seen from the ARM11 enum : PAddr { /// IO register area @@ -126,6 +181,9 @@ enum : VAddr { NEW_LINEAR_HEAP_VADDR_END = NEW_LINEAR_HEAP_VADDR + NEW_LINEAR_HEAP_SIZE, }; +/// Currently active page table +extern PageTable* current_page_table; + bool IsValidVirtualAddress(const VAddr addr); bool IsValidPhysicalAddress(const PAddr addr); @@ -169,8 +227,6 @@ boost::optional<VAddr> PhysicalToVirtualAddress(PAddr addr); /** * Gets a pointer to the memory region beginning at the specified physical address. - * - * @note This is currently implemented using PhysicalToVirtualAddress(). */ u8* GetPhysicalPointer(PAddr address); @@ -209,4 +265,4 @@ void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode); * retrieve the current page table for that purpose. */ std::array<u8*, PAGE_TABLE_NUM_ENTRIES>* GetCurrentPageTablePointers(); -} +} // namespace Memory diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h index 3fdf3a87d..c58baa50b 100644 --- a/src/core/memory_setup.h +++ b/src/core/memory_setup.h @@ -9,24 +9,24 @@ namespace Memory { -void InitMemoryMap(); - /** * Maps an allocated buffer onto a region of the emulated process address space. * + * @param page_table The page table of the emulated process. * @param base The address to start mapping at. Must be page-aligned. * @param size The amount of bytes to map. Must be page-aligned. * @param target Buffer with the memory backing the mapping. Must be of length at least `size`. */ -void MapMemoryRegion(VAddr base, u32 size, u8* target); +void MapMemoryRegion(PageTable& page_table, VAddr base, u32 size, u8* target); /** * Maps a region of the emulated process address space as a IO region. + * @param page_table The page table of the emulated process. * @param base The address to start mapping at. Must be page-aligned. * @param size The amount of bytes to map. Must be page-aligned. * @param mmio_handler The handler that backs the mapping. */ -void MapIoRegion(VAddr base, u32 size, MMIORegionPointer mmio_handler); +void MapIoRegion(PageTable& page_table, VAddr base, u32 size, MMIORegionPointer mmio_handler); -void UnmapRegion(VAddr base, u32 size); +void UnmapRegion(PageTable& page_table, VAddr base, u32 size); } diff --git a/src/core/settings.h b/src/core/settings.h index bf8014c5a..024f14666 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -81,6 +81,7 @@ struct Values { std::array<std::string, NativeButton::NumButtons> buttons; std::array<std::string, NativeAnalog::NumAnalogs> analogs; std::string motion_device; + std::string touch_device; // Core bool use_cpu_jit; diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp index 1df6c5677..8384ce744 100644 --- a/src/tests/core/arm/arm_test_common.cpp +++ b/src/tests/core/arm/arm_test_common.cpp @@ -3,20 +3,30 @@ // Refer to the license.txt file included. #include "core/core.h" +#include "core/memory.h" #include "core/memory_setup.h" #include "tests/core/arm/arm_test_common.h" namespace ArmTests { +static Memory::PageTable page_table; + TestEnvironment::TestEnvironment(bool mutable_memory_) : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) { - Memory::MapIoRegion(0x00000000, 0x80000000, test_memory); - Memory::MapIoRegion(0x80000000, 0x80000000, test_memory); + + page_table.pointers.fill(nullptr); + page_table.attributes.fill(Memory::PageType::Unmapped); + page_table.cached_res_count.fill(0); + + Memory::MapIoRegion(page_table, 0x00000000, 0x80000000, test_memory); + Memory::MapIoRegion(page_table, 0x80000000, 0x80000000, test_memory); + + Memory::current_page_table = &page_table; } TestEnvironment::~TestEnvironment() { - Memory::UnmapRegion(0x80000000, 0x80000000); - Memory::UnmapRegion(0x00000000, 0x80000000); + Memory::UnmapRegion(page_table, 0x80000000, 0x80000000); + Memory::UnmapRegion(page_table, 0x00000000, 0x80000000); } void TestEnvironment::SetMemory64(VAddr vaddr, u64 value) { diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index cffa4c952..82f47d8a9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,6 +1,7 @@ set(SRCS command_processor.cpp debug_utils/debug_utils.cpp + geometry_pipeline.cpp pica.cpp primitive_assembly.cpp regs.cpp @@ -29,6 +30,7 @@ set(SRCS set(HEADERS command_processor.h debug_utils/debug_utils.h + geometry_pipeline.h gpu_debugger.h pica.h pica_state.h diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index f98ca3302..fb65a3a0a 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -161,6 +161,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): g_state.immediate.current_attribute = 0; + g_state.immediate.reset_geometry_pipeline = true; default_attr_counter = 0; break; @@ -234,16 +235,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { shader_engine->Run(g_state.vs, shader_unit); shader_unit.WriteOutput(regs.vs, output); - // Send to renderer - using Pica::Shader::OutputVertex; - auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, - const OutputVertex& v2) { - VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); - }; - - g_state.primitive_assembler.SubmitVertex( - Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output), - AddTriangle); + // Send to geometry pipeline + if (g_state.immediate.reset_geometry_pipeline) { + g_state.geometry_pipeline.Reconfigure(); + g_state.immediate.reset_geometry_pipeline = false; + } + ASSERT(!g_state.geometry_pipeline.NeedIndexInput()); + g_state.geometry_pipeline.Setup(shader_engine); + g_state.geometry_pipeline.SubmitVertex(output); } } } @@ -321,8 +320,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // The size has been tuned for optimal balance between hit-rate and the cost of lookup const size_t VERTEX_CACHE_SIZE = 32; std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; - std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache; - Shader::OutputVertex output_vertex; + std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache; + Shader::AttributeBuffer vs_output; unsigned int vertex_cache_pos = 0; vertex_cache_ids.fill(-1); @@ -332,6 +331,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); + g_state.geometry_pipeline.Reconfigure(); + g_state.geometry_pipeline.Setup(shader_engine); + if (g_state.geometry_pipeline.NeedIndexInput()) + ASSERT(is_indexed); + for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { // Indexed rendering doesn't use the start offset unsigned int vertex = @@ -345,6 +349,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { bool vertex_cache_hit = false; if (is_indexed) { + if (g_state.geometry_pipeline.NeedIndexInput()) { + g_state.geometry_pipeline.SubmitIndex(vertex); + continue; + } + if (g_debug_context && Pica::g_debug_context->recorder) { int size = index_u16 ? 2 : 1; memory_accesses.AddAccess(base_address + index_info.offset + size * index, @@ -353,7 +362,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { if (vertex == vertex_cache_ids[i]) { - output_vertex = vertex_cache[i]; + vs_output = vertex_cache[i]; vertex_cache_hit = true; break; } @@ -362,7 +371,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (!vertex_cache_hit) { // Initialize data for the current vertex - Shader::AttributeBuffer input, output{}; + Shader::AttributeBuffer input; loader.LoadVertex(base_address, index, vertex, input, memory_accesses); // Send to vertex shader @@ -371,26 +380,17 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { (void*)&input); shader_unit.LoadInput(regs.vs, input); shader_engine->Run(g_state.vs, shader_unit); - shader_unit.WriteOutput(regs.vs, output); - - // Retrieve vertex from register data - output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output); + shader_unit.WriteOutput(regs.vs, vs_output); if (is_indexed) { - vertex_cache[vertex_cache_pos] = output_vertex; + vertex_cache[vertex_cache_pos] = vs_output; vertex_cache_ids[vertex_cache_pos] = vertex; vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; } } - // Send to renderer - using Pica::Shader::OutputVertex; - auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, - const OutputVertex& v2) { - VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); - }; - - primitive_assembler.SubmitVertex(output_vertex, AddTriangle); + // Send to geometry pipeline + g_state.geometry_pipeline.SubmitVertex(vs_output); } for (auto& range : memory_accesses.ranges) { diff --git a/src/video_core/geometry_pipeline.cpp b/src/video_core/geometry_pipeline.cpp new file mode 100644 index 000000000..b146e2ecb --- /dev/null +++ b/src/video_core/geometry_pipeline.cpp @@ -0,0 +1,274 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/geometry_pipeline.h" +#include "video_core/pica_state.h" +#include "video_core/regs.h" +#include "video_core/renderer_base.h" +#include "video_core/video_core.h" + +namespace Pica { + +/// An attribute buffering interface for different pipeline modes +class GeometryPipelineBackend { +public: + virtual ~GeometryPipelineBackend() = default; + + /// Checks if there is no incomplete data transfer + virtual bool IsEmpty() const = 0; + + /// Checks if the pipeline needs a direct input from index buffer + virtual bool NeedIndexInput() const = 0; + + /// Submits an index from index buffer + virtual void SubmitIndex(unsigned int val) = 0; + + /** + * Submits vertex attributes + * @param input attributes of a vertex output from vertex shader + * @return if the buffer is full and the geometry shader should be invoked + */ + virtual bool SubmitVertex(const Shader::AttributeBuffer& input) = 0; +}; + +// In the Point mode, vertex attributes are sent to the input registers in the geometry shader unit. +// The size of vertex shader outputs and geometry shader inputs are constants. Geometry shader is +// invoked upon inputs buffer filled up by vertex shader outputs. For example, if we have a geometry +// shader that takes 6 inputs, and the vertex shader outputs 2 attributes, it would take 3 vertices +// for one geometry shader invocation. +// TODO: what happens when the input size is not divisible by the output size? +class GeometryPipeline_Point : public GeometryPipelineBackend { +public: + GeometryPipeline_Point(const Regs& regs, Shader::GSUnitState& unit) : regs(regs), unit(unit) { + ASSERT(regs.pipeline.variable_primitive == 0); + ASSERT(regs.gs.input_to_uniform == 0); + vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; + size_t gs_input_num = regs.gs.max_input_attribute_index + 1; + ASSERT(gs_input_num % vs_output_num == 0); + buffer_cur = attribute_buffer.attr; + buffer_end = attribute_buffer.attr + gs_input_num; + } + + bool IsEmpty() const override { + return buffer_cur == attribute_buffer.attr; + } + + bool NeedIndexInput() const override { + return false; + } + + void SubmitIndex(unsigned int val) override { + UNREACHABLE(); + } + + bool SubmitVertex(const Shader::AttributeBuffer& input) override { + buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); + if (buffer_cur == buffer_end) { + buffer_cur = attribute_buffer.attr; + unit.LoadInput(regs.gs, attribute_buffer); + return true; + } + return false; + } + +private: + const Regs& regs; + Shader::GSUnitState& unit; + Shader::AttributeBuffer attribute_buffer; + Math::Vec4<float24>* buffer_cur; + Math::Vec4<float24>* buffer_end; + unsigned int vs_output_num; +}; + +// In VariablePrimitive mode, vertex attributes are buffered into the uniform registers in the +// geometry shader unit. The number of vertex is variable, which is specified by the first index +// value in the batch. This mode is usually used for subdivision. +class GeometryPipeline_VariablePrimitive : public GeometryPipelineBackend { +public: + GeometryPipeline_VariablePrimitive(const Regs& regs, Shader::ShaderSetup& setup) + : regs(regs), setup(setup) { + ASSERT(regs.pipeline.variable_primitive == 1); + ASSERT(regs.gs.input_to_uniform == 1); + vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; + } + + bool IsEmpty() const override { + return need_index; + } + + bool NeedIndexInput() const override { + return need_index; + } + + void SubmitIndex(unsigned int val) override { + DEBUG_ASSERT(need_index); + + // The number of vertex input is put to the uniform register + float24 vertex_num = float24::FromFloat32(val); + setup.uniforms.f[0] = Math::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num); + + // The second uniform register and so on are used for receiving input vertices + buffer_cur = setup.uniforms.f + 1; + + main_vertex_num = regs.pipeline.variable_vertex_main_num_minus_1 + 1; + total_vertex_num = val; + need_index = false; + } + + bool SubmitVertex(const Shader::AttributeBuffer& input) override { + DEBUG_ASSERT(!need_index); + if (main_vertex_num != 0) { + // For main vertices, receive all attributes + buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); + --main_vertex_num; + } else { + // For other vertices, only receive the first attribute (usually the position) + *(buffer_cur++) = input.attr[0]; + } + --total_vertex_num; + + if (total_vertex_num == 0) { + need_index = true; + return true; + } + + return false; + } + +private: + bool need_index = true; + const Regs& regs; + Shader::ShaderSetup& setup; + unsigned int main_vertex_num; + unsigned int total_vertex_num; + Math::Vec4<float24>* buffer_cur; + unsigned int vs_output_num; +}; + +// In FixedPrimitive mode, vertex attributes are buffered into the uniform registers in the geometry +// shader unit. The number of vertex per shader invocation is constant. This is usually used for +// particle system. +class GeometryPipeline_FixedPrimitive : public GeometryPipelineBackend { +public: + GeometryPipeline_FixedPrimitive(const Regs& regs, Shader::ShaderSetup& setup) + : regs(regs), setup(setup) { + ASSERT(regs.pipeline.variable_primitive == 0); + ASSERT(regs.gs.input_to_uniform == 1); + vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; + ASSERT(vs_output_num == regs.pipeline.gs_config.stride_minus_1 + 1); + size_t vertex_num = regs.pipeline.gs_config.fixed_vertex_num_minus_1 + 1; + buffer_cur = buffer_begin = setup.uniforms.f + regs.pipeline.gs_config.start_index; + buffer_end = buffer_begin + vs_output_num * vertex_num; + } + + bool IsEmpty() const override { + return buffer_cur == buffer_begin; + } + + bool NeedIndexInput() const override { + return false; + } + + void SubmitIndex(unsigned int val) override { + UNREACHABLE(); + } + + bool SubmitVertex(const Shader::AttributeBuffer& input) override { + buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); + if (buffer_cur == buffer_end) { + buffer_cur = buffer_begin; + return true; + } + return false; + } + +private: + const Regs& regs; + Shader::ShaderSetup& setup; + Math::Vec4<float24>* buffer_begin; + Math::Vec4<float24>* buffer_cur; + Math::Vec4<float24>* buffer_end; + unsigned int vs_output_num; +}; + +GeometryPipeline::GeometryPipeline(State& state) : state(state) {} + +GeometryPipeline::~GeometryPipeline() = default; + +void GeometryPipeline::SetVertexHandler(Shader::VertexHandler vertex_handler) { + this->vertex_handler = vertex_handler; +} + +void GeometryPipeline::Setup(Shader::ShaderEngine* shader_engine) { + if (!backend) + return; + + this->shader_engine = shader_engine; + shader_engine->SetupBatch(state.gs, state.regs.gs.main_offset); +} + +void GeometryPipeline::Reconfigure() { + ASSERT(!backend || backend->IsEmpty()); + + if (state.regs.pipeline.use_gs == PipelineRegs::UseGS::No) { + backend = nullptr; + return; + } + + ASSERT(state.regs.pipeline.use_gs == PipelineRegs::UseGS::Yes); + + // The following assumes that when geometry shader is in use, the shader unit 3 is configured as + // a geometry shader unit. + // TODO: what happens if this is not true? + ASSERT(state.regs.pipeline.gs_unit_exclusive_configuration == 1); + ASSERT(state.regs.gs.shader_mode == ShaderRegs::ShaderMode::GS); + + state.gs_unit.ConfigOutput(state.regs.gs); + + ASSERT(state.regs.pipeline.vs_outmap_total_minus_1_a == + state.regs.pipeline.vs_outmap_total_minus_1_b); + + switch (state.regs.pipeline.gs_config.mode) { + case PipelineRegs::GSMode::Point: + backend = std::make_unique<GeometryPipeline_Point>(state.regs, state.gs_unit); + break; + case PipelineRegs::GSMode::VariablePrimitive: + backend = std::make_unique<GeometryPipeline_VariablePrimitive>(state.regs, state.gs); + break; + case PipelineRegs::GSMode::FixedPrimitive: + backend = std::make_unique<GeometryPipeline_FixedPrimitive>(state.regs, state.gs); + break; + default: + UNREACHABLE(); + } +} + +bool GeometryPipeline::NeedIndexInput() const { + if (!backend) + return false; + return backend->NeedIndexInput(); +} + +void GeometryPipeline::SubmitIndex(unsigned int val) { + backend->SubmitIndex(val); +} + +void GeometryPipeline::SubmitVertex(const Shader::AttributeBuffer& input) { + if (!backend) { + // No backend means the geometry shader is disabled, so we send the vertex shader output + // directly to the primitive assembler. + vertex_handler(input); + } else { + if (backend->SubmitVertex(input)) { + shader_engine->Run(state.gs, state.gs_unit); + + // The uniform b15 is set to true after every geometry shader invocation. This is useful + // for the shader to know if this is the first invocation in a batch, if the program set + // b15 to false first. + state.gs.uniforms.b[15] = true; + } + } +} + +} // namespace Pica diff --git a/src/video_core/geometry_pipeline.h b/src/video_core/geometry_pipeline.h new file mode 100644 index 000000000..91fdd3192 --- /dev/null +++ b/src/video_core/geometry_pipeline.h @@ -0,0 +1,49 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include "video_core/shader/shader.h" + +namespace Pica { + +struct State; + +class GeometryPipelineBackend; + +/// A pipeline receiving from vertex shader and sending to geometry shader and primitive assembler +class GeometryPipeline { +public: + explicit GeometryPipeline(State& state); + ~GeometryPipeline(); + + /// Sets the handler for receiving vertex outputs from vertex shader + void SetVertexHandler(Shader::VertexHandler vertex_handler); + + /** + * Setup the geometry shader unit if it is in use + * @param shader_engine the shader engine for the geometry shader to run + */ + void Setup(Shader::ShaderEngine* shader_engine); + + /// Reconfigures the pipeline according to current register settings + void Reconfigure(); + + /// Checks if the pipeline needs a direct input from index buffer + bool NeedIndexInput() const; + + /// Submits an index from index buffer. Call this only when NeedIndexInput returns true + void SubmitIndex(unsigned int val); + + /// Submits vertex attributes output from vertex shader + void SubmitVertex(const Shader::AttributeBuffer& input); + +private: + Shader::VertexHandler vertex_handler; + Shader::ShaderEngine* shader_engine; + std::unique_ptr<GeometryPipelineBackend> backend; + State& state; +}; +} // namespace Pica diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index b95148a6a..218e06883 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -3,9 +3,11 @@ // Refer to the license.txt file included. #include <cstring> +#include "video_core/geometry_pipeline.h" #include "video_core/pica.h" #include "video_core/pica_state.h" -#include "video_core/regs_pipeline.h" +#include "video_core/renderer_base.h" +#include "video_core/video_core.h" namespace Pica { @@ -24,6 +26,23 @@ void Zero(T& o) { memset(&o, 0, sizeof(o)); } +State::State() : geometry_pipeline(*this) { + auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) { + using Pica::Shader::OutputVertex; + auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1, + const OutputVertex& v2) { + VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); + }; + primitive_assembler.SubmitVertex( + Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, vertex), AddTriangle); + }; + + auto SetWinding = [this]() { primitive_assembler.SetWinding(); }; + + g_state.gs_unit.SetVertexHandler(SubmitVertex, SetWinding); + g_state.geometry_pipeline.SetVertexHandler(SubmitVertex); +} + void State::Reset() { Zero(regs); Zero(vs); diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index 864a2c9e6..c6634a0bc 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -8,6 +8,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "common/vector_math.h" +#include "video_core/geometry_pipeline.h" #include "video_core/primitive_assembly.h" #include "video_core/regs.h" #include "video_core/shader/shader.h" @@ -16,6 +17,7 @@ namespace Pica { /// Struct used to describe current Pica state struct State { + State(); void Reset(); /// Pica registers @@ -137,8 +139,17 @@ struct State { Shader::AttributeBuffer input_vertex; // Index of the next attribute to be loaded into `input_vertex`. u32 current_attribute = 0; + // Indicates the immediate mode just started and the geometry pipeline needs to reconfigure + bool reset_geometry_pipeline = true; } immediate; + // the geometry shader needs to be kept in the global state because some shaders relie on + // preserved register value across shader invocation. + // TODO: also bring the three vertex shader units here and implement the shader scheduler. + Shader::GSUnitState gs_unit; + + GeometryPipeline geometry_pipeline; + // This is constructed with a dummy triangle topology PrimitiveAssembler<Shader::OutputVertex> primitive_assembler; }; diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index acd2ac5e2..9c3dd4cab 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp @@ -17,15 +17,18 @@ template <typename VertexType> void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler) { switch (topology) { - // TODO: Figure out what's different with TriangleTopology::Shader. case PipelineRegs::TriangleTopology::List: case PipelineRegs::TriangleTopology::Shader: if (buffer_index < 2) { buffer[buffer_index++] = vtx; } else { buffer_index = 0; - - triangle_handler(buffer[0], buffer[1], vtx); + if (topology == PipelineRegs::TriangleTopology::Shader && winding) { + triangle_handler(buffer[1], buffer[0], vtx); + winding = false; + } else { + triangle_handler(buffer[0], buffer[1], vtx); + } } break; @@ -51,9 +54,15 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, } template <typename VertexType> +void PrimitiveAssembler<VertexType>::SetWinding() { + winding = true; +} + +template <typename VertexType> void PrimitiveAssembler<VertexType>::Reset() { buffer_index = 0; strip_ready = false; + winding = false; } template <typename VertexType> diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index e8eccdf27..12de8e3b9 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h @@ -30,6 +30,12 @@ struct PrimitiveAssembler { void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler); /** + * Invert the vertex order of the next triangle. Called by geometry shader emitter. + * This only takes effect for TriangleTopology::Shader. + */ + void SetWinding(); + + /** * Resets the internal state of the PrimitiveAssembler. */ void Reset(); @@ -45,6 +51,7 @@ private: int buffer_index; VertexType buffer[2]; bool strip_ready = false; + bool winding = false; }; } // namespace diff --git a/src/video_core/regs_pipeline.h b/src/video_core/regs_pipeline.h index 8b6369297..e78c3e331 100644 --- a/src/video_core/regs_pipeline.h +++ b/src/video_core/regs_pipeline.h @@ -147,7 +147,15 @@ struct PipelineRegs { // Number of vertices to render u32 num_vertices; - INSERT_PADDING_WORDS(0x1); + enum class UseGS : u32 { + No = 0, + Yes = 2, + }; + + union { + BitField<0, 2, UseGS> use_gs; + BitField<31, 1, u32> variable_primitive; + }; // The index of the first vertex to render u32 vertex_offset; @@ -218,7 +226,29 @@ struct PipelineRegs { GPUMode gpu_mode; - INSERT_PADDING_WORDS(0x18); + INSERT_PADDING_WORDS(0x4); + BitField<0, 4, u32> vs_outmap_total_minus_1_a; + INSERT_PADDING_WORDS(0x6); + BitField<0, 4, u32> vs_outmap_total_minus_1_b; + + enum class GSMode : u32 { + Point = 0, + VariablePrimitive = 1, + FixedPrimitive = 2, + }; + + union { + BitField<0, 8, GSMode> mode; + BitField<8, 4, u32> fixed_vertex_num_minus_1; + BitField<12, 4, u32> stride_minus_1; + BitField<16, 4, u32> start_index; + } gs_config; + + INSERT_PADDING_WORDS(0x1); + + u32 variable_vertex_main_num_minus_1; + + INSERT_PADDING_WORDS(0x9); enum class TriangleTopology : u32 { List = 0, diff --git a/src/video_core/regs_rasterizer.h b/src/video_core/regs_rasterizer.h index 2874fd127..4fef00d76 100644 --- a/src/video_core/regs_rasterizer.h +++ b/src/video_core/regs_rasterizer.h @@ -5,10 +5,10 @@ #pragma once #include <array> - #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/pica_types.h" namespace Pica { @@ -31,7 +31,17 @@ struct RasterizerRegs { BitField<0, 24, u32> viewport_size_y; - INSERT_PADDING_WORDS(0x9); + INSERT_PADDING_WORDS(0x3); + + BitField<0, 1, u32> clip_enable; + BitField<0, 24, u32> clip_coef[4]; // float24 + + Math::Vec4<float24> GetClipCoef() const { + return {float24::FromRaw(clip_coef[0]), float24::FromRaw(clip_coef[1]), + float24::FromRaw(clip_coef[2]), float24::FromRaw(clip_coef[3])}; + } + + INSERT_PADDING_WORDS(0x1); BitField<0, 24, u32> viewport_depth_range; // float24 BitField<0, 24, u32> viewport_depth_near_plane; // float24 diff --git a/src/video_core/regs_shader.h b/src/video_core/regs_shader.h index ddb1ee451..c15d4d162 100644 --- a/src/video_core/regs_shader.h +++ b/src/video_core/regs_shader.h @@ -24,9 +24,16 @@ struct ShaderRegs { INSERT_PADDING_WORDS(0x4); + enum ShaderMode { + GS = 0x08, + VS = 0xA0, + }; + union { // Number of input attributes to shader unit - 1 BitField<0, 4, u32> max_input_attribute_index; + BitField<8, 8, u32> input_to_uniform; + BitField<24, 8, ShaderMode> shader_mode; }; // Offset to shader program entry point (in words) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index aa95ef21d..7b0cd1b66 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -169,6 +169,8 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle); // Sync fixed function OpenGL state + SyncClipEnabled(); + SyncClipCoef(); SyncCullMode(); SyncBlendEnabled(); SyncBlendFuncs(); @@ -401,6 +403,18 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncCullMode(); break; + // Clipping plane + case PICA_REG_INDEX(rasterizer.clip_enable): + SyncClipEnabled(); + break; + + case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[0], 0x48): + case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[1], 0x49): + case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[2], 0x4a): + case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[3], 0x4b): + SyncClipCoef(); + break; + // Depth modifiers case PICA_REG_INDEX(rasterizer.viewport_depth_range): SyncDepthScale(); @@ -1280,6 +1294,20 @@ void RasterizerOpenGL::SetShader() { } } +void RasterizerOpenGL::SyncClipEnabled() { + state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0; +} + +void RasterizerOpenGL::SyncClipCoef() { + const auto raw_clip_coef = Pica::g_state.regs.rasterizer.GetClipCoef(); + const GLvec4 new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), + raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()}; + if (new_clip_coef != uniform_block_data.data.clip_coef) { + uniform_block_data.data.clip_coef = new_clip_coef; + uniform_block_data.dirty = true; + } +} + void RasterizerOpenGL::SyncCullMode() { const auto& regs = Pica::g_state.regs; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 78e218efe..46c62961c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -151,14 +151,21 @@ private: LightSrc light_src[8]; alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages alignas(16) GLvec4 tev_combiner_buffer_color; + alignas(16) GLvec4 clip_coef; }; static_assert( - sizeof(UniformData) == 0x460, + sizeof(UniformData) == 0x470, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); + /// Syncs the clip enabled status to match the PICA register + void SyncClipEnabled(); + + /// Syncs the clip coefficients to match the PICA register + void SyncClipCoef(); + /// Sets the OpenGL shader in accordance with the current PICA register state void SetShader(); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 015e69da9..9fe183944 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/bit_field.h" #include "common/logging/log.h" +#include "core/core.h" #include "video_core/regs_framebuffer.h" #include "video_core/regs_lighting.h" #include "video_core/regs_rasterizer.h" @@ -24,6 +25,42 @@ using TevStageConfig = TexturingRegs::TevStageConfig; namespace GLShader { +static const std::string UniformBlockDef = R"( +#define NUM_TEV_STAGES 6 +#define NUM_LIGHTS 8 + +struct LightSrc { + vec3 specular_0; + vec3 specular_1; + vec3 diffuse; + vec3 ambient; + vec3 position; + vec3 spot_direction; + float dist_atten_bias; + float dist_atten_scale; +}; + +layout (std140) uniform shader_data { + vec2 framebuffer_scale; + int alphatest_ref; + float depth_scale; + float depth_offset; + int scissor_x1; + int scissor_y1; + int scissor_x2; + int scissor_y2; + vec3 fog_color; + vec2 proctex_noise_f; + vec2 proctex_noise_a; + vec2 proctex_noise_p; + vec3 lighting_global_ambient; + LightSrc light_src[NUM_LIGHTS]; + vec4 const_color[NUM_TEV_STAGES]; + vec4 tev_combiner_buffer_color; + vec4 clip_coef; +}; +)"; + PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) { PicaShaderConfig res; @@ -594,8 +631,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { // Note: even if the normal vector is modified by normal map, which is not the // normal of the tangent plane anymore, the half angle vector is still projected // using the modified normal vector. - std::string half_angle_proj = "normalize(half_vector) - normal / dot(normal, " - "normal) * dot(normal, normalize(half_vector))"; + std::string half_angle_proj = + "normalize(half_vector) - normal * dot(normal, normalize(half_vector))"; // Note: the half angle vector projection is confirmed not normalized before the dot // product. The result is in fact not cos(phi) as the name suggested. index = "dot(" + half_angle_proj + ", tangent)"; @@ -750,7 +787,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { } // Fresnel - if (lighting.lut_fr.enable && + // Note: only the last entry in the light slots applies the Fresnel factor + if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable && LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::Fresnel)) { // Lookup fresnel LUT value @@ -759,17 +797,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { lighting.lut_fr.type, lighting.lut_fr.abs_input); value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + value + ")"; - // Enabled for difffuse lighting alpha component + // Enabled for diffuse lighting alpha component if (lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha || lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { - out += "diffuse_sum.a *= " + value + ";\n"; + out += "diffuse_sum.a = " + value + ";\n"; } // Enabled for the specular lighting alpha component if (lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::SecondaryAlpha || lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { - out += "specular_sum.a *= " + value + ";\n"; + out += "specular_sum.a = " + value + ";\n"; } } @@ -1008,8 +1046,6 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { std::string out = R"( #version 330 core -#define NUM_TEV_STAGES 6 -#define NUM_LIGHTS 8 in vec4 primary_color; in vec2 texcoord[3]; @@ -1021,36 +1057,6 @@ in vec4 gl_FragCoord; out vec4 color; -struct LightSrc { - vec3 specular_0; - vec3 specular_1; - vec3 diffuse; - vec3 ambient; - vec3 position; - vec3 spot_direction; - float dist_atten_bias; - float dist_atten_scale; -}; - -layout (std140) uniform shader_data { - vec2 framebuffer_scale; - int alphatest_ref; - float depth_scale; - float depth_offset; - int scissor_x1; - int scissor_y1; - int scissor_x2; - int scissor_y2; - vec3 fog_color; - vec2 proctex_noise_f; - vec2 proctex_noise_a; - vec2 proctex_noise_p; - vec3 lighting_global_ambient; - LightSrc light_src[NUM_LIGHTS]; - vec4 const_color[NUM_TEV_STAGES]; - vec4 tev_combiner_buffer_color; -}; - uniform sampler2D tex[3]; uniform samplerBuffer lighting_lut; uniform samplerBuffer fog_lut; @@ -1059,7 +1065,11 @@ uniform samplerBuffer proctex_color_map; uniform samplerBuffer proctex_alpha_map; uniform samplerBuffer proctex_lut; uniform samplerBuffer proctex_diff_lut; +)"; + + out += UniformBlockDef; + out += R"( // Rotate the vector v by the quaternion q vec3 quaternion_rotate(vec4 q, vec3 v) { return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); @@ -1155,6 +1165,11 @@ vec4 secondary_fragment_color = vec4(0.0); // Blend the fog out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n"; + } else if (state.fog_mode == TexturingRegs::FogMode::Gas) { + Core::Telemetry().AddField(Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", + true); + LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode"); + UNIMPLEMENTED(); } out += "gl_FragDepth = depth;\n"; @@ -1190,6 +1205,12 @@ out float texcoord0_w; out vec4 normquat; out vec3 view; +)"; + + out += UniformBlockDef; + + out += R"( + void main() { primary_color = vert_color; texcoord[0] = vert_texcoord0; @@ -1200,7 +1221,7 @@ void main() { view = vert_view; gl_Position = vert_position; gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 - // TODO (wwylele): calculate gl_ClipDistance[1] from user-defined clipping plane + gl_ClipDistance[1] = dot(clip_coef, vert_position); } )"; diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 67ed19ba8..e9063e616 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -21,7 +21,8 @@ namespace Pica { namespace Shader { -OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& input) { +OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, + const AttributeBuffer& input) { // Setup output data union { OutputVertex ret{}; @@ -82,6 +83,44 @@ void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) { } } +UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {} + +GSEmitter::GSEmitter() { + handlers = new Handlers; +} + +GSEmitter::~GSEmitter() { + delete handlers; +} + +void GSEmitter::Emit(Math::Vec4<float24> (&vertex)[16]) { + ASSERT(vertex_id < 3); + std::copy(std::begin(vertex), std::end(vertex), buffer[vertex_id].begin()); + if (prim_emit) { + if (winding) + handlers->winding_setter(); + for (size_t i = 0; i < buffer.size(); ++i) { + AttributeBuffer output; + unsigned int output_i = 0; + for (unsigned int reg : Common::BitSet<u32>(output_mask)) { + output.attr[output_i++] = buffer[i][reg]; + } + handlers->vertex_handler(output); + } + } +} + +GSUnitState::GSUnitState() : UnitState(&emitter) {} + +void GSUnitState::SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter) { + emitter.handlers->vertex_handler = std::move(vertex_handler); + emitter.handlers->winding_setter = std::move(winding_setter); +} + +void GSUnitState::ConfigOutput(const ShaderRegs& config) { + emitter.output_mask = config.output_mask; +} + MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); #ifdef ARCHITECTURE_x86_64 diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index e156f6aef..a3789da01 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -6,6 +6,7 @@ #include <array> #include <cstddef> +#include <functional> #include <type_traits> #include <nihstro/shader_bytecode.h> #include "common/assert.h" @@ -31,6 +32,12 @@ struct AttributeBuffer { alignas(16) Math::Vec4<float24> attr[16]; }; +/// Handler type for receiving vertex outputs from vertex shader or geometry shader +using VertexHandler = std::function<void(const AttributeBuffer&)>; + +/// Handler type for signaling to invert the vertex order of the next triangle +using WindingSetter = std::function<void()>; + struct OutputVertex { Math::Vec4<float24> pos; Math::Vec4<float24> quat; @@ -43,7 +50,8 @@ struct OutputVertex { INSERT_PADDING_WORDS(1); Math::Vec2<float24> tc2; - static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& output); + static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, + const AttributeBuffer& output); }; #define ASSERT_POS(var, pos) \ static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \ @@ -61,12 +69,36 @@ static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); /** + * This structure contains state information for primitive emitting in geometry shader. + */ +struct GSEmitter { + std::array<std::array<Math::Vec4<float24>, 16>, 3> buffer; + u8 vertex_id; + bool prim_emit; + bool winding; + u32 output_mask; + + // Function objects are hidden behind a raw pointer to make the structure standard layout type, + // for JIT to use offsetof to access other members. + struct Handlers { + VertexHandler vertex_handler; + WindingSetter winding_setter; + } * handlers; + + GSEmitter(); + ~GSEmitter(); + void Emit(Math::Vec4<float24> (&vertex)[16]); +}; +static_assert(std::is_standard_layout<GSEmitter>::value, "GSEmitter is not standard layout type"); + +/** * This structure contains the state information that needs to be unique for a shader unit. The 3DS * has four shader units that process shaders in parallel. At the present, Citra only implements a * single shader unit that processes all shaders serially. Putting the state information in a struct * here will make it easier for us to parallelize the shader processing later. */ struct UnitState { + explicit UnitState(GSEmitter* emitter = nullptr); struct Registers { // The registers are accessed by the shader JIT using SSE instructions, and are therefore // required to be 16-byte aligned. @@ -82,6 +114,8 @@ struct UnitState { // TODO: How many bits do these actually have? s32 address_registers[3]; + GSEmitter* emitter_ptr; + static size_t InputOffset(const SourceRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Input: @@ -125,6 +159,19 @@ struct UnitState { void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); }; +/** + * This is an extended shader unit state that represents the special unit that can run both vertex + * shader and geometry shader. It contains an additional primitive emitter and utilities for + * geometry shader. + */ +struct GSUnitState : public UnitState { + GSUnitState(); + void SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter); + void ConfigOutput(const ShaderRegs& config); + + GSEmitter emitter; +}; + struct ShaderSetup { struct { // The float uniforms are accessed by the shader JIT using SSE instructions, and are diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 206c0978a..9d4da4904 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -636,6 +636,22 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData break; } + case OpCode::Id::EMIT: { + GSEmitter* emitter = state.emitter_ptr; + ASSERT_MSG(emitter, "Execute EMIT on VS"); + emitter->Emit(state.registers.output); + break; + } + + case OpCode::Id::SETEMIT: { + GSEmitter* emitter = state.emitter_ptr; + ASSERT_MSG(emitter, "Execute SETEMIT on VS"); + emitter->vertex_id = instr.setemit.vertex_id; + emitter->prim_emit = instr.setemit.prim_emit != 0; + emitter->winding = instr.setemit.winding != 0; + break; + } + default: LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value().EffectiveOpCode(), diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 42a57aab1..1b31623bd 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -75,8 +75,8 @@ const JitFunction instr_table[64] = { &JitShader::Compile_IF, // ifu &JitShader::Compile_IF, // ifc &JitShader::Compile_LOOP, // loop - nullptr, // emit - nullptr, // sete + &JitShader::Compile_EMIT, // emit + &JitShader::Compile_SETE, // sete &JitShader::Compile_JMP, // jmpc &JitShader::Compile_JMP, // jmpu &JitShader::Compile_CMP, // cmp @@ -772,6 +772,51 @@ void JitShader::Compile_JMP(Instruction instr) { } } +static void Emit(GSEmitter* emitter, Math::Vec4<float24> (*output)[16]) { + emitter->Emit(*output); +} + +void JitShader::Compile_EMIT(Instruction instr) { + Label have_emitter, end; + mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); + test(rax, rax); + jnz(have_emitter); + + ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute EMIT on VS")); + CallFarFunction(*this, LogCritical); + ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + jmp(end); + + L(have_emitter); + ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + mov(ABI_PARAM1, rax); + mov(ABI_PARAM2, STATE); + add(ABI_PARAM2, static_cast<Xbyak::uint32>(offsetof(UnitState, registers.output))); + CallFarFunction(*this, Emit); + ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + L(end); +} + +void JitShader::Compile_SETE(Instruction instr) { + Label have_emitter, end; + mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); + test(rax, rax); + jnz(have_emitter); + + ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute SETEMIT on VS")); + CallFarFunction(*this, LogCritical); + ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + jmp(end); + + L(have_emitter); + mov(byte[rax + offsetof(GSEmitter, vertex_id)], instr.setemit.vertex_id); + mov(byte[rax + offsetof(GSEmitter, prim_emit)], instr.setemit.prim_emit); + mov(byte[rax + offsetof(GSEmitter, winding)], instr.setemit.winding); + L(end); +} + void JitShader::Compile_Block(unsigned end) { while (program_counter < end) { Compile_NextInstr(); diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index 31af0ca48..4aee56b1d 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h @@ -66,6 +66,8 @@ public: void Compile_JMP(Instruction instr); void Compile_CMP(Instruction instr); void Compile_MAD(Instruction instr); + void Compile_EMIT(Instruction instr); + void Compile_SETE(Instruction instr); private: void Compile_Block(unsigned end); diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp index cdbc71502..a52129eb7 100644 --- a/src/video_core/swrasterizer/clipper.cpp +++ b/src/video_core/swrasterizer/clipper.cpp @@ -31,7 +31,7 @@ public: : coeffs(coeffs), bias(bias) {} bool IsInside(const Vertex& vertex) const { - return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); + return Math::Dot(vertex.pos + bias, coeffs) >= float24::FromFloat32(0); } bool IsOutSide(const Vertex& vertex) const { @@ -116,19 +116,18 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu static const float24 f0 = float24::FromFloat32(0.0); static const float24 f1 = float24::FromFloat32(1.0); static const std::array<ClippingEdge, 7> clipping_edges = {{ - {Math::MakeVec(f1, f0, f0, -f1)}, // x = +w - {Math::MakeVec(-f1, f0, f0, -f1)}, // x = -w - {Math::MakeVec(f0, f1, f0, -f1)}, // y = +w - {Math::MakeVec(f0, -f1, f0, -f1)}, // y = -w - {Math::MakeVec(f0, f0, f1, f0)}, // z = 0 - {Math::MakeVec(f0, f0, -f1, -f1)}, // z = -w - {Math::MakeVec(f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON)}, // w = EPSILON + {Math::MakeVec(-f1, f0, f0, f1)}, // x = +w + {Math::MakeVec(f1, f0, f0, f1)}, // x = -w + {Math::MakeVec(f0, -f1, f0, f1)}, // y = +w + {Math::MakeVec(f0, f1, f0, f1)}, // y = -w + {Math::MakeVec(f0, f0, -f1, f0)}, // z = 0 + {Math::MakeVec(f0, f0, f1, f1)}, // z = -w + {Math::MakeVec(f0, f0, f0, f1), Math::Vec4<float24>(f0, f0, f0, EPSILON)}, // w = EPSILON }}; // Simple implementation of the Sutherland-Hodgman clipping algorithm. // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) - for (auto edge : clipping_edges) { - + auto Clip = [&](const ClippingEdge& edge) { std::swap(input_list, output_list); output_list->clear(); @@ -147,12 +146,24 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu } reference_vertex = &vertex; } + }; + + for (auto edge : clipping_edges) { + Clip(edge); // Need to have at least a full triangle to continue... if (output_list->size() < 3) return; } + if (g_state.regs.rasterizer.clip_enable) { + ClippingEdge custom_edge{g_state.regs.rasterizer.GetClipCoef()}; + Clip(custom_edge); + + if (output_list->size() < 3) + return; + } + InitScreenCoordinates((*output_list)[0]); InitScreenCoordinates((*output_list)[1]); diff --git a/src/video_core/swrasterizer/lighting.cpp b/src/video_core/swrasterizer/lighting.cpp index 39a3e396d..5fa748611 100644 --- a/src/video_core/swrasterizer/lighting.cpp +++ b/src/video_core/swrasterizer/lighting.cpp @@ -22,18 +22,37 @@ static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors( const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, - const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view) { + const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view, + const Math::Vec4<u8> (&texture_color)[4]) { - // TODO(Subv): Bump mapping - Math::Vec3<float> surface_normal = {0.0f, 0.0f, 1.0f}; + Math::Vec3<float> surface_normal; + Math::Vec3<float> surface_tangent; if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) { - LOG_CRITICAL(HW_GPU, "unimplemented bump mapping"); - UNIMPLEMENTED(); + Math::Vec3<float> perturbation = + texture_color[lighting.config0.bump_selector].xyz().Cast<float>() / 127.5f - + Math::MakeVec(1.0f, 1.0f, 1.0f); + if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { + if (!lighting.config0.disable_bump_renorm) { + const float z_square = 1 - perturbation.xy().Length2(); + perturbation.z = std::sqrt(std::max(z_square, 0.0f)); + } + surface_normal = perturbation; + surface_tangent = Math::MakeVec(1.0f, 0.0f, 0.0f); + } else if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::TangentMap) { + surface_normal = Math::MakeVec(0.0f, 0.0f, 1.0f); + surface_tangent = perturbation; + } else { + LOG_ERROR(HW_GPU, "Unknown bump mode %u", lighting.config0.bump_mode.Value()); + } + } else { + surface_normal = Math::MakeVec(0.0f, 0.0f, 1.0f); + surface_tangent = Math::MakeVec(1.0f, 0.0f, 0.0f); } // Use the normalized the quaternion when performing the rotation auto normal = Math::QuaternionRotate(normquat, surface_normal); + auto tangent = Math::QuaternionRotate(normquat, surface_tangent); Math::Vec4<float> diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f}; Math::Vec4<float> specular_sum = {0.0f, 0.0f, 0.0f, 1.0f}; @@ -102,6 +121,16 @@ std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors( result = Math::Dot(light_vector, spot_dir.Cast<float>() / 2047.0f); break; } + case LightingRegs::LightingLutInput::CP: + if (lighting.config0.config == LightingRegs::LightingConfig::Config7) { + const Math::Vec3<float> norm_half_vector = half_vector.Normalized(); + const Math::Vec3<float> half_vector_proj = + norm_half_vector - normal * Math::Dot(normal, norm_half_vector); + result = Math::Dot(half_vector_proj, tangent); + } else { + result = 0.0f; + } + break; default: LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast<u32>(input)); UNIMPLEMENTED(); @@ -201,7 +230,8 @@ std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors( d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); // Fresnel - if (lighting.config1.disable_lut_fr == 0 && + // Note: only the last entry in the light slots applies the Fresnel factor + if (light_index == lighting.max_light_index && lighting.config1.disable_lut_fr == 0 && LightingRegs::IsLightingSamplerSupported(lighting.config0.config, LightingRegs::LightingSampler::Fresnel)) { @@ -213,14 +243,14 @@ std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors( if (lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha || lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { - diffuse_sum.a() *= lut_value; + diffuse_sum.a() = lut_value; } // Enabled for the specular lighting alpha component if (lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::SecondaryAlpha || lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { - specular_sum.a() *= lut_value; + specular_sum.a() = lut_value; } } diff --git a/src/video_core/swrasterizer/lighting.h b/src/video_core/swrasterizer/lighting.h index 438dca926..d807a3d94 100644 --- a/src/video_core/swrasterizer/lighting.h +++ b/src/video_core/swrasterizer/lighting.h @@ -13,6 +13,7 @@ namespace Pica { std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors( const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, - const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view); + const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view, + const Math::Vec4<u8> (&texture_color)[4]); } // namespace Pica diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index fdc1df199..862135614 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -437,8 +437,8 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve GetInterpolatedAttribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), GetInterpolatedAttribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(), }; - std::tie(primary_fragment_color, secondary_fragment_color) = - ComputeFragmentsColors(g_state.regs.lighting, g_state.lighting, normquat, view); + std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors( + g_state.regs.lighting, g_state.lighting, normquat, view, texture_color); } for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); |