summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt4
-rw-r--r--externals/microprofile/README.md7
-rw-r--r--externals/microprofile/microprofile.h3571
-rw-r--r--externals/microprofile/microprofile_html.h3868
-rw-r--r--externals/microprofile/microprofileui.h3348
-rw-r--r--src/citra/citra.cpp7
-rw-r--r--src/citra_qt/CMakeLists.txt2
-rw-r--r--src/citra_qt/bootmanager.cpp5
-rw-r--r--src/citra_qt/debugger/graphics.cpp4
-rw-r--r--src/citra_qt/debugger/graphics_cmdlists.cpp6
-rw-r--r--src/citra_qt/debugger/graphics_vertex_shader.cpp4
-rw-r--r--src/citra_qt/debugger/profiler.cpp202
-rw-r--r--src/citra_qt/debugger/profiler.h17
-rw-r--r--src/citra_qt/main.cpp40
-rw-r--r--src/citra_qt/main.h20
-rw-r--r--src/citra_qt/util/util.cpp13
-rw-r--r--src/citra_qt/util/util.h10
-rw-r--r--src/common/CMakeLists.txt3
-rw-r--r--src/common/common_funcs.h12
-rw-r--r--src/common/file_util.h2
-rw-r--r--src/common/logging/log.h15
-rw-r--r--src/common/microprofile.cpp7
-rw-r--r--src/common/microprofile.h25
-rw-r--r--src/common/microprofileui.h16
-rw-r--r--src/common/x64/emitter.cpp770
-rw-r--r--src/common/x64/emitter.h850
-rw-r--r--src/core/CMakeLists.txt4
-rw-r--r--src/core/arm/dyncom/arm_dyncom_interpreter.cpp103
-rw-r--r--src/core/arm/skyeye_common/armstate.cpp1
-rw-r--r--src/core/arm/skyeye_common/armsupp.cpp1
-rw-r--r--src/core/hle/config_mem.cpp7
-rw-r--r--src/core/hle/config_mem.h1
-rw-r--r--src/core/hle/function_wrappers.h8
-rw-r--r--src/core/hle/hle.cpp4
-rw-r--r--src/core/hle/kernel/kernel.cpp19
-rw-r--r--src/core/hle/kernel/memory.cpp136
-rw-r--r--src/core/hle/kernel/memory.h35
-rw-r--r--src/core/hle/kernel/process.cpp160
-rw-r--r--src/core/hle/kernel/process.h39
-rw-r--r--src/core/hle/kernel/resource_limit.cpp1
-rw-r--r--src/core/hle/kernel/shared_memory.cpp27
-rw-r--r--src/core/hle/kernel/shared_memory.h2
-rw-r--r--src/core/hle/kernel/thread.cpp4
-rw-r--r--src/core/hle/kernel/vm_manager.cpp118
-rw-r--r--src/core/hle/kernel/vm_manager.h38
-rw-r--r--src/core/hle/service/apt/apt.cpp21
-rw-r--r--src/core/hle/service/gsp_gpu.cpp31
-rw-r--r--src/core/hle/service/gsp_gpu.h11
-rw-r--r--src/core/hle/service/y2r_u.cpp1
-rw-r--r--src/core/hle/shared_page.cpp3
-rw-r--r--src/core/hle/shared_page.h1
-rw-r--r--src/core/hle/svc.cpp162
-rw-r--r--src/core/hw/gpu.cpp77
-rw-r--r--src/core/hw/gpu.h32
-rw-r--r--src/core/mem_map.cpp163
-rw-r--r--src/core/mem_map.h46
-rw-r--r--src/core/memory.cpp40
-rw-r--r--src/core/memory.h22
-rw-r--r--src/core/memory_setup.h3
-rw-r--r--src/core/system.cpp3
-rw-r--r--src/video_core/command_processor.cpp4
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp8
-rw-r--r--src/video_core/pica.h36
-rw-r--r--src/video_core/rasterizer.cpp61
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp38
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_state.h3
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h23
-rw-r--r--src/video_core/shader/shader.cpp5
-rw-r--r--src/video_core/shader/shader_interpreter.cpp79
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp252
-rw-r--r--src/video_core/shader/shader_jit_x64.h15
74 files changed, 13344 insertions, 1368 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 00d71dbdc..2ac94bc9f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -155,7 +155,8 @@ IF (APPLE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++")
ELSEIF(MINGW)
# GCC does not support codecvt, so use iconv instead
- set(PLATFORM_LIBRARIES winmm ws2_32 iconv)
+ # PSAPI is the Process Status API
+ set(PLATFORM_LIBRARIES winmm ws2_32 psapi iconv)
# WSAPoll functionality doesn't exist before WinNT 6.x (Vista and up)
add_definitions(-D_WIN32_WINNT=0x0600)
@@ -212,6 +213,7 @@ set(INI_PREFIX "${CMAKE_CURRENT_SOURCE_DIR}/externals/inih")
include_directories(${INI_PREFIX})
add_subdirectory(${INI_PREFIX})
+include_directories(externals/microprofile)
include_directories(externals/nihstro/include)
if (MSVC)
diff --git a/externals/microprofile/README.md b/externals/microprofile/README.md
new file mode 100644
index 000000000..0a58d1c5a
--- /dev/null
+++ b/externals/microprofile/README.md
@@ -0,0 +1,7 @@
+# microprofile
+
+MicroProfile is a embeddable profiler in a single file, written in C++
+
+It can display profile information in the application, or by generating captures via a minimal built in webserver.
+
+For more information see the project webpage at https://bitbucket.org/jonasmeyer/microprofile
diff --git a/externals/microprofile/microprofile.h b/externals/microprofile/microprofile.h
new file mode 100644
index 000000000..d1ae0c1c2
--- /dev/null
+++ b/externals/microprofile/microprofile.h
@@ -0,0 +1,3571 @@
+#pragma once
+// This is free and unencumbered software released into the public domain.
+// Anyone is free to copy, modify, publish, use, compile, sell, or
+// distribute this software, either in source code form or as a compiled
+// binary, for any purpose, commercial or non-commercial, and by any
+// means.
+// In jurisdictions that recognize copyright laws, the author or authors
+// of this software dedicate any and all copyright interest in the
+// software to the public domain. We make this dedication for the benefit
+// of the public at large and to the detriment of our heirs and
+// successors. We intend this dedication to be an overt act of
+// relinquishment in perpetuity of all present and future rights to this
+// software under copyright law.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+// For more information, please refer to <http://unlicense.org/>
+//
+// ***********************************************************************
+//
+//
+//
+//
+// Howto:
+// Call these functions from your code:
+// MicroProfileOnThreadCreate
+// MicroProfileMouseButton
+// MicroProfileMousePosition
+// MicroProfileModKey
+// MicroProfileFlip <-- Call this once per frame
+// MicroProfileDraw <-- Call this once per frame
+// MicroProfileToggleDisplayMode <-- Bind to a key to toggle profiling
+// MicroProfileTogglePause <-- Bind to a key to toggle pause
+//
+// Use these macros in your code in blocks you want to time:
+//
+// MICROPROFILE_DECLARE
+// MICROPROFILE_DEFINE
+// MICROPROFILE_DECLARE_GPU
+// MICROPROFILE_DEFINE_GPU
+// MICROPROFILE_SCOPE
+// MICROPROFILE_SCOPEI
+// MICROPROFILE_SCOPEGPU
+// MICROPROFILE_SCOPEGPUI
+// MICROPROFILE_META
+//
+//
+// Usage:
+//
+// {
+// MICROPROFILE_SCOPEI("GroupName", "TimerName", nColorRgb):
+// ..Code to be timed..
+// }
+//
+// MICROPROFILE_DECLARE / MICROPROFILE_DEFINE allows defining groups in a shared place, to ensure sorting of the timers
+//
+// (in global scope)
+// MICROPROFILE_DEFINE(g_ProfileFisk, "Fisk", "Skalle", nSomeColorRgb);
+//
+// (in some other file)
+// MICROPROFILE_DECLARE(g_ProfileFisk);
+//
+// void foo(){
+// MICROPROFILE_SCOPE(g_ProfileFisk);
+// }
+//
+// Once code is instrumented the gui is activeted by calling MicroProfileToggleDisplayMode or by clicking in the upper left corner of
+// the screen
+//
+// The following functions must be implemented before the profiler is usable
+// debug render:
+// void MicroProfileDrawText(int nX, int nY, uint32_t nColor, const char* pText, uint32_t nNumCharacters);
+// void MicroProfileDrawBox(int nX, int nY, int nX1, int nY1, uint32_t nColor, MicroProfileBoxType = MicroProfileBoxTypeFlat);
+// void MicroProfileDrawLine2D(uint32_t nVertices, float* pVertices, uint32_t nColor);
+// Gpu time stamps: (See below for d3d/opengl helper)
+// uint32_t MicroProfileGpuInsertTimeStamp();
+// uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey);
+// uint64_t MicroProfileTicksPerSecondGpu();
+// threading:
+// const char* MicroProfileGetThreadName(); Threadnames in detailed view
+//
+// Default implementations of Gpu timestamp functions:
+// Opengl:
+// in .c file where MICROPROFILE_IMPL is defined:
+// #define MICROPROFILE_GPU_TIMERS_GL
+// call MicroProfileGpuInitGL() on startup
+// D3D11:
+// in .c file where MICROPROFILE_IMPL is defined:
+// #define MICROPROFILE_GPU_TIMERS_D3D11
+// call MICROPROFILE_GPU_TIMERS_D3D11(). Pass Device & ImmediateContext
+//
+// Limitations:
+// GPU timestamps can only be inserted from one thread.
+
+
+
+#ifndef MICROPROFILE_ENABLED
+#define MICROPROFILE_ENABLED 1
+#endif
+
+#include <stdint.h>
+typedef uint64_t MicroProfileToken;
+typedef uint16_t MicroProfileGroupId;
+
+#if 0 == MICROPROFILE_ENABLED
+
+#define MICROPROFILE_DECLARE(var)
+#define MICROPROFILE_DEFINE(var, group, name, color)
+#define MICROPROFILE_REGISTER_GROUP(group, color, category)
+#define MICROPROFILE_DECLARE_GPU(var)
+#define MICROPROFILE_DEFINE_GPU(var, name, color)
+#define MICROPROFILE_SCOPE(var) do{}while(0)
+#define MICROPROFILE_SCOPEI(group, name, color) do{}while(0)
+#define MICROPROFILE_SCOPEGPU(var) do{}while(0)
+#define MICROPROFILE_SCOPEGPUI( name, color) do{}while(0)
+#define MICROPROFILE_META_CPU(name, count)
+#define MICROPROFILE_META_GPU(name, count)
+#define MICROPROFILE_FORCEENABLECPUGROUP(s) do{} while(0)
+#define MICROPROFILE_FORCEDISABLECPUGROUP(s) do{} while(0)
+#define MICROPROFILE_FORCEENABLEGPUGROUP(s) do{} while(0)
+#define MICROPROFILE_FORCEDISABLEGPUGROUP(s) do{} while(0)
+#define MICROPROFILE_SCOPE_TOKEN(token)
+
+#define MicroProfileGetTime(group, name) 0.f
+#define MicroProfileOnThreadCreate(foo) do{}while(0)
+#define MicroProfileFlip() do{}while(0)
+#define MicroProfileSetAggregateFrames(a) do{}while(0)
+#define MicroProfileGetAggregateFrames() 0
+#define MicroProfileGetCurrentAggregateFrames() 0
+#define MicroProfileTogglePause() do{}while(0)
+#define MicroProfileToggleAllGroups() do{} while(0)
+#define MicroProfileDumpTimers() do{}while(0)
+#define MicroProfileShutdown() do{}while(0)
+#define MicroProfileSetForceEnable(a) do{} while(0)
+#define MicroProfileGetForceEnable() false
+#define MicroProfileSetEnableAllGroups(a) do{} while(0)
+#define MicroProfileEnableCategory(a) do{} while(0)
+#define MicroProfileDisableCategory(a) do{} while(0)
+#define MicroProfileGetEnableAllGroups() false
+#define MicroProfileSetForceMetaCounters(a)
+#define MicroProfileGetForceMetaCounters() 0
+#define MicroProfileEnableMetaCounter(c) do{}while(0)
+#define MicroProfileDisableMetaCounter(c) do{}while(0)
+#define MicroProfileDumpFile(html,csv) do{} while(0)
+#define MicroProfileWebServerPort() ((uint32_t)-1)
+
+#else
+
+#include <stdint.h>
+#include <string.h>
+#include <thread>
+#include <mutex>
+#include <atomic>
+
+#ifndef MICROPROFILE_API
+#define MICROPROFILE_API
+#endif
+
+MICROPROFILE_API int64_t MicroProfileTicksPerSecondCpu();
+
+
+#if defined(__APPLE__)
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+#include <unistd.h>
+#include <libkern/OSAtomic.h>
+#include <TargetConditionals.h>
+#if TARGET_OS_IPHONE
+#define MICROPROFILE_IOS
+#endif
+
+#define MP_TICK() mach_absolute_time()
+inline int64_t MicroProfileTicksPerSecondCpu()
+{
+ static int64_t nTicksPerSecond = 0;
+ if(nTicksPerSecond == 0)
+ {
+ mach_timebase_info_data_t sTimebaseInfo;
+ mach_timebase_info(&sTimebaseInfo);
+ nTicksPerSecond = 1000000000ll * sTimebaseInfo.denom / sTimebaseInfo.numer;
+ }
+ return nTicksPerSecond;
+}
+inline uint64_t MicroProfileGetCurrentThreadId()
+{
+ uint64_t tid;
+ pthread_threadid_np(pthread_self(), &tid);
+ return tid;
+}
+
+#define MP_BREAK() __builtin_trap()
+#define MP_THREAD_LOCAL __thread
+#define MP_STRCASECMP strcasecmp
+#define MP_GETCURRENTTHREADID() MicroProfileGetCurrentThreadId()
+typedef uint64_t ThreadIdType;
+#elif defined(_WIN32)
+int64_t MicroProfileGetTick();
+#define MP_TICK() MicroProfileGetTick()
+#define MP_BREAK() __debugbreak()
+#define MP_THREAD_LOCAL __declspec(thread)
+#define MP_STRCASECMP _stricmp
+#define MP_GETCURRENTTHREADID() GetCurrentThreadId()
+typedef uint32_t ThreadIdType;
+
+#elif defined(__linux__)
+#include <unistd.h>
+#include <time.h>
+inline int64_t MicroProfileTicksPerSecondCpu()
+{
+ return 1000000000ll;
+}
+
+inline int64_t MicroProfileGetTick()
+{
+ timespec ts;
+ clock_gettime(CLOCK_REALTIME, &ts);
+ return 1000000000ll * ts.tv_sec + ts.tv_nsec;
+}
+#define MP_TICK() MicroProfileGetTick()
+#define MP_BREAK() __builtin_trap()
+#define MP_THREAD_LOCAL __thread
+#define MP_STRCASECMP strcasecmp
+#define MP_GETCURRENTTHREADID() (uint64_t)pthread_self()
+typedef uint64_t ThreadIdType;
+#endif
+
+
+#ifndef MP_GETCURRENTTHREADID
+#define MP_GETCURRENTTHREADID() 0
+typedef uint32_t ThreadIdType;
+#endif
+
+
+#define MP_ASSERT(a) do{if(!(a)){MP_BREAK();} }while(0)
+#define MICROPROFILE_DECLARE(var) extern MicroProfileToken g_mp_##var
+#define MICROPROFILE_DEFINE(var, group, name, color) MicroProfileToken g_mp_##var = MicroProfileGetToken(group, name, color, MicroProfileTokenTypeCpu)
+#define MICROPROFILE_REGISTER_GROUP(group, category, color) MicroProfileRegisterGroup(group, category, color)
+#define MICROPROFILE_DECLARE_GPU(var) extern MicroProfileToken g_mp_##var
+#define MICROPROFILE_DEFINE_GPU(var, name, color) MicroProfileToken g_mp_##var = MicroProfileGetToken("GPU", name, color, MicroProfileTokenTypeGpu)
+#define MICROPROFILE_TOKEN_PASTE0(a, b) a ## b
+#define MICROPROFILE_TOKEN_PASTE(a, b) MICROPROFILE_TOKEN_PASTE0(a,b)
+#define MICROPROFILE_SCOPE(var) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(g_mp_##var)
+#define MICROPROFILE_SCOPE_TOKEN(token) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(token)
+#define MICROPROFILE_SCOPEI(group, name, color) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__) = MicroProfileGetToken(group, name, color, MicroProfileTokenTypeCpu); MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo,__LINE__)( MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__))
+#define MICROPROFILE_SCOPEGPU(var) MicroProfileScopeGpuHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(g_mp_##var)
+#define MICROPROFILE_SCOPEGPUI(name, color) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__) = MicroProfileGetToken("GPU", name, color, MicroProfileTokenTypeGpu); MicroProfileScopeGpuHandler MICROPROFILE_TOKEN_PASTE(foo,__LINE__)( MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__))
+#define MICROPROFILE_META_CPU(name, count) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__) = MicroProfileGetMetaToken(name); MicroProfileMetaUpdate(MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__), count, MicroProfileTokenTypeCpu)
+#define MICROPROFILE_META_GPU(name, count) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__) = MicroProfileGetMetaToken(name); MicroProfileMetaUpdate(MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__), count, MicroProfileTokenTypeGpu)
+
+
+#ifndef MICROPROFILE_USE_THREAD_NAME_CALLBACK
+#define MICROPROFILE_USE_THREAD_NAME_CALLBACK 0
+#endif
+
+#ifndef MICROPROFILE_PER_THREAD_BUFFER_SIZE
+#define MICROPROFILE_PER_THREAD_BUFFER_SIZE (2048<<10)
+#endif
+
+#ifndef MICROPROFILE_MAX_FRAME_HISTORY
+#define MICROPROFILE_MAX_FRAME_HISTORY 512
+#endif
+
+#ifndef MICROPROFILE_PRINTF
+#define MICROPROFILE_PRINTF printf
+#endif
+
+#ifndef MICROPROFILE_META_MAX
+#define MICROPROFILE_META_MAX 8
+#endif
+
+#ifndef MICROPROFILE_WEBSERVER_PORT
+#define MICROPROFILE_WEBSERVER_PORT 1338
+#endif
+
+#ifndef MICROPROFILE_WEBSERVER
+#define MICROPROFILE_WEBSERVER 1
+#endif
+
+#ifndef MICROPROFILE_WEBSERVER_MAXFRAMES
+#define MICROPROFILE_WEBSERVER_MAXFRAMES 30
+#endif
+
+#ifndef MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE
+#define MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE (16<<10)
+#endif
+
+#ifndef MICROPROFILE_GPU_TIMERS
+#define MICROPROFILE_GPU_TIMERS 1
+#endif
+
+#ifndef MICROPROFILE_GPU_FRAME_DELAY
+#define MICROPROFILE_GPU_FRAME_DELAY 3 //must be > 0
+#endif
+
+
+#ifndef MICROPROFILE_NAME_MAX_LEN
+#define MICROPROFILE_NAME_MAX_LEN 64
+#endif
+
+#define MICROPROFILE_FORCEENABLECPUGROUP(s) MicroProfileForceEnableGroup(s, MicroProfileTokenTypeCpu)
+#define MICROPROFILE_FORCEDISABLECPUGROUP(s) MicroProfileForceDisableGroup(s, MicroProfileTokenTypeCpu)
+#define MICROPROFILE_FORCEENABLEGPUGROUP(s) MicroProfileForceEnableGroup(s, MicroProfileTokenTypeGpu)
+#define MICROPROFILE_FORCEDISABLEGPUGROUP(s) MicroProfileForceDisableGroup(s, MicroProfileTokenTypeGpu)
+
+#define MICROPROFILE_INVALID_TICK ((uint64_t)-1)
+#define MICROPROFILE_GROUP_MASK_ALL 0xffffffffffff
+
+
+#define MICROPROFILE_INVALID_TOKEN (uint64_t)-1
+
+enum MicroProfileTokenType
+{
+ MicroProfileTokenTypeCpu,
+ MicroProfileTokenTypeGpu,
+};
+
+enum MicroProfileBoxType
+{
+ MicroProfileBoxTypeBar,
+ MicroProfileBoxTypeFlat,
+};
+
+
+
+struct MicroProfile;
+
+MICROPROFILE_API void MicroProfileInit();
+MICROPROFILE_API void MicroProfileShutdown();
+MICROPROFILE_API MicroProfileToken MicroProfileFindToken(const char* sGroup, const char* sName);
+MICROPROFILE_API MicroProfileToken MicroProfileGetToken(const char* sGroup, const char* sName, uint32_t nColor, MicroProfileTokenType Token = MicroProfileTokenTypeCpu);
+MICROPROFILE_API MicroProfileToken MicroProfileGetMetaToken(const char* pName);
+MICROPROFILE_API void MicroProfileMetaUpdate(MicroProfileToken, int nCount, MicroProfileTokenType eTokenType);
+MICROPROFILE_API uint64_t MicroProfileEnter(MicroProfileToken nToken);
+MICROPROFILE_API void MicroProfileLeave(MicroProfileToken nToken, uint64_t nTick);
+MICROPROFILE_API uint64_t MicroProfileGpuEnter(MicroProfileToken nToken);
+MICROPROFILE_API void MicroProfileGpuLeave(MicroProfileToken nToken, uint64_t nTick);
+inline uint16_t MicroProfileGetTimerIndex(MicroProfileToken t){ return (t&0xffff); }
+inline uint64_t MicroProfileGetGroupMask(MicroProfileToken t){ return ((t>>16)&MICROPROFILE_GROUP_MASK_ALL);}
+inline MicroProfileToken MicroProfileMakeToken(uint64_t nGroupMask, uint16_t nTimer){ return (nGroupMask<<16) | nTimer;}
+
+MICROPROFILE_API void MicroProfileFlip(); //! call once per frame.
+MICROPROFILE_API void MicroProfileTogglePause();
+MICROPROFILE_API void MicroProfileForceEnableGroup(const char* pGroup, MicroProfileTokenType Type);
+MICROPROFILE_API void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Type);
+MICROPROFILE_API float MicroProfileGetTime(const char* pGroup, const char* pName);
+MICROPROFILE_API void MicroProfileContextSwitchSearch(uint32_t* pContextSwitchStart, uint32_t* pContextSwitchEnd, uint64_t nBaseTicksCpu, uint64_t nBaseTicksEndCpu);
+MICROPROFILE_API void MicroProfileOnThreadCreate(const char* pThreadName); //should be called from newly created threads
+MICROPROFILE_API void MicroProfileOnThreadExit(); //call on exit to reuse log
+MICROPROFILE_API void MicroProfileInitThreadLog();
+MICROPROFILE_API void MicroProfileSetForceEnable(bool bForceEnable);
+MICROPROFILE_API bool MicroProfileGetForceEnable();
+MICROPROFILE_API void MicroProfileSetEnableAllGroups(bool bEnable);
+MICROPROFILE_API void MicroProfileEnableCategory(const char* pCategory);
+MICROPROFILE_API void MicroProfileDisableCategory(const char* pCategory);
+MICROPROFILE_API bool MicroProfileGetEnableAllGroups();
+MICROPROFILE_API void MicroProfileSetForceMetaCounters(bool bEnable);
+MICROPROFILE_API bool MicroProfileGetForceMetaCounters();
+MICROPROFILE_API void MicroProfileEnableMetaCounter(const char* pMet);
+MICROPROFILE_API void MicroProfileDisableMetaCounter(const char* pMet);
+MICROPROFILE_API void MicroProfileSetAggregateFrames(int frames);
+MICROPROFILE_API int MicroProfileGetAggregateFrames();
+MICROPROFILE_API int MicroProfileGetCurrentAggregateFrames();
+MICROPROFILE_API MicroProfile* MicroProfileGet();
+MICROPROFILE_API void MicroProfileGetRange(uint32_t nPut, uint32_t nGet, uint32_t nRange[2][2]);
+MICROPROFILE_API std::recursive_mutex& MicroProfileGetMutex();
+MICROPROFILE_API void MicroProfileStartContextSwitchTrace();
+MICROPROFILE_API void MicroProfileStopContextSwitchTrace();
+MICROPROFILE_API bool MicroProfileIsLocalThread(uint32_t nThreadId);
+
+
+#if MICROPROFILE_WEBSERVER
+MICROPROFILE_API void MicroProfileDumpFile(const char* pHtml, const char* pCsv);
+MICROPROFILE_API uint32_t MicroProfileWebServerPort();
+#else
+#define MicroProfileDumpFile(c) do{} while(0)
+#define MicroProfileWebServerPort() ((uint32_t)-1)
+#endif
+
+
+
+
+#if MICROPROFILE_GPU_TIMERS
+MICROPROFILE_API uint32_t MicroProfileGpuInsertTimeStamp();
+MICROPROFILE_API uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey);
+MICROPROFILE_API uint64_t MicroProfileTicksPerSecondGpu();
+MICROPROFILE_API int MicroProfileGetGpuTickReference(int64_t* pOutCPU, int64_t* pOutGpu);
+#else
+#define MicroProfileGpuInsertTimeStamp() 1
+#define MicroProfileGpuGetTimeStamp(a) 0
+#define MicroProfileTicksPerSecondGpu() 1
+#define MicroProfileGetGpuTickReference(a,b) 0
+#endif
+
+#if MICROPROFILE_GPU_TIMERS_D3D11
+#define MICROPROFILE_D3D_MAX_QUERIES (8<<10)
+MICROPROFILE_API void MicroProfileGpuInitD3D11(void* pDevice, void* pDeviceContext);
+#endif
+
+#if MICROPROFILE_GPU_TIMERS_GL
+#define MICROPROFILE_GL_MAX_QUERIES (8<<10)
+MICROPROFILE_API void MicroProfileGpuInitGL();
+#endif
+
+
+
+#if MICROPROFILE_USE_THREAD_NAME_CALLBACK
+MICROPROFILE_API const char* MicroProfileGetThreadName();
+#else
+#define MicroProfileGetThreadName() "<implement MicroProfileGetThreadName to get threadnames>"
+#endif
+
+#if !defined(MICROPROFILE_THREAD_NAME_FROM_ID)
+#define MICROPROFILE_THREAD_NAME_FROM_ID(a) ""
+#endif
+
+
+struct MicroProfileScopeHandler
+{
+ MicroProfileToken nToken;
+ uint64_t nTick;
+ MicroProfileScopeHandler(MicroProfileToken Token):nToken(Token)
+ {
+ nTick = MicroProfileEnter(nToken);
+ }
+ ~MicroProfileScopeHandler()
+ {
+ MicroProfileLeave(nToken, nTick);
+ }
+};
+
+struct MicroProfileScopeGpuHandler
+{
+ MicroProfileToken nToken;
+ uint64_t nTick;
+ MicroProfileScopeGpuHandler(MicroProfileToken Token):nToken(Token)
+ {
+ nTick = MicroProfileGpuEnter(nToken);
+ }
+ ~MicroProfileScopeGpuHandler()
+ {
+ MicroProfileGpuLeave(nToken, nTick);
+ }
+};
+
+
+
+#define MICROPROFILE_MAX_TIMERS 1024
+#define MICROPROFILE_MAX_GROUPS 48 //dont bump! no. of bits used it bitmask
+#define MICROPROFILE_MAX_CATEGORIES 16
+#define MICROPROFILE_MAX_GRAPHS 5
+#define MICROPROFILE_GRAPH_HISTORY 128
+#define MICROPROFILE_BUFFER_SIZE ((MICROPROFILE_PER_THREAD_BUFFER_SIZE)/sizeof(MicroProfileLogEntry))
+#define MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS 256
+#define MICROPROFILE_STACK_MAX 32
+//#define MICROPROFILE_MAX_PRESETS 5
+#define MICROPROFILE_ANIM_DELAY_PRC 0.5f
+#define MICROPROFILE_GAP_TIME 50 //extra ms to fetch to close timers from earlier frames
+
+
+#ifndef MICROPROFILE_MAX_THREADS
+#define MICROPROFILE_MAX_THREADS 32
+#endif
+
+#ifndef MICROPROFILE_UNPACK_RED
+#define MICROPROFILE_UNPACK_RED(c) ((c)>>16)
+#endif
+
+#ifndef MICROPROFILE_UNPACK_GREEN
+#define MICROPROFILE_UNPACK_GREEN(c) ((c)>>8)
+#endif
+
+#ifndef MICROPROFILE_UNPACK_BLUE
+#define MICROPROFILE_UNPACK_BLUE(c) ((c))
+#endif
+
+#ifndef MICROPROFILE_DEFAULT_PRESET
+#define MICROPROFILE_DEFAULT_PRESET "Default"
+#endif
+
+
+#ifndef MICROPROFILE_CONTEXT_SWITCH_TRACE
+#if defined(_WIN32)
+#define MICROPROFILE_CONTEXT_SWITCH_TRACE 1
+#elif defined(__APPLE__)
+#define MICROPROFILE_CONTEXT_SWITCH_TRACE 0 //disabled until dtrace script is working.
+#else
+#define MICROPROFILE_CONTEXT_SWITCH_TRACE 0
+#endif
+#endif
+
+#if MICROPROFILE_CONTEXT_SWITCH_TRACE
+#define MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE (128*1024) //2mb with 16 byte entry size
+#else
+#define MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE (1)
+#endif
+
+#ifndef MICROPROFILE_MINIZ
+#define MICROPROFILE_MINIZ 0
+#endif
+
+#ifdef _WIN32
+#include <basetsd.h>
+typedef UINT_PTR MpSocket;
+#else
+typedef int MpSocket;
+#endif
+
+
+#if defined(__APPLE__) || defined(__linux__)
+typedef pthread_t MicroProfileThread;
+#elif defined(_WIN32)
+typedef HANDLE MicroProfileThread;
+#else
+typedef std::thread* MicroProfileThread;
+#endif
+
+
+
+enum MicroProfileDrawMask
+{
+ MP_DRAW_OFF = 0x0,
+ MP_DRAW_BARS = 0x1,
+ MP_DRAW_DETAILED = 0x2,
+ MP_DRAW_HIDDEN = 0x3,
+};
+
+enum MicroProfileDrawBarsMask
+{
+ MP_DRAW_TIMERS = 0x1,
+ MP_DRAW_AVERAGE = 0x2,
+ MP_DRAW_MAX = 0x4,
+ MP_DRAW_CALL_COUNT = 0x8,
+ MP_DRAW_TIMERS_EXCLUSIVE = 0x10,
+ MP_DRAW_AVERAGE_EXCLUSIVE = 0x20,
+ MP_DRAW_MAX_EXCLUSIVE = 0x40,
+ MP_DRAW_META_FIRST = 0x80,
+ MP_DRAW_ALL = 0xffffffff,
+
+};
+
+typedef uint64_t MicroProfileLogEntry;
+
+struct MicroProfileTimer
+{
+ uint64_t nTicks;
+ uint32_t nCount;
+};
+
+struct MicroProfileCategory
+{
+ char pName[MICROPROFILE_NAME_MAX_LEN];
+ uint64_t nGroupMask;
+};
+
+struct MicroProfileGroupInfo
+{
+ char pName[MICROPROFILE_NAME_MAX_LEN];
+ uint32_t nNameLen;
+ uint32_t nGroupIndex;
+ uint32_t nNumTimers;
+ uint32_t nMaxTimerNameLen;
+ uint32_t nColor;
+ uint32_t nCategory;
+ MicroProfileTokenType Type;
+};
+
+struct MicroProfileTimerInfo
+{
+ MicroProfileToken nToken;
+ uint32_t nTimerIndex;
+ uint32_t nGroupIndex;
+ char pName[MICROPROFILE_NAME_MAX_LEN];
+ uint32_t nNameLen;
+ uint32_t nColor;
+ bool bGraph;
+};
+
+struct MicroProfileGraphState
+{
+ int64_t nHistory[MICROPROFILE_GRAPH_HISTORY];
+ MicroProfileToken nToken;
+ int32_t nKey;
+};
+
+struct MicroProfileContextSwitch
+{
+ ThreadIdType nThreadOut;
+ ThreadIdType nThreadIn;
+ int64_t nCpu : 8;
+ int64_t nTicks : 56;
+};
+
+
+struct MicroProfileFrameState
+{
+ int64_t nFrameStartCpu;
+ int64_t nFrameStartGpu;
+ uint32_t nLogStart[MICROPROFILE_MAX_THREADS];
+};
+
+struct MicroProfileThreadLog
+{
+ MicroProfileLogEntry Log[MICROPROFILE_BUFFER_SIZE];
+
+ std::atomic<uint32_t> nPut;
+ std::atomic<uint32_t> nGet;
+ uint32_t nActive;
+ uint32_t nGpu;
+ ThreadIdType nThreadId;
+
+ uint32_t nStack[MICROPROFILE_STACK_MAX];
+ int64_t nChildTickStack[MICROPROFILE_STACK_MAX];
+ uint32_t nStackPos;
+
+
+ uint8_t nGroupStackPos[MICROPROFILE_MAX_GROUPS];
+ int64_t nGroupTicks[MICROPROFILE_MAX_GROUPS];
+ int64_t nAggregateGroupTicks[MICROPROFILE_MAX_GROUPS];
+ enum
+ {
+ THREAD_MAX_LEN = 64,
+ };
+ char ThreadName[64];
+ int nFreeListNext;
+};
+
+#if MICROPROFILE_GPU_TIMERS_D3D11
+struct MicroProfileD3D11Frame
+{
+ uint32_t m_nQueryStart;
+ uint32_t m_nQueryCount;
+ uint32_t m_nRateQueryStarted;
+ void* m_pRateQuery;
+};
+
+struct MicroProfileGpuTimerState
+{
+ uint32_t bInitialized;
+ void* m_pDevice;
+ void* m_pDeviceContext;
+ void* m_pQueries[MICROPROFILE_D3D_MAX_QUERIES];
+ int64_t m_nQueryResults[MICROPROFILE_D3D_MAX_QUERIES];
+ uint32_t m_nQueryPut;
+ uint32_t m_nQueryGet;
+ uint32_t m_nQueryFrame;
+ int64_t m_nQueryFrequency;
+ MicroProfileD3D11Frame m_QueryFrames[MICROPROFILE_GPU_FRAME_DELAY];
+};
+#elif MICROPROFILE_GPU_TIMERS_GL
+struct MicroProfileGpuTimerState
+{
+ uint32_t GLTimers[MICROPROFILE_GL_MAX_QUERIES];
+ uint32_t GLTimerPos;
+};
+#else
+struct MicroProfileGpuTimerState{};
+#endif
+
+struct MicroProfile
+{
+ uint32_t nTotalTimers;
+ uint32_t nGroupCount;
+ uint32_t nCategoryCount;
+ uint32_t nAggregateClear;
+ uint32_t nAggregateFlip;
+ uint32_t nAggregateFlipCount;
+ uint32_t nAggregateFrames;
+
+ uint64_t nAggregateFlipTick;
+
+ uint32_t nDisplay;
+ uint32_t nBars;
+ uint64_t nActiveGroup;
+ uint32_t nActiveBars;
+
+ uint64_t nForceGroup;
+ uint32_t nForceEnable;
+ uint32_t nForceMetaCounters;
+
+ uint64_t nForceGroupUI;
+ uint64_t nActiveGroupWanted;
+ uint32_t nAllGroupsWanted;
+ uint32_t nAllThreadsWanted;
+
+ uint32_t nOverflow;
+
+ uint64_t nGroupMask;
+ uint32_t nRunning;
+ uint32_t nToggleRunning;
+ uint32_t nMaxGroupSize;
+ uint32_t nDumpFileNextFrame;
+ uint32_t nAutoClearFrames;
+ char HtmlDumpPath[512];
+ char CsvDumpPath[512];
+
+ int64_t nPauseTicks;
+
+ float fReferenceTime;
+ float fRcpReferenceTime;
+
+ MicroProfileCategory CategoryInfo[MICROPROFILE_MAX_CATEGORIES];
+ MicroProfileGroupInfo GroupInfo[MICROPROFILE_MAX_GROUPS];
+ MicroProfileTimerInfo TimerInfo[MICROPROFILE_MAX_TIMERS];
+ uint8_t TimerToGroup[MICROPROFILE_MAX_TIMERS];
+
+ MicroProfileTimer AccumTimers[MICROPROFILE_MAX_TIMERS];
+ uint64_t AccumMaxTimers[MICROPROFILE_MAX_TIMERS];
+ uint64_t AccumTimersExclusive[MICROPROFILE_MAX_TIMERS];
+ uint64_t AccumMaxTimersExclusive[MICROPROFILE_MAX_TIMERS];
+
+ MicroProfileTimer Frame[MICROPROFILE_MAX_TIMERS];
+ uint64_t FrameExclusive[MICROPROFILE_MAX_TIMERS];
+
+ MicroProfileTimer Aggregate[MICROPROFILE_MAX_TIMERS];
+ uint64_t AggregateMax[MICROPROFILE_MAX_TIMERS];
+ uint64_t AggregateExclusive[MICROPROFILE_MAX_TIMERS];
+ uint64_t AggregateMaxExclusive[MICROPROFILE_MAX_TIMERS];
+
+
+ uint64_t FrameGroup[MICROPROFILE_MAX_GROUPS];
+ uint64_t AccumGroup[MICROPROFILE_MAX_GROUPS];
+ uint64_t AccumGroupMax[MICROPROFILE_MAX_GROUPS];
+
+ uint64_t AggregateGroup[MICROPROFILE_MAX_GROUPS];
+ uint64_t AggregateGroupMax[MICROPROFILE_MAX_GROUPS];
+
+
+ struct
+ {
+ uint64_t nCounters[MICROPROFILE_MAX_TIMERS];
+
+ uint64_t nAccum[MICROPROFILE_MAX_TIMERS];
+ uint64_t nAccumMax[MICROPROFILE_MAX_TIMERS];
+
+ uint64_t nAggregate[MICROPROFILE_MAX_TIMERS];
+ uint64_t nAggregateMax[MICROPROFILE_MAX_TIMERS];
+
+ uint64_t nSum;
+ uint64_t nSumAccum;
+ uint64_t nSumAccumMax;
+ uint64_t nSumAggregate;
+ uint64_t nSumAggregateMax;
+
+ const char* pName;
+ } MetaCounters[MICROPROFILE_META_MAX];
+
+ MicroProfileGraphState Graph[MICROPROFILE_MAX_GRAPHS];
+ uint32_t nGraphPut;
+
+ uint32_t nThreadActive[MICROPROFILE_MAX_THREADS];
+ MicroProfileThreadLog* Pool[MICROPROFILE_MAX_THREADS];
+ uint32_t nNumLogs;
+ uint32_t nMemUsage;
+ int nFreeListHead;
+
+ uint32_t nFrameCurrent;
+ uint32_t nFrameCurrentIndex;
+ uint32_t nFramePut;
+ uint64_t nFramePutIndex;
+
+ MicroProfileFrameState Frames[MICROPROFILE_MAX_FRAME_HISTORY];
+
+ uint64_t nFlipTicks;
+ uint64_t nFlipAggregate;
+ uint64_t nFlipMax;
+ uint64_t nFlipAggregateDisplay;
+ uint64_t nFlipMaxDisplay;
+
+ MicroProfileThread ContextSwitchThread;
+ bool bContextSwitchRunning;
+ bool bContextSwitchStop;
+ bool bContextSwitchAllThreads;
+ bool bContextSwitchNoBars;
+ uint32_t nContextSwitchUsage;
+ uint32_t nContextSwitchLastPut;
+
+ int64_t nContextSwitchHoverTickIn;
+ int64_t nContextSwitchHoverTickOut;
+ uint32_t nContextSwitchHoverThread;
+ uint32_t nContextSwitchHoverThreadBefore;
+ uint32_t nContextSwitchHoverThreadAfter;
+ uint8_t nContextSwitchHoverCpu;
+ uint8_t nContextSwitchHoverCpuNext;
+
+ uint32_t nContextSwitchPut;
+ MicroProfileContextSwitch ContextSwitch[MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE];
+
+
+ MpSocket ListenerSocket;
+ uint32_t nWebServerPort;
+
+ char WebServerBuffer[MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE];
+ uint32_t WebServerPut;
+
+ uint64_t nWebServerDataSent;
+
+ MicroProfileGpuTimerState GPU;
+
+
+};
+
+#define MP_LOG_TICK_MASK 0x0000ffffffffffff
+#define MP_LOG_INDEX_MASK 0x3fff000000000000
+#define MP_LOG_BEGIN_MASK 0xc000000000000000
+#define MP_LOG_GPU_EXTRA 0x3
+#define MP_LOG_META 0x2
+#define MP_LOG_ENTER 0x1
+#define MP_LOG_LEAVE 0x0
+
+
+inline int MicroProfileLogType(MicroProfileLogEntry Index)
+{
+ return ((MP_LOG_BEGIN_MASK & Index)>>62) & 0x3;
+}
+
+inline uint64_t MicroProfileLogTimerIndex(MicroProfileLogEntry Index)
+{
+ return (0x3fff&(Index>>48));
+}
+
+inline MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfileToken nToken, int64_t nTick)
+{
+ MicroProfileLogEntry Entry = (nBegin<<62) | ((0x3fff&nToken)<<48) | (MP_LOG_TICK_MASK&nTick);
+ int t = MicroProfileLogType(Entry);
+ uint64_t nTimerIndex = MicroProfileLogTimerIndex(Entry);
+ MP_ASSERT(t == nBegin);
+ MP_ASSERT(nTimerIndex == (nToken&0x3fff));
+ return Entry;
+
+}
+
+inline int64_t MicroProfileLogTickDifference(MicroProfileLogEntry Start, MicroProfileLogEntry End)
+{
+ uint64_t nStart = Start;
+ uint64_t nEnd = End;
+ int64_t nDifference = ((nEnd<<16) - (nStart<<16));
+ return nDifference >> 16;
+}
+
+inline int64_t MicroProfileLogGetTick(MicroProfileLogEntry e)
+{
+ return MP_LOG_TICK_MASK & e;
+}
+
+inline int64_t MicroProfileLogSetTick(MicroProfileLogEntry e, int64_t nTick)
+{
+ return (MP_LOG_TICK_MASK & nTick) | (e & ~MP_LOG_TICK_MASK);
+}
+
+template<typename T>
+T MicroProfileMin(T a, T b)
+{ return a < b ? a : b; }
+
+template<typename T>
+T MicroProfileMax(T a, T b)
+{ return a > b ? a : b; }
+
+inline int64_t MicroProfileMsToTick(float fMs, int64_t nTicksPerSecond)
+{
+ return (int64_t)(fMs*0.001f*nTicksPerSecond);
+}
+
+inline float MicroProfileTickToMsMultiplier(int64_t nTicksPerSecond)
+{
+ return 1000.f / nTicksPerSecond;
+}
+
+inline uint16_t MicroProfileGetGroupIndex(MicroProfileToken t)
+{
+ return (uint16_t)MicroProfileGet()->TimerToGroup[MicroProfileGetTimerIndex(t)];
+}
+
+
+
+#ifdef MICROPROFILE_IMPL
+
+#ifdef _WIN32
+#include <windows.h>
+#define snprintf _snprintf
+
+#pragma warning(push)
+#pragma warning(disable: 4244)
+int64_t MicroProfileTicksPerSecondCpu()
+{
+ static int64_t nTicksPerSecond = 0;
+ if(nTicksPerSecond == 0)
+ {
+ QueryPerformanceFrequency((LARGE_INTEGER*)&nTicksPerSecond);
+ }
+ return nTicksPerSecond;
+}
+int64_t MicroProfileGetTick()
+{
+ int64_t ticks;
+ QueryPerformanceCounter((LARGE_INTEGER*)&ticks);
+ return ticks;
+}
+
+#endif
+
+#if defined(MICROPROFILE_WEBSERVER) || defined(MICROPROFILE_CONTEXT_SWITCH_TRACE)
+
+
+typedef void* (*MicroProfileThreadFunc)(void*);
+
+#if defined(__APPLE__) || defined(__linux__)
+typedef pthread_t MicroProfileThread;
+void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
+{
+ pthread_attr_t Attr;
+ int r = pthread_attr_init(&Attr);
+ MP_ASSERT(r == 0);
+ pthread_create(pThread, &Attr, Func, 0);
+}
+void MicroProfileThreadJoin(MicroProfileThread* pThread)
+{
+ int r = pthread_join(*pThread, 0);
+ MP_ASSERT(r == 0);
+}
+#elif defined(_WIN32)
+typedef HANDLE MicroProfileThread;
+DWORD _stdcall ThreadTrampoline(void* pFunc)
+{
+ MicroProfileThreadFunc F = (MicroProfileThreadFunc)pFunc;
+ return (uint32_t)F(0);
+}
+
+void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
+{
+ *pThread = CreateThread(0, 0, ThreadTrampoline, Func, 0, 0);
+}
+void MicroProfileThreadJoin(MicroProfileThread* pThread)
+{
+ WaitForSingleObject(*pThread, INFINITE);
+ CloseHandle(*pThread);
+}
+#else
+#include <thread>
+typedef std::thread* MicroProfileThread;
+inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
+{
+ *pThread = new std::thread(Func, nullptr);
+}
+inline void MicroProfileThreadJoin(MicroProfileThread* pThread)
+{
+ (*pThread)->join();
+ delete *pThread;
+}
+#endif
+#endif
+
+#if MICROPROFILE_WEBSERVER
+
+#ifdef _WIN32
+#define MP_INVALID_SOCKET(f) (f == INVALID_SOCKET)
+#endif
+
+#if defined(__APPLE__)
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <fcntl.h>
+#define MP_INVALID_SOCKET(f) (f < 0)
+#endif
+
+
+void MicroProfileWebServerStart();
+void MicroProfileWebServerStop();
+bool MicroProfileWebServerUpdate();
+void MicroProfileDumpToFile();
+
+#else
+
+#define MicroProfileWebServerStart() do{}while(0)
+#define MicroProfileWebServerStop() do{}while(0)
+#define MicroProfileWebServerUpdate() false
+#define MicroProfileDumpToFile() do{} while(0)
+#endif
+
+
+#if MICROPROFILE_GPU_TIMERS_D3D11
+void MicroProfileGpuFlip();
+void MicroProfileGpuShutdown();
+#else
+#define MicroProfileGpuFlip() do{}while(0)
+#define MicroProfileGpuShutdown() do{}while(0)
+#endif
+
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <algorithm>
+
+
+#ifndef MICROPROFILE_DEBUG
+#define MICROPROFILE_DEBUG 0
+#endif
+
+
+#define S g_MicroProfile
+
+MicroProfile g_MicroProfile;
+MicroProfileThreadLog* g_MicroProfileGpuLog = 0;
+#ifdef MICROPROFILE_IOS
+// iOS doesn't support __thread
+static pthread_key_t g_MicroProfileThreadLogKey;
+static pthread_once_t g_MicroProfileThreadLogKeyOnce = PTHREAD_ONCE_INIT;
+static void MicroProfileCreateThreadLogKey()
+{
+ pthread_key_create(&g_MicroProfileThreadLogKey, NULL);
+}
+#else
+MP_THREAD_LOCAL MicroProfileThreadLog* g_MicroProfileThreadLog = 0;
+#endif
+static bool g_bUseLock = false; /// This is used because windows does not support using mutexes under dll init(which is where global initialization is handled)
+
+
+MICROPROFILE_DEFINE(g_MicroProfileFlip, "MicroProfile", "MicroProfileFlip", 0x3355ee);
+MICROPROFILE_DEFINE(g_MicroProfileThreadLoop, "MicroProfile", "ThreadLoop", 0x3355ee);
+MICROPROFILE_DEFINE(g_MicroProfileClear, "MicroProfile", "Clear", 0x3355ee);
+MICROPROFILE_DEFINE(g_MicroProfileAccumulate, "MicroProfile", "Accumulate", 0x3355ee);
+MICROPROFILE_DEFINE(g_MicroProfileContextSwitchSearch,"MicroProfile", "ContextSwitchSearch", 0xDD7300);
+
+inline std::recursive_mutex& MicroProfileMutex()
+{
+ static std::recursive_mutex Mutex;
+ return Mutex;
+}
+std::recursive_mutex& MicroProfileGetMutex()
+{
+ return MicroProfileMutex();
+}
+
+MICROPROFILE_API MicroProfile* MicroProfileGet()
+{
+ return &g_MicroProfile;
+}
+
+
+MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName);
+
+
+void MicroProfileInit()
+{
+ std::recursive_mutex& mutex = MicroProfileMutex();
+ bool bUseLock = g_bUseLock;
+ if(bUseLock)
+ mutex.lock();
+ static bool bOnce = true;
+ if(bOnce)
+ {
+ S.nMemUsage += sizeof(S);
+ bOnce = false;
+ memset(&S, 0, sizeof(S));
+ for(int i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
+ {
+ S.GroupInfo[i].pName[0] = '\0';
+ }
+ for(int i = 0; i < MICROPROFILE_MAX_CATEGORIES; ++i)
+ {
+ S.CategoryInfo[i].pName[0] = '\0';
+ S.CategoryInfo[i].nGroupMask = 0;
+ }
+ strcpy(&S.CategoryInfo[0].pName[0], "default");
+ S.nCategoryCount = 1;
+ for(int i = 0; i < MICROPROFILE_MAX_TIMERS; ++i)
+ {
+ S.TimerInfo[i].pName[0] = '\0';
+ }
+ S.nGroupCount = 0;
+ S.nAggregateFlipTick = MP_TICK();
+ S.nActiveGroup = 0;
+ S.nActiveBars = 0;
+ S.nForceGroup = 0;
+ S.nAllGroupsWanted = 0;
+ S.nActiveGroupWanted = 0;
+ S.nAllThreadsWanted = 1;
+ S.nAggregateFlip = 0;
+ S.nTotalTimers = 0;
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
+ {
+ S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN;
+ }
+ S.nRunning = 1;
+ S.fReferenceTime = 33.33f;
+ S.fRcpReferenceTime = 1.f / S.fReferenceTime;
+ S.nFreeListHead = -1;
+ int64_t nTick = MP_TICK();
+ for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
+ {
+ S.Frames[i].nFrameStartCpu = nTick;
+ S.Frames[i].nFrameStartGpu = -1;
+ }
+
+ MicroProfileThreadLog* pGpu = MicroProfileCreateThreadLog("GPU");
+ g_MicroProfileGpuLog = pGpu;
+ MP_ASSERT(S.Pool[0] == pGpu);
+ pGpu->nGpu = 1;
+ pGpu->nThreadId = 0;
+
+ S.nWebServerDataSent = (uint64_t)-1;
+ }
+ if(bUseLock)
+ mutex.unlock();
+}
+
+void MicroProfileShutdown()
+{
+ std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
+ MicroProfileWebServerStop();
+ MicroProfileStopContextSwitchTrace();
+ MicroProfileGpuShutdown();
+}
+
+#ifdef MICROPROFILE_IOS
+inline MicroProfileThreadLog* MicroProfileGetThreadLog()
+{
+ pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey);
+ return (MicroProfileThreadLog*)pthread_getspecific(g_MicroProfileThreadLogKey);
+}
+
+inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog)
+{
+ pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey);
+ pthread_setspecific(g_MicroProfileThreadLogKey, pLog);
+}
+#else
+MicroProfileThreadLog* MicroProfileGetThreadLog()
+{
+ return g_MicroProfileThreadLog;
+}
+inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog)
+{
+ g_MicroProfileThreadLog = pLog;
+}
+#endif
+
+
+MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName)
+{
+ MicroProfileThreadLog* pLog = 0;
+ if(S.nFreeListHead != -1)
+ {
+ pLog = S.Pool[S.nFreeListHead];
+ MP_ASSERT(pLog->nPut.load() == 0);
+ MP_ASSERT(pLog->nGet.load() == 0);
+ S.nFreeListHead = S.Pool[S.nFreeListHead]->nFreeListNext;
+ }
+ else
+ {
+ pLog = new MicroProfileThreadLog;
+ S.nMemUsage += sizeof(MicroProfileThreadLog);
+ S.Pool[S.nNumLogs++] = pLog;
+ }
+ memset(pLog, 0, sizeof(*pLog));
+ int len = (int)strlen(pName);
+ int maxlen = sizeof(pLog->ThreadName)-1;
+ len = len < maxlen ? len : maxlen;
+ memcpy(&pLog->ThreadName[0], pName, len);
+ pLog->ThreadName[len] = '\0';
+ pLog->nThreadId = MP_GETCURRENTTHREADID();
+ pLog->nFreeListNext = -1;
+ pLog->nActive = 1;
+ return pLog;
+}
+
+void MicroProfileOnThreadCreate(const char* pThreadName)
+{
+ g_bUseLock = true;
+ MicroProfileInit();
+ std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
+ MP_ASSERT(MicroProfileGetThreadLog() == 0);
+ MicroProfileThreadLog* pLog = MicroProfileCreateThreadLog(pThreadName ? pThreadName : MicroProfileGetThreadName());
+ MP_ASSERT(pLog);
+ MicroProfileSetThreadLog(pLog);
+}
+
+void MicroProfileOnThreadExit()
+{
+ std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
+ MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
+ if(pLog)
+ {
+ int32_t nLogIndex = -1;
+ for(int i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
+ {
+ if(pLog == S.Pool[i])
+ {
+ nLogIndex = i;
+ break;
+ }
+ }
+ MP_ASSERT(nLogIndex < MICROPROFILE_MAX_THREADS && nLogIndex > 0);
+ pLog->nFreeListNext = S.nFreeListHead;
+ pLog->nActive = 0;
+ pLog->nPut.store(0);
+ pLog->nGet.store(0);
+ S.nFreeListHead = nLogIndex;
+ for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
+ {
+ S.Frames[i].nLogStart[nLogIndex] = 0;
+ }
+ memset(pLog->nGroupStackPos, 0, sizeof(pLog->nGroupStackPos));
+ memset(pLog->nGroupTicks, 0, sizeof(pLog->nGroupTicks));
+ }
+}
+
+void MicroProfileInitThreadLog()
+{
+ MicroProfileOnThreadCreate(nullptr);
+}
+
+
+struct MicroProfileScopeLock
+{
+ bool bUseLock;
+ std::recursive_mutex& m;
+ MicroProfileScopeLock(std::recursive_mutex& m) : bUseLock(g_bUseLock), m(m)
+ {
+ if(bUseLock)
+ m.lock();
+ }
+ ~MicroProfileScopeLock()
+ {
+ if(bUseLock)
+ m.unlock();
+ }
+};
+
+MicroProfileToken MicroProfileFindToken(const char* pGroup, const char* pName)
+{
+ MicroProfileInit();
+ MicroProfileScopeLock L(MicroProfileMutex());
+ for(uint32_t i = 0; i < S.nTotalTimers; ++i)
+ {
+ if(!MP_STRCASECMP(pName, S.TimerInfo[i].pName) && !MP_STRCASECMP(pGroup, S.GroupInfo[S.TimerToGroup[i]].pName))
+ {
+ return S.TimerInfo[i].nToken;
+ }
+ }
+ return MICROPROFILE_INVALID_TOKEN;
+}
+
+uint16_t MicroProfileGetGroup(const char* pGroup, MicroProfileTokenType Type)
+{
+ for(uint32_t i = 0; i < S.nGroupCount; ++i)
+ {
+ if(!MP_STRCASECMP(pGroup, S.GroupInfo[i].pName))
+ {
+ return i;
+ }
+ }
+ uint16_t nGroupIndex = 0xffff;
+ uint32_t nLen = (uint32_t)strlen(pGroup);
+ if(nLen > MICROPROFILE_NAME_MAX_LEN-1)
+ nLen = MICROPROFILE_NAME_MAX_LEN-1;
+ memcpy(&S.GroupInfo[S.nGroupCount].pName[0], pGroup, nLen);
+ S.GroupInfo[S.nGroupCount].pName[nLen] = '\0';
+ S.GroupInfo[S.nGroupCount].nNameLen = nLen;
+ S.GroupInfo[S.nGroupCount].nNumTimers = 0;
+ S.GroupInfo[S.nGroupCount].nGroupIndex = S.nGroupCount;
+ S.GroupInfo[S.nGroupCount].Type = Type;
+ S.GroupInfo[S.nGroupCount].nMaxTimerNameLen = 0;
+ S.GroupInfo[S.nGroupCount].nColor = 0x88888888;
+ S.GroupInfo[S.nGroupCount].nCategory = 0;
+ S.CategoryInfo[0].nGroupMask |= (1ll << (uint64_t)S.nGroupCount);
+ nGroupIndex = S.nGroupCount++;
+ S.nGroupMask = (S.nGroupMask<<1)|1;
+ MP_ASSERT(nGroupIndex < MICROPROFILE_MAX_GROUPS);
+ return nGroupIndex;
+}
+
+void MicroProfileRegisterGroup(const char* pGroup, const char* pCategory, uint32_t nColor)
+{
+ int nCategoryIndex = -1;
+ for(uint32_t i = 0; i < S.nCategoryCount; ++i)
+ {
+ if(!MP_STRCASECMP(pCategory, S.CategoryInfo[i].pName))
+ {
+ nCategoryIndex = (int)i;
+ break;
+ }
+ }
+ if(-1 == nCategoryIndex && S.nCategoryCount < MICROPROFILE_MAX_CATEGORIES)
+ {
+ MP_ASSERT(S.CategoryInfo[S.nCategoryCount].pName[0] == '\0');
+ nCategoryIndex = (int)S.nCategoryCount++;
+ uint32_t nLen = (uint32_t)strlen(pCategory);
+ if(nLen > MICROPROFILE_NAME_MAX_LEN-1)
+ nLen = MICROPROFILE_NAME_MAX_LEN-1;
+ memcpy(&S.CategoryInfo[nCategoryIndex].pName[0], pCategory, nLen);
+ S.CategoryInfo[nCategoryIndex].pName[nLen] = '\0';
+ }
+ uint16_t nGroup = MicroProfileGetGroup(pGroup, 0 != MP_STRCASECMP(pGroup, "gpu")?MicroProfileTokenTypeCpu : MicroProfileTokenTypeGpu);
+ S.GroupInfo[nGroup].nColor = nColor;
+ if(nCategoryIndex >= 0)
+ {
+ uint64_t nBit = 1ll << nGroup;
+ uint32_t nOldCategory = S.GroupInfo[nGroup].nCategory;
+ S.CategoryInfo[nOldCategory].nGroupMask &= ~nBit;
+ S.CategoryInfo[nCategoryIndex].nGroupMask |= nBit;
+ S.GroupInfo[nGroup].nCategory = nCategoryIndex;
+ }
+}
+
+MicroProfileToken MicroProfileGetToken(const char* pGroup, const char* pName, uint32_t nColor, MicroProfileTokenType Type)
+{
+ MicroProfileInit();
+ MicroProfileScopeLock L(MicroProfileMutex());
+ MicroProfileToken ret = MicroProfileFindToken(pGroup, pName);
+ if(ret != MICROPROFILE_INVALID_TOKEN)
+ return ret;
+ uint16_t nGroupIndex = MicroProfileGetGroup(pGroup, Type);
+ uint16_t nTimerIndex = (uint16_t)(S.nTotalTimers++);
+ uint64_t nGroupMask = 1ll << nGroupIndex;
+ MicroProfileToken nToken = MicroProfileMakeToken(nGroupMask, nTimerIndex);
+ S.GroupInfo[nGroupIndex].nNumTimers++;
+ S.GroupInfo[nGroupIndex].nMaxTimerNameLen = MicroProfileMax(S.GroupInfo[nGroupIndex].nMaxTimerNameLen, (uint32_t)strlen(pName));
+ MP_ASSERT(S.GroupInfo[nGroupIndex].Type == Type); //dont mix cpu & gpu timers in the same group
+ S.nMaxGroupSize = MicroProfileMax(S.nMaxGroupSize, S.GroupInfo[nGroupIndex].nNumTimers);
+ S.TimerInfo[nTimerIndex].nToken = nToken;
+ uint32_t nLen = (uint32_t)strlen(pName);
+ if(nLen > MICROPROFILE_NAME_MAX_LEN-1)
+ nLen = MICROPROFILE_NAME_MAX_LEN-1;
+ memcpy(&S.TimerInfo[nTimerIndex].pName, pName, nLen);
+ S.TimerInfo[nTimerIndex].pName[nLen] = '\0';
+ S.TimerInfo[nTimerIndex].nNameLen = nLen;
+ S.TimerInfo[nTimerIndex].nColor = nColor&0xffffff;
+ S.TimerInfo[nTimerIndex].nGroupIndex = nGroupIndex;
+ S.TimerInfo[nTimerIndex].nTimerIndex = nTimerIndex;
+ S.TimerToGroup[nTimerIndex] = nGroupIndex;
+ return nToken;
+}
+
+MicroProfileToken MicroProfileGetMetaToken(const char* pName)
+{
+ MicroProfileInit();
+ MicroProfileScopeLock L(MicroProfileMutex());
+ for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i)
+ {
+ if(!S.MetaCounters[i].pName)
+ {
+ S.MetaCounters[i].pName = pName;
+ return i;
+ }
+ else if(!MP_STRCASECMP(pName, S.MetaCounters[i].pName))
+ {
+ return i;
+ }
+ }
+ MP_ASSERT(0);//out of slots, increase MICROPROFILE_META_MAX
+ return (MicroProfileToken)-1;
+}
+
+
+inline void MicroProfileLogPut(MicroProfileToken nToken_, uint64_t nTick, uint64_t nBegin, MicroProfileThreadLog* pLog)
+{
+ MP_ASSERT(pLog != 0); //this assert is hit if MicroProfileOnCreateThread is not called
+ MP_ASSERT(pLog->nActive);
+ uint32_t nPos = pLog->nPut.load(std::memory_order_relaxed);
+ uint32_t nNextPos = (nPos+1) % MICROPROFILE_BUFFER_SIZE;
+ if(nNextPos == pLog->nGet.load(std::memory_order_relaxed))
+ {
+ S.nOverflow = 100;
+ }
+ else
+ {
+ pLog->Log[nPos] = MicroProfileMakeLogIndex(nBegin, nToken_, nTick);
+ pLog->nPut.store(nNextPos, std::memory_order_release);
+ }
+}
+
+uint64_t MicroProfileEnter(MicroProfileToken nToken_)
+{
+ if(MicroProfileGetGroupMask(nToken_) & S.nActiveGroup)
+ {
+ if(!MicroProfileGetThreadLog())
+ {
+ MicroProfileInitThreadLog();
+ }
+ uint64_t nTick = MP_TICK();
+ MicroProfileLogPut(nToken_, nTick, MP_LOG_ENTER, MicroProfileGetThreadLog());
+ return nTick;
+ }
+ return MICROPROFILE_INVALID_TICK;
+}
+
+void MicroProfileMetaUpdate(MicroProfileToken nToken, int nCount, MicroProfileTokenType eTokenType)
+{
+ if((MP_DRAW_META_FIRST<<nToken) & S.nActiveBars)
+ {
+ MicroProfileThreadLog* pLog = MicroProfileTokenTypeCpu == eTokenType ? MicroProfileGetThreadLog() : g_MicroProfileGpuLog;
+ if(pLog)
+ {
+ MP_ASSERT(nToken < MICROPROFILE_META_MAX);
+ MicroProfileLogPut(nToken, nCount, MP_LOG_META, pLog);
+ }
+ }
+}
+
+
+void MicroProfileLeave(MicroProfileToken nToken_, uint64_t nTickStart)
+{
+ if(MICROPROFILE_INVALID_TICK != nTickStart)
+ {
+ if(!MicroProfileGetThreadLog())
+ {
+ MicroProfileInitThreadLog();
+ }
+ uint64_t nTick = MP_TICK();
+ MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
+ MicroProfileLogPut(nToken_, nTick, MP_LOG_LEAVE, pLog);
+ }
+}
+
+
+uint64_t MicroProfileGpuEnter(MicroProfileToken nToken_)
+{
+ if(MicroProfileGetGroupMask(nToken_) & S.nActiveGroup)
+ {
+ uint64_t nTimer = MicroProfileGpuInsertTimeStamp();
+ MicroProfileLogPut(nToken_, nTimer, MP_LOG_ENTER, g_MicroProfileGpuLog);
+ MicroProfileLogPut(nToken_, MP_TICK(), MP_LOG_GPU_EXTRA, g_MicroProfileGpuLog);
+ return 1;
+ }
+ return 0;
+}
+
+void MicroProfileGpuLeave(MicroProfileToken nToken_, uint64_t nTickStart)
+{
+ if(nTickStart)
+ {
+ uint64_t nTimer = MicroProfileGpuInsertTimeStamp();
+ MicroProfileLogPut(nToken_, nTimer, MP_LOG_LEAVE, g_MicroProfileGpuLog);
+ MicroProfileLogPut(nToken_, MP_TICK(), MP_LOG_GPU_EXTRA, g_MicroProfileGpuLog);
+ }
+}
+
+void MicroProfileContextSwitchPut(MicroProfileContextSwitch* pContextSwitch)
+{
+ if(S.nRunning || pContextSwitch->nTicks <= S.nPauseTicks)
+ {
+ uint32_t nPut = S.nContextSwitchPut;
+ S.ContextSwitch[nPut] = *pContextSwitch;
+ S.nContextSwitchPut = (S.nContextSwitchPut+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
+ }
+}
+
+
+void MicroProfileGetRange(uint32_t nPut, uint32_t nGet, uint32_t nRange[2][2])
+{
+ if(nPut > nGet)
+ {
+ nRange[0][0] = nGet;
+ nRange[0][1] = nPut;
+ nRange[1][0] = nRange[1][1] = 0;
+ }
+ else if(nPut != nGet)
+ {
+ MP_ASSERT(nGet != MICROPROFILE_BUFFER_SIZE);
+ uint32_t nCountEnd = MICROPROFILE_BUFFER_SIZE - nGet;
+ nRange[0][0] = nGet;
+ nRange[0][1] = nGet + nCountEnd;
+ nRange[1][0] = 0;
+ nRange[1][1] = nPut;
+ }
+}
+
+void MicroProfileFlip()
+{
+ #if 0
+ //verify LogEntry wraps correctly
+ MicroProfileLogEntry c = MP_LOG_TICK_MASK-5000;
+ for(int i = 0; i < 10000; ++i, c += 1)
+ {
+ MicroProfileLogEntry l2 = (c+2500) & MP_LOG_TICK_MASK;
+ MP_ASSERT(2500 == MicroProfileLogTickDifference(c, l2));
+ }
+ #endif
+ MICROPROFILE_SCOPE(g_MicroProfileFlip);
+ std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
+
+
+ MicroProfileGpuFlip();
+
+ if(S.nToggleRunning)
+ {
+ S.nRunning = !S.nRunning;
+ if(!S.nRunning)
+ S.nPauseTicks = MP_TICK();
+ S.nToggleRunning = 0;
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
+ {
+ MicroProfileThreadLog* pLog = S.Pool[i];
+ if(pLog)
+ {
+ pLog->nStackPos = 0;
+ }
+ }
+ }
+ uint32_t nAggregateClear = S.nAggregateClear || S.nAutoClearFrames, nAggregateFlip = 0;
+ if(S.nDumpFileNextFrame)
+ {
+ MicroProfileDumpToFile();
+ S.nDumpFileNextFrame = 0;
+ S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; //hide spike from dumping webpage
+ }
+ if(S.nWebServerDataSent == (uint64_t)-1)
+ {
+ MicroProfileWebServerStart();
+ S.nWebServerDataSent = 0;
+ }
+
+ if(MicroProfileWebServerUpdate())
+ {
+ S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; //hide spike from dumping webpage
+ }
+
+ if(S.nAutoClearFrames)
+ {
+ nAggregateClear = 1;
+ nAggregateFlip = 1;
+ S.nAutoClearFrames -= 1;
+ }
+
+
+ if(S.nRunning || S.nForceEnable)
+ {
+ S.nFramePutIndex++;
+ S.nFramePut = (S.nFramePut+1) % MICROPROFILE_MAX_FRAME_HISTORY;
+ MP_ASSERT((S.nFramePutIndex % MICROPROFILE_MAX_FRAME_HISTORY) == S.nFramePut);
+ S.nFrameCurrent = (S.nFramePut + MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 1) % MICROPROFILE_MAX_FRAME_HISTORY;
+ S.nFrameCurrentIndex++;
+ uint32_t nFrameNext = (S.nFrameCurrent+1) % MICROPROFILE_MAX_FRAME_HISTORY;
+
+ uint32_t nContextSwitchPut = S.nContextSwitchPut;
+ if(S.nContextSwitchLastPut < nContextSwitchPut)
+ {
+ S.nContextSwitchUsage = (nContextSwitchPut - S.nContextSwitchLastPut);
+ }
+ else
+ {
+ S.nContextSwitchUsage = MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - S.nContextSwitchLastPut + nContextSwitchPut;
+ }
+ S.nContextSwitchLastPut = nContextSwitchPut;
+
+ MicroProfileFrameState* pFramePut = &S.Frames[S.nFramePut];
+ MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent];
+ MicroProfileFrameState* pFrameNext = &S.Frames[nFrameNext];
+
+ pFramePut->nFrameStartCpu = MP_TICK();
+ pFramePut->nFrameStartGpu = (uint32_t)MicroProfileGpuInsertTimeStamp();
+ if(pFrameNext->nFrameStartGpu != (uint64_t)-1)
+ pFrameNext->nFrameStartGpu = MicroProfileGpuGetTimeStamp((uint32_t)pFrameNext->nFrameStartGpu);
+
+ if(pFrameCurrent->nFrameStartGpu == (uint64_t)-1)
+ pFrameCurrent->nFrameStartGpu = pFrameNext->nFrameStartGpu + 1;
+
+ uint64_t nFrameStartCpu = pFrameCurrent->nFrameStartCpu;
+ uint64_t nFrameEndCpu = pFrameNext->nFrameStartCpu;
+
+ {
+ uint64_t nTick = nFrameEndCpu - nFrameStartCpu;
+ S.nFlipTicks = nTick;
+ S.nFlipAggregate += nTick;
+ S.nFlipMax = MicroProfileMax(S.nFlipMax, nTick);
+ }
+
+ uint8_t* pTimerToGroup = &S.TimerToGroup[0];
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
+ {
+ MicroProfileThreadLog* pLog = S.Pool[i];
+ if(!pLog)
+ {
+ pFramePut->nLogStart[i] = 0;
+ }
+ else
+ {
+ uint32_t nPut = pLog->nPut.load(std::memory_order_acquire);
+ pFramePut->nLogStart[i] = nPut;
+ MP_ASSERT(nPut< MICROPROFILE_BUFFER_SIZE);
+ //need to keep last frame around to close timers. timers more than 1 frame old is ditched.
+ pLog->nGet.store(nPut, std::memory_order_relaxed);
+ }
+ }
+
+ if(S.nRunning)
+ {
+ uint64_t* pFrameGroup = &S.FrameGroup[0];
+ {
+ MICROPROFILE_SCOPE(g_MicroProfileClear);
+ for(uint32_t i = 0; i < S.nTotalTimers; ++i)
+ {
+ S.Frame[i].nTicks = 0;
+ S.Frame[i].nCount = 0;
+ S.FrameExclusive[i] = 0;
+ }
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
+ {
+ pFrameGroup[i] = 0;
+ }
+ for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j)
+ {
+ if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<<j)))
+ {
+ auto& Meta = S.MetaCounters[j];
+ for(uint32_t i = 0; i < S.nTotalTimers; ++i)
+ {
+ Meta.nCounters[i] = 0;
+ }
+ }
+ }
+
+ }
+ {
+ MICROPROFILE_SCOPE(g_MicroProfileThreadLoop);
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
+ {
+ MicroProfileThreadLog* pLog = S.Pool[i];
+ if(!pLog)
+ continue;
+
+ uint8_t* pGroupStackPos = &pLog->nGroupStackPos[0];
+ int64_t nGroupTicks[MICROPROFILE_MAX_GROUPS] = {0};
+
+
+ uint32_t nPut = pFrameNext->nLogStart[i];
+ uint32_t nGet = pFrameCurrent->nLogStart[i];
+ uint32_t nRange[2][2] = { {0, 0}, {0, 0}, };
+ MicroProfileGetRange(nPut, nGet, nRange);
+
+
+ //fetch gpu results.
+ if(pLog->nGpu)
+ {
+ for(uint32_t j = 0; j < 2; ++j)
+ {
+ uint32_t nStart = nRange[j][0];
+ uint32_t nEnd = nRange[j][1];
+ for(uint32_t k = nStart; k < nEnd; ++k)
+ {
+ MicroProfileLogEntry L = pLog->Log[k];
+ if(MicroProfileLogType(L) < MP_LOG_META)
+ {
+ pLog->Log[k] = MicroProfileLogSetTick(L, MicroProfileGpuGetTimeStamp((uint32_t)MicroProfileLogGetTick(L)));
+ }
+ }
+ }
+ }
+
+
+ uint32_t* pStack = &pLog->nStack[0];
+ int64_t* pChildTickStack = &pLog->nChildTickStack[0];
+ uint32_t nStackPos = pLog->nStackPos;
+
+ for(uint32_t j = 0; j < 2; ++j)
+ {
+ uint32_t nStart = nRange[j][0];
+ uint32_t nEnd = nRange[j][1];
+ for(uint32_t k = nStart; k < nEnd; ++k)
+ {
+ MicroProfileLogEntry LE = pLog->Log[k];
+ int nType = MicroProfileLogType(LE);
+
+ if(MP_LOG_ENTER == nType)
+ {
+ int nTimer = MicroProfileLogTimerIndex(LE);
+ uint8_t nGroup = pTimerToGroup[nTimer];
+ MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX);
+ MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
+ pGroupStackPos[nGroup]++;
+ pStack[nStackPos++] = k;
+ pChildTickStack[nStackPos] = 0;
+
+ }
+ else if(MP_LOG_META == nType)
+ {
+ if(nStackPos)
+ {
+ int64_t nMetaIndex = MicroProfileLogTimerIndex(LE);
+ int64_t nMetaCount = MicroProfileLogGetTick(LE);
+ MP_ASSERT(nMetaIndex < MICROPROFILE_META_MAX);
+ int64_t nCounter = MicroProfileLogTimerIndex(pLog->Log[pStack[nStackPos-1]]);
+ S.MetaCounters[nMetaIndex].nCounters[nCounter] += nMetaCount;
+ }
+ }
+ else if(MP_LOG_LEAVE == nType)
+ {
+ int nTimer = MicroProfileLogTimerIndex(LE);
+ uint8_t nGroup = pTimerToGroup[nTimer];
+ MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
+ if(nStackPos)
+ {
+ int64_t nTickStart = pLog->Log[pStack[nStackPos-1]];
+ int64_t nTicks = MicroProfileLogTickDifference(nTickStart, LE);
+ int64_t nChildTicks = pChildTickStack[nStackPos];
+ nStackPos--;
+ pChildTickStack[nStackPos] += nTicks;
+
+ uint32_t nTimerIndex = MicroProfileLogTimerIndex(LE);
+ S.Frame[nTimerIndex].nTicks += nTicks;
+ S.FrameExclusive[nTimerIndex] += (nTicks-nChildTicks);
+ S.Frame[nTimerIndex].nCount += 1;
+
+ MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
+ uint8_t nGroupStackPos = pGroupStackPos[nGroup];
+ if(nGroupStackPos)
+ {
+ nGroupStackPos--;
+ if(0 == nGroupStackPos)
+ {
+ nGroupTicks[nGroup] += nTicks;
+ }
+ pGroupStackPos[nGroup] = nGroupStackPos;
+ }
+ }
+ }
+ }
+ }
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
+ {
+ pLog->nGroupTicks[i] += nGroupTicks[i];
+ pFrameGroup[i] += nGroupTicks[i];
+ }
+ pLog->nStackPos = nStackPos;
+ }
+ }
+ {
+ MICROPROFILE_SCOPE(g_MicroProfileAccumulate);
+ for(uint32_t i = 0; i < S.nTotalTimers; ++i)
+ {
+ S.AccumTimers[i].nTicks += S.Frame[i].nTicks;
+ S.AccumTimers[i].nCount += S.Frame[i].nCount;
+ S.AccumMaxTimers[i] = MicroProfileMax(S.AccumMaxTimers[i], S.Frame[i].nTicks);
+ S.AccumTimersExclusive[i] += S.FrameExclusive[i];
+ S.AccumMaxTimersExclusive[i] = MicroProfileMax(S.AccumMaxTimersExclusive[i], S.FrameExclusive[i]);
+ }
+
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
+ {
+ S.AccumGroup[i] += pFrameGroup[i];
+ S.AccumGroupMax[i] = MicroProfileMax(S.AccumGroupMax[i], pFrameGroup[i]);
+ }
+
+ for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j)
+ {
+ if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<<j)))
+ {
+ auto& Meta = S.MetaCounters[j];
+ uint64_t nSum = 0;;
+ for(uint32_t i = 0; i < S.nTotalTimers; ++i)
+ {
+ uint64_t nCounter = Meta.nCounters[i];
+ Meta.nAccumMax[i] = MicroProfileMax(Meta.nAccumMax[i], nCounter);
+ Meta.nAccum[i] += nCounter;
+ nSum += nCounter;
+ }
+ Meta.nSumAccum += nSum;
+ Meta.nSumAccumMax = MicroProfileMax(Meta.nSumAccumMax, nSum);
+ }
+ }
+ }
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
+ {
+ if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN)
+ {
+ MicroProfileToken nToken = S.Graph[i].nToken;
+ S.Graph[i].nHistory[S.nGraphPut] = S.Frame[MicroProfileGetTimerIndex(nToken)].nTicks;
+ }
+ }
+ S.nGraphPut = (S.nGraphPut+1) % MICROPROFILE_GRAPH_HISTORY;
+
+ }
+
+
+ if(S.nRunning && S.nAggregateFlip <= ++S.nAggregateFlipCount)
+ {
+ nAggregateFlip = 1;
+ if(S.nAggregateFlip) // if 0 accumulate indefinitely
+ {
+ nAggregateClear = 1;
+ }
+ }
+ }
+ if(nAggregateFlip)
+ {
+ memcpy(&S.Aggregate[0], &S.AccumTimers[0], sizeof(S.Aggregate[0]) * S.nTotalTimers);
+ memcpy(&S.AggregateMax[0], &S.AccumMaxTimers[0], sizeof(S.AggregateMax[0]) * S.nTotalTimers);
+ memcpy(&S.AggregateExclusive[0], &S.AccumTimersExclusive[0], sizeof(S.AggregateExclusive[0]) * S.nTotalTimers);
+ memcpy(&S.AggregateMaxExclusive[0], &S.AccumMaxTimersExclusive[0], sizeof(S.AggregateMaxExclusive[0]) * S.nTotalTimers);
+
+ memcpy(&S.AggregateGroup[0], &S.AccumGroup[0], sizeof(S.AggregateGroup));
+ memcpy(&S.AggregateGroupMax[0], &S.AccumGroupMax[0], sizeof(S.AggregateGroup));
+
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
+ {
+ MicroProfileThreadLog* pLog = S.Pool[i];
+ if(!pLog)
+ continue;
+
+ memcpy(&pLog->nAggregateGroupTicks[0], &pLog->nGroupTicks[0], sizeof(pLog->nAggregateGroupTicks));
+
+ if(nAggregateClear)
+ {
+ memset(&pLog->nGroupTicks[0], 0, sizeof(pLog->nGroupTicks));
+ }
+ }
+
+ for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j)
+ {
+ if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<<j)))
+ {
+ auto& Meta = S.MetaCounters[j];
+ memcpy(&Meta.nAggregateMax[0], &Meta.nAccumMax[0], sizeof(Meta.nAggregateMax[0]) * S.nTotalTimers);
+ memcpy(&Meta.nAggregate[0], &Meta.nAccum[0], sizeof(Meta.nAggregate[0]) * S.nTotalTimers);
+ Meta.nSumAggregate = Meta.nSumAccum;
+ Meta.nSumAggregateMax = Meta.nSumAccumMax;
+ if(nAggregateClear)
+ {
+ memset(&Meta.nAccumMax[0], 0, sizeof(Meta.nAccumMax[0]) * S.nTotalTimers);
+ memset(&Meta.nAccum[0], 0, sizeof(Meta.nAccum[0]) * S.nTotalTimers);
+ Meta.nSumAccum = 0;
+ Meta.nSumAccumMax = 0;
+ }
+ }
+ }
+
+
+
+
+
+ S.nAggregateFrames = S.nAggregateFlipCount;
+ S.nFlipAggregateDisplay = S.nFlipAggregate;
+ S.nFlipMaxDisplay = S.nFlipMax;
+ if(nAggregateClear)
+ {
+ memset(&S.AccumTimers[0], 0, sizeof(S.Aggregate[0]) * S.nTotalTimers);
+ memset(&S.AccumMaxTimers[0], 0, sizeof(S.AccumMaxTimers[0]) * S.nTotalTimers);
+ memset(&S.AccumTimersExclusive[0], 0, sizeof(S.AggregateExclusive[0]) * S.nTotalTimers);
+ memset(&S.AccumMaxTimersExclusive[0], 0, sizeof(S.AccumMaxTimersExclusive[0]) * S.nTotalTimers);
+ memset(&S.AccumGroup[0], 0, sizeof(S.AggregateGroup));
+ memset(&S.AccumGroupMax[0], 0, sizeof(S.AggregateGroup));
+
+ S.nAggregateFlipCount = 0;
+ S.nFlipAggregate = 0;
+ S.nFlipMax = 0;
+
+ S.nAggregateFlipTick = MP_TICK();
+ }
+ }
+ S.nAggregateClear = 0;
+
+ uint64_t nNewActiveGroup = 0;
+ if(S.nForceEnable || (S.nDisplay && S.nRunning))
+ nNewActiveGroup = S.nAllGroupsWanted ? S.nGroupMask : S.nActiveGroupWanted;
+ nNewActiveGroup |= S.nForceGroup;
+ nNewActiveGroup |= S.nForceGroupUI;
+ if(S.nActiveGroup != nNewActiveGroup)
+ S.nActiveGroup = nNewActiveGroup;
+ uint32_t nNewActiveBars = 0;
+ if(S.nDisplay && S.nRunning)
+ nNewActiveBars = S.nBars;
+ if(S.nForceMetaCounters)
+ {
+ for(int i = 0; i < MICROPROFILE_META_MAX; ++i)
+ {
+ if(S.MetaCounters[i].pName)
+ {
+ nNewActiveBars |= (MP_DRAW_META_FIRST<<i);
+ }
+ }
+ }
+ if(nNewActiveBars != S.nActiveBars)
+ S.nActiveBars = nNewActiveBars;
+}
+
+void MicroProfileSetForceEnable(bool bEnable)
+{
+ S.nForceEnable = bEnable ? 1 : 0;
+}
+bool MicroProfileGetForceEnable()
+{
+ return S.nForceEnable != 0;
+}
+
+void MicroProfileSetEnableAllGroups(bool bEnableAllGroups)
+{
+ S.nAllGroupsWanted = bEnableAllGroups ? 1 : 0;
+}
+
+void MicroProfileEnableCategory(const char* pCategory, bool bEnabled)
+{
+ int nCategoryIndex = -1;
+ for(uint32_t i = 0; i < S.nCategoryCount; ++i)
+ {
+ if(!MP_STRCASECMP(pCategory, S.CategoryInfo[i].pName))
+ {
+ nCategoryIndex = (int)i;
+ break;
+ }
+ }
+ if(nCategoryIndex >= 0)
+ {
+ if(bEnabled)
+ {
+ S.nActiveGroupWanted |= S.CategoryInfo[nCategoryIndex].nGroupMask;
+ }
+ else
+ {
+ S.nActiveGroupWanted &= ~S.CategoryInfo[nCategoryIndex].nGroupMask;
+ }
+ }
+}
+
+
+void MicroProfileEnableCategory(const char* pCategory)
+{
+ MicroProfileEnableCategory(pCategory, true);
+}
+void MicroProfileDisableCategory(const char* pCategory)
+{
+ MicroProfileEnableCategory(pCategory, false);
+}
+
+bool MicroProfileGetEnableAllGroups()
+{
+ return 0 != S.nAllGroupsWanted;
+}
+
+void MicroProfileSetForceMetaCounters(bool bForce)
+{
+ S.nForceMetaCounters = bForce ? 1 : 0;
+}
+
+bool MicroProfileGetForceMetaCounters()
+{
+ return 0 != S.nForceMetaCounters;
+}
+
+void MicroProfileEnableMetaCounter(const char* pMeta)
+{
+ for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i)
+ {
+ if(S.MetaCounters[i].pName && 0 == MP_STRCASECMP(S.MetaCounters[i].pName, pMeta))
+ {
+ S.nBars |= (MP_DRAW_META_FIRST<<i);
+ return;
+ }
+ }
+}
+void MicroProfileDisableMetaCounter(const char* pMeta)
+{
+ for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i)
+ {
+ if(S.MetaCounters[i].pName && 0 == MP_STRCASECMP(S.MetaCounters[i].pName, pMeta))
+ {
+ S.nBars &= ~(MP_DRAW_META_FIRST<<i);
+ return;
+ }
+ }
+}
+
+
+void MicroProfileSetAggregateFrames(int nFrames)
+{
+ S.nAggregateFlip = (uint32_t)nFrames;
+ if(0 == nFrames)
+ {
+ S.nAggregateClear = 1;
+ }
+}
+
+int MicroProfileGetAggregateFrames()
+{
+ return S.nAggregateFlip;
+}
+
+int MicroProfileGetCurrentAggregateFrames()
+{
+ return int(S.nAggregateFlip ? S.nAggregateFlip : S.nAggregateFlipCount);
+}
+
+
+void MicroProfileForceEnableGroup(const char* pGroup, MicroProfileTokenType Type)
+{
+ MicroProfileInit();
+ std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
+ uint16_t nGroup = MicroProfileGetGroup(pGroup, Type);
+ S.nForceGroup |= (1ll << nGroup);
+}
+
+void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Type)
+{
+ MicroProfileInit();
+ std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
+ uint16_t nGroup = MicroProfileGetGroup(pGroup, Type);
+ S.nForceGroup &= ~(1ll << nGroup);
+}
+
+
+void MicroProfileCalcAllTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, float* pTotal, uint32_t nSize)
+{
+ for(uint32_t i = 0; i < S.nTotalTimers && i < nSize; ++i)
+ {
+ const uint32_t nGroupId = S.TimerInfo[i].nGroupIndex;
+ const float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
+ uint32_t nTimer = i;
+ uint32_t nIdx = i * 2;
+ uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
+ uint32_t nAggregateCount = S.Aggregate[nTimer].nCount ? S.Aggregate[nTimer].nCount : 1;
+ float fToPrc = S.fRcpReferenceTime;
+ float fMs = fToMs * (S.Frame[nTimer].nTicks);
+ float fPrc = MicroProfileMin(fMs * fToPrc, 1.f);
+ float fAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateFrames);
+ float fAveragePrc = MicroProfileMin(fAverageMs * fToPrc, 1.f);
+ float fMaxMs = fToMs * (S.AggregateMax[nTimer]);
+ float fMaxPrc = MicroProfileMin(fMaxMs * fToPrc, 1.f);
+ float fCallAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateCount);
+ float fCallAveragePrc = MicroProfileMin(fCallAverageMs * fToPrc, 1.f);
+ float fMsExclusive = fToMs * (S.FrameExclusive[nTimer]);
+ float fPrcExclusive = MicroProfileMin(fMsExclusive * fToPrc, 1.f);
+ float fAverageMsExclusive = fToMs * (S.AggregateExclusive[nTimer] / nAggregateFrames);
+ float fAveragePrcExclusive = MicroProfileMin(fAverageMsExclusive * fToPrc, 1.f);
+ float fMaxMsExclusive = fToMs * (S.AggregateMaxExclusive[nTimer]);
+ float fMaxPrcExclusive = MicroProfileMin(fMaxMsExclusive * fToPrc, 1.f);
+ float fTotalMs = fToMs * S.Aggregate[nTimer].nTicks;
+ pTimers[nIdx] = fMs;
+ pTimers[nIdx+1] = fPrc;
+ pAverage[nIdx] = fAverageMs;
+ pAverage[nIdx+1] = fAveragePrc;
+ pMax[nIdx] = fMaxMs;
+ pMax[nIdx+1] = fMaxPrc;
+ pCallAverage[nIdx] = fCallAverageMs;
+ pCallAverage[nIdx+1] = fCallAveragePrc;
+ pExclusive[nIdx] = fMsExclusive;
+ pExclusive[nIdx+1] = fPrcExclusive;
+ pAverageExclusive[nIdx] = fAverageMsExclusive;
+ pAverageExclusive[nIdx+1] = fAveragePrcExclusive;
+ pMaxExclusive[nIdx] = fMaxMsExclusive;
+ pMaxExclusive[nIdx+1] = fMaxPrcExclusive;
+ pTotal[nIdx] = fTotalMs;
+ pTotal[nIdx+1] = 0.f;
+ }
+}
+
+void MicroProfileTogglePause()
+{
+ S.nToggleRunning = 1;
+}
+
+float MicroProfileGetTime(const char* pGroup, const char* pName)
+{
+ MicroProfileToken nToken = MicroProfileFindToken(pGroup, pName);
+ if(nToken == MICROPROFILE_INVALID_TOKEN)
+ {
+ return 0.f;
+ }
+ uint32_t nTimerIndex = MicroProfileGetTimerIndex(nToken);
+ uint32_t nGroupIndex = MicroProfileGetGroupIndex(nToken);
+ float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupIndex].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
+ return S.Frame[nTimerIndex].nTicks * fToMs;
+}
+
+
+void MicroProfileContextSwitchSearch(uint32_t* pContextSwitchStart, uint32_t* pContextSwitchEnd, uint64_t nBaseTicksCpu, uint64_t nBaseTicksEndCpu)
+{
+ MICROPROFILE_SCOPE(g_MicroProfileContextSwitchSearch);
+ uint32_t nContextSwitchPut = S.nContextSwitchPut;
+ uint64_t nContextSwitchStart, nContextSwitchEnd;
+ nContextSwitchStart = nContextSwitchEnd = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - 1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
+ int64_t nSearchEnd = nBaseTicksEndCpu + MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu());
+ int64_t nSearchBegin = nBaseTicksCpu - MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu());
+ for(uint32_t i = 0; i < MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; ++i)
+ {
+ uint32_t nIndex = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - (i+1)) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
+ MicroProfileContextSwitch& CS = S.ContextSwitch[nIndex];
+ if(CS.nTicks > nSearchEnd)
+ {
+ nContextSwitchEnd = nIndex;
+ }
+ if(CS.nTicks > nSearchBegin)
+ {
+ nContextSwitchStart = nIndex;
+ }
+ }
+ *pContextSwitchStart = nContextSwitchStart;
+ *pContextSwitchEnd = nContextSwitchEnd;
+}
+
+
+
+#if MICROPROFILE_WEBSERVER
+
+#define MICROPROFILE_EMBED_HTML
+
+extern const char* g_MicroProfileHtml_begin[];
+extern size_t g_MicroProfileHtml_begin_sizes[];
+extern size_t g_MicroProfileHtml_begin_count;
+extern const char* g_MicroProfileHtml_end[];
+extern size_t g_MicroProfileHtml_end_sizes[];
+extern size_t g_MicroProfileHtml_end_count;
+
+typedef void MicroProfileWriteCallback(void* Handle, size_t size, const char* pData);
+
+uint32_t MicroProfileWebServerPort()
+{
+ return S.nWebServerPort;
+}
+
+void MicroProfileDumpFile(const char* pHtml, const char* pCsv)
+{
+ S.nDumpFileNextFrame = 0;
+ if(pHtml)
+ {
+ uint32_t nLen = strlen(pHtml);
+ if(nLen > sizeof(S.HtmlDumpPath)-1)
+ {
+ return;
+ }
+ memcpy(S.HtmlDumpPath, pHtml, nLen+1);
+ S.nDumpFileNextFrame |= 1;
+ }
+ if(pCsv)
+ {
+ uint32_t nLen = strlen(pCsv);
+ if(nLen > sizeof(S.CsvDumpPath)-1)
+ {
+ return;
+ }
+ memcpy(S.CsvDumpPath, pCsv, nLen+1);
+ S.nDumpFileNextFrame |= 2;
+ }
+}
+
+void MicroProfilePrintf(MicroProfileWriteCallback CB, void* Handle, const char* pFmt, ...)
+{
+ char buffer[32*1024];
+ va_list args;
+ va_start (args, pFmt);
+#ifdef _WIN32
+ size_t size = vsprintf_s(buffer, pFmt, args);
+#else
+ size_t size = vsnprintf(buffer, sizeof(buffer)-1, pFmt, args);
+#endif
+ CB(Handle, size, &buffer[0]);
+ va_end (args);
+}
+
+#define printf(...) MicroProfilePrintf(CB, Handle, __VA_ARGS__)
+void MicroProfileDumpCsv(MicroProfileWriteCallback CB, void* Handle, int nMaxFrames)
+{
+ uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
+ float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
+ float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
+
+ printf("frames,%d\n", nAggregateFrames);
+ printf("group,name,average,max,callaverage\n");
+
+ uint32_t nNumTimers = S.nTotalTimers;
+ uint32_t nBlockSize = 2 * nNumTimers;
+ float* pTimers = (float*)alloca(nBlockSize * 8 * sizeof(float));
+ float* pAverage = pTimers + nBlockSize;
+ float* pMax = pTimers + 2 * nBlockSize;
+ float* pCallAverage = pTimers + 3 * nBlockSize;
+ float* pTimersExclusive = pTimers + 4 * nBlockSize;
+ float* pAverageExclusive = pTimers + 5 * nBlockSize;
+ float* pMaxExclusive = pTimers + 6 * nBlockSize;
+ float* pTotal = pTimers + 7 * nBlockSize;
+
+ MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers);
+
+ for(uint32_t i = 0; i < S.nTotalTimers; ++i)
+ {
+ uint32_t nIdx = i * 2;
+ printf("\"%s\",\"%s\",%f,%f,%f\n", S.TimerInfo[i].pName, S.GroupInfo[S.TimerInfo[i].nGroupIndex].pName, pAverage[nIdx], pMax[nIdx], pCallAverage[nIdx]);
+ }
+
+ printf("\n\n");
+
+ printf("group,average,max,total\n");
+ for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
+ {
+ const char* pGroupName = S.GroupInfo[j].pName;
+ float fToMs = S.GroupInfo[j].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU;
+ if(pGroupName[0] != '\0')
+ {
+ printf("\"%s\",%.3f,%.3f,%.3f\n", pGroupName, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j]);
+ }
+ }
+
+ printf("\n\n");
+ printf("group,thread,average,total\n");
+ for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
+ {
+ for(uint32_t i = 0; i < S.nNumLogs; ++i)
+ {
+ if(S.Pool[i])
+ {
+ const char* pThreadName = &S.Pool[i]->ThreadName[0];
+ // MicroProfilePrintf(CB, Handle, "var ThreadGroupTime%d = [", i);
+ float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
+ {
+ uint64_t nTicks = S.Pool[i]->nAggregateGroupTicks[j];
+ float fTime = nTicks / nAggregateFrames * fToMs;
+ float fTimeTotal = nTicks * fToMs;
+ if(fTimeTotal > 0.01f)
+ {
+ const char* pGroupName = S.GroupInfo[j].pName;
+ printf("\"%s\",\"%s\",%.3f,%.3f\n", pGroupName, pThreadName, fTime, fTimeTotal);
+ }
+ }
+ }
+ }
+ }
+
+ printf("\n\n");
+ printf("frametimecpu\n");
+
+ const uint32_t nCount = MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3;
+ const uint32_t nStart = S.nFrameCurrent;
+ for(uint32_t i = nCount; i > 0; i--)
+ {
+ uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY;
+ uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
+ uint64_t nTicks = S.Frames[nFrameNext].nFrameStartCpu - S.Frames[nFrame].nFrameStartCpu;
+ printf("%f,", nTicks * fToMsCPU);
+ }
+ printf("\n");
+
+ printf("\n\n");
+ printf("frametimegpu\n");
+
+ for(uint32_t i = nCount; i > 0; i--)
+ {
+ uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY;
+ uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
+ uint64_t nTicks = S.Frames[nFrameNext].nFrameStartGpu - S.Frames[nFrame].nFrameStartGpu;
+ printf("%f,", nTicks * fToMsGPU);
+ }
+ printf("\n\n");
+ printf("Meta\n");//only single frame snapshot
+ printf("name,average,max,total\n");
+ for(int j = 0; j < MICROPROFILE_META_MAX; ++j)
+ {
+ if(S.MetaCounters[j].pName)
+ {
+ printf("\"%s\",%f,%lld,%lld\n",S.MetaCounters[j].pName, S.MetaCounters[j].nSumAggregate / (float)nAggregateFrames, S.MetaCounters[j].nSumAggregateMax,S.MetaCounters[j].nSumAggregate);
+ }
+ }
+}
+#undef printf
+
+void MicroProfileDumpHtml(MicroProfileWriteCallback CB, void* Handle, int nMaxFrames, const char* pHost)
+{
+ uint32_t nRunning = S.nRunning;
+ S.nRunning = 0;
+ //stall pushing of timers
+ uint64_t nActiveGroup = S.nActiveGroup;
+ S.nActiveGroup = 0;
+ S.nPauseTicks = MP_TICK();
+
+
+ for(size_t i = 0; i < g_MicroProfileHtml_begin_count; ++i)
+ {
+ CB(Handle, g_MicroProfileHtml_begin_sizes[i]-1, g_MicroProfileHtml_begin[i]);
+ }
+ //dump info
+ uint64_t nTicks = MP_TICK();
+
+ float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
+ float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
+ float fAggregateMs = fToMsCPU * (nTicks - S.nAggregateFlipTick);
+ MicroProfilePrintf(CB, Handle, "var DumpHost = '%s';\n", pHost ? pHost : "");
+ time_t CaptureTime;
+ time(&CaptureTime);
+ MicroProfilePrintf(CB, Handle, "var DumpUtcCaptureTime = %ld;\n", CaptureTime);
+ MicroProfilePrintf(CB, Handle, "var AggregateInfo = {'Frames':%d, 'Time':%f};\n", S.nAggregateFrames, fAggregateMs);
+
+ //categories
+ MicroProfilePrintf(CB, Handle, "var CategoryInfo = Array(%d);\n",S.nCategoryCount);
+ for(uint32_t i = 0; i < S.nCategoryCount; ++i)
+ {
+ MicroProfilePrintf(CB, Handle, "CategoryInfo[%d] = \"%s\";\n", i, S.CategoryInfo[i].pName);
+ }
+
+ //groups
+ MicroProfilePrintf(CB, Handle, "var GroupInfo = Array(%d);\n\n",S.nGroupCount);
+ uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
+ float fRcpAggregateFrames = 1.f / nAggregateFrames;
+ for(uint32_t i = 0; i < S.nGroupCount; ++i)
+ {
+ MP_ASSERT(i == S.GroupInfo[i].nGroupIndex);
+ float fToMs = S.GroupInfo[i].Type == MicroProfileTokenTypeCpu ? fToMsCPU : fToMsGPU;
+ MicroProfilePrintf(CB, Handle, "GroupInfo[%d] = MakeGroup(%d, \"%s\", %d, %d, %d, %f, %f, %f, '#%02x%02x%02x');\n",
+ S.GroupInfo[i].nGroupIndex,
+ S.GroupInfo[i].nGroupIndex,
+ S.GroupInfo[i].pName,
+ S.GroupInfo[i].nCategory,
+ S.GroupInfo[i].nNumTimers,
+ S.GroupInfo[i].Type == MicroProfileTokenTypeGpu?1:0,
+ fToMs * S.AggregateGroup[i],
+ fToMs * S.AggregateGroup[i] / nAggregateFrames,
+ fToMs * S.AggregateGroupMax[i],
+ MICROPROFILE_UNPACK_RED(S.GroupInfo[i].nColor) & 0xff,
+ MICROPROFILE_UNPACK_GREEN(S.GroupInfo[i].nColor) & 0xff,
+ MICROPROFILE_UNPACK_BLUE(S.GroupInfo[i].nColor) & 0xff);
+ }
+ //timers
+
+ uint32_t nNumTimers = S.nTotalTimers;
+ uint32_t nBlockSize = 2 * nNumTimers;
+ float* pTimers = (float*)alloca(nBlockSize * 8 * sizeof(float));
+ float* pAverage = pTimers + nBlockSize;
+ float* pMax = pTimers + 2 * nBlockSize;
+ float* pCallAverage = pTimers + 3 * nBlockSize;
+ float* pTimersExclusive = pTimers + 4 * nBlockSize;
+ float* pAverageExclusive = pTimers + 5 * nBlockSize;
+ float* pMaxExclusive = pTimers + 6 * nBlockSize;
+ float* pTotal = pTimers + 7 * nBlockSize;
+
+ MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers);
+
+ MicroProfilePrintf(CB, Handle, "\nvar TimerInfo = Array(%d);\n\n", S.nTotalTimers);
+ for(uint32_t i = 0; i < S.nTotalTimers; ++i)
+ {
+ uint32_t nIdx = i * 2;
+ MP_ASSERT(i == S.TimerInfo[i].nTimerIndex);
+ MicroProfilePrintf(CB, Handle, "var Meta%d = [", i);
+ bool bOnce = true;
+ for(int j = 0; j < MICROPROFILE_META_MAX; ++j)
+ {
+ if(S.MetaCounters[j].pName)
+ {
+ uint32_t lala = S.MetaCounters[j].nCounters[i];
+ MicroProfilePrintf(CB, Handle, bOnce ? "%d" : ",%d", lala);
+ bOnce = false;
+ }
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+ MicroProfilePrintf(CB, Handle, "var MetaAvg%d = [", i);
+ bOnce = true;
+ for(int j = 0; j < MICROPROFILE_META_MAX; ++j)
+ {
+ if(S.MetaCounters[j].pName)
+ {
+ MicroProfilePrintf(CB, Handle, bOnce ? "%f" : ",%f", fRcpAggregateFrames * S.MetaCounters[j].nAggregate[i]);
+ bOnce = false;
+ }
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+ MicroProfilePrintf(CB, Handle, "var MetaMax%d = [", i);
+ bOnce = true;
+ for(int j = 0; j < MICROPROFILE_META_MAX; ++j)
+ {
+ if(S.MetaCounters[j].pName)
+ {
+ MicroProfilePrintf(CB, Handle, bOnce ? "%d" : ",%d", S.MetaCounters[j].nAggregateMax[i]);
+ bOnce = false;
+ }
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+
+
+ uint32_t nColor = S.TimerInfo[i].nColor;
+ uint32_t nColorDark = (nColor >> 1) & ~0x80808080;
+ MicroProfilePrintf(CB, Handle, "TimerInfo[%d] = MakeTimer(%d, \"%s\", %d, '#%02x%02x%02x','#%02x%02x%02x', %f, %f, %f, %f, %f, %d, %f, Meta%d, MetaAvg%d, MetaMax%d);\n", S.TimerInfo[i].nTimerIndex, S.TimerInfo[i].nTimerIndex, S.TimerInfo[i].pName, S.TimerInfo[i].nGroupIndex,
+ MICROPROFILE_UNPACK_RED(nColor) & 0xff,
+ MICROPROFILE_UNPACK_GREEN(nColor) & 0xff,
+ MICROPROFILE_UNPACK_BLUE(nColor) & 0xff,
+ MICROPROFILE_UNPACK_RED(nColorDark) & 0xff,
+ MICROPROFILE_UNPACK_GREEN(nColorDark) & 0xff,
+ MICROPROFILE_UNPACK_BLUE(nColorDark) & 0xff,
+ pAverage[nIdx],
+ pMax[nIdx],
+ pAverageExclusive[nIdx],
+ pMaxExclusive[nIdx],
+ pCallAverage[nIdx],
+ S.Aggregate[i].nCount,
+ pTotal[nIdx],
+ i,i,i);
+
+ }
+
+ MicroProfilePrintf(CB, Handle, "\nvar ThreadNames = [");
+ for(uint32_t i = 0; i < S.nNumLogs; ++i)
+ {
+ if(S.Pool[i])
+ {
+ MicroProfilePrintf(CB, Handle, "'%s',", S.Pool[i]->ThreadName);
+ }
+ else
+ {
+ MicroProfilePrintf(CB, Handle, "'Thread %d',", i);
+ }
+ }
+ MicroProfilePrintf(CB, Handle, "];\n\n");
+
+
+ for(uint32_t i = 0; i < S.nNumLogs; ++i)
+ {
+ if(S.Pool[i])
+ {
+ MicroProfilePrintf(CB, Handle, "var ThreadGroupTime%d = [", i);
+ float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
+ for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
+ {
+ MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j]/nAggregateFrames * fToMs);
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+ }
+ }
+ MicroProfilePrintf(CB, Handle, "\nvar ThreadGroupTimeArray = [");
+ for(uint32_t i = 0; i < S.nNumLogs; ++i)
+ {
+ if(S.Pool[i])
+ {
+ MicroProfilePrintf(CB, Handle, "ThreadGroupTime%d,", i);
+ }
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+
+
+ for(uint32_t i = 0; i < S.nNumLogs; ++i)
+ {
+ if(S.Pool[i])
+ {
+ MicroProfilePrintf(CB, Handle, "var ThreadGroupTimeTotal%d = [", i);
+ float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
+ for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
+ {
+ MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j] * fToMs);
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+ }
+ }
+ MicroProfilePrintf(CB, Handle, "\nvar ThreadGroupTimeTotalArray = [");
+ for(uint32_t i = 0; i < S.nNumLogs; ++i)
+ {
+ if(S.Pool[i])
+ {
+ MicroProfilePrintf(CB, Handle, "ThreadGroupTimeTotal%d,", i);
+ }
+ }
+ MicroProfilePrintf(CB, Handle, "];");
+
+
+
+
+ MicroProfilePrintf(CB, Handle, "\nvar ThreadIds = [");
+ for(uint32_t i = 0; i < S.nNumLogs; ++i)
+ {
+ if(S.Pool[i])
+ {
+ ThreadIdType ThreadId = S.Pool[i]->nThreadId;
+ if(!ThreadId)
+ {
+ ThreadId = (ThreadIdType)-1;
+ }
+ MicroProfilePrintf(CB, Handle, "%d,", ThreadId);
+ }
+ else
+ {
+ MicroProfilePrintf(CB, Handle, "-1,", i);
+ }
+ }
+ MicroProfilePrintf(CB, Handle, "];\n\n");
+
+ MicroProfilePrintf(CB, Handle, "\nvar MetaNames = [");
+ for(int i = 0; i < MICROPROFILE_META_MAX; ++i)
+ {
+ if(S.MetaCounters[i].pName)
+ {
+ MicroProfilePrintf(CB, Handle, "'%s',", S.MetaCounters[i].pName);
+ }
+ }
+
+
+ MicroProfilePrintf(CB, Handle, "];\n\n");
+
+
+
+ uint32_t nNumFrames = (MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3); //leave a few to not overwrite
+ nNumFrames = MicroProfileMin(nNumFrames, (uint32_t)nMaxFrames);
+
+
+ uint32_t nFirstFrame = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
+ uint32_t nLastFrame = (nFirstFrame + nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
+ MP_ASSERT(nLastFrame == (S.nFrameCurrent % MICROPROFILE_MAX_FRAME_HISTORY));
+ MP_ASSERT(nFirstFrame < MICROPROFILE_MAX_FRAME_HISTORY);
+ MP_ASSERT(nLastFrame < MICROPROFILE_MAX_FRAME_HISTORY);
+ const int64_t nTickStart = S.Frames[nFirstFrame].nFrameStartCpu;
+ const int64_t nTickEnd = S.Frames[nLastFrame].nFrameStartCpu;
+ int64_t nTickStartGpu = S.Frames[nFirstFrame].nFrameStartGpu;
+
+ int64_t nTickReferenceCpu, nTickReferenceGpu;
+ int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu();
+ int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu();
+ int nTickReference = 0;
+ if(MicroProfileGetGpuTickReference(&nTickReferenceCpu, &nTickReferenceGpu))
+ {
+ nTickStartGpu = (nTickStart - nTickReferenceCpu) * nTicksPerSecondGpu / nTicksPerSecondCpu + nTickReferenceGpu;
+ nTickReference = 1;
+ }
+
+
+#if MICROPROFILE_DEBUG
+ printf("dumping %d frames\n", nNumFrames);
+ printf("dumping frame %d to %d\n", nFirstFrame, nLastFrame);
+#endif
+
+
+ uint32_t* nTimerCounter = (uint32_t*)alloca(sizeof(uint32_t)* S.nTotalTimers);
+ memset(nTimerCounter, 0, sizeof(uint32_t) * S.nTotalTimers);
+
+ MicroProfilePrintf(CB, Handle, "var Frames = Array(%d);\n", nNumFrames);
+ for(uint32_t i = 0; i < nNumFrames; ++i)
+ {
+ uint32_t nFrameIndex = (nFirstFrame + i) % MICROPROFILE_MAX_FRAME_HISTORY;
+ uint32_t nFrameIndexNext = (nFrameIndex + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
+
+ for(uint32_t j = 0; j < S.nNumLogs; ++j)
+ {
+ MicroProfileThreadLog* pLog = S.Pool[j];
+ int64_t nStartTickBase = pLog->nGpu ? nTickStartGpu : nTickStart;
+ uint32_t nLogStart = S.Frames[nFrameIndex].nLogStart[j];
+ uint32_t nLogEnd = S.Frames[nFrameIndexNext].nLogStart[j];
+
+ float fToMsCpu = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu);
+ float fToMsBase = MicroProfileTickToMsMultiplier(pLog->nGpu ? nTicksPerSecondGpu : nTicksPerSecondCpu);
+ MicroProfilePrintf(CB, Handle, "var ts_%d_%d = [", i, j);
+ if(nLogStart != nLogEnd)
+ {
+ uint32_t k = nLogStart;
+ uint32_t nLogType = MicroProfileLogType(pLog->Log[k]);
+ float fToMs = nLogType == MP_LOG_GPU_EXTRA ? fToMsCpu : fToMsBase;
+ int64_t nStartTick = nLogType == MP_LOG_GPU_EXTRA ? nTickStart : nStartTickBase;
+ float fTime = nLogType == MP_LOG_META ? 0.f : MicroProfileLogTickDifference(nStartTick, pLog->Log[k]) * fToMs;
+ MicroProfilePrintf(CB, Handle, "%f", fTime);
+ for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE)
+ {
+ uint32_t nLogType = MicroProfileLogType(pLog->Log[k]);
+ float fToMs = nLogType == MP_LOG_GPU_EXTRA ? fToMsCpu : fToMsBase;
+ nStartTick = nLogType == MP_LOG_GPU_EXTRA ? nTickStart : nStartTickBase;
+ float fTime = nLogType == MP_LOG_META ? 0.f : MicroProfileLogTickDifference(nStartTick, pLog->Log[k]) * fToMs;
+ MicroProfilePrintf(CB, Handle, ",%f", fTime);
+ }
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+ MicroProfilePrintf(CB, Handle, "var tt_%d_%d = [", i, j);
+ if(nLogStart != nLogEnd)
+ {
+ uint32_t k = nLogStart;
+ MicroProfilePrintf(CB, Handle, "%d", MicroProfileLogType(pLog->Log[k]));
+ for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE)
+ {
+ uint32_t nLogType = MicroProfileLogType(pLog->Log[k]);
+ if(nLogType == MP_LOG_META)
+ {
+ //for meta, store the count + 3, which is the tick part
+ nLogType = 3 + MicroProfileLogGetTick(pLog->Log[k]);
+ }
+ MicroProfilePrintf(CB, Handle, ",%d", nLogType);
+ }
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+
+ MicroProfilePrintf(CB, Handle, "var ti_%d_%d = [", i, j);
+ if(nLogStart != nLogEnd)
+ {
+ uint32_t k = nLogStart;
+ MicroProfilePrintf(CB, Handle, "%d", (uint32_t)MicroProfileLogTimerIndex(pLog->Log[k]));
+ for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE)
+ {
+ uint32_t nTimerIndex = (uint32_t)MicroProfileLogTimerIndex(pLog->Log[k]);
+ MicroProfilePrintf(CB, Handle, ",%d", nTimerIndex);
+ nTimerCounter[nTimerIndex]++;
+ }
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+
+ }
+
+ MicroProfilePrintf(CB, Handle, "var ts%d = [", i);
+ for(uint32_t j = 0; j < S.nNumLogs; ++j)
+ {
+ MicroProfilePrintf(CB, Handle, "ts_%d_%d,", i, j);
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+ MicroProfilePrintf(CB, Handle, "var tt%d = [", i);
+ for(uint32_t j = 0; j < S.nNumLogs; ++j)
+ {
+ MicroProfilePrintf(CB, Handle, "tt_%d_%d,", i, j);
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+
+ MicroProfilePrintf(CB, Handle, "var ti%d = [", i);
+ for(uint32_t j = 0; j < S.nNumLogs; ++j)
+ {
+ MicroProfilePrintf(CB, Handle, "ti_%d_%d,", i, j);
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+
+
+ int64_t nFrameStart = S.Frames[nFrameIndex].nFrameStartCpu;
+ int64_t nFrameEnd = S.Frames[nFrameIndexNext].nFrameStartCpu;
+
+ float fToMs = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu);
+ float fFrameMs = MicroProfileLogTickDifference(nTickStart, nFrameStart) * fToMs;
+ float fFrameEndMs = MicroProfileLogTickDifference(nTickStart, nFrameEnd) * fToMs;
+ float fFrameGpuMs = 0;
+ float fFrameGpuEndMs = 0;
+ if(nTickReference)
+ {
+ fFrameGpuMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndex].nFrameStartGpu) * fToMsGPU;
+ fFrameGpuEndMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndexNext].nFrameStartGpu) * fToMsGPU;
+ }
+ MicroProfilePrintf(CB, Handle, "Frames[%d] = MakeFrame(%d, %f, %f, %f, %f, ts%d, tt%d, ti%d);\n", i, 0, fFrameMs, fFrameEndMs, fFrameGpuMs, fFrameGpuEndMs, i, i, i);
+ }
+
+ uint32_t nContextSwitchStart = 0;
+ uint32_t nContextSwitchEnd = 0;
+ MicroProfileContextSwitchSearch(&nContextSwitchStart, &nContextSwitchEnd, nTickStart, nTickEnd);
+
+ uint32_t nWrittenBefore = S.nWebServerDataSent;
+ MicroProfilePrintf(CB, Handle, "var CSwitchThreadInOutCpu = [");
+ for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE)
+ {
+ MicroProfileContextSwitch CS = S.ContextSwitch[j];
+ int nCpu = CS.nCpu;
+ MicroProfilePrintf(CB, Handle, "%d,%d,%d,", CS.nThreadIn, CS.nThreadOut, nCpu);
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+ MicroProfilePrintf(CB, Handle, "var CSwitchTime = [");
+ float fToMsCpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
+ for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE)
+ {
+ MicroProfileContextSwitch CS = S.ContextSwitch[j];
+ float fTime = MicroProfileLogTickDifference(nTickStart, CS.nTicks) * fToMsCpu;
+ MicroProfilePrintf(CB, Handle, "%f,", fTime);
+ }
+ MicroProfilePrintf(CB, Handle, "];\n");
+ uint32_t nWrittenAfter = S.nWebServerDataSent;
+ MicroProfilePrintf(CB, Handle, "//CSwitch Size %d\n", nWrittenAfter - nWrittenBefore);
+
+
+ for(size_t i = 0; i < g_MicroProfileHtml_end_count; ++i)
+ {
+ CB(Handle, g_MicroProfileHtml_end_sizes[i]-1, g_MicroProfileHtml_end[i]);
+ }
+
+ uint32_t* nGroupCounter = (uint32_t*)alloca(sizeof(uint32_t)* S.nGroupCount);
+
+ memset(nGroupCounter, 0, sizeof(uint32_t) * S.nGroupCount);
+ for(uint32_t i = 0; i < S.nTotalTimers; ++i)
+ {
+ uint32_t nGroupIndex = S.TimerInfo[i].nGroupIndex;
+ nGroupCounter[nGroupIndex] += nTimerCounter[i];
+ }
+
+ uint32_t* nGroupCounterSort = (uint32_t*)alloca(sizeof(uint32_t)* S.nGroupCount);
+ uint32_t* nTimerCounterSort = (uint32_t*)alloca(sizeof(uint32_t)* S.nTotalTimers);
+ for(uint32_t i = 0; i < S.nGroupCount; ++i)
+ {
+ nGroupCounterSort[i] = i;
+ }
+ for(uint32_t i = 0; i < S.nTotalTimers; ++i)
+ {
+ nTimerCounterSort[i] = i;
+ }
+ std::sort(nGroupCounterSort, nGroupCounterSort + S.nGroupCount,
+ [nGroupCounter](const uint32_t l, const uint32_t r)
+ {
+ return nGroupCounter[l] > nGroupCounter[r];
+ }
+ );
+
+ std::sort(nTimerCounterSort, nTimerCounterSort + S.nTotalTimers,
+ [nTimerCounter](const uint32_t l, const uint32_t r)
+ {
+ return nTimerCounter[l] > nTimerCounter[r];
+ }
+ );
+
+ MicroProfilePrintf(CB, Handle, "\n<!--\nMarker Per Group\n");
+ for(uint32_t i = 0; i < S.nGroupCount; ++i)
+ {
+ uint32_t idx = nGroupCounterSort[i];
+ MicroProfilePrintf(CB, Handle, "%8d:%s\n", nGroupCounter[idx], S.GroupInfo[idx].pName);
+ }
+ MicroProfilePrintf(CB, Handle, "Marker Per Timer\n");
+ for(uint32_t i = 0; i < S.nTotalTimers; ++i)
+ {
+ uint32_t idx = nTimerCounterSort[i];
+ MicroProfilePrintf(CB, Handle, "%8d:%s(%s)\n", nTimerCounter[idx], S.TimerInfo[idx].pName, S.GroupInfo[S.TimerInfo[idx].nGroupIndex].pName);
+ }
+ MicroProfilePrintf(CB, Handle, "\n-->\n");
+
+ S.nActiveGroup = nActiveGroup;
+ S.nRunning = nRunning;
+
+#if MICROPROFILE_DEBUG
+ int64_t nTicksEnd = MP_TICK();
+ float fMs = fToMsCpu * (nTicksEnd - S.nPauseTicks);
+ printf("html dump took %6.2fms\n", fMs);
+#endif
+
+
+}
+
+void MicroProfileWriteFile(void* Handle, size_t nSize, const char* pData)
+{
+ fwrite(pData, nSize, 1, (FILE*)Handle);
+}
+
+void MicroProfileDumpToFile()
+{
+ std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
+ if(S.nDumpFileNextFrame&1)
+ {
+ FILE* F = fopen(S.HtmlDumpPath, "w");
+ if(F)
+ {
+ MicroProfileDumpHtml(MicroProfileWriteFile, F, MICROPROFILE_WEBSERVER_MAXFRAMES, S.HtmlDumpPath);
+ fclose(F);
+ }
+ }
+ if(S.nDumpFileNextFrame&2)
+ {
+ FILE* F = fopen(S.CsvDumpPath, "w");
+ if(F)
+ {
+ MicroProfileDumpCsv(MicroProfileWriteFile, F, MICROPROFILE_WEBSERVER_MAXFRAMES);
+ fclose(F);
+ }
+ }
+}
+
+void MicroProfileFlushSocket(MpSocket Socket)
+{
+ send(Socket, &S.WebServerBuffer[0], S.WebServerPut, 0);
+ S.WebServerPut = 0;
+
+}
+
+void MicroProfileWriteSocket(void* Handle, size_t nSize, const char* pData)
+{
+ S.nWebServerDataSent += nSize;
+ MpSocket Socket = *(MpSocket*)Handle;
+ if(nSize > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE / 2)
+ {
+ MicroProfileFlushSocket(Socket);
+ send(Socket, pData, nSize, 0);
+
+ }
+ else
+ {
+ memcpy(&S.WebServerBuffer[S.WebServerPut], pData, nSize);
+ S.WebServerPut += nSize;
+ if(S.WebServerPut > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE/2)
+ {
+ MicroProfileFlushSocket(Socket);
+ }
+ }
+}
+
+#if MICROPROFILE_MINIZ
+#ifndef MICROPROFILE_COMPRESS_BUFFER_SIZE
+#define MICROPROFILE_COMPRESS_BUFFER_SIZE (256<<10)
+#endif
+
+#define MICROPROFILE_COMPRESS_CHUNK (MICROPROFILE_COMPRESS_BUFFER_SIZE/2)
+struct MicroProfileCompressedSocketState
+{
+ unsigned char DeflateOut[MICROPROFILE_COMPRESS_CHUNK];
+ unsigned char DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
+ mz_stream Stream;
+ MpSocket Socket;
+ uint32_t nSize;
+ uint32_t nCompressedSize;
+ uint32_t nFlushes;
+ uint32_t nMemmoveBytes;
+};
+
+void MicroProfileCompressedSocketFlush(MicroProfileCompressedSocketState* pState)
+{
+ mz_stream& Stream = pState->Stream;
+ unsigned char* pSendStart = &pState->DeflateOut[0];
+ unsigned char* pSendEnd = &pState->DeflateOut[MICROPROFILE_COMPRESS_CHUNK - Stream.avail_out];
+ if(pSendStart != pSendEnd)
+ {
+ send(pState->Socket, (const char*)pSendStart, pSendEnd - pSendStart, 0);
+ pState->nCompressedSize += pSendEnd - pSendStart;
+ }
+ Stream.next_out = &pState->DeflateOut[0];
+ Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK;
+
+}
+void MicroProfileCompressedSocketStart(MicroProfileCompressedSocketState* pState, MpSocket Socket)
+{
+ mz_stream& Stream = pState->Stream;
+ memset(&Stream, 0, sizeof(Stream));
+ Stream.next_out = &pState->DeflateOut[0];
+ Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK;
+ Stream.next_in = &pState->DeflateIn[0];
+ Stream.avail_in = 0;
+ mz_deflateInit(&Stream, Z_DEFAULT_COMPRESSION);
+ pState->Socket = Socket;
+ pState->nSize = 0;
+ pState->nCompressedSize = 0;
+ pState->nFlushes = 0;
+ pState->nMemmoveBytes = 0;
+
+}
+void MicroProfileCompressedSocketFinish(MicroProfileCompressedSocketState* pState)
+{
+ mz_stream& Stream = pState->Stream;
+ MicroProfileCompressedSocketFlush(pState);
+ int r = mz_deflate(&Stream, MZ_FINISH);
+ MP_ASSERT(r == MZ_STREAM_END);
+ MicroProfileCompressedSocketFlush(pState);
+ r = mz_deflateEnd(&Stream);
+ MP_ASSERT(r == MZ_OK);
+}
+
+void MicroProfileCompressedWriteSocket(void* Handle, size_t nSize, const char* pData)
+{
+ MicroProfileCompressedSocketState* pState = (MicroProfileCompressedSocketState*)Handle;
+ mz_stream& Stream = pState->Stream;
+ const unsigned char* pDeflateInEnd = Stream.next_in + Stream.avail_in;
+ const unsigned char* pDeflateInStart = &pState->DeflateIn[0];
+ const unsigned char* pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
+ pState->nSize += nSize;
+ if(nSize <= pDeflateInRealEnd - pDeflateInEnd)
+ {
+ memcpy((void*)pDeflateInEnd, pData, nSize);
+ Stream.avail_in += nSize;
+ MP_ASSERT(Stream.next_in + Stream.avail_in <= pDeflateInRealEnd);
+ return;
+ }
+ int Flush = 0;
+ while(nSize)
+ {
+ pDeflateInEnd = Stream.next_in + Stream.avail_in;
+ if(Flush)
+ {
+ pState->nFlushes++;
+ MicroProfileCompressedSocketFlush(pState);
+ pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
+ if(pDeflateInEnd == pDeflateInRealEnd)
+ {
+ if(Stream.avail_in)
+ {
+ MP_ASSERT(pDeflateInStart != Stream.next_in);
+ memmove((void*)pDeflateInStart, Stream.next_in, Stream.avail_in);
+ pState->nMemmoveBytes += Stream.avail_in;
+ }
+ Stream.next_in = pDeflateInStart;
+ pDeflateInEnd = Stream.next_in + Stream.avail_in;
+ }
+ }
+ size_t nSpace = pDeflateInRealEnd - pDeflateInEnd;
+ size_t nBytes = MicroProfileMin(nSpace, nSize);
+ MP_ASSERT(nBytes + pDeflateInEnd <= pDeflateInRealEnd);
+ memcpy((void*)pDeflateInEnd, pData, nBytes);
+ Stream.avail_in += nBytes;
+ nSize -= nBytes;
+ pData += nBytes;
+ int r = mz_deflate(&Stream, MZ_NO_FLUSH);
+ Flush = r == MZ_BUF_ERROR || nBytes == 0 || Stream.avail_out == 0 ? 1 : 0;
+ MP_ASSERT(r == MZ_BUF_ERROR || r == MZ_OK);
+ if(r == MZ_BUF_ERROR)
+ {
+ r = mz_deflate(&Stream, MZ_SYNC_FLUSH);
+ }
+ }
+}
+#endif
+
+
+#ifndef MicroProfileSetNonBlocking //fcntl doesnt work on a some unix like platforms..
+void MicroProfileSetNonBlocking(MpSocket Socket, int NonBlocking)
+{
+#ifdef _WIN32
+ u_long nonBlocking = NonBlocking ? 1 : 0;
+ ioctlsocket(Socket, FIONBIO, &nonBlocking);
+#else
+ int Options = fcntl(Socket, F_GETFL);
+ if(NonBlocking)
+ {
+ fcntl(Socket, F_SETFL, Options|O_NONBLOCK);
+ }
+ else
+ {
+ fcntl(Socket, F_SETFL, Options&(~O_NONBLOCK));
+ }
+#endif
+}
+#endif
+
+void MicroProfileWebServerStart()
+{
+#ifdef _WIN32
+ WSADATA wsa;
+ if(WSAStartup(MAKEWORD(2, 2), &wsa))
+ {
+ S.ListenerSocket = -1;
+ return;
+ }
+#endif
+
+ S.ListenerSocket = socket(PF_INET, SOCK_STREAM, 6);
+ MP_ASSERT(!MP_INVALID_SOCKET(S.ListenerSocket));
+ MicroProfileSetNonBlocking(S.ListenerSocket, 1);
+
+ S.nWebServerPort = (uint32_t)-1;
+ struct sockaddr_in Addr;
+ Addr.sin_family = AF_INET;
+ Addr.sin_addr.s_addr = INADDR_ANY;
+ for(int i = 0; i < 20; ++i)
+ {
+ Addr.sin_port = htons(MICROPROFILE_WEBSERVER_PORT+i);
+ if(0 == bind(S.ListenerSocket, (sockaddr*)&Addr, sizeof(Addr)))
+ {
+ S.nWebServerPort = MICROPROFILE_WEBSERVER_PORT+i;
+ break;
+ }
+ }
+ listen(S.ListenerSocket, 8);
+}
+
+void MicroProfileWebServerStop()
+{
+#ifdef _WIN32
+ closesocket(S.ListenerSocket);
+ WSACleanup();
+#else
+ close(S.ListenerSocket);
+#endif
+}
+
+int MicroProfileParseGet(const char* pGet)
+{
+ const char* pStart = pGet;
+ while(*pGet != '\0')
+ {
+ if(*pGet < '0' || *pGet > '9')
+ return 0;
+ pGet++;
+ }
+ int nFrames = atoi(pStart);
+ if(nFrames)
+ {
+ return nFrames;
+ }
+ else
+ {
+ return MICROPROFILE_WEBSERVER_MAXFRAMES;
+ }
+}
+bool MicroProfileWebServerUpdate()
+{
+ MICROPROFILE_SCOPEI("MicroProfile", "Webserver-update", -1);
+ MpSocket Connection = accept(S.ListenerSocket, 0, 0);
+ bool bServed = false;
+ if(!MP_INVALID_SOCKET(Connection))
+ {
+ std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
+ char Req[8192];
+ MicroProfileSetNonBlocking(Connection, 0);
+ int nReceived = recv(Connection, Req, sizeof(Req)-1, 0);
+ if(nReceived > 0)
+ {
+ Req[nReceived] = '\0';
+#if MICROPROFILE_MINIZ
+#define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\nContent-Encoding: deflate\r\nExpires: Tue, 01 Jan 2199 16:00:00 GMT\r\n\r\n"
+#else
+#define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\nExpires: Tue, 01 Jan 2199 16:00:00 GMT\r\n\r\n"
+#endif
+ char* pHttp = strstr(Req, "HTTP/");
+ char* pGet = strstr(Req, "GET /");
+ char* pHost = strstr(Req, "Host: ");
+ auto Terminate = [](char* pString)
+ {
+ char* pEnd = pString;
+ while(*pEnd != '\0')
+ {
+ if(*pEnd == '\r' || *pEnd == '\n' || *pEnd == ' ')
+ {
+ *pEnd = '\0';
+ return;
+ }
+ pEnd++;
+ }
+ };
+ if(pHost)
+ {
+ pHost += sizeof("Host: ")-1;
+ Terminate(pHost);
+ }
+
+ if(pHttp && pGet)
+ {
+ *pHttp = '\0';
+ pGet += sizeof("GET /")-1;
+ Terminate(pGet);
+ int nFrames = MicroProfileParseGet(pGet);
+ if(nFrames)
+ {
+ uint64_t nTickStart = MP_TICK();
+ send(Connection, MICROPROFILE_HTML_HEADER, sizeof(MICROPROFILE_HTML_HEADER)-1, 0);
+ uint64_t nDataStart = S.nWebServerDataSent;
+ S.WebServerPut = 0;
+ #if 0 == MICROPROFILE_MINIZ
+ MicroProfileDumpHtml(MicroProfileWriteSocket, &Connection, nFrames, pHost);
+ uint64_t nDataEnd = S.nWebServerDataSent;
+ uint64_t nTickEnd = MP_TICK();
+ uint64_t nDiff = (nTickEnd - nTickStart);
+ float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
+ int nKb = ((nDataEnd-nDataStart)>>10) + 1;
+ int nCompressedKb = nKb;
+ MicroProfilePrintf(MicroProfileWriteSocket, &Connection, "\n<!-- Sent %dkb in %.2fms-->\n\n",nKb, fMs);
+ MicroProfileFlushSocket(Connection);
+ #else
+ MicroProfileCompressedSocketState CompressState;
+ MicroProfileCompressedSocketStart(&CompressState, Connection);
+ MicroProfileDumpHtml(MicroProfileCompressedWriteSocket, &CompressState, nFrames, pHost);
+ S.nWebServerDataSent += CompressState.nSize;
+ uint64_t nDataEnd = S.nWebServerDataSent;
+ uint64_t nTickEnd = MP_TICK();
+ uint64_t nDiff = (nTickEnd - nTickStart);
+ float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
+ int nKb = ((nDataEnd-nDataStart)>>10) + 1;
+ int nCompressedKb = ((CompressState.nCompressedSize)>>10) + 1;
+ MicroProfilePrintf(MicroProfileCompressedWriteSocket, &CompressState, "\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
+ MicroProfileCompressedSocketFinish(&CompressState);
+ MicroProfileFlushSocket(Connection);
+ #endif
+
+ #if MICROPROFILE_DEBUG
+ printf("\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
+ #endif
+ }
+ }
+ }
+#ifdef _WIN32
+ closesocket(Connection);
+#else
+ close(Connection);
+#endif
+ }
+ return bServed;
+}
+#endif
+
+
+
+
+#if MICROPROFILE_CONTEXT_SWITCH_TRACE
+//functions that need to be implemented per platform.
+void* MicroProfileTraceThread(void* unused);
+bool MicroProfileIsLocalThread(uint32_t nThreadId);
+
+
+void MicroProfileStartContextSwitchTrace()
+{
+ if(!S.bContextSwitchRunning)
+ {
+ S.bContextSwitchRunning = true;
+ S.bContextSwitchStop = false;
+ MicroProfileThreadStart(&S.ContextSwitchThread, MicroProfileTraceThread);
+ }
+}
+
+void MicroProfileStopContextSwitchTrace()
+{
+ if(S.bContextSwitchRunning)
+ {
+ S.bContextSwitchStop = true;
+ MicroProfileThreadJoin(&S.ContextSwitchThread);
+ }
+}
+
+
+#ifdef _WIN32
+#define INITGUID
+#include <evntrace.h>
+#include <evntcons.h>
+#include <strsafe.h>
+
+
+static GUID g_MicroProfileThreadClassGuid = { 0x3d6fa8d1, 0xfe05, 0x11d0, 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c };
+
+struct MicroProfileSCSwitch
+{
+ uint32_t NewThreadId;
+ uint32_t OldThreadId;
+ int8_t NewThreadPriority;
+ int8_t OldThreadPriority;
+ uint8_t PreviousCState;
+ int8_t SpareByte;
+ int8_t OldThreadWaitReason;
+ int8_t OldThreadWaitMode;
+ int8_t OldThreadState;
+ int8_t OldThreadWaitIdealProcessor;
+ uint32_t NewThreadWaitTime;
+ uint32_t Reserved;
+};
+
+
+VOID WINAPI MicroProfileContextSwitchCallback(PEVENT_TRACE pEvent)
+{
+ if (pEvent->Header.Guid == g_MicroProfileThreadClassGuid)
+ {
+ if (pEvent->Header.Class.Type == 36)
+ {
+ MicroProfileSCSwitch* pCSwitch = (MicroProfileSCSwitch*) pEvent->MofData;
+ if ((pCSwitch->NewThreadId != 0) || (pCSwitch->OldThreadId != 0))
+ {
+ MicroProfileContextSwitch Switch;
+ Switch.nThreadOut = pCSwitch->OldThreadId;
+ Switch.nThreadIn = pCSwitch->NewThreadId;
+ Switch.nCpu = pEvent->BufferContext.ProcessorNumber;
+ Switch.nTicks = pEvent->Header.TimeStamp.QuadPart;
+ MicroProfileContextSwitchPut(&Switch);
+ }
+ }
+ }
+}
+
+ULONG WINAPI MicroProfileBufferCallback(PEVENT_TRACE_LOGFILE Buffer)
+{
+ return (S.bContextSwitchStop || !S.bContextSwitchRunning) ? FALSE : TRUE;
+}
+
+
+struct MicroProfileKernelTraceProperties : public EVENT_TRACE_PROPERTIES
+{
+ char dummy[sizeof(KERNEL_LOGGER_NAME)];
+};
+
+void MicroProfileContextSwitchShutdownTrace()
+{
+ TRACEHANDLE SessionHandle = 0;
+ MicroProfileKernelTraceProperties sessionProperties;
+
+ ZeroMemory(&sessionProperties, sizeof(sessionProperties));
+ sessionProperties.Wnode.BufferSize = sizeof(sessionProperties);
+ sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID;
+ sessionProperties.Wnode.ClientContext = 1; //QPC clock resolution
+ sessionProperties.Wnode.Guid = SystemTraceControlGuid;
+ sessionProperties.BufferSize = 1;
+ sessionProperties.NumberOfBuffers = 128;
+ sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH;
+ sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
+ sessionProperties.MaximumFileSize = 0;
+ sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES);
+ sessionProperties.LogFileNameOffset = 0;
+
+ EVENT_TRACE_LOGFILE log;
+ ZeroMemory(&log, sizeof(log));
+ log.LoggerName = KERNEL_LOGGER_NAME;
+ log.ProcessTraceMode = 0;
+ TRACEHANDLE hLog = OpenTrace(&log);
+ if (hLog)
+ {
+ ControlTrace(SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties, EVENT_TRACE_CONTROL_STOP);
+ }
+ CloseTrace(hLog);
+
+
+}
+
+void* MicroProfileTraceThread(void* unused)
+{
+
+ MicroProfileContextSwitchShutdownTrace();
+ ULONG status = ERROR_SUCCESS;
+ TRACEHANDLE SessionHandle = 0;
+ MicroProfileKernelTraceProperties sessionProperties;
+
+ ZeroMemory(&sessionProperties, sizeof(sessionProperties));
+ sessionProperties.Wnode.BufferSize = sizeof(sessionProperties);
+ sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID;
+ sessionProperties.Wnode.ClientContext = 1; //QPC clock resolution
+ sessionProperties.Wnode.Guid = SystemTraceControlGuid;
+ sessionProperties.BufferSize = 1;
+ sessionProperties.NumberOfBuffers = 128;
+ sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH|EVENT_TRACE_FLAG_PROCESS;
+ sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
+ sessionProperties.MaximumFileSize = 0;
+ sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES);
+ sessionProperties.LogFileNameOffset = 0;
+
+
+ status = StartTrace((PTRACEHANDLE) &SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties);
+
+ if (ERROR_SUCCESS != status)
+ {
+ S.bContextSwitchRunning = false;
+ return 0;
+ }
+
+ EVENT_TRACE_LOGFILE log;
+ ZeroMemory(&log, sizeof(log));
+
+ log.LoggerName = KERNEL_LOGGER_NAME;
+ log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_RAW_TIMESTAMP;
+ log.EventCallback = MicroProfileContextSwitchCallback;
+ log.BufferCallback = MicroProfileBufferCallback;
+
+ TRACEHANDLE hLog = OpenTrace(&log);
+ ProcessTrace(&hLog, 1, 0, 0);
+ CloseTrace(hLog);
+ MicroProfileContextSwitchShutdownTrace();
+
+ S.bContextSwitchRunning = false;
+ return 0;
+}
+
+bool MicroProfileIsLocalThread(uint32_t nThreadId)
+{
+ HANDLE h = OpenThread(THREAD_QUERY_LIMITED_INFORMATION, FALSE, nThreadId);
+ if(h == NULL)
+ return false;
+ DWORD hProcess = GetProcessIdOfThread(h);
+ CloseHandle(h);
+ return GetCurrentProcessId() == hProcess;
+}
+
+#elif defined(__APPLE__)
+#include <sys/time.h>
+void* MicroProfileTraceThread(void* unused)
+{
+ FILE* pFile = fopen("mypipe", "r");
+ if(!pFile)
+ {
+ printf("CONTEXT SWITCH FAILED TO OPEN FILE: make sure to run dtrace script\n");
+ S.bContextSwitchRunning = false;
+ return 0;
+ }
+ printf("STARTING TRACE THREAD\n");
+ char* pLine = 0;
+ size_t cap = 0;
+ size_t len = 0;
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+
+ uint64_t nsSinceEpoch = ((uint64_t)(tv.tv_sec) * 1000000 + (uint64_t)(tv.tv_usec)) * 1000;
+ uint64_t nTickEpoch = MP_TICK();
+ uint32_t nLastThread[MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS] = {0};
+ mach_timebase_info_data_t sTimebaseInfo;
+ mach_timebase_info(&sTimebaseInfo);
+ S.bContextSwitchRunning = true;
+
+ uint64_t nProcessed = 0;
+ uint64_t nProcessedLast = 0;
+ while((len = getline(&pLine, &cap, pFile))>0 && !S.bContextSwitchStop)
+ {
+ nProcessed += len;
+ if(nProcessed - nProcessedLast > 10<<10)
+ {
+ nProcessedLast = nProcessed;
+ printf("processed %llukb %llukb\n", (nProcessed-nProcessedLast)>>10,nProcessed >>10);
+ }
+
+ char* pX = strchr(pLine, 'X');
+ if(pX)
+ {
+ int cpu = atoi(pX+1);
+ char* pX2 = strchr(pX + 1, 'X');
+ char* pX3 = strchr(pX2 + 1, 'X');
+ int thread = atoi(pX2+1);
+ char* lala;
+ int64_t timestamp = strtoll(pX3 + 1, &lala, 10);
+ MicroProfileContextSwitch Switch;
+
+ //convert to ticks.
+ uint64_t nDeltaNsSinceEpoch = timestamp - nsSinceEpoch;
+ uint64_t nDeltaTickSinceEpoch = sTimebaseInfo.numer * nDeltaNsSinceEpoch / sTimebaseInfo.denom;
+ uint64_t nTicks = nDeltaTickSinceEpoch + nTickEpoch;
+ if(cpu < MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS)
+ {
+ Switch.nThreadOut = nLastThread[cpu];
+ Switch.nThreadIn = thread;
+ nLastThread[cpu] = thread;
+ Switch.nCpu = cpu;
+ Switch.nTicks = nTicks;
+ MicroProfileContextSwitchPut(&Switch);
+ }
+ }
+ }
+ printf("EXITING TRACE THREAD\n");
+ S.bContextSwitchRunning = false;
+ return 0;
+}
+
+bool MicroProfileIsLocalThread(uint32_t nThreadId)
+{
+ return false;
+}
+
+#endif
+#else
+
+bool MicroProfileIsLocalThread(uint32_t nThreadId){return false;}
+void MicroProfileStopContextSwitchTrace(){}
+void MicroProfileStartContextSwitchTrace(){}
+
+#endif
+
+
+
+
+#if MICROPROFILE_GPU_TIMERS_D3D11
+uint32_t MicroProfileGpuInsertTimeStamp()
+{
+ MicroProfileD3D11Frame& Frame = S.GPU.m_QueryFrames[S.GPU.m_nQueryFrame];
+ if(Frame.m_nRateQueryStarted)
+ {
+ uint32_t nCurrent = (Frame.m_nQueryStart + Frame.m_nQueryCount) % MICROPROFILE_D3D_MAX_QUERIES;
+ uint32_t nNext = (nCurrent + 1) % MICROPROFILE_D3D_MAX_QUERIES;
+ if(nNext != S.GPU.m_nQueryGet)
+ {
+ Frame.m_nQueryCount++;
+ ID3D11Query* pQuery = (ID3D11Query*)S.GPU.m_pQueries[nCurrent];
+ ID3D11DeviceContext* pContext = (ID3D11DeviceContext*)S.GPU.m_pDeviceContext;
+ pContext->End(pQuery);
+ S.GPU.m_nQueryPut = nNext;
+ return nCurrent;
+ }
+ }
+ return (uint32_t)-1;
+}
+
+uint64_t MicroProfileGpuGetTimeStamp(uint32_t nIndex)
+{
+ if(nIndex == (uint32_t)-1)
+ {
+ return (uint64_t)-1;
+ }
+ int64_t nResult = S.GPU.m_nQueryResults[nIndex];
+ MP_ASSERT(nResult != -1);
+ return nResult;
+}
+
+bool MicroProfileGpuGetData(void* pQuery, void* pData, uint32_t nDataSize)
+{
+ HRESULT hr;
+ do
+ {
+ hr = ((ID3D11DeviceContext*)S.GPU.m_pDeviceContext)->GetData((ID3D11Query*)pQuery, pData, nDataSize, 0);
+ }while(hr == S_FALSE);
+ switch(hr)
+ {
+ case DXGI_ERROR_DEVICE_REMOVED:
+ case DXGI_ERROR_INVALID_CALL:
+ case E_INVALIDARG:
+ MP_BREAK();
+ return false;
+
+ }
+ return true;
+}
+
+uint64_t MicroProfileTicksPerSecondGpu()
+{
+ return S.GPU.m_nQueryFrequency;
+}
+
+void MicroProfileGpuFlip()
+{
+ MicroProfileD3D11Frame& CurrentFrame = S.GPU.m_QueryFrames[S.GPU.m_nQueryFrame];
+ ID3D11DeviceContext* pContext = (ID3D11DeviceContext*)S.GPU.m_pDeviceContext;
+ if(CurrentFrame.m_nRateQueryStarted)
+ {
+ pContext->End((ID3D11Query*)CurrentFrame.m_pRateQuery);
+ }
+ uint32_t nNextFrame = (S.GPU.m_nQueryFrame + 1) % MICROPROFILE_GPU_FRAME_DELAY;
+ MicroProfileD3D11Frame& OldFrame = S.GPU.m_QueryFrames[nNextFrame];
+ if(OldFrame.m_nRateQueryStarted)
+ {
+ struct RateQueryResult
+ {
+ uint64_t nFrequency;
+ BOOL bDisjoint;
+ };
+ RateQueryResult Result;
+ if(MicroProfileGpuGetData(OldFrame.m_pRateQuery, &Result, sizeof(Result)))
+ {
+ if(S.GPU.m_nQueryFrequency != (int64_t)Result.nFrequency)
+ {
+ if(S.GPU.m_nQueryFrequency)
+ {
+ OutputDebugString("Query freq changing");
+ }
+ S.GPU.m_nQueryFrequency = Result.nFrequency;
+ }
+ uint32_t nStart = OldFrame.m_nQueryStart;
+ uint32_t nCount = OldFrame.m_nQueryCount;
+ for(uint32_t i = 0; i < nCount; ++i)
+ {
+ uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D_MAX_QUERIES;
+
+
+
+ if(!MicroProfileGpuGetData(S.GPU.m_pQueries[nIndex], &S.GPU.m_nQueryResults[nIndex], sizeof(uint64_t)))
+ {
+ S.GPU.m_nQueryResults[nIndex] = -1;
+ }
+ }
+ }
+ else
+ {
+ uint32_t nStart = OldFrame.m_nQueryStart;
+ uint32_t nCount = OldFrame.m_nQueryCount;
+ for(uint32_t i = 0; i < nCount; ++i)
+ {
+ uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D_MAX_QUERIES;
+ S.GPU.m_nQueryResults[nIndex] = -1;
+ }
+ }
+ S.GPU.m_nQueryGet = (OldFrame.m_nQueryStart + OldFrame.m_nQueryCount) % MICROPROFILE_D3D_MAX_QUERIES;
+ }
+
+ S.GPU.m_nQueryFrame = nNextFrame;
+ MicroProfileD3D11Frame& NextFrame = S.GPU.m_QueryFrames[nNextFrame];
+ pContext->Begin((ID3D11Query*)NextFrame.m_pRateQuery);
+ NextFrame.m_nQueryStart = S.GPU.m_nQueryPut;
+ NextFrame.m_nQueryCount = 0;
+
+ NextFrame.m_nRateQueryStarted = 1;
+}
+
+void MicroProfileGpuInitD3D11(void* pDevice_, void* pDeviceContext_)
+{
+ ID3D11Device* pDevice = (ID3D11Device*)pDevice_;
+ ID3D11DeviceContext* pDeviceContext = (ID3D11DeviceContext*)pDeviceContext_;
+ S.GPU.m_pDeviceContext = pDeviceContext_;
+
+ D3D11_QUERY_DESC Desc;
+ Desc.MiscFlags = 0;
+ Desc.Query = D3D11_QUERY_TIMESTAMP;
+ for(uint32_t i = 0; i < MICROPROFILE_D3D_MAX_QUERIES; ++i)
+ {
+ HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&S.GPU.m_pQueries[i]);
+ MP_ASSERT(hr == S_OK);
+ S.GPU.m_nQueryResults[i] = -1;
+ }
+ S.GPU.m_nQueryPut = 0;
+ S.GPU.m_nQueryGet = 0;
+ S.GPU.m_nQueryFrame = 0;
+ S.GPU.m_nQueryFrequency = 0;
+ Desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
+ for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i)
+ {
+ S.GPU.m_QueryFrames[i].m_nQueryStart = 0;
+ S.GPU.m_QueryFrames[i].m_nQueryCount = 0;
+ S.GPU.m_QueryFrames[i].m_nRateQueryStarted = 0;
+ HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&S.GPU.m_QueryFrames[i].m_pRateQuery);
+ MP_ASSERT(hr == S_OK);
+ }
+}
+
+
+void MicroProfileGpuShutdown()
+{
+ for(uint32_t i = 0; i < MICROPROFILE_D3D_MAX_QUERIES; ++i)
+ {
+ ((ID3D11Query*)&S.GPU.m_pQueries[i])->Release();
+ S.GPU.m_pQueries[i] = 0;
+ }
+ for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i)
+ {
+ ((ID3D11Query*)S.GPU.m_QueryFrames[i].m_pRateQuery)->Release();
+ S.GPU.m_QueryFrames[i].m_pRateQuery = 0;
+ }
+}
+
+int MicroProfileGetGpuTickReference(int64_t* pOutCPU, int64_t* pOutGpu)
+{
+ return 0;
+}
+
+
+#elif MICROPROFILE_GPU_TIMERS_GL
+void MicroProfileGpuInitGL()
+{
+ S.GPU.GLTimerPos = 0;
+ glGenQueries(MICROPROFILE_GL_MAX_QUERIES, &S.GPU.GLTimers[0]);
+}
+
+uint32_t MicroProfileGpuInsertTimeStamp()
+{
+ uint32_t nIndex = (S.GPU.GLTimerPos+1)%MICROPROFILE_GL_MAX_QUERIES;
+ glQueryCounter(S.GPU.GLTimers[nIndex], GL_TIMESTAMP);
+ S.GPU.GLTimerPos = nIndex;
+ return nIndex;
+}
+uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey)
+{
+ uint64_t result;
+ glGetQueryObjectui64v(S.GPU.GLTimers[nKey], GL_QUERY_RESULT, &result);
+ return result;
+}
+
+uint64_t MicroProfileTicksPerSecondGpu()
+{
+ return 1000000000ll;
+}
+
+int MicroProfileGetGpuTickReference(int64_t* pOutCpu, int64_t* pOutGpu)
+{
+ int64_t nGpuTimeStamp;
+ glGetInteger64v(GL_TIMESTAMP, &nGpuTimeStamp);
+ if(nGpuTimeStamp)
+ {
+ *pOutCpu = MP_TICK();
+ *pOutGpu = nGpuTimeStamp;
+ #if 0 //debug test if timestamp diverges
+ static int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu();
+ static int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu();
+ static int64_t nGpuStart = 0;
+ static int64_t nCpuStart = 0;
+ if(!nCpuStart)
+ {
+ nCpuStart = *pOutCpu;
+ nGpuStart = *pOutGpu;
+ }
+ static int nCountDown = 100;
+ if(0 == nCountDown--)
+ {
+ int64_t nCurCpu = *pOutCpu;
+ int64_t nCurGpu = *pOutGpu;
+ double fDistanceCpu = (nCurCpu - nCpuStart) / (double)nTicksPerSecondCpu;
+ double fDistanceGpu = (nCurGpu - nGpuStart) / (double)nTicksPerSecondGpu;
+
+ char buf[254];
+ snprintf(buf, sizeof(buf)-1,"Distance %f %f diff %f\n", fDistanceCpu, fDistanceGpu, fDistanceCpu-fDistanceGpu);
+ OutputDebugString(buf);
+ nCountDown = 100;
+ }
+ #endif
+ return 1;
+ }
+ return 0;
+}
+
+
+#endif
+
+#undef S
+
+#ifdef _WIN32
+#pragma warning(pop)
+#endif
+
+
+
+
+
+#endif
+#endif
+#ifdef MICROPROFILE_EMBED_HTML
+#include "microprofile_html.h"
+#endif
diff --git a/externals/microprofile/microprofile_html.h b/externals/microprofile/microprofile_html.h
new file mode 100644
index 000000000..01b624b60
--- /dev/null
+++ b/externals/microprofile/microprofile_html.h
@@ -0,0 +1,3868 @@
+///start file generated from microprofile.html
+#ifdef MICROPROFILE_EMBED_HTML
+const char g_MicroProfileHtml_begin_0[] =
+"<!DOCTYPE HTML>\n"
+"<html>\n"
+"<head>\n"
+"<title>MicroProfile Capture</title>\n"
+"<style>\n"
+"/* about css: http://bit.ly/1eMQ42U */\n"
+"body {margin: 0px;padding: 0px; font: 12px Courier New;background-color:#474747; color:white;overflow:hidden;}\n"
+"ul {list-style-type: none;margin: 0;padding: 0;}\n"
+"li{display: inline; float:left;border:5px; position:relative;text-align:center;}\n"
+"a {\n"
+" float:left;\n"
+" text-decoration:none;\n"
+" display: inline;\n"
+" text-align: center;\n"
+" padding:5px;\n"
+" padding-bottom:0px;\n"
+" padding-top:0px;\n"
+" color: #FFFFFF;\n"
+" background-color: #474747;\n"
+"}\n"
+"a:hover, a:active{\n"
+" background-color: #000000;\n"
+"}\n"
+"\n"
+"ul ul {\n"
+" position:absolute;\n"
+" left:0;\n"
+" top:100%;\n"
+" margin-left:-999em;\n"
+"}\n"
+"li:hover ul {\n"
+" margin-left:0;\n"
+" margin-right:0;\n"
+"}\n"
+"ul li ul{ display:block;float:none;width:100%;}\n"
+"ul li ul li{ display:block;float:none;width:100%;}\n"
+"li li a{ display:block;float:none;width:100%;text-align:left;}\n"
+"#nav li:hover div {margin-left:0;}\n"
+".help {position:absolute;z-index:5;text-align:left;padding:2px;margin-left:-999em;background-color: #313131;width:300px;}\n"
+".helpstart {position:absolute;z-index:5;text-align:left;padding:2px;background-color: #313131;width:300px;display:none}\n"
+".root {z-index:1;position:absolute;top:0px;left:0px;}\n"
+"</style>\n"
+"</head>\n"
+"<body style=\"\">\n"
+"<canvas id=\"History\" height=\"70\" style=\"background-color:#474747;margin:0px;padding:0px;\"></canvas><canvas id=\"DetailedView\" height=\"200\" style=\"background-color:#474747;margin:0px;padding:0px;\"></canvas>\n"
+"<div id=\"root\" class=\"root\">\n"
+"<ul id=\"nav\">\n"
+"<li><a href=\"javascript:void(0)\" onclick=\"ToggleDebugMode();\">?</a>\n"
+"<div class=\"helpstart\" id=\"helpwindow\" style=\"left:20px;top:20px\">\n"
+"History View:<br>\n"
+"Click + Drag: Pan View<br>\n"
+"Right Click + Drag : Zoom on region<br>\n"
+"Click Frame : Center on frame<br>\n"
+"<hr>\n"
+"Main View:<br>\n"
+"Ctrl + Mouse up/down: Zoom<br>\n"
+"Mousewheel : Zoom<br>\n"
+"Right Click + Drag: Zoom to region<br>\n"
+"Ctrl + Drag: Pan<br>\n"
+"Click + Drag: Pan<br>\n"
+"<hr>\n"
+"<table style=\"width:100%\">\n"
+"<tr>\n"
+"<td width=\"50%\" align=\"left\"><a href=\'javascript:void(0)\' onclick=\"ShowHelp(0, 0);\">Close</a></td>\n"
+"<td width=\"50%\" align=\"right\"><a href=\'javascript:void(0)\' onclick=\"ShowHelp(0, 1);\">Close, Never Show</a></td>\n"
+"</tr>\n"
+"</table>\n"
+"</div>\n"
+"<div class=\"help\" id=\"divFrameInfo\" style=\"left:20px;top:300px;width:auto;\">\n"
+"</div>\n"
+"</li>\n"
+"<li><a id=\'ModeSubMenuText\'>Mode</a>\n"
+" <ul id=\'ModeSubMenu\'>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetMode(\'timers\', 0);\" id=\"buttonTimers\">Timers</a></li>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetMode(\'timers\', 1);\" id=\"buttonGroups\">Groups</a></li> \n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetMode(\'timers\', 2);\" id=\"buttonThreads\">Threads</a></li>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetMode(\'detailed\', 0);\" id=\"buttonDetailed\">Detailed</a></li>\n"
+" </ul>\n"
+"</li>\n"
+"<li><a>Reference</a>\n"
+" <ul id=\'ReferenceSubMenu\'>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'5ms\');\">5ms</a></li>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'10ms\');\">10ms</a></li>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'15ms\');\">15ms</a></li>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'20ms\');\">20ms</a></li>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'33ms\');\">33ms</a></li>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'50ms\');\">50ms</a></li>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'100ms\');\">100ms</a></li>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'250ms\');\">250ms</a></li>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'500ms\');\">500ms</a></li>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'1000ms\');\">1000ms</a></li>\n"
+" </ul>\n"
+"</li>\n"
+"<li id=\"ilThreads\"><a>Threads</a>\n"
+" <ul id=\"ThreadSubMenu\">\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"ToggleThread();\">All</a></li>\n"
+" <li><a>---</a></li>\n"
+" </ul>\n"
+"</li>\n"
+"<li id=\"ilGroups\"><a>Groups</a>\n"
+" <ul id=\"GroupSubMenu\">\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"ToggleGroup();\">All</a></li>\n"
+" <li><a>---</a></li>\n"
+" </ul>\n"
+"</li>\n"
+"<li id=\"ilOptions\"><a>Options&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</a>\n"
+" <ul id=\'OptionsMenu\'>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"ToggleContextSwitch();\">Context Switch</a></li>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"ToggleDisableMerge();\">MergeDisable</a></li>\n"
+" <li><a href=\"javascript:void(0)\" onclick=\"ToggleDisableLod();\">LodDisable</a></li>\n"
+" <li id=\'GroupColors\'><a href=\"javascript:void(0)\" onclick=\"ToggleGroupColors();\">Group Colors</a></li>\n"
+" <li id=\'TimersMeta\'><a href=\"javascript:void(0)\" onclick=\"ToggleTimersMeta();\">Meta</a></li>\n"
+" <li id=\'ShowHelp\'><a href=\"javascript:void(0)\" onclick=\"ShowHelp(1,1);\">Help</a></li>\n"
+"<!-- <li><a href=\"javascript:void(0)\" onclick=\"ToggleDebug();\">DEBUG</a></li> -->\n"
+" </ul>\n"
+"</li>\n"
+"</ul>\n"
+"</div>\n"
+"<script>\n"
+"function InvertColor(hexTripletColor) {\n"
+" var color = hexTripletColor;\n"
+" color = color.substring(1); // remove #\n"
+" color = parseInt(color, 16); // convert to integer\n"
+" var R = ((color >> 16) % 256)/255.0;\n"
+" var G = ((color >> 8) % 256)/255.0;\n"
+" var B = ((color >> 0) % 256)/255.0;\n"
+" var lum = (0.2126*R + 0.7152*G + 0.0722*B);\n"
+" if(lum < 0.7)\n"
+" {\n"
+" return \'#ffffff\';\n"
+" }\n"
+" else\n"
+" {\n"
+" return \'#333333\';\n"
+" }\n"
+"}\n"
+"function InvertColorIndex(hexTripletColor) {\n"
+" var color = hexTripletColor;\n"
+" color = color.substring(1); // remove #\n"
+" color = parseInt(color, 16); // convert to integer\n"
+" var R = ((color >> 16) % 256)/255.0;\n"
+" var G = ((color >> 8) % 256)/255.0;\n"
+" var B = ((color >> 0) % 256)/255.0;\n"
+" var lum = (0.2126*R + 0.7152*G + 0.0722*B);\n"
+" if(lum < 0.7)\n"
+" {\n"
+" return 0;\n"
+" }\n"
+" else\n"
+" {\n"
+" return 1;\n"
+" }\n"
+"}\n"
+"function MakeGroup(id, name, category, numtimers, isgpu, total, average, max, color)\n"
+"{\n"
+" var group = {\"id\":id, \"name\":name, \"category\":category, \"numtimers\":numtimers, \"isgpu\":isgpu, \"total\": total, \"average\" : average, \"max\" : max, \"color\":color};\n"
+" return group;\n"
+"}\n"
+"\n"
+"function MakeTimer(id, name, group, color, colordark, average, max, exclaverage, exclmax, callaverage, callcount, total, meta, metaavg, metamax)\n"
+"{\n"
+" var timer = {\"id\":id, \"name\":name, \"len\":name.length, \"color\":color, \"colordark\":colordark,\"timercolor\":color, \"textcolor\":InvertColor(color), \"group\":group, \"average\":average, \"max\":max, \"exclaverage\":exclaverage, \"exclmax\":exclmax, \"callaverage\":callaverage, \"callcount\":callcount, \"total\":total, \"meta\":meta, \"textcolorindex\":InvertColorIndex(color), \"metaavg\":metaavg, \"metamax\":metamax};\n"
+" return timer;\n"
+"}\n"
+"function MakeFrame(id, framestart, frameend, framestartgpu, frameendgpu, ts, tt, ti)\n"
+"{\n"
+" var frame = {\"id\":id, \"framestart\":framestart, \"frameend\":frameend, \"framestartgpu\":framestartgpu, \"frameendgpu\":frameendgpu, \"ts\":ts, \"tt\":tt, \"ti\":ti};\n"
+" return frame;\n"
+"}\n"
+"\n"
+"";
+
+const size_t g_MicroProfileHtml_begin_0_size = sizeof(g_MicroProfileHtml_begin_0);
+const char* g_MicroProfileHtml_begin[] = {
+&g_MicroProfileHtml_begin_0[0],
+};
+size_t g_MicroProfileHtml_begin_sizes[] = {
+sizeof(g_MicroProfileHtml_begin_0),
+};
+size_t g_MicroProfileHtml_begin_count = 1;
+const char g_MicroProfileHtml_end_0[] =
+"\n"
+"\n"
+"\n"
+"var CanvasDetailedView = document.getElementById(\'DetailedView\');\n"
+"var CanvasHistory = document.getElementById(\'History\');\n"
+"var CanvasDetailedOffscreen = document.createElement(\'canvas\');\n"
+"var g_Msg = \'0\';\n"
+"\n"
+"var Initialized = 0;\n"
+"var fDetailedOffset = Frames[0].framestart;\n"
+"var fDetailedRange = Frames[0].frameend - fDetailedOffset;\n"
+"var nWidth = CanvasDetailedView.width;\n"
+"var nHeight = CanvasDetailedView.height;\n"
+"var ReferenceTime = 33;\n"
+"var nHistoryHeight = 70;\n"
+"var nOffsetY = 0;\n"
+"var nOffsetBarsX = 0;\n"
+"var nOffsetBarsY = 0;\n"
+"var nBarsWidth = 80;\n"
+"var NameWidth = 200;\n"
+"var MouseButtonState = [0,0,0,0,0,0,0,0];\n"
+"var KeyShiftDown = 0;\n"
+"var MouseDragButton = 0;\n"
+"var KeyCtrlDown = 0;\n"
+"var FlipToolTip = 0;\n"
+"var DetailedViewMouseX = 0;\n"
+"var DetailedViewMouseY = 0;\n"
+"var HistoryViewMouseX = -1;\n"
+"var HistoryViewMouseY = -1;\n"
+"var MouseHistory = 0;\n"
+"var MouseDetailed = 0;\n"
+"var FontHeight = 10;\n"
+"var FontWidth = 1;\n"
+"var FontAscent = 3; //Set manually\n"
+"var Font = \'Bold \' + FontHeight + \'px Courier New\';\n"
+"var FontFlash = \'Bold \' + 35 + \'px Courier New\';\n"
+"var BoxHeight = FontHeight + 2;\n"
+"var ThreadsActive = new Object();\n"
+"var ThreadsAllActive = 1;\n"
+"var GroupsActive = new Object();\n"
+"var GroupsAllActive = 1;\n"
+"var nMinWidth = 0.01;//subpixel width\n"
+"var nMinWidthPan = 1.0;//subpixel width when panning\n"
+"var nContextSwitchEnabled = 1;\n"
+"var DisableLod = 0;\n"
+"var DisableMerge = 0;\n"
+"var GroupColors = 0;\n"
+"var nModDown = 0;\n"
+"var g_MSG = \'no\';\n"
+"var nDrawCount = 0;\n"
+"var nBackColors = [\'#474747\', \'#313131\' ];\n"
+"var nBackColorOffset = \'#606060\';\n"
+"var CSwitchColors =[\"#9DD8AF\",\"#D7B6DA\",\"#EAAC76\",\"#DBDA61\",\"#8AD5E1\",\"#8CE48B\",\"#C4D688\",\"#57E5C4\"];//generated by http://tools.medialab.sciences-po.fr/iwanthue/index.php\n"
+"var CSwitchHeight = 5;\n"
+"var FRAME_HISTORY_COLOR_CPU = \'#ff7f27\';\n"
+"var FRAME_HISTORY_COLOR_GPU = \'#ffffff\';\n"
+"var ZOOM_TIME = 0.5;\n"
+"var AnimationActive = false;\n"
+"var nHoverCSCpu = -1;\n"
+"var nHoverCSCpuNext = -1;\n"
+"var nHoverCSToolTip = null;\n"
+"var nHoverToken = -1;\n"
+"var nHoverFrame = -1;\n"
+"var nHoverTokenIndex = -1;\n"
+"var nHoverTokenLogIndex = -1;\n"
+"var nHoverCounter = 0;\n"
+"var nHoverCounterDelta = 8;\n"
+"var nHoverTokenNext = -1;\n"
+"var nHoverTokenLogIndexNext = -1;\n"
+"var nHoverTokenIndexNext = -1;\n"
+"var nHideHelp = 0;\n"
+"\n"
+"\n"
+"var fFrameScale = 33.33;\n"
+"var fRangeBegin = 0;\n"
+"var fRangeEnd = -1;\n"
+"var fRangeBeginNext = 0;\n"
+"var fRangeEndNext = 0;\n"
+"var fRangeBeginGpuNext = 0;\n"
+"var fRangeEndGpuNext = 0;\n"
+"var fRangeBeginHistory = -1;\n"
+"var fRangeEndHistory = -1;\n"
+"var fRangeBeginHistoryGpu = -1;\n"
+"var fRangeEndHistoryGpu = -1;\n"
+"var fRangeBeginSelect = 0;\n"
+"var fRangeEndSelect = -1;\n"
+"\n"
+"var ModeDetailed = 0;\n"
+"var ModeTimers = 1;\n"
+"var Mode = ModeDetailed;\n"
+"\n"
+"var DebugDrawQuadCount = 0;\n"
+"var DebugDrawTextCount = 0;\n"
+"var ProfileMode = 0;\n"
+"var ProfileFps = 0;\n"
+"var ProfileFpsAggr = 0;\n"
+"var ProfileFpsCount = 0;\n"
+"var ProfileLastTimeStamp = new Date();\n"
+"\n"
+"var CSwitchCache = {};\n"
+"var CSwitchOnlyThreads = [];\n"
+"var ProfileData = {};\n"
+"var ProfileStackTime = {};\n"
+"var ProfileStackName = {};\n"
+"var Debug = 1;\n"
+"\n"
+"var g_MaxStack = Array();\n"
+"var g_TypeArray;\n"
+"var g_TimeArray;\n"
+"var g_IndexArray;\n"
+"var LodData = new Array();\n"
+"var NumLodSplits = 10;\n"
+"var SplitMin = 100;\n"
+"var SPLIT_LIMIT = 1e20;\n"
+"var DPR = 1;\n"
+"var DetailedRedrawState = {};\n"
+"var OffscreenData;\n"
+"var DetailedFrameCounter = 0;\n"
+"var Invalidate = 0;\n"
+"var GroupOrder = Array();\n"
+"var ThreadOrder = Array();\n"
+"var TimersGroups = 0;\n"
+"var TimersMeta = 1;\n"
+"var MetaLengths = Array();\n"
+"var MetaLengthsAvg = Array();\n"
+"var MetaLengthsMax = Array();\n"
+"\n"
+"\n"
+"function ProfileModeClear()\n"
+"{\n"
+" if(ProfileMode)\n"
+" {\n"
+" ProfileData = new Object();\n"
+" ProfileStackTime = new Array();\n"
+" ProfileStackName = new Array();\n"
+" }\n"
+"}\n"
+"function ProfileEnter(Name)\n"
+"{\n"
+" if(ProfileMode)\n"
+" {\n"
+" ProfileStackTime.push(new Date());\n"
+" ProfileStackName.push(Name);\n"
+" }\n"
+"}\n"
+"function ProfileLeave()\n"
+"{\n"
+" if(ProfileMode)\n"
+" {\n"
+" var Time = new Date();\n"
+" var Delta = Time - ProfileStackTime.pop();\n"
+" var Name = ProfileStackName.pop();\n"
+" var Obj = ProfileData[Name];\n"
+" if(!Obj)\n"
+" {\n"
+" Obj = new Object();\n"
+" Obj.Count = 0;\n"
+" Obj.Name = Name;\n"
+" Obj.Time = 0;\n"
+" ProfileData[Name] = Obj;\n"
+" }\n"
+" Obj.Time += Delta;\n"
+" Obj.Count += 1;\n"
+" }\n"
+"}\n"
+"\n"
+"function ProfilePlot(s)\n"
+"{\n"
+" if(ProfileMode)\n"
+" {\n"
+" var A = ProfileData.Plot;\n"
+" if(!A)\n"
+" {\n"
+" ProfileData.Plot = Array();\n"
+" A = ProfileData.Plot;\n"
+" }\n"
+" if(A.length<10)\n"
+" {\n"
+" A.push(s);\n"
+" }\n"
+" }\n"
+"}\n"
+"function ProfileModeDump()\n"
+"{\n"
+" for(var idx in ProfileData)\n"
+" {\n"
+" var Timer = ProfileData[idx];\n"
+" console.log(Timer.Name + \" \" + Timer.Time + \"ms \" + Timer.Count);\n"
+" }\n"
+"\n"
+"}\n"
+"function ProfileModeDraw(Canvas)\n"
+"{\n"
+" if(ProfileMode)\n"
+" {\n"
+" var StringArray = [];\n"
+" for(var idx in ProfileData)\n"
+" {\n"
+" if(idx == \"Plot\")\n"
+" continue;\n"
+" var Timer = ProfileData[idx];\n"
+" StringArray.push(Timer.Name);\n"
+" StringArray.push(Timer.Time + \"ms\");\n"
+" StringArray.push(\"#\");\n"
+" StringArray.push(\"\" + Timer.Count);\n"
+" }\n"
+" StringArray.push(\"debug\");\n"
+" StringArray.push(Debug);\n"
+" var Time = new Date();\n"
+" var Delta = Time - ProfileLastTimeStamp;\n"
+" ProfileLastTimeStamp = Time;\n"
+" StringArray.push(\"Frame Delta\");\n"
+" StringArray.push(Delta + \"ms\");\n"
+" if(ProfileMode == 2)\n"
+" {\n"
+" ProfileFpsAggr += Delta;\n"
+" ProfileFpsCount ++ ;\n"
+" var AggrFrames = 10;\n"
+" if(ProfileFpsCount == AggrFrames)\n"
+" {\n"
+" ProfileFps = 1000 / (ProfileFpsAggr / AggrFrames);\n"
+" ProfileFpsAggr = 0;\n"
+" ProfileFpsCount = 0;\n"
+" }\n"
+" StringArray.push(\"FPS\");\n"
+" StringArray.push(\"\" + ProfileFps.toFixed(2));\n"
+" }\n"
+"\n"
+"\n"
+" for(var i = 0; i < ProfileData.Plot; ++i)\n"
+" {\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(ProfileData.Plot[i]);\n"
+" }\n"
+" ProfileData.Plot = Array();\n"
+" DrawToolTip(StringArray, Canvas, 0, 200);\n"
+" }\n"
+"}\n"
+"\n"
+"function ToggleDebugMode()\n"
+"{\n"
+" ProfileMode = (ProfileMode+1)%4;\n"
+" console.log(\'Toggle Debug Mode \' + ProfileMode);\n"
+"}\n"
+"\n"
+"function DetailedTotal()\n"
+"{\n"
+" var Total = 0;\n"
+" for(var i = 0; i < Frames.length; i++)\n"
+" {\n"
+" var frfr = Frames[i];\n"
+" Total += frfr.frameend - frfr.framestart;\n"
+" }\n"
+" return Total;\n"
+"}\n"
+"\n"
+"function InitFrameInfo()\n"
+"{\n"
+"\n"
+" var div = document.getElementById(\'divFrameInfo\');\n"
+" var txt = \'\';\n"
+" txt = txt + \'Timers View\' + \'<br>\';\n"
+" txt = txt + \'Frames:\' + AggregateInfo.Frames +\'<br>\';\n"
+" txt = txt + \'Time:\' + AggregateInfo.Time.toFixed(2) +\'ms<br>\';\n"
+" txt = txt + \'<hr>\';\n"
+" txt = txt + \'Detailed View\' + \'<br>\';\n"
+" txt = txt + \'Frames:\' + Frames.length +\'<br>\';\n"
+" txt = txt + \'Time:\' + DetailedTotal().toFixed(2) +\'ms<br>\';\n"
+" div.innerHTML = txt;\n"
+"}\n"
+"function InitGroups()\n"
+"{\n"
+" for(groupid in GroupInfo)\n"
+" {\n"
+" var TimerArray = Array();\n"
+" for(timerid in TimerInfo)\n"
+" {\n"
+" if(TimerInfo[timerid].group == groupid)\n"
+" {\n"
+" TimerArray.push(timerid);\n"
+" }\n"
+" }\n"
+" GroupInfo[groupid].TimerArray = TimerArray;\n"
+" }\n"
+"}\n"
+"\n"
+"function InitThreadMenu()\n"
+"{\n"
+" var ulThreadMenu = document.getElementById(\'ThreadSubMenu\');\n"
+" var MaxLen = 7;\n"
+" ThreadOrder = CreateOrderArray(ThreadNames, function(a){return a;});\n"
+" for(var idx in ThreadOrder)\n"
+" {\n"
+" var name = ThreadNames[ThreadOrder[idx]];\n"
+" var li = document.createElement(\'li\');\n"
+" if(name.length > MaxLen)\n"
+" {\n"
+" MaxLen = name.length;\n"
+" }\n"
+" li.innerText = name;\n"
+" var asText = li.innerHTML;\n"
+" var html = \'<a href=\"javascript:void(0)\" onclick=\"ToggleThread(\\'\' + name + \'\\');\">\' + asText + \'</a>\';\n"
+" li.innerHTML = html;\n"
+" ulThreadMenu.appendChild(li);\n"
+" }\n"
+" var LenStr = (5+(1+MaxLen) * (1+FontWidth)) + \'px\';\n"
+" var Lis = ulThreadMenu.getElementsByTagName(\'li\');\n"
+" for(var i = 0; i < Lis.length; ++i)\n"
+" {\n"
+" Lis[i].style[\'width\'] = LenStr;\n"
+" }\n"
+"}\n"
+"\n"
+"function UpdateThreadMenu()\n"
+"{\n"
+" var ulThreadMenu = document.getElementById(\'ThreadSubMenu\');\n"
+" var as = ulThreadMenu.getElementsByTagName(\'a\');\n"
+" for(var i = 0; i < as.length; ++i)\n"
+" {\n"
+" var elem = as[i];\n"
+" var inner = elem.innerText;\n"
+" var bActive = false;\n"
+" if(i < 2)\n"
+" {\n"
+" if(inner == \'All\')\n"
+" {\n"
+" bActive = ThreadsAllActive;\n"
+" }\n"
+" }\n"
+" else\n"
+" {\n"
+" bActive = ThreadsActive[inner];\n"
+" }\n"
+" if(bActive)\n"
+" {\n"
+" elem.style[\'text-decoration\'] = \'underline\';\n"
+" }\n"
+" else\n"
+" {\n"
+" elem.style[\'text-decoration\'] = \'none\';\n"
+" }\n"
+" }\n"
+"}\n"
+"\n"
+"function ToggleThread(ThreadName)\n"
+"{\n"
+" if(ThreadName)\n"
+" {\n"
+" if(ThreadsActive[ThreadName])\n"
+" {\n"
+" ThreadsActive[ThreadName] = false;\n"
+" }\n"
+" else\n"
+" {\n"
+" ThreadsActive[ThreadName] = true;\n"
+" }\n"
+" }\n"
+" else\n"
+" {\n"
+" if(ThreadsAllActive)\n"
+" {\n"
+" ThreadsAllActive = 0;\n"
+" }\n"
+" else\n"
+" {\n"
+" ThreadsAllActive = 1;\n"
+" }\n"
+" }\n"
+" Invalidate = 0;\n"
+" UpdateThreadMenu();\n"
+" WriteCookie();\n"
+" Draw(1);\n"
+"\n"
+"}\n"
+"\n"
+"function CreateOrderArray(Source, NameFunc)\n"
+"{\n"
+" var Temp = Array(Source.length);\n"
+" for(var i = 0; i < Source.length; ++i)\n"
+" {\n"
+" Temp[i] = {};\n"
+" Temp[i].index = i;\n"
+" Temp[i].namezz = NameFunc(Source[i]).toLowerCase();\n"
+" }\n"
+" Temp.sort(function(l, r)\n"
+" { \n"
+" if(r.namezz<l.namezz)\n"
+" {return 1;}\n"
+" if(l.namezz<r.namezz)\n"
+" {return -1;} \n"
+" return 0;\n"
+" } );\n"
+" var OrderArray = Array(Source.length);\n"
+" for(var i = 0; i < Source.length; ++i)\n"
+" {\n"
+" OrderArray[i] = Temp[i].index;\n"
+" }\n"
+" return OrderArray;\n"
+"}\n"
+"\n"
+"\n"
+"function InitGroupMenu()\n"
+"{\n"
+" var ulGroupMenu = document.getElementById(\'GroupSubMenu\');\n"
+" var MaxLen = 7;\n"
+" var MenuArray = Array();\n"
+" for(var i = 0; i < GroupInfo.length; ++i)\n"
+" {\n"
+" var x = {};\n"
+" x.IsCategory = 0;\n"
+" x.category = GroupInfo[i].category;\n"
+" x.name = GroupInfo[i].name;\n"
+" x.index = i;\n"
+" MenuArray.push(x);\n"
+" }\n"
+" for(var i = 0; i < CategoryInfo.length; ++i)\n"
+" {\n"
+" var x = {};\n"
+" x.IsCategory = 1;\n"
+" x.category = i;\n"
+" x.name = CategoryInfo[i];\n"
+" x.index = i;\n"
+" MenuArray.push(x);\n"
+" }\n"
+" var OrderFunction = function(a){ return a.category + \"__\" + a.name; };\n"
+" var OrderFunctionMenu = function(a){ return a.IsCategory ? (a.category + \'\') : (a.category + \"__\" + a.name); };\n"
+" GroupOrder = CreateOrderArray(GroupInfo, OrderFunction);\n"
+" var MenuOrder = CreateOrderArray(MenuArray, OrderFunctionMenu);\n"
+"\n"
+" for(var idx in MenuOrder)\n"
+" {\n"
+" var MenuItem = MenuArray[MenuOrder[idx]];\n"
+" var name = MenuItem.name;\n"
+" var li = document.createElement(\'li\');\n"
+" if(name.length > MaxLen)\n"
+" {\n"
+" MaxLen = name.length;\n"
+" }\n"
+" var jsfunc = \'\';\n"
+" if(MenuItem.IsCategory)\n"
+" { \n"
+" li.innerText = \'[\' + name + \']\';\n"
+" jsfunc = \"ToggleCategory\";\n"
+" }\n"
+" else\n"
+" {\n"
+" li.innerText = name;\n"
+" jsfunc = \"ToggleGroup\";\n"
+" }\n"
+" var asText = li.innerHTML;\n"
+" var html = \'<a href=\"javascript:void(0)\" onclick=\"\' + jsfunc + \'(\\'\' + name + \'\\');\">\' + asText + \'</a>\';\n"
+" li.innerHTML = html;\n"
+" ulGroupMenu.appendChild(li);\n"
+" }\n"
+" var LenStr = (5+(1+MaxLen) * FontWidth) + \'px\';\n"
+" var Lis = ulGroupMenu.getElementsByTagName(\'li\');\n"
+" for(var i = 0; i < Lis.length; ++i)\n"
+" {\n"
+" Lis[i].style[\'width\'] = LenStr;\n"
+" }\n"
+" UpdateGroupMenu();\n"
+"}\n"
+"\n"
+"function UpdateGroupMenu()\n"
+"{\n"
+" var ulThreadMenu = document.getElementById(\'GroupSubMenu\');\n"
+" var as = ulThreadMenu.getElementsByTagName(\'a\');\n"
+" for(var i = 0; i < as.length; ++i)\n"
+" {\n"
+" var elem = as[i];\n"
+" var inner = elem.innerText;\n"
+" var bActive = false;\n"
+" if(i < 2)\n"
+" {\n"
+" if(inner == \'All\')\n"
+" {\n"
+" bActive = GroupsAllActive;\n"
+" }\n"
+" }\n"
+" else\n"
+" {\n"
+" var CategoryString = inner.length>2 ? inner.substring(1, inner.length-2) : \"\";\n"
+" var CategoryIdx = CategoryIndex(CategoryString);\n"
+" if(inner[0] == \'[\' && inner[inner.length-1] == \']\' && CategoryIdx >= 0)\n"
+" {\n"
+" bActive = IsCategoryActive(CategoryIdx);\n"
+" }\n"
+" else\n"
+" {\n"
+" bActive = GroupsActive[inner];\n"
+" }\n"
+" }\n"
+" if(bActive)\n"
+" {\n"
+" elem.style[\'text-decoration\'] = \'underline\';\n"
+" }\n"
+" else\n"
+" {\n"
+" elem.style[\'text-decoration\'] = \'none\';\n"
+" }\n"
+" }\n"
+"}\n"
+"function CategoryIndex(CategoryName)\n"
+"{\n"
+" for(var i = 0; i < CategoryInfo.length; ++i)\n"
+" {\n"
+" if(CategoryInfo[i] == CategoryName)\n"
+" {\n"
+" return i;\n"
+" }\n"
+" }\n"
+" return -1;\n"
+"}\n"
+"function IsCategoryActive(CategoryIdx)\n"
+"{\n"
+" for(var i = 0; i < GroupInfo.length; ++i)\n"
+" {\n"
+" if(GroupInfo[i].category == CategoryIdx)\n"
+" {\n"
+" var Name = GroupInfo[i].name;\n"
+" if(!GroupsActive[Name])\n"
+" {\n"
+" return false;\n"
+" }\n"
+" }\n"
+" }\n"
+" return true;\n"
+"\n"
+"}\n"
+"function ToggleCategory(CategoryName)\n"
+"{\n"
+" var CategoryIdx = CategoryIndex(CategoryName);\n"
+" if(CategoryIdx < 0)\n"
+" return;\n"
+" var CategoryActive = IsCategoryActive(CategoryIdx);\n"
+" for(var i = 0; i < GroupInfo.length; ++i)\n"
+" {\n"
+" if(GroupInfo[i].category == CategoryIdx)\n"
+" {\n"
+" var Name = GroupInfo[i].name;\n"
+" if(CategoryActive)\n"
+" {\n"
+" GroupsActive[Name] = false;\n"
+" }\n"
+" else\n"
+" {\n"
+" GroupsActive[Name] = true;\n"
+" }\n"
+" }\n"
+" }\n"
+" UpdateGroupMenu();\n"
+" WriteCookie();\n"
+" RequestRedraw();\n"
+"}\n"
+"\n"
+"function ToggleGroup(GroupName)\n"
+"{\n"
+" if(GroupName)\n"
+" {\n"
+" if(GroupsActive[GroupName])\n"
+" {\n"
+" GroupsActive[GroupName] = false;\n"
+" }\n"
+" else\n"
+" {\n"
+" GroupsActive[GroupName] = true;\n"
+" }\n"
+" }\n"
+" else\n"
+" {\n"
+" if(GroupsAllActive)\n"
+" {\n"
+" GroupsAllActive = 0;\n"
+" }\n"
+" else\n"
+" {\n"
+" GroupsAllActive = 1;\n"
+" }\n"
+" }\n"
+" UpdateGroupMenu();\n"
+" WriteCookie();\n"
+" RequestRedraw();\n"
+"}\n"
+"function UpdateGroupColors()\n"
+"{\n"
+" for(var i = 0; i < TimerInfo.length; ++i)\n"
+" {\n"
+" if(GroupColors)\n"
+" {\n"
+" TimerInfo[i].color = GroupInfo[TimerInfo[i].group].color;\n"
+" }\n"
+" else\n"
+" {\n"
+" TimerInfo[i].color = TimerInfo[i].timercolor;\n"
+" }\n"
+" TimerInfo[i].textcolorindex = InvertColorIndex(TimerInfo[i].color);\n"
+" }\n"
+"}\n"
+"\n"
+"function ToggleGroupColors()\n"
+"{\n"
+" GroupColors = !GroupColors;\n"
+" UpdateGroupColors();\n"
+" UpdateOptionsMenu();\n"
+" WriteCookie();\n"
+" RequestRedraw();\n"
+"}\n"
+"\n"
+"function UpdateOptionsMenu()\n"
+"{\n"
+" var ulTimersMeta = document.getElementById(\'TimersMeta\');\n"
+" ulTimersMeta.style[\'text-decoration\'] = TimersMeta ? \'underline\' : \'none\';\n"
+" var ulGroupColors = document.getElementById(\'GroupColors\');\n"
+" ulGroupColors.style[\'text-decoration\'] = GroupColors ? \'underline\' : \'none\';\n"
+"}\n"
+"\n"
+"function ToggleTimersMeta()\n"
+"{\n"
+" TimersMeta = TimersMeta ? 0 : 1;\n"
+" WriteCookie();\n"
+" UpdateOptionsMenu();\n"
+" RequestRedraw();\n"
+"}\n"
+"\n"
+"function ShowHelp(Show, Forever)\n"
+"{\n"
+" var HelpWindow = document.getElementById(\'helpwindow\');\n"
+" if(Show)\n"
+" {\n"
+" HelpWindow.style[\'display\'] = \'block\';\n"
+" }\n"
+" else\n"
+" {\n"
+" HelpWindow.style[\'display\'] = \'none\';\n"
+" }\n"
+" if(Forever)\n"
+" {\n"
+" nHideHelp = Show ? 0 : 1;\n"
+" WriteCookie();\n"
+" }\n"
+"}\n"
+"function SetMode(NewMode, Groups)\n"
+"{\n"
+" var buttonTimers = document.getElementById(\'buttonTimers\');\n"
+" var buttonDetailed = document.getElementById(\'buttonDetailed\');\n"
+" var buttonGroups = document.getElementById(\'buttonGroups\');\n"
+" var buttonThreads = document.getElementById(\'buttonThreads\');\n"
+" var ilThreads = document.getElementById(\'ilThreads\');\n"
+" var ilGroups = document.getElementById(\'ilGroups\');\n"
+" var ModeElement = null;\n"
+" if(NewMode == \'timers\' || NewMode == ModeTimers)\n"
+" {\n"
+" TimersGroups = Groups;\n"
+" buttonTimers.style[\'text-decoration\'] = TimersGroups ? \'none\' : \'underline\';\n"
+" buttonGroups.style[\'text-decoration\'] = TimersGroups == 1 ? \'underline\' : \'none\';\n"
+" buttonThreads.style[\'text-decoration\'] = TimersGroups == 2 ? \'underline\' : \'none\';\n"
+" buttonDetailed.style[\'text-decoration\'] = \'none\';\n"
+" if(TimersGroups == 0)\n"
+" {\n"
+" ilThreads.style[\'display\'] = \'none\';\n"
+" }\n"
+" else\n"
+" {\n"
+" ilThreads.style[\'display\'] = \'block\';\n"
+" }\n"
+" ilGroups.style[\'display\'] = \'block\';\n"
+" Mode = ModeTimers;\n"
+" ModeElement = TimersGroups == 2 ? buttonThreads : TimersGroups == 1 ? buttonGroups : buttonTimers;\n"
+" }\n"
+" else if(NewMode == \'detailed\' || NewMode == ModeDetailed)\n"
+" {\n"
+" buttonTimers.style[\'text-decoration\'] = \'none\';\n"
+" buttonGroups.style[\'text-decoration\'] = \'none\';\n"
+" buttonThreads.style[\'text-decoration\'] = \'none\';\n"
+" buttonDetailed.style[\'text-decoration\'] = \'underline\';\n"
+" ilThreads.style[\'display\'] = \'block\';\n"
+" ilGroups.style[\'display\'] = \'none\';\n"
+" Mode = ModeDetailed;\n"
+" ModeElement = buttonDetailed;\n"
+" }\n"
+" var ModeSubMenuText = document.getElementById(\'ModeSubMenuText\');\n"
+" ModeSubMenuText.innerText = \'Mode[\' + ModeElement.innerText + \']\';\n"
+"\n"
+" WriteCookie();\n"
+" RequestRedraw();\n"
+"\n"
+"}\n"
+"\n"
+"function SetReferenceTime(TimeString)\n"
+"{\n"
+" ReferenceTime = parseInt(TimeString);\n"
+" var ReferenceMenu = document.getElementById(\'ReferenceSubMenu\');\n"
+" var Links = ReferenceMenu.getElementsByTagName(\'a\');\n"
+" for(var i = 0; i < Links.length; ++i)\n"
+" {\n"
+" if(Links[i].innerHTML.match(\'^\' + TimeString))\n"
+" {\n"
+" Links[i].style[\'text-decoration\'] = \'underline\';\n"
+" }\n"
+" else\n"
+" {\n"
+" Links[i].style[\'text-decoration\'] = \'none\';\n"
+" }\n"
+" }\n"
+" WriteCookie();\n"
+" RequestRedraw();\n"
+"\n"
+"}\n"
+"\n"
+"function ToggleContextSwitch()\n"
+"{\n"
+" SetContextSwitch(nContextSwitchEnabled ? 0 : 1);\n"
+"}\n"
+"function SetContextSwitch(Enabled)\n"
+"{\n"
+" nContextSwitchEnabled = Enabled ? 1 : 0;\n"
+" var ReferenceMenu = document.getElementById(\'OptionsMenu\');\n"
+" var Links = ReferenceMenu.getElementsByTagName(\'a\');\n"
+" Links[0].style[\'text-decoration\'] = nContextSwitchEnabled ? \'underline\' : \'none\';\n"
+" WriteCookie();\n"
+" RequestRedraw();\n"
+"}\n"
+"\n"
+"function ToggleDebug()\n"
+"{\n"
+" Debug = (Debug + 1) % 2;\n"
+"}\n"
+"\n"
+"function ToggleDisableMerge()\n"
+"{\n"
+" DisableMerge = DisableMerge ? 0 : 1;\n"
+" var ReferenceMenu = document.getElementById(\'OptionsMenu\');\n"
+" var Links = ReferenceMenu.getElementsByTagName(\'a\');\n"
+" if(DisableMerge)\n"
+" {\n"
+" Links[1].style[\'text-decoration\'] = \'underline\';\n"
+" }\n"
+" else\n"
+" {\n"
+" Links[1].style[\'text-decoration\'] = \'none\';\n"
+" }\n"
+"\n"
+"}\n"
+"\n"
+"function ToggleDisableLod()\n"
+"{\n"
+" DisableLod = DisableLod ? 0 : 1;\n"
+" var ReferenceMenu = document.getElementById(\'OptionsMenu\');\n"
+" var Links = ReferenceMenu.getElementsByTagName(\'a\');\n"
+" if(DisableLod)\n"
+" {\n"
+" Links[2].style[\'text-decoration\'] = \'underline\';\n"
+" }\n"
+" else\n"
+" {\n"
+" Links[2].style[\'text-decoration\'] = \'none\';\n"
+" }\n"
+"\n"
+"}\n"
+"\n"
+"function GatherHoverMetaCounters(TimerIndex, StartIndex, nLog, nFrameLast)\n"
+"{\n"
+" var HoverInfo = new Object();\n"
+" var StackPos = 1;\n"
+" //search backwards, count meta counters \n"
+" for(var i = nFrameLast; i >= 0; i--)\n"
+" {\n"
+" var fr = Frames[i];\n"
+" var ts = fr.ts[nLog];\n"
+" var ti = fr.ti[nLog];\n"
+" var tt = fr.tt[nLog];\n"
+" var start = i == nFrameLast ? StartIndex-1 : ts.length-1;\n"
+"\n"
+" for(var j = start; j >= 0; j--)\n"
+" {\n"
+" var type = tt[j];\n"
+" var index = ti[j];\n"
+" var time = ts[j];\n"
+" if(type == 1)\n"
+" {\n"
+" StackPos--;\n"
+" if(StackPos == 0 && index == TimerIndex)\n"
+" {\n"
+" return HoverInfo;\n"
+" }\n"
+" }\n"
+" else if(type == 0)\n"
+" {\n"
+" StackPos++;\n"
+" }\n"
+" else if(type > 3)\n"
+" {\n"
+" var nMetaCount = type - 3;\n"
+" var nMetaIndex = MetaNames[index];\n"
+" if(nMetaIndex in HoverInfo)\n"
+" {\n"
+" HoverInfo[nMetaIndex] += nMetaCount;\n"
+" }\n"
+" else\n"
+" {\n"
+" HoverInfo[nMetaIndex] = nMetaCount;\n"
+" }\n"
+" }\n"
+" }\n"
+" }\n"
+"}\n"
+"function CalculateAllTimers(fBegin, fEnd)\n"
+"{\n"
+" var Sum = [];\n"
+" var Count = [];\n"
+" var Sorted = [];\n"
+" for(var i = 0; i < TimerInfo.length; ++i)\n"
+" {\n"
+" Sum.push(0.0);\n"
+" Count.push(0);\n"
+" Sorted.push(i);\n"
+" }\n"
+" var nFrameFirst = 0;\n"
+" var nFrameLast = Frames.length;\n"
+"\n"
+" var nNumLogs = Frames[0].ts.length;\n"
+" var StackPosArray = Array(nNumLogs);\n"
+" var StackArray = Array(nNumLogs);\n"
+" for(var i = 0; i < nNumLogs; ++i)\n"
+" {\n"
+" StackPosArray[i] = 0;\n"
+" StackArray[i] = Array(20);\n"
+" }\n"
+"\n"
+" for(var i = nFrameFirst; i < nFrameLast; i++)\n"
+" {\n"
+" var fr = Frames[i];\n"
+" for(nLog = 0; nLog < nNumLogs; nLog++)\n"
+" {\n"
+" var StackPos = StackPosArray[nLog];\n"
+" var Stack = StackArray[nLog];\n"
+" var ts = fr.ts[nLog];\n"
+" var ti = fr.ti[nLog];\n"
+" var tt = fr.tt[nLog];\n"
+" var count = ts.length;\n"
+" for(j = 0; j < count; j++)\n"
+" {\n"
+" var type = tt[j];\n"
+" var index = ti[j];\n"
+" var time = ts[j];\n"
+" if(type == 1 && time < fEnd) //enter\n"
+" {\n"
+" Stack[StackPos] = time < fBegin ? fBegin : time;\n"
+" if(StackArray[nLog][StackPos] != time)\n"
+" {\n"
+" console.log(\'fail fail fail\');\n"
+" }\n"
+" StackPos++;\n"
+" }\n"
+" else if(type == 0) // leave\n"
+" {\n"
+" if(StackPos>0)\n"
+" {\n"
+" var timeend = time;\n"
+" StackPos--;\n"
+" timestart = Stack[StackPos];\n"
+" var TimeDelta = timeend - timestart;\n"
+" Sum[index] += TimeDelta;\n"
+" Count[index]++;\n"
+" }\n"
+" }\n"
+" }\n"
+" StackPosArray[nLog] = StackPos;\n"
+" }\n"
+" }\n"
+" Sorted.sort(function(a,b){ return Sum[b] - Sum[a]; } );\n"
+" var Result = {\"Sorted\" : Sorted, \"Sum\" : Sum, \"Count\" : Count};\n"
+" return Result;\n"
+"}\n"
+"function CalculateTimers(Result, TimerIndex, nFrameFirst, nFrameLast)\n"
+"{\n"
+" if(!nFrameFirst || nFrameFirst < 0)\n"
+" nFrameFirst = 0;\n"
+" if(!nFrameLast || nFrameLast > Frames.length)\n"
+" nFrameLast = Frames.length;\n"
+" var FrameCount = nFrameLast - nFrameFirst;\n"
+" if(0 == FrameCount)\n"
+" return;\n"
+" var CallCount = 0;\n"
+" var Sum = 0;\n"
+" var Max = 0;\n"
+" var FrameMax = 0;\n"
+"\n"
+" var nNumLogs = Frames[0].ts.length;\n"
+" var StackPosArray = Array(nNumLogs);\n"
+" var StackArray = Array(nNumLogs);\n"
+" for(var i = 0; i < nNumLogs; ++i)\n"
+" {\n"
+" StackPosArray[i] = 0;\n"
+" StackArray[i] = Array(20);\n"
+" }\n"
+"\n"
+" for(var i = nFrameFirst; i < nFrameLast; i++)\n"
+" {\n"
+" var FrameSum = 0;\n"
+" var fr = Frames[i];\n"
+" for(nLog = 0; nLog < nNumLogs; nLog++)\n"
+" {\n"
+" var StackPos = StackPosArray[nLog];\n"
+" var Stack = StackArray[nLog];\n"
+" var ts = fr.ts[nLog];\n"
+" var ti = fr.ti[nLog];\n"
+" var tt = fr.tt[nLog];\n"
+" var count = ts.length;\n"
+" for(j = 0; j < count; j++)\n"
+" {\n"
+" var type = tt[j];\n"
+" var index = ti[j];\n"
+" var time = ts[j];\n"
+" if(type == 1) //enter\n"
+" {\n"
+" //push\n"
+" Stack[StackPos] = time;\n"
+" if(StackArray[nLog][StackPos] != time)\n"
+" {\n"
+" console.log(\'fail fail fail\');\n"
+" }\n"
+" StackPos++;\n"
+" }\n"
+" else if(type == 0) // leave\n"
+" {\n"
+" var timestart;\n"
+" var timeend = time;\n"
+" if(StackPos>0)\n"
+" {\n"
+" StackPos--;\n"
+" timestart = Stack[StackPos];\n"
+" }\n"
+" else\n"
+" {\n"
+" timestart = Frames[nFrameFirst].framestart;\n"
+" }\n"
+" if(index == TimerIndex)\n"
+" {\n"
+" var TimeDelta = timeend - timestart;\n"
+" CallCount++;\n"
+" FrameSum += TimeDelta;\n"
+" Sum += TimeDelta;\n"
+" if(TimeDelta > Max)\n"
+" Max = TimeDelta;\n"
+" }\n"
+" }\n"
+" else\n"
+" {\n"
+" //meta\n"
+" }\n"
+" }\n"
+" if(FrameSum > FrameMax)\n"
+" {\n"
+" FrameMax = FrameSum;\n"
+" }\n"
+" StackPosArray[nLog] = StackPos;\n"
+" }\n"
+" }\n"
+"\n"
+" Result.CallCount = CallCount;\n"
+" Result.Sum = Sum.toFixed(3);\n"
+" Result.Max = Max.toFixed(3);\n"
+" Result.Average = (Sum / CallCount).toFixed(3);\n"
+" Result.FrameAverage = (Sum / FrameCount).toFixed(3);\n"
+" Result.FrameCallAverage = (CallCount / FrameCount).toFixed(3);\n"
+" Result.FrameMax = FrameMax.toFixed(3);\n"
+" return Result;\n"
+"}\n"
+"\n"
+"function PreprocessCalculateAllTimers()\n"
+"{\n"
+" ProfileEnter(\"CalculateAllTimers\");\n"
+" var nFrameFirst = 0;\n"
+" var nFrameLast = Frames.length;\n"
+" var FrameCount = nFrameLast - nFrameFirst;\n"
+" if(0 == FrameCount)\n"
+" return;\n"
+" for(var j = 0; j < TimerInfo.length; j++)\n"
+" {\n"
+" TimerInfo[j].CallCount = 0;\n"
+" TimerInfo[j].Sum = 0;\n"
+" TimerInfo[j].Max = 0;\n"
+" TimerInfo[j].FrameMax = 0;\n"
+" }\n"
+"\n"
+"\n"
+" var nNumLogs = Frames[0].ts.length;\n"
+" var StackPosArray = Array(nNumLogs);\n"
+" var StackArray = Array(nNumLogs);\n"
+" for(var i = 0; i < nNumLogs; ++i)\n"
+" {\n"
+" StackPosArray[i] = 0;\n"
+" StackArray[i] = Array(20);\n"
+" }\n"
+"\n"
+" for(var i = nFrameFirst; i < nFrameLast; i++)\n"
+" {\n"
+" for(var j = 0; j < TimerInfo.length; j++)\n"
+" {\n"
+" TimerInfo[j].FrameSum = 0;\n"
+" }\n"
+"\n"
+" var fr = Frames[i];\n"
+" for(nLog = 0; nLog < nNumLogs; nLog++)\n"
+" {\n"
+" var StackPos = StackPosArray[nLog];\n"
+" var Stack = StackArray[nLog];\n"
+" var ts = fr.ts[nLog];\n"
+" var ti = fr.ti[nLog];\n"
+" var tt = fr.tt[nLog];\n"
+" var count = ts.length;\n"
+" for(j = 0; j < count; j++)\n"
+" {\n"
+" var type = tt[j];\n"
+" var index = ti[j];\n"
+" var time = ts[j];\n"
+" if(type == 1) //enter\n"
+" {\n"
+" //push\n"
+" Stack[StackPos] = time;\n"
+" if(StackArray[nLog][StackPos] != time)\n"
+" {\n"
+" console.log(\'fail fail fail\');\n"
+" }\n"
+" StackPos++;\n"
+" }\n"
+" else if(type == 0) // leave\n"
+" {\n"
+" var timestart;\n"
+" var timeend = time;\n"
+" if(StackPos>0)\n"
+" {\n"
+" StackPos--;\n"
+" timestart = Stack[StackPos];\n"
+" }\n"
+" else\n"
+" {\n"
+" timestart = Frames[nFrameFirst].framestart;\n"
+" }\n"
+" // if(index == TimerIndex)\n"
+" {\n"
+" var TimeDelta = timeend - timestart;\n"
+" TimerInfo[index].CallCount++;\n"
+" TimerInfo[index].FrameSum += TimeDelta;\n"
+" TimerInfo[index].Sum += TimeDelta;\n"
+" if(TimeDelta > TimerInfo[index].Max)\n"
+" TimerInfo[index].Max = TimeDelta;\n"
+" }\n"
+" }\n"
+" else\n"
+" {\n"
+" //meta\n"
+" }\n"
+" }\n"
+" for(var j = 0; j < TimerInfo.length; j++)\n"
+" {\n"
+" if(TimerInfo[j].FrameSum > TimerInfo[j].FrameMax)\n"
+" {\n"
+" TimerInfo[j].FrameMax = TimerInfo[j].FrameSum;\n"
+" }\n"
+" }\n"
+" StackPosArray[nLog] = StackPos;\n"
+" }\n"
+"\n"
+"\n"
+" }\n"
+"\n"
+" for(var j = 0; j < TimerInfo.length; j++)\n"
+" {\n"
+" var CallCount = TimerInfo[j].CallCount;\n"
+" var Sum = TimerInfo[j].Sum.toFixed(3);\n"
+" var Max = TimerInfo[j].Max.toFixed(3);\n"
+" var Average = (TimerInfo[j].Sum / TimerInfo[j].CallCount).toFixed(3);\n"
+" var FrameAverage = (TimerInfo[j].Sum / FrameCount).toFixed(3);\n"
+" var FrameCallAverage = (TimerInfo[j].CallCount / FrameCount).toFixed(3);\n"
+" var FrameMax = TimerInfo[j].FrameMax.toFixed(3);\n"
+" TimerInfo[j].CallCount = CallCount;\n"
+" TimerInfo[j].Sum = Sum;\n"
+" TimerInfo[j].Max = Max ;\n"
+" TimerInfo[j].Average = Average;\n"
+" TimerInfo[j].FrameAverage = FrameAverage;\n"
+" TimerInfo[j].FrameCallAverage = FrameCallAverage;\n"
+" TimerInfo[j].FrameMax = FrameMax;\n"
+" }\n"
+" ProfileLeave();\n"
+"}\n"
+"\n"
+"var FlashFrames = 10;\n"
+"var FlashFrameCounter = 0;\n"
+"var FlashMessage = \'\';\n"
+"function TimeString(Diff)\n"
+"{\n"
+" var DiffString = \"0 sec\";\n"
+" var DiffTable = [1,60,60*60,60*60*24];\n"
+" var DiffNameTable = [\"sec\", \"min\", \"hr\", \"day\"];\n"
+" for(var i = 0; i < DiffTable.length; ++i)\n"
+" {\n"
+" if(Diff >= DiffTable[i])\n"
+" {\n"
+" DiffString = Math.floor(Diff / DiffTable[i]) + \" \" + DiffNameTable[i];\n"
+" }\n"
+" }\n"
+" return DiffString;\n"
+"\n"
+"}\n"
+"function ShowFlashMessage(Message, FrameCount)\n"
+"{\n"
+" FlashMessage = Message;\n"
+" FlashFrameCounter = FrameCount;\n"
+"}\n"
+"function OnPageReady()\n"
+"{\n"
+" var DumpDate = DumpUtcCaptureTime;\n"
+" var CurrentDate = Date.now() / 1000;\n"
+" var Diff = CurrentDate - DumpDate;\n"
+" var Limit = 10*60;//flash old message when loading captures older than 10 minutes \n"
+" if(Diff > Limit)\n"
+" {\n"
+" ShowFlashMessage(\"Captured \" + TimeString(Diff) + \" ago\", 100);\n"
+" }\n"
+" if(!nHideHelp)\n"
+" {\n"
+" ShowHelp(1,0);\n"
+" }\n"
+"}\n"
+"\n"
+"function DrawFlashMessage(context)\n"
+"{\n"
+" if(FlashFrameCounter > 0)\n"
+" {\n"
+" if(FlashFrameCounter>1)\n"
+" {\n"
+" var FlashPrc = Math.sin(FlashFrameCounter / FlashFrames);\n"
+" context.font = FontFlash;\n"
+" context.globalAlpha = FlashPrc * 0.35 + 0.5;\n"
+" context.textAlign = \'center\';\n"
+" context.fillStyle = \'red\';\n"
+" context.fillText(FlashMessage, nWidth * 0.5, 50);\n"
+" context.globalAlpha = 1;\n"
+" context.textAlign = \'left\';\n"
+" context.font = Font;\n"
+" }\n"
+" FlashFrameCounter -= 1;\n"
+"\n"
+" }\n"
+"}\n"
+"\n"
+"function DrawCaptureInfo(context)\n"
+"{\n"
+" context.fillStyle = \'white\';\n"
+" context.textAlign = \'right\';\n"
+" context.font = Font;\n"
+" var DumpDate = DumpUtcCaptureTime;\n"
+" var CurrentDate = Date.now() / 1000;\n"
+" var Diff = CurrentDate - DumpDate;\n"
+" var DiffString = TimeString(Diff) + \" ago\";\n"
+" context.fillText(new Date(DumpDate*1000).toLocaleString(), nWidth, FontHeight);\n"
+" if(Mode == ModeTimers)\n"
+" {\n"
+" context.fillText(\"Timer Frames: \" + AggregateInfo.Frames, nWidth, FontHeight*2);\n"
+" }\n"
+" else\n"
+" {\n"
+" context.fillText(\"Detailed Frames \"+ Frames.length, nWidth, FontHeight*2);\n"
+" }\n"
+" context.fillText(DumpHost, nWidth, FontHeight*3);\n"
+" context.fillText(DiffString, nWidth, FontHeight*4);\n"
+" context.textAlign = \'left\';\n"
+" DrawFlashMessage(context);\n"
+"}\n"
+"\n"
+"function DrawDetailedFrameHistory()\n"
+"{\n"
+" ProfileEnter(\"DrawDetailedFrameHistory\");\n"
+" var x = HistoryViewMouseX;\n"
+"\n"
+" var context = CanvasHistory.getContext(\'2d\');\n"
+" context.clearRect(0, 0, CanvasHistory.width, CanvasHistory.height);\n"
+"\n"
+" var fHeight = nHistoryHeight;\n"
+" var fWidth = nWidth / Frames.length;\n"
+" var fHeightScale = fHeight / ReferenceTime;\n"
+" var fX = 0;\n"
+" var FrameIndex = -1;\n"
+" var MouseDragging = MouseDragState != MouseDragOff;\n"
+" fRangeBeginHistory = fRangeEndHistory = -1;\n"
+" fRangeBeginHistoryGpu = fRangeEndHistoryGpu = -1;\n"
+"\n"
+" var FrameFirst = -1;\n"
+" var FrameLast = nWidth;\n"
+" var fDetailedOffsetEnd = fDetailedOffset + fDetailedRange;\n"
+" for(i = 0; i < Frames.length; i++)\n"
+" {\n"
+" var fMs = Frames[i].frameend - Frames[i].framestart;\n"
+" if(fDetailedOffset <= Frames[i].frameend && fDetailedOffset >= Frames[i].framestart)\n"
+" {\n"
+" var lerp = (fDetailedOffset - Frames[i].framestart) / (Frames[i].frameend - Frames[i].framestart);\n"
+" FrameFirst = fX + fWidth * lerp;\n"
+" }\n"
+" if(fDetailedOffsetEnd <= Frames[i].frameend && fDetailedOffsetEnd >= Frames[i].framestart)\n"
+" {\n"
+" var lerp = (fDetailedOffsetEnd - Frames[i].framestart) / (Frames[i].frameend - Frames[i].framestart);\n"
+" FrameLast = fX + fWidth * lerp;\n"
+" }\n"
+" var fH = fHeightScale * fMs;\n"
+" var bMouse = x > fX && x < fX + fWidth;\n"
+" if(bMouse && !MouseDragging)\n"
+" {\n"
+" context.fillStyle = FRAME_HISTORY_COLOR_GPU;\n"
+" fRangeBeginHistory = Frames[i].framestart;\n"
+" fRangeEndHistory = Frames[i].frameend;\n"
+" if(Frames[i].framestartgpu)\n"
+" {\n"
+" fRangeBeginHistoryGpu = Frames[i].framestartgpu;\n"
+" fRangeEndHistoryGpu = Frames[i].frameendgpu;\n"
+" }\n"
+" FrameIndex = i;\n"
+" }\n"
+" else\n"
+" {\n"
+" context.fillStyle = FRAME_HISTORY_COLOR_CPU;\n"
+" }\n"
+" context.fillRect(fX, fHeight - fH, fWidth-1, fH);\n"
+" fX += fWidth;\n"
+" }\n"
+"\n"
+" var fRangeHistoryBegin = FrameFirst;\n"
+" var fRangeHistoryEnd = FrameLast;\n"
+" var X = fRangeHistoryBegin;\n"
+" var Y = 0;\n"
+" var W = fRangeHistoryEnd - fRangeHistoryBegin;\n"
+" context.globalAlpha = 0.35;\n"
+" context.fillStyle = \'#009900\';\n"
+" context.fillRect(X, Y, W, fHeight);\n"
+" context.globalAlpha = 1;\n"
+" context.strokeStyle = \'#00ff00\';\n"
+" context.beginPath();\n"
+" context.moveTo(X, Y);\n"
+" context.lineTo(X, Y+fHeight);\n"
+" context.moveTo(X+W, Y);\n"
+" context.lineTo(X+W, Y+fHeight);\n"
+" context.stroke();\n"
+"\n"
+"\n"
+"\n"
+"\n"
+" DrawCaptureInfo(context);\n"
+"\n"
+" if(FrameIndex>=0 && !MouseDragging)\n"
+" {\n"
+" var StringArray = [];\n"
+" StringArray.push(\"Frame\");\n"
+" StringArray.push(\"\" + FrameIndex);\n"
+" StringArray.push(\"Time\");\n"
+" StringArray.push(\"\" + (Frames[FrameIndex].frameend - Frames[FrameIndex].framestart).toFixed(3));\n"
+"\n"
+" DrawToolTip(StringArray, CanvasHistory, HistoryViewMouseX, HistoryViewMouseY+20);\n"
+"\n"
+" }\n"
+" ProfileLeave();\n"
+"}\n"
+"function TimeToMsString(Time)\n"
+"{\n"
+" return Time.toFixed(3) + \"ms\";\n"
+"}\n"
+"function TimeToString(Time)\n"
+"{\n"
+" if(Time > 1000)\n"
+" {\n"
+" return (Time/1000.0).toFixed(0) +\"s\";\n"
+" }\n"
+" else if(Time > 0.9)\n"
+" {\n"
+" return Time.toFixed(0) + \"ms\";\n"
+" }\n"
+" else if(Time > 0.0009)\n"
+" {\n"
+" return (Time*1000).toFixed(0) + \"us\";\n"
+" }\n"
+" else\n"
+" {\n"
+" return (Time*1000000).toFixed(0) + \"ns\";\n"
+" }\n"
+"}\n"
+"\n"
+"function DrawDetailedBackground(context)\n"
+"{\n"
+" var fMs = fDetailedRange;\n"
+" var fMsEnd = fMs + fDetailedOffset;\n"
+" var fMsToScreen = nWidth / fMs;\n"
+" var fRate = Math.floor(2*((Math.log(fMs)/Math.log(10))-1))/2;\n"
+" var fStep = Math.pow(10, fRate);\n"
+" var fRcpStep = 1.0 / fStep;\n"
+" var nColorIndex = Math.floor(fDetailedOffset * fRcpStep) % 2;\n"
+" if(nColorIndex < 0)\n"
+" nColorIndex = -nColorIndex;\n"
+" var fStart = Math.floor(fDetailedOffset * fRcpStep) * fStep;\n"
+" var fHeight = CanvasDetailedView.height;\n"
+" var fScaleX = nWidth / fDetailedRange; \n"
+" var HeaderString = TimeToString(fStep);\n"
+" context.textAlign = \'center\';\n"
+" for(f = fStart; f < fMsEnd; )\n"
+" {\n"
+" var fNext = f + fStep;\n"
+" var X = (f - fDetailedOffset) * fScaleX;\n"
+" var W = (fNext-f)*fScaleX;\n"
+" context.fillStyle = nBackColors[nColorIndex];\n"
+" context.fillRect(X, 0, W+2, fHeight);\n"
+" nColorIndex = 1 - nColorIndex;\n"
+" context.fillStyle = \'#777777\'\n"
+" context.fillText(HeaderString, X + W * 0.5, 10);\n"
+" context.fillText(HeaderString, X + W * 0.5, nHeight - 10);\n"
+" f = fNext;\n"
+" }\n"
+" context.textAlign = \'left\';\n"
+" var fScaleX = nWidth / fDetailedRange; \n"
+" context.globalAlpha = 0.5;\n"
+" context.strokeStyle = \'#bbbbbb\';\n"
+" context.beginPath();\n"
+" for(var i = 0; i < Frames.length; i++)\n"
+" {\n"
+" var frfr = Frames[i];\n"
+" if(frfr.frameend < fDetailedOffset || frfr.framestart > fDetailedOffset + fDetailedRange)\n"
+" {\n"
+" continue;\n"
+" }\n"
+" var X = (frfr.framestart - fDetailedOffset) * fScaleX;\n"
+" if(X >= 0 && X < nWidth)\n"
+" {\n"
+" context.moveTo(X, 0);\n"
+" context.lineTo(X, nHeight);\n"
+" }\n"
+" }\n"
+" context.stroke();\n"
+" context.globalAlpha = 1;\n"
+"\n"
+"}\n"
+"function DrawToolTip(StringArray, Canvas, x, y)\n"
+"{\n"
+" var context = Canvas.getContext(\'2d\');\n"
+" context.font = Font;\n"
+" var WidthArray = Array(StringArray.length);\n"
+" var nMaxWidth = 0;\n"
+" var nHeight = 0;\n"
+" for(i = 0; i < StringArray.length; i += 2)\n"
+" {\n"
+" var nWidth0 = context.measureText(StringArray[i]).width;\n"
+" var nWidth1 = context.measureText(StringArray[i+1]).width;\n"
+" var nSum = nWidth0 + nWidth1;\n"
+" WidthArray[i] = nWidth0;\n"
+" WidthArray[i+1] = nWidth1;\n"
+" if(nSum > nMaxWidth)\n"
+" {\n"
+" nMaxWidth = nSum;\n"
+" }\n"
+" nHeight += BoxHeight;\n"
+" }\n"
+" nMaxWidth += 15;\n"
+" //bounds check.\n"
+" var CanvasRect = Canvas.getBoundingClientRect();\n"
+" if(y + nHeight > CanvasRect.height)\n"
+" {\n"
+" y = CanvasRect.height - nHeight;\n"
+" x += 20;\n"
+" }\n"
+" if(x + nMaxWidth > CanvasRect.width)\n"
+" {\n"
+" x = CanvasRect.width - nMaxWidth;\n"
+" }\n"
+"\n"
+" context.fillStyle = \'black\';\n"
+" context.fillRect(x-1, y, nMaxWidth+2, nHeight);\n"
+" context.fillStyle = \'white\';\n"
+"\n"
+" var XPos = x;\n"
+" var XPosRight = x + nMaxWidth;\n"
+" var YPos = y + BoxHeight-2;\n"
+" for(i = 0; i < StringArray.length; i += 2)\n"
+" {\n"
+" context.fillText(StringArray[i], XPos, YPos);\n"
+" context.fillText(StringArray[i+1], XPosRight - WidthArray[i+1], YPos);\n"
+" YPos += BoxHeight;\n"
+" }\n"
+"}\n"
+"function DrawHoverToolTip()\n"
+"{\n"
+" ProfileEnter(\"DrawHoverToolTip\");\n"
+" if(nHoverToken != -1)\n"
+" {\n"
+" var StringArray = [];\n"
+" var groupid = TimerInfo[nHoverToken].group;\n"
+" StringArray.push(\"Timer\");\n"
+" StringArray.push(TimerInfo[nHoverToken].name);\n"
+" StringArray.push(\"Group\");\n"
+" StringArray.push(GroupInfo[groupid].name);\n"
+"\n"
+" var bShowTimers = Mode == ModeTimers;\n"
+" if(FlipToolTip)\n"
+" {\n"
+" bShowTimers = !bShowTimers;\n"
+" }\n"
+" if(bShowTimers)\n"
+" {\n"
+"\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"\");\n"
+" var Timer = TimerInfo[nHoverToken];\n"
+" StringArray.push(\"Average\");\n"
+" StringArray.push(Timer.average);\n"
+" StringArray.push(\"Max\");\n"
+" StringArray.push(Timer.max);\n"
+" StringArray.push(\"Excl Max\");\n"
+" StringArray.push(Timer.exclmax);\n"
+" StringArray.push(\"Excl Average\");\n"
+" StringArray.push(Timer.exclaverage);\n"
+" StringArray.push(\"Call Average\");\n"
+" StringArray.push(Timer.callaverage);\n"
+" StringArray.push(\"Call Count\");\n"
+" StringArray.push(Timer.callcount);\n"
+"\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"\");\n"
+"\n"
+"\n"
+" StringArray.push(\"Group\");\n"
+" StringArray.push(GroupInfo[groupid].name);\n"
+" StringArray.push(\"Average\");\n"
+" StringArray.push(GroupInfo[groupid].average);\n"
+" StringArray.push(\"Max\");\n"
+" StringArray.push(GroupInfo[groupid].max);\n"
+"\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"\");\n"
+"\n"
+" StringArray.push(\"";
+
+const size_t g_MicroProfileHtml_end_0_size = sizeof(g_MicroProfileHtml_end_0);
+const char g_MicroProfileHtml_end_1[] =
+"Timer Capture\");\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"Frames\");\n"
+" StringArray.push(AggregateInfo.Frames);\n"
+" StringArray.push(\"Time\");\n"
+" StringArray.push(AggregateInfo.Time.toFixed(2) + \"ms\");\n"
+"\n"
+"\n"
+"\n"
+"\n"
+" }\n"
+" else\n"
+" {\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"\");\n"
+"\n"
+"\n"
+"\n"
+" StringArray.push(\"Time\");\n"
+" StringArray.push((fRangeEnd-fRangeBegin).toFixed(3));\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"Total\");\n"
+" StringArray.push(\"\" + TimerInfo[nHoverToken].Sum);\n"
+" StringArray.push(\"Max\");\n"
+" StringArray.push(\"\" + TimerInfo[nHoverToken].Max);\n"
+" StringArray.push(\"Average\");\n"
+" StringArray.push(\"\" + TimerInfo[nHoverToken].Average);\n"
+" StringArray.push(\"Count\");\n"
+" StringArray.push(\"\" + TimerInfo[nHoverToken].CallCount);\n"
+"\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"\");\n"
+"\n"
+" StringArray.push(\"Max/Frame\");\n"
+" StringArray.push(\"\" + TimerInfo[nHoverToken].FrameMax);\n"
+"\n"
+" StringArray.push(\"Average Time/Frame\");\n"
+" StringArray.push(\"\" + TimerInfo[nHoverToken].FrameAverage);\n"
+"\n"
+" StringArray.push(\"Average Count/Frame\");\n"
+" StringArray.push(\"\" + TimerInfo[nHoverToken].FrameCallAverage);\n"
+"\n"
+"\n"
+"\n"
+"\n"
+"\n"
+" \n"
+" if(nHoverFrame != -1)\n"
+" {\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"Frame \" + nHoverFrame);\n"
+" StringArray.push(\"\");\n"
+"\n"
+" var FrameTime = new Object();\n"
+" CalculateTimers(FrameTime, nHoverToken, nHoverFrame, nHoverFrame+1);\n"
+" StringArray.push(\"Total\");\n"
+" StringArray.push(\"\" + FrameTime.Sum);\n"
+" StringArray.push(\"Count\");\n"
+" StringArray.push(\"\" + FrameTime.CallCount);\n"
+" StringArray.push(\"Average\");\n"
+" StringArray.push(\"\" + FrameTime.Average);\n"
+" StringArray.push(\"Max\");\n"
+" StringArray.push(\"\" + FrameTime.Max);\n"
+" }\n"
+"\n"
+" var HoverInfo = GatherHoverMetaCounters(nHoverToken, nHoverTokenIndex, nHoverTokenLogIndex, nHoverFrame);\n"
+" var Header = 0;\n"
+" for(index in HoverInfo)\n"
+" {\n"
+" if(0 == Header)\n"
+" {\n"
+" Header = 1;\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"Meta\");\n"
+" StringArray.push(\"\");\n"
+"\n"
+" }\n"
+" StringArray.push(\"\"+index);\n"
+" StringArray.push(\"\"+HoverInfo[index]);\n"
+" }\n"
+"\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"\");\n"
+"\n"
+" StringArray.push(\"Detailed Capture\");\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"Frames\");\n"
+" StringArray.push(Frames.length);\n"
+" StringArray.push(\"Time\");\n"
+" StringArray.push(DetailedTotal().toFixed(2) + \"ms\");\n"
+"\n"
+"\n"
+" }\n"
+" DrawToolTip(StringArray, CanvasDetailedView, DetailedViewMouseX, DetailedViewMouseY+20);\n"
+" }\n"
+" else if(nHoverCSCpu >= 0)\n"
+" {\n"
+" var StringArray = [];\n"
+" StringArray.push(\"Context Switch\");\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"\");\n"
+" StringArray.push(\"Cpu\");\n"
+" StringArray.push(\"\" + nHoverCSCpu);\n"
+" StringArray.push(\"Begin\");\n"
+" StringArray.push(\"\" + fRangeBegin);\n"
+" StringArray.push(\"End\");\n"
+" StringArray.push(\"\" + fRangeEnd);\n"
+" DrawToolTip(StringArray, CanvasDetailedView, DetailedViewMouseX, DetailedViewMouseY+20);\n"
+" }\n"
+" ProfileLeave();\n"
+"}\n"
+"\n"
+"function FormatMeta(Value, Dec)\n"
+"{\n"
+" if(!Value)\n"
+" {\n"
+" Value = \"0\";\n"
+" }\n"
+" else\n"
+" {\n"
+" Value = \'\' + Value.toFixed(Dec);\n"
+" }\n"
+" return Value;\n"
+"}\n"
+"\n"
+"function DrawBarView()\n"
+"{\n"
+" ProfileEnter(\"DrawBarView\");\n"
+" Invalidate++;\n"
+" nHoverToken = -1;\n"
+" nHoverFrame = -1;\n"
+" var context = CanvasDetailedView.getContext(\'2d\');\n"
+" context.clearRect(0, 0, nWidth, nHeight);\n"
+"\n"
+" var Height = BoxHeight;\n"
+" var Width = nWidth;\n"
+"\n"
+" //clamp offset to prevent scrolling into the void\n"
+" var nTotalRows = 0;\n"
+" for(var groupid in GroupInfo)\n"
+" {\n"
+" if(GroupsAllActive || GroupsActive[GroupInfo[groupid].name])\n"
+" {\n"
+" nTotalRows += GroupInfo[groupid].TimerArray.length + 1;\n"
+" }\n"
+" }\n"
+" var nTotalRowPixels = nTotalRows * Height;\n"
+" var nFrameRows = nHeight - BoxHeight;\n"
+" if(nOffsetBarsY + nFrameRows > nTotalRowPixels && nTotalRowPixels > nFrameRows)\n"
+" {\n"
+" nOffsetBarsY = nTotalRowPixels - nFrameRows;\n"
+" }\n"
+"\n"
+"\n"
+" var Y = -nOffsetBarsY + BoxHeight;\n"
+" if(TimersGroups)\n"
+" {\n"
+" nOffsetBarsX = 0;\n"
+" }\n"
+" var XBase = -nOffsetBarsX;\n"
+" var nColorIndex = 0;\n"
+"\n"
+" context.fillStyle = \'white\';\n"
+" context.font = Font;\n"
+" var bMouseIn = 0;\n"
+" var RcpReferenceTime = 1.0 / ReferenceTime;\n"
+" var CountWidth = 8 * FontWidth;\n"
+" var nMetaLen = TimerInfo[0].meta.length;\n"
+" var nMetaCharacters = 10;\n"
+" for(var i = 0; i < nMetaLen; ++i)\n"
+" {\n"
+" if(nMetaCharacters < MetaNames[i].length)\n"
+" nMetaCharacters = MetaNames[i].length;\n"
+" }\n"
+" var nWidthMeta = nMetaCharacters * FontWidth + 6;\n"
+" function DrawHeaderSplit(Header)\n"
+" {\n"
+" context.fillStyle = \'white\';\n"
+" context.fillText(Header, X, Height-FontAscent);\n"
+" X += nWidthBars;\n"
+" context.fillStyle = nBackColorOffset;\n"
+" X += nWidthMs;\n"
+" if(X >= NameWidth)\n"
+" {\n"
+" context.fillRect(X-3, 0, 1, nHeight);\n"
+" }\n"
+" }\n"
+" function DrawHeaderSplitSingle(Header, Width)\n"
+" {\n"
+" context.fillStyle = \'white\';\n"
+" context.fillText(Header, X, Height-FontAscent);\n"
+" X += Width;\n"
+" context.fillStyle = nBackColorOffset;\n"
+" if(X >= NameWidth)\n"
+" {\n"
+" context.fillRect(X-3, 0, 1, nHeight);\n"
+" }\n"
+" }\n"
+" function DrawHeaderSplitLeftRight(HeaderLeft, HeaderRight, Width)\n"
+" {\n"
+" context.textAlign = \'left\';\n"
+" context.fillStyle = \'white\';\n"
+" context.fillText(HeaderLeft, X, Height-FontAscent);\n"
+" X += Width;\n"
+" context.textAlign = \'right\';\n"
+" context.fillText(HeaderRight, X-5, Height-FontAscent);\n"
+" context.textAlign = \'left\';\n"
+" context.fillStyle = nBackColorOffset;\n"
+" if(X >= NameWidth)\n"
+" {\n"
+" context.fillRect(X-3, 0, 1, nHeight);\n"
+" }\n"
+" }\n"
+" function DrawTimer(Value, Color)\n"
+" {\n"
+" var Prc = Value * RcpReferenceTime;\n"
+" var YText = Y+Height-FontAscent;\n"
+" if(Prc > 1)\n"
+" {\n"
+" Prc = 1;\n"
+" }\n"
+" context.fillStyle = Color;\n"
+" context.fillRect(X+1, Y+1, Prc * nBarsWidth, InnerBoxHeight);\n"
+" X += nWidthBars;\n"
+" context.fillStyle = \'white\';\n"
+" context.fillText((\" \" + Value.toFixed(2)).slice(-TimerLen), X, YText);\n"
+" X += nWidthMs;\n"
+" }\n"
+" function DrawMeta(Value, Width, Dec)\n"
+" {\n"
+" Value = FormatMeta(Value, Dec);\n"
+" X += (FontWidth*Width);\n"
+" context.textAlign = \'right\';\n"
+" context.fillText(Value, X-FontWidth, YText);\n"
+" context.textAlign = \'left\';\n"
+" }\n"
+" var InnerBoxHeight = BoxHeight-2;\n"
+" var TimerLen = 6;\n"
+" var TimerWidth = TimerLen * FontWidth;\n"
+" var nWidthBars = nBarsWidth+2;\n"
+" var nWidthMs = TimerWidth+2+10;\n"
+"\n"
+"\n"
+" if(2 == TimersGroups)\n"
+" {\n"
+" for(var i = 0; i < ThreadNames.length; ++i)\n"
+" {\n"
+" if(ThreadsActive[ThreadNames[i]] || ThreadsAllActive)\n"
+" {\n"
+" var X = 0;\n"
+" var YText = Y+Height-FontAscent;\n"
+" bMouseIn = DetailedViewMouseY >= Y && DetailedViewMouseY < Y + BoxHeight;\n"
+" nColorIndex = 1-nColorIndex;\n"
+" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n"
+" context.fillRect(0, Y, Width, Height);\n"
+" var ThreadColor = CSwitchColors[i % CSwitchColors.length];\n"
+" context.fillStyle = ThreadColor;\n"
+" context.fillText(ThreadNames[i], 1, YText);\n"
+" context.textAlign = \'left\';\n"
+" Y += Height;\n"
+" for(var idx in GroupOrder)\n"
+" {\n"
+" var groupid = GroupOrder[idx];\n"
+" var Group = GroupInfo[groupid];\n"
+" var PerThreadTimer = ThreadGroupTimeArray[i][groupid];\n"
+" var PerThreadTimerTotal = ThreadGroupTimeTotalArray[i][groupid];\n"
+" if((PerThreadTimer > 0.0001|| PerThreadTimerTotal>0.1) && (GroupsAllActive || GroupsActive[Group.name]))\n"
+" {\n"
+" var GColor = GroupColors ? GroupInfo[groupid].color : \'white\';\n"
+" var X = 0;\n"
+" nColorIndex = 1-nColorIndex;\n"
+" bMouseIn = DetailedViewMouseY >= Y && DetailedViewMouseY < Y + BoxHeight;\n"
+" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n"
+" context.fillRect(0, Y, Width, nHeight);\n"
+" context.fillStyle = GColor;\n"
+" context.textAlign = \'right\';\n"
+" context.fillText(Group.name, NameWidth - 5, Y+Height-FontAscent);\n"
+" context.textAlign = \'left\';\n"
+" X += NameWidth;\n"
+" DrawTimer(PerThreadTimer, GColor);\n"
+" X += nWidthBars + nWidthMs; \n"
+" DrawTimer(PerThreadTimerTotal, GColor);\n"
+"\n"
+" Y += Height;\n"
+" }\n"
+" }\n"
+" }\n"
+" }\n"
+" }\n"
+" else\n"
+" {\n"
+" for(var idx in GroupOrder)\n"
+" {\n"
+" var groupid = GroupOrder[idx];\n"
+" var Group = GroupInfo[groupid];\n"
+" var GColor = GroupColors ? GroupInfo[groupid].color : \'white\';\n"
+" if(GroupsAllActive || GroupsActive[Group.name])\n"
+" {\n"
+" var TimerArray = Group.TimerArray;\n"
+" var X = XBase;\n"
+" nColorIndex = 1-nColorIndex;\n"
+" bMouseIn = DetailedViewMouseY >= Y && DetailedViewMouseY < Y + BoxHeight;\n"
+" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n"
+" context.fillRect(0, Y, Width, nHeight);\n"
+" context.fillStyle = GColor;\n"
+" context.fillText(Group.name, 1, Y+Height-FontAscent);\n"
+" X += NameWidth;\n"
+" DrawTimer(Group.average, GColor);\n"
+" DrawTimer(Group.max, GColor);\n"
+" DrawTimer(Group.total, GColor);\n"
+"\n"
+" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n"
+" context.fillRect(0, Y, NameWidth, nHeight);\n"
+" context.fillStyle = GColor;\n"
+" context.fillText(Group.name, 1, Y+Height-FontAscent);\n"
+"\n"
+"\n"
+"\n"
+" Y += Height;\n"
+" if(TimersGroups)\n"
+" {\n"
+" for(var i = 0; i < ThreadNames.length; ++i)\n"
+" {\n"
+" var PerThreadTimer = ThreadGroupTimeArray[i][groupid];\n"
+" var PerThreadTimerTotal = ThreadGroupTimeTotalArray[i][groupid];\n"
+" if((PerThreadTimer > 0.0001|| PerThreadTimerTotal>0.1) && (ThreadsActive[ThreadNames[i]] || ThreadsAllActive))\n"
+" {\n"
+" var YText = Y+Height-FontAscent;\n"
+" bMouseIn = DetailedViewMouseY >= Y && DetailedViewMouseY < Y + BoxHeight;\n"
+" nColorIndex = 1-nColorIndex;\n"
+" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n"
+" context.fillRect(0, Y, Width, Height);\n"
+" var ThreadColor = CSwitchColors[i % CSwitchColors.length];\n"
+" context.fillStyle = ThreadColor;\n"
+" context.textAlign = \'right\';\n"
+" context.fillText(ThreadNames[i], NameWidth - 5, YText);\n"
+" context.textAlign = \'left\';\n"
+" X = NameWidth;\n"
+" DrawTimer(PerThreadTimer, ThreadColor);\n"
+" X += nWidthBars + nWidthMs; \n"
+" DrawTimer(PerThreadTimerTotal, ThreadColor);\n"
+" Y += Height;\n"
+" }\n"
+" }\n"
+" }\n"
+" else\n"
+" {\n"
+" for(var timerindex in TimerArray)\n"
+" {\n"
+" var timerid = TimerArray[timerindex];\n"
+" var Timer = TimerInfo[timerid];\n"
+" var Average = Timer.average;\n"
+" var Max = Timer.max;\n"
+" var ExclusiveMax = Timer.exclmax;\n"
+" var ExclusiveAverage = Timer.exclaverage;\n"
+" var CallAverage = Timer.callaverage;\n"
+" var CallCount = Timer.callcount;\n"
+" var YText = Y+Height-FontAscent;\n"
+" X = NameWidth + XBase;\n"
+"\n"
+" nColorIndex = 1-nColorIndex;\n"
+" bMouseIn = DetailedViewMouseY >= Y && DetailedViewMouseY < Y + BoxHeight;\n"
+" if(bMouseIn)\n"
+" {\n"
+" nHoverToken = timerid;\n"
+" }\n"
+" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n"
+" context.fillRect(0, Y, Width, Height);\n"
+"\n"
+" DrawTimer(Average, Timer.color);\n"
+" DrawTimer(Max,Timer.color);\n"
+" DrawTimer(Timer.total,Timer.color);\n"
+" DrawTimer(CallAverage,Timer.color);\n"
+" context.fillStyle = \'white\';\n"
+" context.fillText(CallCount, X, YText);\n"
+" X += CountWidth;\n"
+" DrawTimer(ExclusiveAverage,Timer.color);\n"
+" DrawTimer(ExclusiveMax,Timer.color);\n"
+"\n"
+" if(TimersMeta)\n"
+" {\n"
+" context.fillStyle = \'white\';\n"
+" for(var j = 0; j < nMetaLen; ++j)\n"
+" {\n"
+" // var Len = MetaNames[j].length + 1;\n"
+" DrawMeta(Timer.meta[j], MetaLengths[j], 0);\n"
+" DrawMeta(Timer.metaavg[j], MetaLengthsAvg[j], 2);\n"
+" DrawMeta(Timer.metamax[j], MetaLengthsMax[j], 0);\n"
+" }\n"
+" }\n"
+" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n"
+" context.fillRect(0, Y, NameWidth, Height);\n"
+" context.textAlign = \'right\';\n"
+" context.fillStyle = Timer.color;\n"
+" context.fillText(Timer.name, NameWidth - 5, YText);\n"
+" context.textAlign = \'left\';\n"
+"\n"
+"\n"
+" Y += Height;\n"
+" } \n"
+" }\n"
+" }\n"
+" }\n"
+" }\n"
+" X = 0;\n"
+" context.fillStyle = nBackColorOffset;\n"
+" context.fillRect(0, 0, Width, Height);\n"
+" context.fillStyle = \'white\';\n"
+" if(TimersGroups)\n"
+" {\n"
+" if(2 == TimersGroups)\n"
+" {\n"
+" DrawHeaderSplitLeftRight(\'Thread\', \'Group\', NameWidth);\n"
+" DrawHeaderSplit(\'Average\');\n"
+" }\n"
+" else\n"
+" {\n"
+" DrawHeaderSplitLeftRight(\'Group\', \'Thread\', NameWidth);\n"
+" DrawHeaderSplit(\'Average\');\n"
+" DrawHeaderSplit(\'Max\');\n"
+" DrawHeaderSplit(\'Total\');\n"
+" }\n"
+" }\n"
+" else\n"
+" {\n"
+" X = NameWidth + XBase;\n"
+" DrawHeaderSplit(\'Average\');\n"
+" DrawHeaderSplit(\'Max\');\n"
+" DrawHeaderSplit(\'Total\');\n"
+" DrawHeaderSplit(\'Call Average\');\n"
+" DrawHeaderSplitSingle(\'Count\', CountWidth);\n"
+" DrawHeaderSplit(\'Excl Average\');\n"
+" DrawHeaderSplit(\'Excl Max\');\n"
+" if(TimersMeta)\n"
+" {\n"
+" for(var i = 0; i < nMetaLen; ++i)\n"
+" {\n"
+" DrawHeaderSplitSingle(MetaNames[i], MetaLengths[i] * FontWidth);\n"
+" DrawHeaderSplitSingle(MetaNames[i] + \" Avg\", MetaLengthsAvg[i] * FontWidth);\n"
+" DrawHeaderSplitSingle(MetaNames[i] + \" Max\", MetaLengthsMax[i] * FontWidth);\n"
+" }\n"
+" }\n"
+" X = 0;\n"
+" context.fillStyle = nBackColorOffset;\n"
+" context.fillRect(0, 0, NameWidth, Height);\n"
+" context.fillStyle = \'white\';\n"
+" \n"
+" DrawHeaderSplitLeftRight(\'Group\', \'Timer\', NameWidth);\n"
+" \n"
+" }\n"
+"\n"
+" ProfileLeave();\n"
+"}\n"
+"\n"
+"\n"
+"//preprocess context switch data to contain array per thread\n"
+"function PreprocessContextSwitchCacheItem(ThreadId)\n"
+"{\n"
+" console.log(\'context switch preparing \' + ThreadId);\n"
+" var CSObject = CSwitchCache[ThreadId];\n"
+" if(ThreadId > 0 && !CSObject)\n"
+" {\n"
+" CSArrayIn = new Array();\n"
+" CSArrayOut = new Array();\n"
+" CSArrayCpu = new Array();\n"
+" var nCount = CSwitchTime.length;\n"
+" var j = 0;\n"
+" var TimeIn = -1.0;\n"
+" for(var i = 0; i < nCount; ++i)\n"
+" { \n"
+" var ThreadIn = CSwitchThreadInOutCpu[j];\n"
+" var ThreadOut = CSwitchThreadInOutCpu[j+1];\n"
+" var Cpu = CSwitchThreadInOutCpu[j+2];\n"
+" if(TimeIn < 0)\n"
+" {\n"
+" if(ThreadIn == ThreadId)\n"
+" {\n"
+" TimeIn = CSwitchTime[i];\n"
+" }\n"
+" }\n"
+" else\n"
+" {\n"
+" if(ThreadOut == ThreadId)\n"
+" {\n"
+" var TimeOut = CSwitchTime[i];\n"
+" CSArrayIn.push(TimeIn);\n"
+" CSArrayOut.push(TimeOut);\n"
+" CSArrayCpu.push(Cpu);\n"
+" TimeIn = -1;\n"
+" }\n"
+" }\n"
+" j += 3;\n"
+" }\n"
+" CSObject = new Object();\n"
+" CSObject.Size = CSArrayIn.length;\n"
+" CSObject.In = CSArrayIn;\n"
+" CSObject.Out = CSArrayOut;\n"
+" CSObject.Cpu = CSArrayCpu;\n"
+" CSwitchCache[ThreadId] = CSObject;\n"
+" }\n"
+"\n"
+"}\n"
+"function PreprocessContextSwitchCache()\n"
+"{\n"
+" ProfileEnter(\"PreprocessContextSwitchCache\");\n"
+" var AllThreads = {};\n"
+" var nCount = CSwitchTime.length;\n"
+" for(var i = 0; i < nCount; ++i)\n"
+" { \n"
+" var nThreadIn = CSwitchThreadInOutCpu[i];\n"
+" if(!AllThreads[nThreadIn])\n"
+" {\n"
+" AllThreads[nThreadIn] = \'\' + nThreadIn;\n"
+" var FoundThread = false;\n"
+" for(var i = 0; i < ThreadIds.length; ++i)\n"
+" {\n"
+" if(ThreadIds[i] == nThreadIn)\n"
+" {\n"
+" FoundThread = true;\n"
+" }\n"
+" }\n"
+" if(!FoundThread)\n"
+" {\n"
+" CSwitchOnlyThreads.push(nThreadIn);\n"
+" }\n"
+" }\n"
+" }\n"
+" for(var i = 0; i < CSwitchOnlyThreads.length; ++i)\n"
+" {\n"
+" PreprocessContextSwitchCacheItem(CSwitchOnlyThreads[i]);\n"
+" }\n"
+" for(var i = 0; i < ThreadIds.length; ++i)\n"
+" {\n"
+" PreprocessContextSwitchCacheItem(ThreadIds[i]); \n"
+" }\n"
+" ProfileLeave();\n"
+"}\n"
+"\n"
+"function DrawContextSwitchBars(context, ThreadId, fScaleX, fOffsetY, fDetailedOffset, nHoverColor, MinWidth, bDrawEnabled)\n"
+"{\n"
+" ProfileEnter(\"DrawContextSwitchBars\");\n"
+" var CSObject = CSwitchCache[ThreadId];\n"
+" if(CSObject)\n"
+" {\n"
+" var Size = CSObject.Size; \n"
+" var In = CSObject.In;\n"
+" var Out = CSObject.Out;\n"
+" var Cpu = CSObject.Cpu;\n"
+" var nNumColors = CSwitchColors.length;\n"
+" for(var i = 0; i < Size; ++i)\n"
+" {\n"
+" var TimeIn = In[i];\n"
+" var TimeOut = Out[i];\n"
+" var ActiveCpu = Cpu[i];\n"
+"\n"
+" var X = (TimeIn - fDetailedOffset) * fScaleX;\n"
+" if(X > nWidth)\n"
+" {\n"
+" break;\n"
+" }\n"
+" var W = (TimeOut - TimeIn) * fScaleX;\n"
+" if(W > MinWidth && X+W > 0)\n"
+" {\n"
+" if(nHoverCSCpu == ActiveCpu || bDrawEnabled)\n"
+" {\n"
+" if(nHoverCSCpu == ActiveCpu)\n"
+" {\n"
+" context.fillStyle = nHoverColor;\n"
+" }\n"
+" else\n"
+" {\n"
+" context.fillStyle = CSwitchColors[ActiveCpu % nNumColors];\n"
+" }\n"
+" context.fillRect(X, fOffsetY, W, CSwitchHeight);\n"
+" }\n"
+" if(DetailedViewMouseX >= X && DetailedViewMouseX <= X+W && DetailedViewMouseY < fOffsetY+CSwitchHeight && DetailedViewMouseY >= fOffsetY)\n"
+" {\n"
+" nHoverCSCpuNext = ActiveCpu;\n"
+" fRangeBeginNext = TimeIn;\n"
+" fRangeEndNext = TimeOut;\n"
+" fRangeBeginGpuNext = fRangeEndGpuNext = -1;\n"
+" }\n"
+" }\n"
+" }\n"
+" }\n"
+" ProfileLeave();\n"
+"}\n"
+"\n"
+"function DrawDetailedView(context, MinWidth, bDrawEnabled)\n"
+"{\n"
+" if(bDrawEnabled)\n"
+" {\n"
+" DrawDetailedBackground(context);\n"
+" }\n"
+"\n"
+" var colors = [ \'#ff0000\', \'#ff00ff\', \'#ffff00\'];\n"
+"\n"
+" var fScaleX = nWidth / fDetailedRange; \n"
+" var fOffsetY = -nOffsetY + BoxHeight;\n"
+" nHoverTokenNext = -1;\n"
+" nHoverTokenLogIndexNext = -1;\n"
+" nHoverTokenIndexNext = -1;\n"
+" nHoverCounter += nHoverCounterDelta;\n"
+" if(nHoverCounter >= 255) \n"
+" {\n"
+" nHoverCounter = 255;\n"
+" nHoverCounterDelta = -nHoverCounterDelta;\n"
+" }\n"
+" if(nHoverCounter < 128) \n"
+" {\n"
+" nHoverCounter = 128;\n"
+" nHoverCounterDelta = -nHoverCounterDelta;\n"
+" }\n"
+" var nHoverHigh = nHoverCounter.toString(16);\n"
+" var nHoverLow = (127+255-nHoverCounter).toString(16);\n"
+" var nHoverColor = \'#\' + nHoverHigh + nHoverHigh + nHoverHigh;\n"
+"\n"
+" context.fillStyle = \'black\';\n"
+" context.font = Font;\n"
+" var nNumLogs = Frames[0].ts.length;\n"
+" var fTimeEnd = fDetailedOffset + fDetailedRange;\n"
+"\n"
+" var FirstFrame = 0;\n"
+" for(var i = 0; i < Frames.length ; i++)\n"
+" {\n"
+" if(Frames[i].frameend < fDetailedOffset)\n"
+" {\n"
+" FirstFrame = i;\n"
+" }\n"
+" }\n"
+" var nMinTimeMs = MinWidth / fScaleX;\n"
+" {\n"
+"\n"
+" var Batches = new Array(TimerInfo.length);\n"
+" var BatchesTxt = Array();\n"
+" var BatchesTxtPos = Array();\n"
+" var BatchesTxtColor = [\'#ffffff\', \'#333333\'];\n"
+"\n"
+" for(var i = 0; i < 2; ++i)\n"
+" {\n"
+" BatchesTxt[i] = Array();\n"
+" BatchesTxtPos[i] = Array();\n"
+" }\n"
+" for(var i = 0; i < Batches.length; ++i)\n"
+" {\n"
+" Batches[i] = Array();\n"
+" }\n"
+" for(nLog = 0; nLog < nNumLogs; nLog++)\n"
+" {\n"
+" var ThreadName = ThreadNames[nLog];\n"
+" if(ThreadsAllActive || ThreadsActive[ThreadName])\n"
+" {\n"
+"\n"
+" var LodIndex = 0;\n"
+" var MinDelta = 0;\n"
+" var NextLod = 1;\n"
+" while(NextLod < LodData.length && LodData[NextLod].MinDelta[nLog] < nMinTimeMs)\n"
+" {\n"
+" LodIndex = NextLod;\n"
+" NextLod = NextLod + 1;\n"
+" MinDelta = LodData[LodIndex].MinDelta[nLog];\n"
+" }\n"
+" if(LodIndex == LodData.length)\n"
+" {\n"
+" LodIndex = LodData.length-1;\n"
+" }\n"
+" if(DisableLod)\n"
+" {\n"
+" LodIndex = 0;\n"
+" }\n"
+"\n"
+" context.fillStyle = \'white\';\n"
+" fOffsetY += BoxHeight;\n"
+" context.fillText(ThreadName, 0, fOffsetY);\n"
+" if(nContextSwitchEnabled)\n"
+" {\n"
+" DrawContextSwitchBars(context, ThreadIds[nLog], fScaleX, fOffsetY, fDetailedOffset, nHoverColor, MinWidth, bDrawEnabled);\n"
+" fOffsetY += CSwitchHeight+1;\n"
+" }\n"
+" var MaxDepth = 1;\n"
+" var StackPos = 0;\n"
+" var Stack = Array(20);\n"
+" var Lod = LodData[LodIndex];\n"
+"\n"
+" var TypeArray = Lod.TypeArray[nLog];\n"
+" var IndexArray = Lod.IndexArray[nLog];\n"
+" var TimeArray = Lod.TimeArray[nLog];\n"
+"\n"
+" var LocalFirstFrame = Frames[FirstFrame].FirstFrameIndex[nLog];\n"
+" var IndexStart = Lod.LogStart[LocalFirstFrame][nLog];\n"
+" var IndexEnd = TimeArray.length;\n"
+" IndexEnd = TimeArray.length;\n"
+" var HasSetHover = 0;\n"
+"\n"
+"\n"
+" for(var j = IndexStart; j < IndexEnd; ++j)\n"
+" {\n"
+" var type = TypeArray[j];\n"
+" var index = IndexArray[j];\n"
+" var time = TimeArray[j];\n"
+" if(type == 1)\n"
+" {\n"
+" //push\n"
+" Stack[StackPos] = j;\n"
+" StackPos++;\n"
+" if(StackPos > MaxDepth)\n"
+" {\n"
+" MaxDepth = StackPos;\n"
+" }\n"
+" }\n"
+" else if(type == 0)\n"
+" {\n"
+" if(StackPos>0)\n"
+" {\n"
+" StackPos--;\n"
+"\n"
+" var StartIndex = Stack[StackPos];\n"
+" var timestart = TimeArray[StartIndex];\n"
+" var timeend = time;\n"
+" var X = (timestart - fDetailedOffset) * fScaleX;\n"
+" var Y = fOffsetY + StackPos * BoxHeight;\n"
+" var W = (timeend-timestart)*fScaleX;\n"
+"\n"
+" if(W > MinWidth && X < nWidth && X+W > 0)\n"
+" {\n"
+" if(bDrawEnabled || index == nHoverToken)\n"
+" {\n"
+" Batches[index].push(X);\n"
+" Batches[index].push(Y);\n"
+" Batches[index].push(W);\n"
+" DebugDrawQuadCount++;\n"
+"\n"
+" var XText = X < 0 ? 0 : X;\n"
+" var WText = W - (XText-X);\n"
+" if(XText + WText > nWidth)\n"
+" {\n"
+" WText = nWidth - XText;\n"
+" }\n"
+" var Name = TimerInfo[index].name;\n"
+" var NameLen = TimerInfo[index].len;\n"
+" var BarTextLen = Math.floor((WText-2)/FontWidth);\n"
+" var TimeText = TimeToMsString(timeend-timestart);\n"
+" var TimeTextLen = TimeText.length;\n"
+"\n"
+" if(BarTextLen >= 2)\n"
+" {\n"
+" if(BarTextLen < NameLen)\n"
+" Name = Name.substr(0, BarTextLen);\n"
+" var txtidx = TimerInfo[index].textcolorindex;\n"
+" var YPos = Y+BoxHeight-FontAscent;\n"
+" BatchesTxt[txtidx].push(Name);\n"
+" BatchesTxtPos[txtidx].push(XText+2);\n"
+"\n"
+" BatchesTxtPos[txtidx].push(YPos);\n"
+" DebugDrawTextCount++;\n"
+" if(BarTextLen - NameLen > TimeTextLen)\n"
+" {\n"
+" BatchesTxt[txtidx].push(TimeText);\n"
+" BatchesTxtPos[txtidx].push(XText+WText-2 - TimeTextLen * FontWidth);\n"
+" BatchesTxtPos[txtidx].push(YPos);\n"
+" DebugDrawTextCount++;\n"
+" }\n"
+"\n"
+" }\n"
+" }\n"
+"\n"
+" if(DetailedViewMouseX >= X && DetailedViewMouseX <= X+W && DetailedViewMouseY < Y+BoxHeight && DetailedViewMouseY >= Y)\n"
+" {\n"
+" fRangeBeginNext = timestart;\n"
+" fRangeEndNext = timeend;\n"
+" if(TypeArray[StartIndex+1] == 3 && TypeArray[j+1] == 3)\n"
+" {\n"
+" fRangeBeginGpuNext = fRangeBeginNext;\n"
+" fRangeEndGpuNext = fRangeEndNext;\n"
+" //cpu tick is stored following\n"
+" fRangeBeginNext = TimeArray[StartIndex+1];\n"
+" fRangeEndNext = TimeArray[j+1];\n"
+" }\n"
+" else\n"
+" {\n"
+" fRangeBeginGpuNext = -1;\n"
+" fRangeEndGpuNext = -1;\n"
+" }\n"
+"\n"
+" nHoverTokenNext = index;\n"
+" nHoverTokenIndexNext = j;\n"
+" nHoverTokenLogIndexNext = nLog;\n"
+" bHasSetHover = 1;\n"
+" }\n"
+" }\n"
+" if(StackPos == 0 && time > fTimeEnd)\n"
+" break; \n"
+" }\n"
+" }\n"
+" }\n"
+" fOffsetY += (1+g_MaxStack[nLog]) * BoxHeight;\n"
+"\n"
+" if(HasSetHover)\n"
+" {\n"
+" for(var i = 0; i < Frames.length-1; ++i)\n"
+" {\n"
+" var IndexStart = Lod.LogStart[i][nLog];\n"
+" if(nHoverTokenNext >= IndexStart)\n"
+" {\n"
+" nHoverFrame = i;\n"
+" }\n"
+" }\n"
+" }\n"
+" }\n"
+" }\n"
+"\n"
+" if(nContextSwitchEnabled) //non instrumented threads.\n"
+" {\n"
+" var ContextSwitchThreads = CSwitchOnlyThreads;\n"
+" for(var i = 0; i < ContextSwitchThreads.length; ++i)\n"
+" {\n"
+" var ThreadId = ContextSwitchThreads[i];\n"
+" var ThreadName = \'\' + ThreadId;\n"
+" DrawContextSwitchBars(context, ThreadId, fScaleX, fOffsetY, fDetailedOffset, nHoverColor, MinWidth, bDrawEnabled);\n"
+" context.fillStyle = \'white\';\n"
+" context.fillText(ThreadName, 0, fOffsetY+5);\n"
+" fOffsetY += BoxHeight + 1;\n"
+" }\n"
+" }\n"
+"\n"
+"\n"
+" {\n"
+" for(var i = 0; i < Batches.length; ++i)\n"
+" {\n"
+" var a = Batches[i];\n"
+" if(a.length)\n"
+" {\n"
+" context.fillStyle = TimerInfo[i].colordark;\n"
+" if(!DisableMerge)\n"
+" {\n"
+" for(var j = 0; j < a.length; j += 3)\n"
+" { \n"
+" var X = a[j];\n"
+" var Y = a[j+1];\n"
+" var BaseWidth = j + 2;\n"
+" var W = a[BaseWidth];\n"
+" while(j+1 < a.length && W < 1)\n"
+" {\n"
+" var jnext = j+3;\n"
+" var XNext = a[jnext];\n"
+" var YNext = a[jnext+1];\n"
+" var WNext = a[jnext+2];\n"
+" var Delta = XNext - (X+W);\n"
+" var YDelta = Math.abs(Y - YNext); \n"
+" if(Delta < 0.3 && YDelta < 0.5 && WNext < 1)\n"
+" {\n"
+" W = (XNext+WNext) - X;\n"
+" a[BaseWidth] = W;\n"
+" a[jnext+2] = 0;\n"
+" j += 3;\n"
+" }\n"
+" else\n"
+" {\n"
+" break;\n"
+" }\n"
+"\n"
+" }\n"
+" }\n"
+" }\n"
+" var off = 0.7;\n"
+" var off2 = 2*off;\n"
+" context.fillStyle = TimerInfo[i].colordark;\n"
+" for(var j = 0; j < a.length; j += 3)\n"
+" { \n"
+" var X = a[j];\n"
+" var Y = a[j+1];\n"
+" var W = a[j+2];\n"
+" if(W >= 1)\n"
+" {\n"
+" context.fillRect(X, Y, W, BoxHeight-1);\n"
+" }\n"
+" }\n"
+" \n"
+"\n"
+" if(i == nHoverToken)\n"
+" {\n"
+" context.fillStyle = nHoverColor;\n"
+" }\n"
+" else\n"
+" {\n"
+" context.fillStyle = TimerInfo[i].color;\n"
+" }\n"
+" for(var j = 0; j < a.length; j += 3)\n"
+" { \n"
+" var X = a[j];\n"
+" var Y = a[j+1];\n"
+" var W = a[j+2];\n"
+" if(W > 0)\n"
+" {\n"
+" context.fillRect(X+off, Y+off, W-off2, BoxHeight-1-off2);\n"
+" }\n"
+" }\n"
+" }\n"
+" } \n"
+" }\n"
+" for(var i = 0; i < BatchesTxt.length; ++i)\n"
+" {\n"
+" context.fillStyle = BatchesTxtColor[i];\n"
+" var TxtArray = BatchesTxt[i];\n"
+" var PosArray = BatchesTxtPos[i];\n"
+" for(var j = 0; j < TxtArray.length; ++j)\n"
+" {\n"
+" var k = j * 2;\n"
+" context.fillText(TxtArray[j], PosArray[k],PosArray[k+1]);\n"
+" }\n"
+" }\n"
+"\n"
+" }\n"
+"}\n"
+"function DrawTextBox(context, text, x, y, align)\n"
+"{\n"
+" var textsize = context.measureText(text).width;\n"
+" var offsetx = 0;\n"
+" var offsety = -FontHeight;\n"
+" if(align == \'center\')\n"
+" {\n"
+" offsetx = -textsize / 2.0;\n"
+" }\n"
+" else if(align == \'right\')\n"
+" {\n"
+" offsetx = -textsize;\n"
+" }\n"
+" context.fillStyle = nBackColors[0];\n"
+" context.fillRect(x + offsetx, y + offsety, textsize+2, FontHeight + 2);\n"
+" context.fillStyle = \'white\';\n"
+" context.fillText(text, x, y);\n"
+"\n"
+"}\n"
+"function DrawRange(context, fBegin, fEnd, ColorBack, ColorFront, Offset, Name)\n"
+"{\n"
+" if(fBegin < fEnd)\n"
+" {\n"
+" var fScaleX = nWidth / fDetailedRange; \n"
+" var X = (fBegin - fDetailedOffset) * fScaleX;\n"
+" var YSpace = (FontHeight+2);\n"
+" var Y = YSpace * (Offset);\n"
+" var W = (fEnd - fBegin) * fScaleX;\n"
+" context.globalAlpha = 0.1;\n"
+" context.fillStyle = ColorBack;\n"
+" context.fillRect(X, 0, W, nHeight);\n"
+" context.globalAlpha = 1;\n"
+" context.strokeStyle = ColorFront;\n"
+" context.beginPath();\n"
+" context.moveTo(X, 0);\n"
+" context.lineTo(X, nHeight);\n"
+" context.moveTo(X+W, 0);\n"
+" context.lineTo(X+W, nHeight);\n"
+" context.stroke();\n"
+" var Duration = (fEnd - fBegin).toFixed(2) + \"ms\";\n"
+" var Center = ((fBegin + fEnd) / 2.0) - fDetailedOffset;\n"
+" var DurationWidth = context.measureText(Duration+ \" \").width;\n"
+"\n"
+" context.fillStyle = \'white\';\n"
+" context.textAlign = \'right\';\n"
+" var TextPosY = Y + YSpace;\n"
+" DrawTextBox(context, \'\' + fBegin.toFixed(2), X-3, TextPosY, \'right\');\n"
+" if(DurationWidth < W + 10)\n"
+" {\n"
+" context.textAlign = \'center\';\n"
+" DrawTextBox(context,\'\' + Duration,Center * fScaleX, TextPosY, \'center\');\n"
+"\n"
+" var W0 = W - DurationWidth + FontWidth*1.5;\n"
+" if(W0 > 6)\n"
+" {\n"
+" var Y0 = Y + FontHeight * 0.5;\n"
+" W0 = W0 / 2.0;\n"
+" var X0 = X + W0;\n"
+" var X1 = X + W - W0;\n"
+" context.strokeStyle = ColorFront;\n"
+" context.beginPath();\n"
+" context.moveTo(X, Y0);\n"
+" context.lineTo(X0, Y0);\n"
+" context.moveTo(X0, Y0-2);\n"
+" context.lineTo(X0, Y0+2);\n"
+" context.moveTo(X1, Y0-2);\n"
+" context.lineTo(X1, Y0+2);\n"
+" context.moveTo(X1, Y0);\n"
+" context.lineTo(X + W, Y0);\n"
+" context.stroke();\n"
+" }\n"
+" }\n"
+" context.textAlign = \'left\';\n"
+" DrawTextBox(context, \'\' + fEnd.toFixed(2), X + W + 2, TextPosY, \'left\');\n"
+" DrawTextBox(context, Name, X + W + 2, nHeight - FontHeight - YSpace*Offset, \'left\');\n"
+" Offset += 1;\n"
+" }\n"
+" return Offset;\n"
+"}\n"
+"\n"
+"function DrawDetailed(Animation)\n"
+"{\n"
+" if(AnimationActive != Animation || !Initialized)\n"
+" {\n"
+" return;\n"
+" }\n"
+" ProfileEnter(\"DrawDetailed\");\n"
+" DebugDrawQuadCount = 0;\n"
+" DebugDrawTextCount = 0;\n"
+" nHoverCSCpuNext = -1;\n"
+"\n"
+" fRangeBeginNext = fRangeEndNext = -1;\n"
+" fRangeBeginGpuNext = fRangeEndGpuNext = -1;\n"
+" var fRangeBeginGpu = -1;\n"
+" var fRangeEndGpu = -1;\n"
+"\n"
+" var start = new Date();\n"
+" nDrawCount++;\n"
+"\n"
+" var context = CanvasDetailedView.getContext(\'2d\');\n"
+" var offscreen = CanvasDetailedOffscreen.getContext(\'2d\');\n"
+" var fScaleX = nWidth / fDetailedRange; \n"
+" var fOffsetY = -nOffsetY + BoxHeight;\n"
+"\n"
+" if(DetailedRedrawState.fOffsetY == fOffsetY && DetailedRedrawState.fDetailedOffset == fDetailedOffset && DetailedRedrawState.fDetailedRange == fDetailedRange && !KeyCtrlDown && !KeyShiftDown && !MouseDragButton)\n"
+" {\n"
+" Invalidate++;\n"
+" }\n"
+" else\n"
+" {\n"
+" Invalidate = 0;\n"
+" DetailedRedrawState.fOffsetY = fOffsetY;\n"
+" DetailedRedrawState.fDetailedOffset = fDetailedOffset;\n"
+" DetailedRedrawState.fDetailedRange = fDetailedRange;\n"
+" }\n"
+" if(Invalidate == 0) //when panning, only draw bars that are a certain width to keep decent framerate\n"
+" {\n"
+" context.clearRect(0, 0, CanvasDetailedView.width, CanvasDetailedView.height);\n"
+" DrawDetailedView(context, nMinWidthPan, true);\n"
+" }\n"
+" else if(Invalidate == 1) //draw full and store\n"
+" {\n"
+" offscreen.clearRect(0, 0, CanvasDetailedView.width, CanvasDetailedView.height);\n"
+" DrawDetailedView(offscreen, nMinWidth, true);\n"
+" OffscreenData = offscreen.getImageData(0, 0, CanvasDetailedOffscreen.width, CanvasDetailedOffscreen.height);\n"
+" }\n"
+" else//reuse stored result untill next time viewport is changed.\n"
+" {\n"
+" context.clearRect(0, 0, CanvasDetailedView.width, CanvasDetailedView.height);\n"
+" context.putImageData(OffscreenData, 0, 0);\n"
+" DrawDetailedView(context, nMinWidth, false);\n"
+" }\n"
+"\n"
+" if(KeyShiftDown || KeyCtrlDown || MouseDragButton || MouseDragSelectRange())\n"
+" {\n"
+" nHoverToken = -1;\n"
+" nHoverTokenIndex = -1;\n"
+" nHoverTokenLogIndex = -1;\n"
+" fRangeBegin = fRangeEnd = -1;\n"
+" }\n"
+" else\n"
+" {\n"
+" nHoverToken = nHoverTokenNext;\n"
+" nHoverTokenIndex = nHoverTokenIndexNext;\n"
+" nHoverTokenLogIndex = nHoverTokenLogIndexNext;\n"
+" if(fRangeBeginHistory < fRangeEndHistory)\n"
+" {\n"
+" fRangeBegin = fRangeBeginHistory;\n"
+" fRangeEnd = fRangeEndHistory;\n"
+" fRangeBeginGpu = fRangeBeginHistoryGpu;\n"
+" fRangeEndGpu = fRangeEndHistoryGpu;\n"
+" }\n"
+" else\n"
+" {\n"
+" fRangeBegin = fRangeBeginNext;\n"
+" fRangeEnd = fRangeEndNext;\n"
+" fRangeBeginGpu = fRangeBeginGpuNext;\n"
+" fRangeEndGpu = fRangeEndGpuNext;\n"
+" }\n"
+" }\n"
+"\n"
+" DrawTextBox(context, TimeToMsString(fDetailedOffset), 0, FontHeight, \'left\');\n"
+" context.textAlign = \'right\';\n"
+" DrawTextBox(context, TimeToMsString(fDetailedOffset + fDetailedRange), nWidth, FontHeight, \'right\');\n"
+" context.textAlign = \'left\';\n"
+"\n"
+" var Offset = 0;\n"
+" Offset = DrawRange(context, fRangeBeginSelect, fRangeEndSelect, \'#59d0ff\', \'#00ddff\', Offset, \"Selection\");\n"
+" Offset = DrawRange(context, fRangeBegin, fRangeEnd, \'#009900\', \'#00ff00\', Offset, \"Cpu\");\n"
+" Offset = DrawRange(context, fRangeBeginGpu, fRangeEndGpu, \'#996600\', \'#775500\', Offset, \"Gpu\");\n"
+"\n"
+" nHoverCSCpu = nHoverCSCpuNext;\n"
+" ProfileLeave();\n"
+"}\n"
+"\n"
+"function ZoomTo(fZoomBegin, fZoomEnd)\n"
+"{\n"
+" if(fZoomBegin < fZoomEnd)\n"
+" {\n"
+" AnimationActive = true;\n"
+" var fDetailedOffsetOriginal = fDetailedOffset;\n"
+" var fDetailedRangeOriginal = fDetailedRange;\n"
+" var fDetailedOffsetTarget = fZoomBegin;\n"
+" var fDetailedRangeTarget = fZoomEnd - fZoomBegin;\n"
+" var TimestampStart = new Date();\n"
+" var count = 0;\n"
+" function ZoomFunc(Timestamp)\n"
+" {\n"
+" var fPrc = (new Date() - TimestampStart) / (ZOOM_TIME * 1000.0);\n"
+" if(fPrc > 1.0)\n"
+" {\n"
+" fPrc = 1.0;\n"
+" }\n"
+" fPrc = Math.pow(fPrc, 0.3);\n"
+" fDetailedOffset = fDetailedOffsetOriginal + (fDetailedOffsetTarget - fDetailedOffsetOriginal) * fPrc;\n"
+" fDetailedRange = fDetailedRangeOriginal + (fDetailedRangeTarget - fDetailedRangeOriginal) * fPrc;\n"
+" DrawDetailed(true);\n"
+" if(fPrc >= 1.0)\n"
+" {\n"
+" AnimationActive = false;\n"
+" fDetailedOffset = fDetailedOffsetTarget;\n"
+" fDetailedRange = fDetailedRangeTarget;\n"
+" }\n"
+" else\n"
+" {\n"
+" requestAnimationFrame(ZoomFunc);\n"
+" }\n"
+" }\n"
+" requestAnimationFrame(ZoomFunc);\n"
+" }\n"
+"}\n"
+"function RequestRedraw()\n"
+"{\n"
+" Invalidate = 0;\n"
+" Draw(1);\n"
+"}\n"
+"function Draw(RedrawMode)\n"
+"{\n"
+" if(ProfileMode)\n"
+" {\n"
+" ProfileModeClear();\n"
+" ProfileEnter(\"Total\");\n"
+" }\n"
+" if(RedrawMode == 1)\n"
+" {\n"
+" if(Mode == ModeTimers)\n"
+" {\n"
+" DrawBarView();\n"
+" DrawHoverToolTip();\n"
+" }\n"
+" else if(Mode == ModeDetailed)\n"
+" {\n"
+" DrawDetailed(false);\n"
+" DrawHoverToolTip();\n"
+" }\n"
+" }\n"
+" DrawDetailedFrameHistory();\n"
+"\n"
+" if(ProfileMode)\n"
+" {\n"
+" ProfileLeave();\n"
+" ProfileModeDraw(CanvasDetailedView);\n"
+" }\n"
+"}\n"
+"\n"
+"function AutoRedraw(Timestamp)\n"
+"{\n"
+" var RedrawMode = 0;\n"
+" if(Mode == ModeDetailed)\n"
+" {\n"
+" if(ProfileMode == 2 || ((nHoverCSCpu >= 0 || nHoverToken != -1) && !KeyCtrlDown && !KeyShiftDown && !MouseDragButton)||(Invalidate<2 && !KeyCtrlDown && !KeyShiftDown && !MouseDragButton))\n"
+" {\n"
+" RedrawMode = 1;\n"
+" }\n"
+" }\n"
+" else\n"
+" {\n"
+" if(Invalidate < 1)\n"
+" {\n"
+" RedrawMode = 1;\n"
+" }\n"
+" }\n"
+" if(RedrawMode)\n"
+" {\n"
+" Draw(RedrawMode);\n"
+" }\n"
+" else if(FlashFrameCounter>0)\n"
+" {\n"
+" Draw(0);\n"
+" }\n"
+" requestAnimationFrame(AutoRedraw);\n"
+"}\n"
+"\n"
+"\n"
+"function ZoomGraph(nZoom)\n"
+"{\n"
+" var fOldRange = fDetailedRange;\n"
+" if(nZoom>0)\n"
+" {\n"
+" fDetailedRange *= Math.pow(nModDown ? 1.40 : 1.03, nZoom);\n"
+" }\n"
+" else\n"
+" {\n"
+" var fNewDetailedRange = fDetailedRange / Math.pow((nModDown ? 1.40 : 1.03), -nZoom);\n"
+" if(fNewDetailedRange < 0.0001) //100ns\n"
+" fNewDetailedRange = 0.0001;\n"
+" fDetailedRange = fNewDetailedRange;\n"
+" }\n"
+"\n"
+" var fDiff = fOldRange - fDetailedRange;\n"
+" var fMousePrc = DetailedViewMouseX / nWidth;\n"
+" if(fMousePrc < 0)\n"
+" {\n"
+" fMousePrc = 0;\n"
+" }\n"
+" fDetailedOffset += fDiff * fMousePrc;\n"
+"\n"
+"}\n"
+"\n"
+"function MeasureFont()\n"
+"{\n"
+" var context = CanvasDetailedView.getContext(\'2d\');\n"
+" context.font = Font;\n"
+" FontWidth = context.measureText(\'W\').width;\n"
+"\n"
+"}\n"
+"function ResizeCanvas() \n"
+"{\n"
+" nWidth = window.innerWidth;\n"
+" nHeight = window.innerHeight - CanvasHistory.height-2;\n"
+" DPR = window.devicePixelRatio;\n"
+"\n"
+" if(DPR)\n"
+" {\n"
+" CanvasDetailedView.style.width = nWidth + \'px\'; \n"
+" CanvasDetailedView.style.height = nHeight + \'px\';\n"
+" CanvasDetailedView.width = nWidth * DPR;\n"
+" CanvasDetailedView.height = nHeight * DPR;\n"
+" CanvasHistory.style.width = window.innerWidth + \'px\';\n"
+" CanvasHistory.style.height = 70 + \'px\';\n"
+" CanvasHistory.width = window.innerWidth * DPR;\n"
+" CanvasHistory.height = 70 * DPR;\n"
+" CanvasHistory.getContext(\'2d\').scale(DPR,DPR);\n"
+" CanvasDetailedView.getContext(\'2d\').scale(DPR,DPR);\n"
+"\n"
+" CanvasDetailedOffscreen.style.width = nWidth + \'px\';\n"
+" CanvasDetailedOffscreen.style.height = nHeight + \'px\';\n"
+" CanvasDetailedOffscreen.wid";
+
+const size_t g_MicroProfileHtml_end_1_size = sizeof(g_MicroProfileHtml_end_1);
+const char g_MicroProfileHtml_end_2[] =
+"th = nWidth * DPR;\n"
+" CanvasDetailedOffscreen.height = nHeight * DPR;\n"
+" CanvasDetailedOffscreen.getContext(\'2d\').scale(DPR,DPR);\n"
+"\n"
+" }\n"
+" else\n"
+" {\n"
+" DPR = 1;\n"
+" CanvasDetailedView.width = nWidth;\n"
+" CanvasDetailedView.height = nHeight;\n"
+" CanvasDetailedOffscreen.width = nWidth;\n"
+" CanvasDetailedOffscreen.height = nHeight;\n"
+" CanvasHistory.width = window.innerWidth;\n"
+" }\n"
+" RequestRedraw();\n"
+"}\n"
+"\n"
+"var MouseDragOff = 0;\n"
+"var MouseDragDown = 1;\n"
+"var MouseDragUp = 2;\n"
+"var MouseDragMove = 3;\n"
+"var MouseDragState = MouseDragOff;\n"
+"var MouseDragTarget = 0;\n"
+"var MouseDragButton = 0;\n"
+"var MouseDragKeyShift = 0;\n"
+"var MouseDragKeyCtrl = 0;\n"
+"var MouseDragX = 0;\n"
+"var MouseDragY = 0;\n"
+"var MouseDragXLast = 0;\n"
+"var MouseDragYLast = 0;\n"
+"var MouseDragXStart = 0;\n"
+"var MouseDragYStart = 0;\n"
+"\n"
+"function clamp(number, min, max)\n"
+"{\n"
+" return Math.max(min, Math.min(number, max));\n"
+"}\n"
+"\n"
+"function MouseDragPan()\n"
+"{\n"
+" return MouseDragButton == 1 || MouseDragKeyShift;\n"
+"}\n"
+"function MouseDragSelectRange()\n"
+"{\n"
+" return MouseDragState == MouseDragMove && (MouseDragButton == 3 || (MouseDragKeyShift && MouseDragKeyCtrl));\n"
+"}\n"
+"function MouseHandleDrag()\n"
+"{\n"
+" if(MouseDragTarget == CanvasDetailedView)\n"
+" {\n"
+" if(Mode == ModeDetailed)\n"
+" {\n"
+" if(MouseDragSelectRange())\n"
+" {\n"
+" var xStart = MouseDragXStart;\n"
+" var xEnd = MouseDragX;\n"
+" if(xStart > xEnd)\n"
+" {\n"
+" var Temp = xStart;\n"
+" xStart = xEnd;\n"
+" xEnd = Temp;\n"
+" }\n"
+" if(xEnd - xStart > 1)\n"
+" {\n"
+" fRangeBegin = fDetailedOffset + fDetailedRange * (xStart / nWidth);\n"
+" fRangeEnd = fDetailedOffset + fDetailedRange * (xEnd / nWidth);\n"
+" fRangeBeginSelect = fDetailedOffset + fDetailedRange * (xStart / nWidth);\n"
+" fRangeEndSelect = fDetailedOffset + fDetailedRange * (xEnd / nWidth);\n"
+" }\n"
+" }\n"
+" else if(MouseDragPan())\n"
+" {\n"
+" var X = MouseDragX - MouseDragXLast;\n"
+" var Y = MouseDragY - MouseDragYLast;\n"
+" if(X)\n"
+" {\n"
+" fDetailedOffset += -X * fDetailedRange / nWidth;\n"
+" }\n"
+" nOffsetY -= Y;\n"
+" if(nOffsetY < 0)\n"
+" {\n"
+" nOffsetY = 0;\n"
+" }\n"
+" }\n"
+" else if(MouseDragKeyCtrl)\n"
+" {\n"
+" if(MouseDragY != MouseDragYLast)\n"
+" {\n"
+" ZoomGraph(MouseDragY - MouseDragYLast);\n"
+" }\n"
+" }\n"
+" }\n"
+" else if(Mode == ModeTimers)\n"
+" {\n"
+" if(MouseDragKeyShift || MouseDragButton == 1)\n"
+" {\n"
+" var X = MouseDragX - MouseDragXLast;\n"
+" var Y = MouseDragY - MouseDragYLast;\n"
+" nOffsetBarsY -= Y;\n"
+" nOffsetBarsX -= X;\n"
+" if(nOffsetBarsY < 0)\n"
+" {\n"
+" nOffsetBarsY = 0;\n"
+" }\n"
+" if(nOffsetBarsX < 0)\n"
+" {\n"
+" nOffsetBarsX = 0;\n"
+" }\n"
+" }\n"
+"\n"
+" }\n"
+"\n"
+" }\n"
+" else if(MouseDragTarget == CanvasHistory)\n"
+" {\n"
+" function HistoryFrameTime(x)\n"
+" {\n"
+" var NumFrames = Frames.length;\n"
+" var fBarWidth = nWidth / NumFrames;\n"
+" var Index = clamp(Math.floor(NumFrames * x / nWidth), 0, NumFrames-1);\n"
+" var Lerp = clamp((x/fBarWidth - Index) , 0, 1);\n"
+" var time = Frames[Index].framestart + (Frames[Index].frameend - Frames[Index].framestart) * Lerp;\n"
+" return time;\n"
+" }\n"
+" if(MouseDragSelectRange())\n"
+" {\n"
+" fRangeBegin = fRangeEnd = -1;\n"
+"\n"
+" var xStart = MouseDragXStart;\n"
+" var xEnd = MouseDragX;\n"
+" if(xStart > xEnd)\n"
+" {\n"
+" var Temp = xStart;\n"
+" xStart = xEnd;\n"
+" xEnd = Temp;\n"
+" }\n"
+" if(xEnd - xStart > 2)\n"
+" {\n"
+" var timestart = HistoryFrameTime(xStart);\n"
+" var timeend = HistoryFrameTime(xEnd);\n"
+" fDetailedOffset = timestart;\n"
+" fDetailedRange = timeend-timestart;\n"
+" }\n"
+" }\n"
+" else if(MouseDragPan())\n"
+" {\n"
+" var Time = HistoryFrameTime(MouseDragX);\n"
+" fDetailedOffset = Time - fDetailedRange / 2.0;\n"
+" }\n"
+" }\n"
+"}\n"
+"function MouseHandleDragEnd()\n"
+"{\n"
+" if(MouseDragTarget == CanvasDetailedView)\n"
+" {\n"
+"\n"
+" }\n"
+" else if(MouseDragTarget == CanvasHistory)\n"
+" {\n"
+" if(!MouseDragSelectRange() && !MouseDragPan())\n"
+" {\n"
+" ZoomTo(fRangeBegin, fRangeEnd);\n"
+" fRangeBegin = fRangeEnd = -1;\n"
+" }\n"
+"\n"
+"\n"
+" }\n"
+"\n"
+"}\n"
+"\n"
+"function MouseHandleDragClick()\n"
+"{\n"
+" if(MouseDragTarget == CanvasDetailedView)\n"
+" {\n"
+" ZoomTo(fRangeBegin, fRangeEnd);\n"
+" }\n"
+" else if(MouseDragTarget == CanvasHistory)\n"
+" {\n"
+" if(Mode == ModeDetailed)\n"
+" {\n"
+" ZoomTo(fRangeBegin, fRangeEnd);\n"
+" }\n"
+" }\n"
+"}\n"
+"\n"
+"function MapMouseButton(Event)\n"
+"{\n"
+" if(event.button == 1 || event.which == 1)\n"
+" {\n"
+" return 1;\n"
+" }\n"
+" else if(event.button == 3 || event.which == 3)\n"
+" {\n"
+" return 3;\n"
+" }\n"
+" else\n"
+" {\n"
+" return 0;\n"
+" }\n"
+"}\n"
+"\n"
+"function MouseDragReset()\n"
+"{\n"
+" MouseDragState = MouseDragOff;\n"
+" MouseDragTarget = 0;\n"
+" MouseDragKeyShift = 0;\n"
+" MouseDragKeyCtrl = 0;\n"
+" MouseDragButton = 0;\n"
+"}\n"
+"function MouseDragKeyUp()\n"
+"{\n"
+" if((MouseDragKeyShift && !KeyShiftDown) || (MouseDragKeyCtrl && !KeyCtrlDown))\n"
+" {\n"
+" MouseHandleDragEnd();\n"
+" MouseDragReset();\n"
+" }\n"
+"}\n"
+"function MouseDrag(Source, Event)\n"
+"{\n"
+" if(Source == MouseDragOff || (MouseDragTarget && MouseDragTarget != Event.target))\n"
+" {\n"
+" MouseDragReset();\n"
+" return;\n"
+" }\n"
+"\n"
+" var LocalRect = Event.target.getBoundingClientRect();\n"
+" MouseDragX = Event.clientX - LocalRect.left;\n"
+" MouseDragY = Event.clientY - LocalRect.top;\n"
+" // console.log(\'cur drag state \' + MouseDragState + \' Source \' + Source);\n"
+" if(MouseDragState == MouseDragMove)\n"
+" {\n"
+" var dx = Math.abs(MouseDragX - MouseDragXStart);\n"
+" var dy = Math.abs(MouseDragY - MouseDragYStart);\n"
+" if((Source == MouseDragUp && MapMouseButton(Event) == MouseDragButton) ||\n"
+" (MouseDragKeyCtrl && !KeyCtrlDown) ||\n"
+" (MouseDragKeyShift && !KeyShiftDown))\n"
+" {\n"
+" MouseHandleDragEnd();\n"
+" MouseDragReset();\n"
+" return;\n"
+" }\n"
+" else\n"
+" {\n"
+" MouseHandleDrag();\n"
+" }\n"
+" }\n"
+" else if(MouseDragState == MouseDragOff)\n"
+" {\n"
+" if(Source == MouseDragDown || KeyShiftDown || KeyCtrlDown)\n"
+" {\n"
+" MouseDragTarget = Event.target;\n"
+" MouseDragButton = MapMouseButton(Event);\n"
+" MouseDragState = MouseDragDown;\n"
+" MouseDragXStart = MouseDragX;\n"
+" MouseDragYStart = MouseDragY;\n"
+" MouseDragKeyCtrl = 0;\n"
+" MouseDragKeyShift = 0;\n"
+"\n"
+" if(KeyShiftDown || KeyCtrlDown)\n"
+" {\n"
+" MouseDragKeyShift = KeyShiftDown;\n"
+" MouseDragKeyCtrl = KeyCtrlDown;\n"
+" MouseDragState = MouseDragMove;\n"
+" }\n"
+" }\n"
+" }\n"
+" else if(MouseDragState == MouseDragDown)\n"
+" {\n"
+" if(Source == MouseDragUp)\n"
+" {\n"
+" MouseHandleDragClick();\n"
+" MouseDragReset();\n"
+" }\n"
+" else if(Source == MouseDragMove)\n"
+" {\n"
+" var dx = Math.abs(MouseDragX - MouseDragXStart);\n"
+" var dy = Math.abs(MouseDragY - MouseDragYStart);\n"
+" if(dx+dy>1)\n"
+" {\n"
+" MouseDragState = MouseDragMove;\n"
+" }\n"
+" }\n"
+" }\n"
+" MouseDragXLast = MouseDragX;\n"
+" MouseDragYLast = MouseDragY;\n"
+"}\n"
+"\n"
+"function MouseMove(evt)\n"
+"{\n"
+" evt.preventDefault();\n"
+" MouseDrag(MouseDragMove, evt);\n"
+" MouseHistory = 0;\n"
+" MouseDetailed = 0;\n"
+" HistoryViewMouseX = HistoryViewMouseY = -1;\n"
+" var rect = evt.target.getBoundingClientRect();\n"
+" var x = evt.clientX - rect.left;\n"
+" var y = evt.clientY - rect.top;\n"
+" if(evt.target == CanvasDetailedView)\n"
+" {\n"
+" if(!MouseDragSelectRange())\n"
+" {\n"
+" fRangeBegin = fRangeEnd = -1;\n"
+" }\n"
+" DetailedViewMouseX = x;\n"
+" DetailedViewMouseY = y;\n"
+" }\n"
+" else if(evt.target = CanvasHistory)\n"
+" {\n"
+" var Rect = CanvasHistory.getBoundingClientRect();\n"
+" HistoryViewMouseX = x;\n"
+" HistoryViewMouseY = y;\n"
+" }\n"
+" Draw(1);\n"
+"}\n"
+"\n"
+"function MouseButton(bPressed, evt)\n"
+"{\n"
+" evt.preventDefault();\n"
+" MouseDrag(bPressed ? MouseDragDown : MouseDragUp, evt);\n"
+"}\n"
+"\n"
+"function MouseOut(evt)\n"
+"{\n"
+" MouseDrag(MouseDragOff, evt);\n"
+" KeyCtrlDown = 0;\n"
+" KeyShiftDown = 0;\n"
+" MouseDragButton = 0;\n"
+" nHoverToken = -1;\n"
+" fRangeBegin = fRangeEnd = -1;\n"
+"}\n"
+"\n"
+"function MouseWheel(e)\n"
+"{\n"
+" var e = window.event || e;\n"
+" var delta = (e.wheelDelta || e.detail * (-120));\n"
+" ZoomGraph((-4 * delta / 120.0) | 0);\n"
+" Draw(1);\n"
+"}\n"
+"\n"
+"\n"
+"function KeyUp(evt)\n"
+"{\n"
+" if(evt.keyCode == 17)\n"
+" {\n"
+" KeyCtrlDown = 0;\n"
+" MouseDragKeyUp();\n"
+" }\n"
+" else if(evt.keyCode == 16)\n"
+" {\n"
+" KeyShiftDown = 0;\n"
+" MouseDragKeyUp();\n"
+" }\n"
+" if(evt.keyCode == 18)\n"
+" {\n"
+" FlipToolTip = 0;\n"
+" }\n"
+" if(evt.keyCode == 32)\n"
+" {\n"
+" if(fRangeBeginSelect < fRangeEndSelect)\n"
+" {\n"
+" ZoomTo(fRangeBeginSelect, fRangeEndSelect);\n"
+" fRangeBeginSelect = fRangeEndSelect = -1;\n"
+" MouseHandleDragEnd();\n"
+" }\n"
+" }\n"
+" if(evt.keyCode == 27)\n"
+" {\n"
+" fRangeBeginSelect = fRangeEndSelect = -1; \n"
+" }\n"
+" Invalidate = 0;\n"
+"}\n"
+"\n"
+"function KeyDown(evt)\n"
+"{\n"
+" if(evt.keyCode == 18)\n"
+" {\n"
+" FlipToolTip = 1;\n"
+" }\n"
+" if(evt.keyCode == 17)\n"
+" {\n"
+" KeyCtrlDown = 1;\n"
+" }\n"
+" else if(evt.keyCode == 16)\n"
+" {\n"
+" KeyShiftDown = 1;\n"
+" }\n"
+" Invalidate = 0;\n"
+"}\n"
+"\n"
+"function ReadCookie()\n"
+"{\n"
+" var result = document.cookie.match(/fisk=([^;]+)/);\n"
+" var NewMode = ModeDetailed;\n"
+" var ReferenceTimeString = \'33ms\';\n"
+" if(result && result.length > 0)\n"
+" {\n"
+" var Obj = JSON.parse(result[1]);\n"
+" if(Obj.Mode)\n"
+" {\n"
+" NewMode = Obj.Mode;\n"
+" }\n"
+" if(Obj.ReferenceTime)\n"
+" {\n"
+" ReferenceTimeString = Obj.ReferenceTime;\n"
+" }\n"
+" if(Obj.ThreadsAllActive || Obj.ThreadsAllActive == 0 || Obj.ThreadsAllActive == false)\n"
+" {\n"
+" ThreadsAllActive = Obj.ThreadsAllActive;\n"
+" }\n"
+" else\n"
+" {\n"
+" ThreadsAllActive = 1;\n"
+" }\n"
+" if(Obj.ThreadsActive)\n"
+" {\n"
+" ThreadsActive = Obj.ThreadsActive;\n"
+" }\n"
+" if(Obj.GroupsAllActive || Obj.GroupsAllActive == 0 || Obj.GroupsAllActive)\n"
+" {\n"
+" GroupsAllActive = Obj.GroupsAllActive;\n"
+" }\n"
+" else\n"
+" {\n"
+" GroupsAllActive = 1;\n"
+" }\n"
+" if(Obj.GroupsActive)\n"
+" {\n"
+" GroupsActive = Obj.GroupsActive;\n"
+" }\n"
+" if(Obj.nContextSwitchEnabled)\n"
+" {\n"
+" nContextSwitchEnabled = Obj.nContextSwitchEnabled; \n"
+" }\n"
+" else\n"
+" {\n"
+" nContextSwitchEnabled = 1;\n"
+" }\n"
+" if(Obj.GroupColors)\n"
+" {\n"
+" GroupColors = Obj.GroupColors;\n"
+" }\n"
+" else\n"
+" {\n"
+" GroupColors = 0;\n"
+" }\n"
+" if(Obj.nHideHelp)\n"
+" {\n"
+" nHideHelp = 1;\n"
+" }\n"
+" TimersGroups = Obj.TimersGroups?Obj.TimersGroups:0;\n"
+" TimersMeta = Obj.TimersMeta?0:1;\n"
+" }\n"
+" SetContextSwitch(nContextSwitchEnabled);\n"
+" SetMode(NewMode, TimersGroups);\n"
+" SetReferenceTime(ReferenceTimeString);\n"
+" UpdateOptionsMenu();\n"
+" UpdateGroupColors();\n"
+"}\n"
+"function WriteCookie()\n"
+"{\n"
+" var Obj = new Object();\n"
+" Obj.Mode = Mode;\n"
+" Obj.ReferenceTime = ReferenceTime + \'ms\';\n"
+" Obj.ThreadsActive = ThreadsActive;\n"
+" Obj.ThreadsAllActive = ThreadsAllActive;\n"
+" Obj.GroupsActive = GroupsActive;\n"
+" Obj.GroupsAllActive = GroupsAllActive;\n"
+" Obj.nContextSwitchEnabled = nContextSwitchEnabled;\n"
+" Obj.TimersGroups = TimersGroups?TimersGroups:0;\n"
+" Obj.TimersMeta = TimersMeta?0:1;\n"
+" Obj.GroupColors = GroupColors;\n"
+" if(nHideHelp)\n"
+" {\n"
+" Obj.nHideHelp = 1;\n"
+" }\n"
+" var date = new Date();\n"
+" date.setFullYear(2099);\n"
+" var cookie = \'fisk=\' + JSON.stringify(Obj) + \';expires=\' + date;\n"
+" document.cookie = cookie;\n"
+"}\n"
+"\n"
+"var mousewheelevt = (/Firefox/i.test(navigator.userAgent)) ? \"DOMMouseScroll\" : \"mousewheel\" //FF doesn\'t recognize mousewheel as of FF3.x\n"
+"\n"
+"CanvasDetailedView.addEventListener(\'mousemove\', MouseMove, false);\n"
+"CanvasDetailedView.addEventListener(\'mousedown\', function(evt) { MouseButton(true, evt); });\n"
+"CanvasDetailedView.addEventListener(\'mouseup\', function(evt) { MouseButton(false, evt); } );\n"
+"CanvasDetailedView.addEventListener(\'mouseout\', MouseOut);\n"
+"CanvasDetailedView.addEventListener(\"contextmenu\", function (e) { e.preventDefault(); }, false);\n"
+"CanvasDetailedView.addEventListener(mousewheelevt, MouseWheel, false);\n"
+"CanvasHistory.addEventListener(\'mousemove\', MouseMove);\n"
+"CanvasHistory.addEventListener(\'mousedown\', function(evt) { MouseButton(true, evt); });\n"
+"CanvasHistory.addEventListener(\'mouseup\', function(evt) { MouseButton(false, evt); } );\n"
+"CanvasHistory.addEventListener(\'mouseout\', MouseOut);\n"
+"CanvasHistory.addEventListener(\"contextmenu\", function (e) { e.preventDefault(); }, false);\n"
+"CanvasHistory.addEventListener(mousewheelevt, MouseWheel, false);\n"
+"window.addEventListener(\'keydown\', KeyDown);\n"
+"window.addEventListener(\'keyup\', KeyUp);\n"
+"window.addEventListener(\'resize\', ResizeCanvas, false);\n"
+"\n"
+"function CalcAverage()\n"
+"{\n"
+" var Sum = 0;\n"
+" var Count = 0;\n"
+" for(nLog = 0; nLog < nNumLogs; nLog++)\n"
+" {\n"
+" StackPos = 0;\n"
+" for(var i = 0; i < Frames.length; i++)\n"
+" {\n"
+" var Frame_ = Frames[i]; \n"
+" var tt = Frame_.tt[nLog];\n"
+" var ts = Frame_.ts[nLog];\n"
+"\n"
+" var count = tt.length;\n"
+" for(var j = 0; j < count; j++)\n"
+" {\n"
+" var type = tt[j];\n"
+" var time = ts[j];\n"
+" if(type == 1)\n"
+" {\n"
+" Stack[StackPos] = time;//store the frame which it comes from\n"
+" StackPos++;\n"
+" }\n"
+" else if(type == 0)\n"
+" {\n"
+" if(StackPos>0)\n"
+" {\n"
+"\n"
+" StackPos--;\n"
+" var localtime = time - Stack[StackPos];\n"
+" Count++;\n"
+" Sum += localtime;\n"
+" }\n"
+" }\n"
+" }\n"
+" }\n"
+" }\n"
+" return Sum / Count;\n"
+"\n"
+"}\n"
+"\n"
+"function MakeLod(index, MinDelta, TimeArray, TypeArray, IndexArray, LogStart)\n"
+"{\n"
+" if(LodData[index])\n"
+" {\n"
+" console.log(\"error!!\");\n"
+" }\n"
+" // debugger;\n"
+" var o = new Object();\n"
+" o.MinDelta = MinDelta;\n"
+" o.TimeArray = TimeArray;\n"
+" o.TypeArray = TypeArray;\n"
+" o.IndexArray = IndexArray;\n"
+" o.LogStart = LogStart;\n"
+" LodData[index] = o;\n"
+"}\n"
+"\n"
+"function PreprocessBuildSplitArray()\n"
+"{\n"
+" var nNumLogs = Frames[0].ts.length;\n"
+"\n"
+" ProfileEnter(\"PreprocessBuildSplitArray\");\n"
+" var SplitArrays = new Array(nNumLogs);\n"
+"\n"
+" for(nLog = 0; nLog < nNumLogs; ++nLog)\n"
+" {\n"
+" console.log(\"source log \" + nLog + \" size \" + LodData[0].TypeArray[nLog].length);\n"
+" }\n"
+"\n"
+"\n"
+" for(nLog = 0; nLog < nNumLogs; nLog++)\n"
+" {\n"
+" var MaxDepth = 1;\n"
+" var StackPos = 0;\n"
+" var Stack = Array(20);\n"
+" var TypeArray = LodData[0].TypeArray[nLog];\n"
+" var TimeArray = LodData[0].TimeArray[nLog];\n"
+" var DeltaTimes = new Array(TypeArray.length);\n"
+"\n"
+" for(var j = 0; j < TypeArray.length; ++j)\n"
+" {\n"
+" var type = TypeArray[j];\n"
+" var time = TimeArray[j];\n"
+" if(type == 1)\n"
+" {\n"
+" //push\n"
+" Stack[StackPos] = time;\n"
+" StackPos++;\n"
+" }\n"
+" else if(type == 0)\n"
+" {\n"
+" if(StackPos>0)\n"
+" {\n"
+" StackPos--;\n"
+" DeltaTimes[j] = time - Stack[StackPos];\n"
+" }\n"
+" else\n"
+" {\n"
+" DeltaTimes[j] = 0;\n"
+" }\n"
+" }\n"
+" }\n"
+" DeltaTimes.sort(function(a,b){return b-a;});\n"
+" var SplitArray = Array(NumLodSplits);\n"
+" var SplitIndex = DeltaTimes.length;\n"
+"\n"
+" var j = 0;\n"
+" for(j = 0; j < NumLodSplits; ++j)\n"
+" {\n"
+" SplitIndex = Math.floor(SplitIndex / 2);\n"
+" while(SplitIndex > 0 && !DeltaTimes[SplitIndex])\n"
+" {\n"
+" SplitIndex--;\n"
+" }\n"
+" if(SplitIndex < SplitMin)\n"
+" {\n"
+" break;\n"
+" }\n"
+" //search.. if 0\n"
+" var SplitTime = DeltaTimes[SplitIndex];\n"
+" if(SplitTime>=0)\n"
+" {\n"
+" SplitArray[j] = SplitTime;\n"
+" }\n"
+" else\n"
+" {\n"
+" SplitArray[j] = SPLIT_LIMIT;\n"
+" }\n"
+" if(j>0)\n"
+" {\n"
+" console.assert(SplitArray[j-1] <= SplitArray[j], \"must be less\");\n"
+" }\n"
+"\n"
+" }\n"
+" for(; j < NumLodSplits; ++j)\n"
+" {\n"
+" SplitArray[j] = SPLIT_LIMIT;\n"
+" // console.log(\"split skipping \" + j + \" \" + SPLIT_LIMIT);\n"
+" }\n"
+"\n"
+"\n"
+" SplitArrays[nLog] = SplitArray;\n"
+" }\n"
+" ProfileLeave();\n"
+" return SplitArrays;\n"
+"}\n"
+"\n"
+"function PreprocessBuildDurationArray()\n"
+"{\n"
+" var nNumLogs = Frames[0].ts.length;\n"
+" ProfileEnter(\"PreprocessBuildDurationArray\");\n"
+" var DurationArrays = new Array(nNumLogs);\n"
+" for(nLog = 0; nLog < nNumLogs; ++nLog)\n"
+" {\n"
+" var MaxDepth = 1;\n"
+" var StackPos = 0;\n"
+" var Stack = Array(20);\n"
+" var StackIndex = Array(20);\n"
+" var TypeArray = LodData[0].TypeArray[nLog];\n"
+" var TimeArray = LodData[0].TimeArray[nLog];\n"
+" var DurationArray = Array(LodData[0].TypeArray[nLog].length);\n"
+" for(var j = 0; j < TypeArray.length; ++j)\n"
+" {\n"
+" var type = TypeArray[j];\n"
+" var time = TimeArray[j];\n"
+" if(type == 1)\n"
+" {\n"
+" //push\n"
+" Stack[StackPos] = time;\n"
+" StackIndex[StackPos] = j;\n"
+" StackPos++;\n"
+" }\n"
+" else if(type == 0)\n"
+" {\n"
+" if(StackPos>0)\n"
+" {\n"
+" StackPos--;\n"
+" var Duration = time - Stack[StackPos];\n"
+" DurationArray[StackIndex[StackPos]] = Duration;\n"
+" DurationArray[j] = Duration;\n"
+" }\n"
+" else\n"
+" {\n"
+" DurationArray[j] = 0;\n"
+" }\n"
+" }\n"
+" }\n"
+" for(var j = 0; j < StackPos; ++j)\n"
+" {\n"
+" DurationArray[j] = 0;\n"
+" }\n"
+" DurationArrays[nLog] = DurationArray;\n"
+" }\n"
+" ProfileLeave();\n"
+" return DurationArrays;\n"
+"\n"
+"}\n"
+"function PreprocessLods()\n"
+"{\n"
+" ProfileEnter(\"PreprocessLods\");\n"
+" var nNumLogs = Frames[0].ts.length;\n"
+" var SplitArrays = PreprocessBuildSplitArray();\n"
+" var DurationArrays = PreprocessBuildDurationArray();\n"
+" var Source = LodData[0];\n"
+" var SourceLogStart = Source.LogStart;\n"
+" var NumFrames = SourceLogStart.length;\n"
+"\n"
+" for(var i = 0; i < NumLodSplits-1; ++i)\n"
+" {\n"
+" var DestLogStart = Array(SourceLogStart.length);\n"
+" for(var j = 0; j < DestLogStart.length; ++j)\n"
+" {\n"
+" DestLogStart[j] = Array(nNumLogs);\n"
+" }\n"
+" var MinDelta = Array(nNumLogs);\n"
+" var TimeArray = Array(nNumLogs);\n"
+" var IndexArray = Array(nNumLogs);\n"
+" var TypeArray = Array(nNumLogs);\n"
+"\n"
+"\n"
+"\n"
+" for(nLog = 0; nLog < nNumLogs; ++nLog)\n"
+" {\n"
+" var SourceTypeArray = Source.TypeArray[nLog];\n"
+" var SourceTimeArray = Source.TimeArray[nLog];\n"
+" var SourceIndexArray = Source.IndexArray[nLog];\n"
+" var Duration = DurationArrays[nLog];\n"
+" console.assert(Duration.length == SourceTypeArray.length, \"must be equal!\");\n"
+" var SplitTime = SplitArrays[nLog][i];\n"
+"\n"
+" MinDelta[nLog] = SplitTime;\n"
+" if(SplitTime < SPLIT_LIMIT)\n"
+" {\n"
+" var SourceCount = SourceTypeArray.length;\n"
+" var DestTypeArray = Array();\n"
+" var DestTimeArray = Array();\n"
+" var DestIndexArray = Array();\n"
+" var RemapArray = Array(SourceCount);\n"
+" var DiscardLast = 0;\n"
+"\n"
+" for(var j = 0; j < SourceCount; ++j)\n"
+" {\n"
+" RemapArray[j] = DestTypeArray.length;\n"
+" if(Duration[j] >= SplitTime || (SourceTypeArray[j] == 3 && 0 == DiscardLast))\n"
+" {\n"
+" DiscardLast = 0;\n"
+" DestTypeArray.push(SourceTypeArray[j]);\n"
+" DestTimeArray.push(SourceTimeArray[j]);\n"
+" DestIndexArray.push(SourceIndexArray[j]);\n"
+" }\n"
+" else\n"
+" {\n"
+" DiscardLast = 1;\n"
+" }\n"
+" }\n"
+" TimeArray[nLog] = DestTimeArray;\n"
+" IndexArray[nLog] = DestIndexArray;\n"
+" TypeArray[nLog] = DestTypeArray;\n"
+" for(var j = 0; j < NumFrames; ++j)\n"
+" {\n"
+" var OldStart = SourceLogStart[j][nLog];\n"
+" var NewStart = RemapArray[OldStart];\n"
+" var FrameArray = DestLogStart[j];\n"
+" FrameArray[nLog] = NewStart;\n"
+" }\n"
+" }\n"
+" else\n"
+" {\n"
+"\n"
+" for(var j = 0; j < NumFrames; ++j)\n"
+" {\n"
+" var FrameArray = DestLogStart[j];\n"
+" \n"
+" FrameArray[nLog] = 0;\n"
+" }\n"
+"\n"
+" }\n"
+"\n"
+" }\n"
+" MakeLod(i+1, MinDelta, TimeArray, TypeArray, IndexArray, DestLogStart);\n"
+" }\n"
+" ProfileLeave();\n"
+"}\n"
+"function PreprocessGlobalArray()\n"
+"{\n"
+" ProfileEnter(\"PreprocessGlobalArray\");\n"
+" var nNumLogs = Frames[0].ts.length;\n"
+" var CaptureStart = Frames[0].framestart;\n"
+" var CaptureEnd = Frames[Frames.length-1].frameend;\n"
+" g_TypeArray = new Array(nNumLogs);\n"
+" g_TimeArray = new Array(nNumLogs);\n"
+" g_IndexArray = new Array(nNumLogs);\n"
+" var StackPos = 0;\n"
+" var Stack = Array(20);\n"
+" var LogStartArray = new Array(Frames.length);\n"
+" for(var i = 0; i < Frames.length; i++)\n"
+" {\n"
+" Frames[i].LogStart = new Array(nNumLogs); \n"
+" LogStartArray[i] = Frames[i].LogStart;\n"
+"\n"
+" Frames[i].LogEnd = new Array(nNumLogs);\n"
+" }\n"
+" var MinDelta = Array(nNumLogs);\n"
+" for(nLog = 0; nLog < nNumLogs; nLog++)\n"
+" {\n"
+" MinDelta[nLog] = 0;\n"
+" var Discard = 0;\n"
+" var TypeArray = new Array();\n"
+" var TimeArray = new Array();\n"
+" var IndexArray = new Array();\n"
+" for(var i = 0; i < Frames.length; i++)\n"
+" {\n"
+" var Frame_ = Frames[i]; \n"
+" Frame_.LogStart[nLog] = TimeArray.length;\n"
+" var FrameDiscard = Frame_.frameend + 33;//if timestamps are more than 33ms after current frame, we assume buffer has wrapped.\n"
+" var tt = Frame_.tt[nLog];\n"
+" var ts = Frame_.ts[nLog];\n"
+" var ti = Frame_.ti[nLog];\n"
+" var len = tt.length;\n"
+" var DiscardLast = 0;\n"
+" for(var xx = 0; xx < len; ++xx)\n"
+" {\n"
+" var Skip = (tt[i] == 3) ? DiscardLast : ts[xx] > FrameDiscard;\n"
+" if(Skip)\n"
+" {\n"
+" Discard++;\n"
+" DiscardLast = 1;\n"
+" }\n"
+" else\n"
+" {\n"
+" DiscardLast = 0;\n"
+" TypeArray.push(tt[xx]);\n"
+" TimeArray.push(ts[xx]);\n"
+" IndexArray.push(ti[xx]);\n"
+" }\n"
+" }\n"
+" Frame_.LogEnd[nLog] = TimeArray.length;\n"
+" }\n"
+" g_TypeArray[nLog] = TypeArray;\n"
+" g_TimeArray[nLog] = TimeArray;\n"
+" g_IndexArray[nLog] = IndexArray;\n"
+" if(Discard)\n"
+" {\n"
+" console.log(\'discarded \' + Discard + \' markers from \' + ThreadNames[nLog]);\n"
+" }\n"
+" }\n"
+" MakeLod(0, MinDelta, g_TimeArray, g_TypeArray, g_IndexArray, LogStartArray);\n"
+" ProfileLeave();\n"
+"}\n"
+"\n"
+"function PreprocessFindFirstFrames()\n"
+"{\n"
+" ProfileEnter(\"PreprocesFindFirstFrames\");\n"
+" //create arrays that show how far back we need to start search in order to get all markers.\n"
+" var nNumLogs = Frames[0].ts.length;\n"
+" for(var i = 0; i < Frames.length; i++)\n"
+" {\n"
+" Frames[i].FirstFrameIndex = new Array(nNumLogs);\n"
+" }\n"
+"\n"
+" var StackPos = 0;\n"
+" var Stack = Array(20);\n"
+" g_MaxStack = Array(nNumLogs);\n"
+" \n"
+" for(nLog = 0; nLog < nNumLogs; nLog++)\n"
+" {\n"
+" var MaxStack = 0;\n"
+" StackPos = 0;\n"
+" for(var i = 0; i < Frames.length; i++)\n"
+" {\n"
+" var Frame_ = Frames[i]; \n"
+" var tt = Frame_.tt[nLog];\n"
+" var count = tt.length;\n"
+"\n"
+" var FirstFrame = i;\n"
+" if(StackPos>0)\n"
+" {\n"
+" FirstFrame = Stack[0];\n"
+" }\n"
+" Frames[i].FirstFrameIndex[nLog] = FirstFrame;\n"
+"\n"
+" for(var j = 0; j < count; j++)\n"
+" {\n"
+" var type = tt[j];\n"
+" if(type == 1)\n"
+" {\n"
+" Stack[StackPos] = i;//store the frame which it comes from\n"
+" StackPos++;\n"
+" if(StackPos > MaxStack)\n"
+" {\n"
+" MaxStack = StackPos;\n"
+" }\n"
+" }\n"
+" else if(type == 0)\n"
+" {\n"
+" if(StackPos>0)\n"
+" {\n"
+" StackPos--;\n"
+" }\n"
+" }\n"
+" }\n"
+" }\n"
+" g_MaxStack[nLog] = MaxStack;\n"
+" }\n"
+" ProfileLeave();\n"
+"}\n"
+"function PreprocessMeta()\n"
+"{\n"
+" MetaLengths = Array(MetaNames.length);\n"
+" MetaLengthsAvg = Array(MetaNames.length);\n"
+" MetaLengthsMax = Array(MetaNames.length);\n"
+" for(var i = 0; i < MetaNames.length; ++i)\n"
+" {\n"
+" MetaLengths[i] = MetaNames[i].length+1;\n"
+" MetaLengthsAvg[i] = MetaNames[i].length+5;\n"
+" MetaLengthsMax[i] = MetaNames[i].length+5;\n"
+" if(MetaLengths[i]<12)\n"
+" MetaLengths[i] = 12;\n"
+" if(MetaLengthsAvg[i]<12)\n"
+" MetaLengthsAvg[i] = 12;\n"
+" if(MetaLengthsMax[i]<12)\n"
+" MetaLengthsMax[i] = 12;\n"
+" }\n"
+" for(var i = 0; i < TimerInfo.length; ++i)\n"
+" {\n"
+" var Timer = TimerInfo[i];\n"
+" for(var j = 0; j < MetaNames.length; ++j)\n"
+" {\n"
+" var Len = FormatMeta(Timer.meta[j],0).length + 2;\n"
+" var LenAvg = FormatMeta(Timer.meta[j],2).length + 2;\n"
+" var LenMax = FormatMeta(Timer.meta[j],0).length + 2;\n"
+" if(Len > MetaLengths[j])\n"
+" {\n"
+" MetaLengths[j] = Len;\n"
+" }\n"
+" if(LenAvg > MetaLengthsAvg[j])\n"
+" {\n"
+" MetaLengthsAvg[j] = LenAvg;\n"
+" }\n"
+" if(LenMax > MetaLengthsMax[j])\n"
+" {\n"
+" MetaLengthsMax[j] = LenMax;\n"
+" }\n"
+" }\n"
+" }\n"
+"}\n"
+"\n"
+"function Preprocess()\n"
+"{\n"
+" var ProfileModeOld = ProfileMode;\n"
+" ProfileMode = 1;\n"
+" ProfileModeClear();\n"
+" ProfileEnter(\"Preprocess\");\n"
+" PreprocessCalculateAllTimers();\n"
+" PreprocessFindFirstFrames();\n"
+" PreprocessGlobalArray();\n"
+" PreprocessLods();\n"
+" PreprocessMeta();\n"
+" PreprocessContextSwitchCache();\n"
+" ProfileLeave();\n"
+" ProfileModeDump();\n"
+" ProfileMode = ProfileModeOld;\n"
+" Initialized = 1;\n"
+"}\n"
+"\n"
+"InitGroups();\n"
+"ReadCookie();\n"
+"MeasureFont()\n"
+"InitThreadMenu();\n"
+"InitGroupMenu();\n"
+"InitFrameInfo();\n"
+"UpdateThreadMenu();\n"
+"ResizeCanvas();\n"
+"Preprocess();\n"
+"OnPageReady();\n"
+"Draw(1);\n"
+"AutoRedraw();\n"
+"\n"
+"</script>\n"
+"</body>\n"
+"</html> ";
+
+const size_t g_MicroProfileHtml_end_2_size = sizeof(g_MicroProfileHtml_end_2);
+const char* g_MicroProfileHtml_end[] = {
+&g_MicroProfileHtml_end_0[0],
+&g_MicroProfileHtml_end_1[0],
+&g_MicroProfileHtml_end_2[0],
+};
+size_t g_MicroProfileHtml_end_sizes[] = {
+sizeof(g_MicroProfileHtml_end_0),
+sizeof(g_MicroProfileHtml_end_1),
+sizeof(g_MicroProfileHtml_end_2),
+};
+size_t g_MicroProfileHtml_end_count = 3;
+#endif //MICROPROFILE_EMBED_HTML
+
+///end file generated from microprofile.html
diff --git a/externals/microprofile/microprofileui.h b/externals/microprofile/microprofileui.h
new file mode 100644
index 000000000..eac1119a4
--- /dev/null
+++ b/externals/microprofile/microprofileui.h
@@ -0,0 +1,3348 @@
+#pragma once
+// This is free and unencumbered software released into the public domain.
+// Anyone is free to copy, modify, publish, use, compile, sell, or
+// distribute this software, either in source code form or as a compiled
+// binary, for any purpose, commercial or non-commercial, and by any
+// means.
+// In jurisdictions that recognize copyright laws, the author or authors
+// of this software dedicate any and all copyright interest in the
+// software to the public domain. We make this dedication for the benefit
+// of the public at large and to the detriment of our heirs and
+// successors. We intend this dedication to be an overt act of
+// relinquishment in perpetuity of all present and future rights to this
+// software under copyright law.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+// For more information, please refer to <http://unlicense.org/>
+//
+// ***********************************************************************
+//
+//
+//
+
+
+#ifndef MICROPROFILE_ENABLED
+#error "microprofile.h must be included before including microprofileui.h"
+#endif
+
+#ifndef MICROPROFILEUI_ENABLED
+#define MICROPROFILEUI_ENABLED MICROPROFILE_ENABLED
+#endif
+
+#ifndef MICROPROFILEUI_API
+#define MICROPROFILEUI_API
+#endif
+
+
+#if 0 == MICROPROFILEUI_ENABLED
+#define MicroProfileMouseButton(foo, bar) do{}while(0)
+#define MicroProfileMousePosition(foo, bar, z) do{}while(0)
+#define MicroProfileModKey(key) do{}while(0)
+#define MicroProfileDraw(foo, bar) do{}while(0)
+#define MicroProfileIsDrawing() 0
+#define MicroProfileToggleDisplayMode() do{}while(0)
+#define MicroProfileSetDisplayMode(f) do{}while(0)
+#else
+
+#ifndef MICROPROFILE_DRAWCURSOR
+#define MICROPROFILE_DRAWCURSOR 0
+#endif
+
+#ifndef MICROPROFILE_DETAILED_BAR_NAMES
+#define MICROPROFILE_DETAILED_BAR_NAMES 1
+#endif
+
+#ifndef MICROPROFILE_TEXT_WIDTH
+#define MICROPROFILE_TEXT_WIDTH 5
+#endif
+
+#ifndef MICROPROFILE_TEXT_HEIGHT
+#define MICROPROFILE_TEXT_HEIGHT 8
+#endif
+
+#ifndef MICROPROFILE_DETAILED_BAR_HEIGHT
+#define MICROPROFILE_DETAILED_BAR_HEIGHT 12
+#endif
+
+#ifndef MICROPROFILE_DETAILED_CONTEXT_SWITCH_HEIGHT
+#define MICROPROFILE_DETAILED_CONTEXT_SWITCH_HEIGHT 7
+#endif
+
+#ifndef MICROPROFILE_GRAPH_WIDTH
+#define MICROPROFILE_GRAPH_WIDTH 256
+#endif
+
+#ifndef MICROPROFILE_GRAPH_HEIGHT
+#define MICROPROFILE_GRAPH_HEIGHT 256
+#endif
+
+#ifndef MICROPROFILE_BORDER_SIZE
+#define MICROPROFILE_BORDER_SIZE 1
+#endif
+
+#ifndef MICROPROFILE_HELP_LEFT
+#define MICROPROFILE_HELP_LEFT "Left-Click"
+#endif
+
+#ifndef MICROPROFILE_HELP_ALT
+#define MICROPROFILE_HELP_ALT "Alt-Click"
+#endif
+
+#ifndef MICROPROFILE_HELP_MOD
+#define MICROPROFILE_HELP_MOD "Mod"
+#endif
+
+#ifndef MICROPROFILE_BAR_WIDTH
+#define MICROPROFILE_BAR_WIDTH 100
+#endif
+
+#ifndef MICROPROFILE_CUSTOM_MAX
+#define MICROPROFILE_CUSTOM_MAX 8
+#endif
+
+#ifndef MICROPROFILE_CUSTOM_MAX_TIMERS
+#define MICROPROFILE_CUSTOM_MAX_TIMERS 64
+#endif
+
+#ifndef MICROPROFILE_CUSTOM_PADDING
+#define MICROPROFILE_CUSTOM_PADDING 12
+#endif
+
+
+#define MICROPROFILE_FRAME_HISTORY_HEIGHT 50
+#define MICROPROFILE_FRAME_HISTORY_WIDTH 7
+#define MICROPROFILE_FRAME_HISTORY_COLOR_CPU 0xffff7f27 //255 127 39
+#define MICROPROFILE_FRAME_HISTORY_COLOR_GPU 0xff37a0ee //55 160 238
+#define MICROPROFILE_FRAME_HISTORY_COLOR_HIGHTLIGHT 0x7733bb44
+#define MICROPROFILE_FRAME_COLOR_HIGHTLIGHT 0x20009900
+#define MICROPROFILE_FRAME_COLOR_HIGHTLIGHT_GPU 0x20996600
+#define MICROPROFILE_NUM_FRAMES (MICROPROFILE_MAX_FRAME_HISTORY - (MICROPROFILE_GPU_FRAME_DELAY+1))
+
+#define MICROPROFILE_TOOLTIP_MAX_STRINGS (32 + MICROPROFILE_MAX_GROUPS*2)
+#define MICROPROFILE_TOOLTIP_STRING_BUFFER_SIZE (4*1024)
+#define MICROPROFILE_TOOLTIP_MAX_LOCKED 3
+
+
+enum
+{
+ MICROPROFILE_CUSTOM_BARS = 0x1,
+ MICROPROFILE_CUSTOM_BAR_SOURCE_MAX = 0x2,
+ MICROPROFILE_CUSTOM_BAR_SOURCE_AVG = 0,
+ MICROPROFILE_CUSTOM_STACK = 0x4,
+ MICROPROFILE_CUSTOM_STACK_SOURCE_MAX = 0x8,
+ MICROPROFILE_CUSTOM_STACK_SOURCE_AVG = 0,
+};
+
+
+MICROPROFILEUI_API void MicroProfileDraw(uint32_t nWidth, uint32_t nHeight); //! call if drawing microprofilers
+MICROPROFILEUI_API bool MicroProfileIsDrawing();
+MICROPROFILEUI_API void MicroProfileToggleGraph(MicroProfileToken nToken);
+MICROPROFILEUI_API bool MicroProfileDrawGraph(uint32_t nScreenWidth, uint32_t nScreenHeight);
+MICROPROFILEUI_API void MicroProfileToggleDisplayMode(); //switch between off, bars, detailed
+MICROPROFILEUI_API void MicroProfileSetDisplayMode(int); //switch between off, bars, detailed
+MICROPROFILEUI_API void MicroProfileClearGraph();
+MICROPROFILEUI_API void MicroProfileMousePosition(uint32_t nX, uint32_t nY, int nWheelDelta);
+MICROPROFILEUI_API void MicroProfileModKey(uint32_t nKeyState);
+MICROPROFILEUI_API void MicroProfileMouseButton(uint32_t nLeft, uint32_t nRight);
+MICROPROFILEUI_API void MicroProfileDrawLineVertical(int nX, int nTop, int nBottom, uint32_t nColor);
+MICROPROFILEUI_API void MicroProfileDrawLineHorizontal(int nLeft, int nRight, int nY, uint32_t nColor);
+MICROPROFILEUI_API void MicroProfileLoadPreset(const char* pSuffix);
+MICROPROFILEUI_API void MicroProfileSavePreset(const char* pSuffix);
+
+MICROPROFILEUI_API void MicroProfileDrawText(int nX, int nY, uint32_t nColor, const char* pText, uint32_t nNumCharacters);
+MICROPROFILEUI_API void MicroProfileDrawBox(int nX, int nY, int nX1, int nY1, uint32_t nColor, MicroProfileBoxType = MicroProfileBoxTypeFlat);
+MICROPROFILEUI_API void MicroProfileDrawLine2D(uint32_t nVertices, float* pVertices, uint32_t nColor);
+MICROPROFILEUI_API void MicroProfileDumpTimers();
+
+MICROPROFILEUI_API void MicroProfileInitUI();
+
+MICROPROFILEUI_API void MicroProfileCustomGroupToggle(const char* pCustomName);
+MICROPROFILEUI_API void MicroProfileCustomGroupEnable(const char* pCustomName);
+MICROPROFILEUI_API void MicroProfileCustomGroupEnable(uint32_t nIndex);
+MICROPROFILEUI_API void MicroProfileCustomGroupDisable();
+MICROPROFILEUI_API void MicroProfileCustomGroup(const char* pCustomName, uint32_t nMaxTimers, uint32_t nAggregateFlip, float fReferenceTime, uint32_t nFlags);
+MICROPROFILEUI_API void MicroProfileCustomGroupAddTimer(const char* pCustomName, const char* pGroup, const char* pTimer);
+
+#ifdef MICROPROFILEUI_IMPL
+#ifdef _WIN32
+#define snprintf _snprintf
+#endif
+#include <stdlib.h>
+#include <stdarg.h>
+#include <math.h>
+#include <algorithm>
+
+MICROPROFILE_DEFINE(g_MicroProfileDetailed, "MicroProfile", "Detailed View", 0x8888000);
+MICROPROFILE_DEFINE(g_MicroProfileDrawGraph, "MicroProfile", "Draw Graph", 0xff44ee00);
+MICROPROFILE_DEFINE(g_MicroProfileDrawBarView, "MicroProfile", "DrawBarView", 0x00dd77);
+MICROPROFILE_DEFINE(g_MicroProfileDraw,"MicroProfile", "Draw", 0x737373);
+
+
+struct MicroProfileStringArray
+{
+ const char* ppStrings[MICROPROFILE_TOOLTIP_MAX_STRINGS];
+ char Buffer[MICROPROFILE_TOOLTIP_STRING_BUFFER_SIZE];
+ char* pBufferPos;
+ uint32_t nNumStrings;
+};
+
+struct MicroProfileGroupMenuItem
+{
+ uint32_t nIsCategory;
+ uint32_t nCategoryIndex;
+ uint32_t nIndex;
+ const char* pName;
+};
+
+struct MicroProfileCustom
+{
+ char pName[MICROPROFILE_NAME_MAX_LEN];
+ uint32_t nFlags;
+ uint32_t nAggregateFlip;
+ uint32_t nNumTimers;
+ uint32_t nMaxTimers;
+ uint64_t nGroupMask;
+ float fReference;
+ uint64_t* pTimers;
+};
+
+struct SOptionDesc
+{
+ SOptionDesc(){}
+ SOptionDesc(uint8_t nSubType, uint8_t nIndex, const char* fmt, ...):nSubType(nSubType), nIndex(nIndex)
+ {
+ va_list args;
+ va_start (args, fmt);
+ vsprintf(Text, fmt, args);
+ va_end(args);
+ }
+ char Text[32];
+ uint8_t nSubType;
+ uint8_t nIndex;
+ bool bSelected;
+};
+static uint32_t g_MicroProfileAggregatePresets[] = {0, 10, 20, 30, 60, 120};
+static float g_MicroProfileReferenceTimePresets[] = {5.f, 10.f, 15.f,20.f, 33.33f, 66.66f, 100.f, 250.f, 500.f, 1000.f};
+static uint32_t g_MicroProfileOpacityPresets[] = {0x40, 0x80, 0xc0, 0xff};
+static const char* g_MicroProfilePresetNames[] =
+{
+ MICROPROFILE_DEFAULT_PRESET,
+ "Render",
+ "GPU",
+ "Lighting",
+ "AI",
+ "Visibility",
+ "Sound",
+};
+
+enum
+{
+ MICROPROFILE_NUM_REFERENCE_PRESETS = sizeof(g_MicroProfileReferenceTimePresets)/sizeof(g_MicroProfileReferenceTimePresets[0]),
+ MICROPROFILE_NUM_OPACITY_PRESETS = sizeof(g_MicroProfileOpacityPresets)/sizeof(g_MicroProfileOpacityPresets[0]),
+#if MICROPROFILE_CONTEXT_SWITCH_TRACE
+ MICROPROFILE_OPTION_SIZE = MICROPROFILE_NUM_REFERENCE_PRESETS + MICROPROFILE_NUM_OPACITY_PRESETS * 2 + 2 + 7,
+#else
+ MICROPROFILE_OPTION_SIZE = MICROPROFILE_NUM_REFERENCE_PRESETS + MICROPROFILE_NUM_OPACITY_PRESETS * 2 + 2 + 3,
+#endif
+};
+
+struct MicroProfileUI
+{
+ //menu/mouse over stuff
+ uint64_t nHoverToken;
+ int64_t nHoverTime;
+ int nHoverFrame;
+#if MICROPROFILE_DEBUG
+ uint64_t nHoverAddressEnter;
+ uint64_t nHoverAddressLeave;
+#endif
+
+ uint32_t nWidth;
+ uint32_t nHeight;
+
+
+ int nOffsetX;
+ int nOffsetY;
+ float fDetailedOffset; //display offset relative to start of latest displayable frame.
+ float fDetailedRange; //no. of ms to display
+ float fDetailedOffsetTarget;
+ float fDetailedRangeTarget;
+ uint32_t nOpacityBackground;
+ uint32_t nOpacityForeground;
+ bool bShowSpikes;
+
+
+
+ uint32_t nMouseX;
+ uint32_t nMouseY;
+ uint32_t nMouseDownX;
+ uint32_t nMouseDownY;
+ int nMouseWheelDelta;
+ uint32_t nMouseDownLeft;
+ uint32_t nMouseDownRight;
+ uint32_t nMouseLeft;
+ uint32_t nMouseRight;
+ uint32_t nMouseLeftMod;
+ uint32_t nMouseRightMod;
+ uint32_t nModDown;
+ uint32_t nActiveMenu;
+
+ MicroProfileLogEntry* pDisplayMouseOver;
+
+ int64_t nRangeBegin;
+ int64_t nRangeEnd;
+ int64_t nRangeBeginGpu;
+ int64_t nRangeEndGpu;
+ uint32_t nRangeBeginIndex;
+ uint32_t nRangeEndIndex;
+ MicroProfileThreadLog* pRangeLog;
+ uint32_t nHoverColor;
+ uint32_t nHoverColorShared;
+
+ MicroProfileStringArray LockedToolTips[MICROPROFILE_TOOLTIP_MAX_LOCKED];
+ uint32_t nLockedToolTipColor[MICROPROFILE_TOOLTIP_MAX_LOCKED];
+ int LockedToolTipFront;
+
+ MicroProfileGroupMenuItem GroupMenu[MICROPROFILE_MAX_GROUPS + MICROPROFILE_MAX_CATEGORIES];
+ uint32_t GroupMenuCount;
+
+
+ uint32_t nCustomActive;
+ uint32_t nCustomTimerCount;
+ uint32_t nCustomCount;
+ MicroProfileCustom Custom[MICROPROFILE_CUSTOM_MAX];
+ uint64_t CustomTimer[MICROPROFILE_CUSTOM_MAX_TIMERS];
+
+ SOptionDesc Options[MICROPROFILE_OPTION_SIZE];
+
+
+};
+
+MicroProfileUI g_MicroProfileUI;
+#define UI g_MicroProfileUI
+static uint32_t g_nMicroProfileBackColors[2] = { 0x474747, 0x313131 };
+#define MICROPROFILE_NUM_CONTEXT_SWITCH_COLORS 16
+static uint32_t g_nMicroProfileContextSwitchThreadColors[MICROPROFILE_NUM_CONTEXT_SWITCH_COLORS] = //palette generated by http://tools.medialab.sciences-po.fr/iwanthue/index.php
+{
+ 0x63607B,
+ 0x755E2B,
+ 0x326A55,
+ 0x523135,
+ 0x904F42,
+ 0x87536B,
+ 0x346875,
+ 0x5E6046,
+ 0x35404C,
+ 0x224038,
+ 0x413D1E,
+ 0x5E3A26,
+ 0x5D6161,
+ 0x4C6234,
+ 0x7D564F,
+ 0x5C4352,
+};
+
+
+void MicroProfileInitUI()
+{
+ static bool bInitialized = false;
+ if(!bInitialized)
+ {
+ bInitialized = true;
+ memset(&g_MicroProfileUI, 0, sizeof(g_MicroProfileUI));
+ UI.nActiveMenu = (uint32_t)-1;
+ UI.fDetailedOffsetTarget = UI.fDetailedOffset = 0.f;
+ UI.fDetailedRangeTarget = UI.fDetailedRange = 50.f;
+
+ UI.nOpacityBackground = 0xff<<24;
+ UI.nOpacityForeground = 0xff<<24;
+
+ UI.bShowSpikes = false;
+
+ UI.nWidth = 100;
+ UI.nHeight = 100;
+
+ UI.nCustomActive = (uint32_t)-1;
+ UI.nCustomTimerCount = 0;
+ UI.nCustomCount = 0;
+
+ int nIndex = 0;
+ UI.Options[nIndex++] = SOptionDesc(0xff, 0, "%s", "Reference");
+ for(int i = 0; i < MICROPROFILE_NUM_REFERENCE_PRESETS; ++i)
+ {
+ UI.Options[nIndex++] = SOptionDesc(0, i, " %6.2fms", g_MicroProfileReferenceTimePresets[i]);
+ }
+ UI.Options[nIndex++] = SOptionDesc(0xff, 0, "%s", "BG Opacity");
+ for(int i = 0; i < MICROPROFILE_NUM_OPACITY_PRESETS; ++i)
+ {
+ UI.Options[nIndex++] = SOptionDesc(1, i, " %7d%%", (i+1)*25);
+ }
+ UI.Options[nIndex++] = SOptionDesc(0xff, 0, "%s", "FG Opacity");
+ for(int i = 0; i < MICROPROFILE_NUM_OPACITY_PRESETS; ++i)
+ {
+ UI.Options[nIndex++] = SOptionDesc(2, i, " %7d%%", (i+1)*25);
+ }
+ UI.Options[nIndex++] = SOptionDesc(0xff, 0, "%s", "Spike Display");
+ UI.Options[nIndex++] = SOptionDesc(3, 0, "%s", " Enable");
+
+#if MICROPROFILE_CONTEXT_SWITCH_TRACE
+ UI.Options[nIndex++] = SOptionDesc(0xff, 0, "%s", "CSwitch Trace");
+ UI.Options[nIndex++] = SOptionDesc(4, 0, "%s", " Enable");
+ UI.Options[nIndex++] = SOptionDesc(4, 1, "%s", " All Threads");
+ UI.Options[nIndex++] = SOptionDesc(4, 2, "%s", " No Bars");
+#endif
+ MP_ASSERT(nIndex == MICROPROFILE_OPTION_SIZE);
+ }
+}
+
+void MicroProfileSetDisplayMode(int nValue)
+{
+ MicroProfile& S = *MicroProfileGet();
+ nValue = nValue >= 0 && nValue < 4 ? nValue : S.nDisplay;
+ S.nDisplay = nValue;
+ UI.nOffsetY = 0;
+}
+
+void MicroProfileToggleDisplayMode()
+{
+ MicroProfile& S = *MicroProfileGet();
+ S.nDisplay = (S.nDisplay + 1) % 4;
+ UI.nOffsetY = 0;
+}
+
+
+void MicroProfileStringArrayClear(MicroProfileStringArray* pArray)
+{
+ pArray->nNumStrings = 0;
+ pArray->pBufferPos = &pArray->Buffer[0];
+}
+
+void MicroProfileStringArrayAddLiteral(MicroProfileStringArray* pArray, const char* pLiteral)
+{
+ MP_ASSERT(pArray->nNumStrings < MICROPROFILE_TOOLTIP_MAX_STRINGS);
+ pArray->ppStrings[pArray->nNumStrings++] = pLiteral;
+}
+
+void MicroProfileStringArrayFormat(MicroProfileStringArray* pArray, const char* fmt, ...)
+{
+ MP_ASSERT(pArray->nNumStrings < MICROPROFILE_TOOLTIP_MAX_STRINGS);
+ pArray->ppStrings[pArray->nNumStrings++] = pArray->pBufferPos;
+ va_list args;
+ va_start (args, fmt);
+ pArray->pBufferPos += 1 + vsprintf(pArray->pBufferPos, fmt, args);
+ va_end(args);
+ MP_ASSERT(pArray->pBufferPos < pArray->Buffer + MICROPROFILE_TOOLTIP_STRING_BUFFER_SIZE);
+}
+void MicroProfileStringArrayCopy(MicroProfileStringArray* pDest, MicroProfileStringArray* pSrc)
+{
+ memcpy(&pDest->ppStrings[0], &pSrc->ppStrings[0], sizeof(pDest->ppStrings));
+ memcpy(&pDest->Buffer[0], &pSrc->Buffer[0], sizeof(pDest->Buffer));
+ for(uint32_t i = 0; i < MICROPROFILE_TOOLTIP_MAX_STRINGS; ++i)
+ {
+ if(i < pSrc->nNumStrings)
+ {
+ if(pSrc->ppStrings[i] >= &pSrc->Buffer[0] && pSrc->ppStrings[i] < &pSrc->Buffer[0] + MICROPROFILE_TOOLTIP_STRING_BUFFER_SIZE)
+ {
+ pDest->ppStrings[i] += &pDest->Buffer[0] - &pSrc->Buffer[0];
+ }
+ }
+ }
+ pDest->nNumStrings = pSrc->nNumStrings;
+}
+
+void MicroProfileFloatWindowSize(const char** ppStrings, uint32_t nNumStrings, uint32_t* pColors, uint32_t& nWidth, uint32_t& nHeight, uint32_t* pStringLengths = 0)
+{
+ uint32_t* nStringLengths = pStringLengths ? pStringLengths : (uint32_t*)alloca(nNumStrings * sizeof(uint32_t));
+ uint32_t nTextCount = nNumStrings/2;
+ for(uint32_t i = 0; i < nTextCount; ++i)
+ {
+ uint32_t i0 = i * 2;
+ uint32_t s0, s1;
+ nStringLengths[i0] = s0 = (uint32_t)strlen(ppStrings[i0]);
+ nStringLengths[i0+1] = s1 = (uint32_t)strlen(ppStrings[i0+1]);
+ nWidth = MicroProfileMax(s0+s1, nWidth);
+ }
+ nWidth = (MICROPROFILE_TEXT_WIDTH+1) * (2+nWidth) + 2 * MICROPROFILE_BORDER_SIZE;
+ if(pColors)
+ nWidth += MICROPROFILE_TEXT_WIDTH + 1;
+ nHeight = (MICROPROFILE_TEXT_HEIGHT+1) * nTextCount + 2 * MICROPROFILE_BORDER_SIZE;
+}
+
+void MicroProfileDrawFloatWindow(uint32_t nX, uint32_t nY, const char** ppStrings, uint32_t nNumStrings, uint32_t nColor, uint32_t* pColors = 0)
+{
+ uint32_t nWidth = 0, nHeight = 0;
+ uint32_t* nStringLengths = (uint32_t*)alloca(nNumStrings * sizeof(uint32_t));
+ MicroProfileFloatWindowSize(ppStrings, nNumStrings, pColors, nWidth, nHeight, nStringLengths);
+ uint32_t nTextCount = nNumStrings/2;
+ if(nX + nWidth > UI.nWidth)
+ nX = UI.nWidth - nWidth;
+ if(nY + nHeight > UI.nHeight)
+ nY = UI.nHeight - nHeight;
+ MicroProfileDrawBox(nX-1, nY-1, nX + nWidth+1, nY + nHeight+1, 0xff000000|nColor);
+ MicroProfileDrawBox(nX, nY, nX + nWidth, nY + nHeight, 0xff000000);
+ if(pColors)
+ {
+ nX += MICROPROFILE_TEXT_WIDTH+1;
+ nWidth -= MICROPROFILE_TEXT_WIDTH+1;
+ }
+ for(uint32_t i = 0; i < nTextCount; ++i)
+ {
+ int i0 = i * 2;
+ if(pColors)
+ {
+ MicroProfileDrawBox(nX-MICROPROFILE_TEXT_WIDTH, nY, nX, nY + MICROPROFILE_TEXT_WIDTH, pColors[i]|0xff000000);
+ }
+ MicroProfileDrawText(nX + 1, nY + 1, (uint32_t)-1, ppStrings[i0], (uint32_t)strlen(ppStrings[i0]));
+ MicroProfileDrawText(nX + nWidth - nStringLengths[i0+1] * (MICROPROFILE_TEXT_WIDTH+1), nY + 1, (uint32_t)-1, ppStrings[i0+1], (uint32_t)strlen(ppStrings[i0+1]));
+ nY += (MICROPROFILE_TEXT_HEIGHT+1);
+ }
+}
+void MicroProfileDrawTextBox(uint32_t nX, uint32_t nY, const char** ppStrings, uint32_t nNumStrings, uint32_t nColor, uint32_t* pColors = 0)
+{
+ uint32_t nWidth = 0, nHeight = 0;
+ uint32_t* nStringLengths = (uint32_t*)alloca(nNumStrings * sizeof(uint32_t));
+ for(uint32_t i = 0; i < nNumStrings; ++i)
+ {
+ nStringLengths[i] = (uint32_t)strlen(ppStrings[i]);
+ nWidth = MicroProfileMax(nWidth, nStringLengths[i]);
+ nHeight++;
+ }
+ nWidth = (MICROPROFILE_TEXT_WIDTH+1) * (2+nWidth) + 2 * MICROPROFILE_BORDER_SIZE;
+ nHeight = (MICROPROFILE_TEXT_HEIGHT+1) * nHeight + 2 * MICROPROFILE_BORDER_SIZE;
+ if(nX + nWidth > UI.nWidth)
+ nX = UI.nWidth - nWidth;
+ if(nY + nHeight > UI.nHeight)
+ nY = UI.nHeight - nHeight;
+ MicroProfileDrawBox(nX, nY, nX + nWidth, nY + nHeight, 0xff000000);
+ for(uint32_t i = 0; i < nNumStrings; ++i)
+ {
+ MicroProfileDrawText(nX + 1, nY + 1, (uint32_t)-1, ppStrings[i], (uint32_t)strlen(ppStrings[i]));
+ nY += (MICROPROFILE_TEXT_HEIGHT+1);
+ }
+}
+
+
+
+void MicroProfileToolTipMeta(MicroProfileStringArray* pToolTip)
+{
+ MicroProfile& S = *MicroProfileGet();
+ if(UI.nRangeBeginIndex != UI.nRangeEndIndex && UI.pRangeLog)
+ {
+ uint64_t nMetaSum[MICROPROFILE_META_MAX] = {0};
+ uint64_t nMetaSumInclusive[MICROPROFILE_META_MAX] = {0};
+ int nStackDepth = 0;
+ uint32_t nRange[2][2];
+ MicroProfileThreadLog* pLog = UI.pRangeLog;
+
+
+ MicroProfileGetRange(UI.nRangeEndIndex, UI.nRangeBeginIndex, nRange);
+ for(uint32_t i = 0; i < 2; ++i)
+ {
+ uint32_t nStart = nRange[i][0];
+ uint32_t nEnd = nRange[i][1];
+ for(uint32_t j = nStart; j < nEnd; ++j)
+ {
+ MicroProfileLogEntry LE = pLog->Log[j];
+ int nType = MicroProfileLogType(LE);
+ switch(nType)
+ {
+ case MP_LOG_META:
+ {
+ int64_t nMetaIndex = MicroProfileLogTimerIndex(LE);
+ int64_t nMetaCount = MicroProfileLogGetTick(LE);
+ MP_ASSERT(nMetaIndex < MICROPROFILE_META_MAX);
+ if(nStackDepth>1)
+ {
+ nMetaSumInclusive[nMetaIndex] += nMetaCount;
+ }
+ else
+ {
+ nMetaSum[nMetaIndex] += nMetaCount;
+ }
+ }
+ break;
+ case MP_LOG_LEAVE:
+ if(nStackDepth)
+ {
+ nStackDepth--;
+ }
+ else
+ {
+ for(int i = 0; i < MICROPROFILE_META_MAX; ++i)
+ {
+ nMetaSumInclusive[i] += nMetaSum[i];
+ nMetaSum[i] = 0;
+ }
+ }
+ break;
+ case MP_LOG_ENTER:
+ nStackDepth++;
+ break;
+ }
+
+ }
+ }
+ bool bSpaced = false;
+ for(int i = 0; i < MICROPROFILE_META_MAX; ++i)
+ {
+ if(S.MetaCounters[i].pName && (nMetaSum[i]||nMetaSumInclusive[i]))
+ {
+ if(!bSpaced)
+ {
+ bSpaced = true;
+ MicroProfileStringArrayAddLiteral(pToolTip, "");
+ MicroProfileStringArrayAddLiteral(pToolTip, "");
+ }
+ MicroProfileStringArrayFormat(pToolTip, "%s excl", S.MetaCounters[i].pName);
+ MicroProfileStringArrayFormat(pToolTip, "%5d", nMetaSum[i]);
+ MicroProfileStringArrayFormat(pToolTip, "%s incl", S.MetaCounters[i].pName);
+ MicroProfileStringArrayFormat(pToolTip, "%5d", nMetaSum[i] + nMetaSumInclusive[i]);
+ }
+ }
+ }
+}
+
+void MicroProfileDrawFloatTooltip(uint32_t nX, uint32_t nY, uint32_t nToken, uint64_t nTime)
+{
+ MicroProfile& S = *MicroProfileGet();
+
+ uint32_t nIndex = MicroProfileGetTimerIndex(nToken);
+ uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
+ uint32_t nAggregateCount = S.Aggregate[nIndex].nCount ? S.Aggregate[nIndex].nCount : 1;
+
+ uint32_t nGroupId = MicroProfileGetGroupIndex(nToken);
+ uint32_t nTimerId = MicroProfileGetTimerIndex(nToken);
+ bool bGpu = S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu;
+
+ float fToMs = MicroProfileTickToMsMultiplier(bGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
+
+ float fMs = fToMs * (nTime);
+ float fFrameMs = fToMs * (S.Frame[nIndex].nTicks);
+ float fAverage = fToMs * (S.Aggregate[nIndex].nTicks/nAggregateFrames);
+ float fCallAverage = fToMs * (S.Aggregate[nIndex].nTicks / nAggregateCount);
+ float fMax = fToMs * (S.AggregateMax[nIndex]);
+
+ float fFrameMsExclusive = fToMs * (S.FrameExclusive[nIndex]);
+ float fAverageExclusive = fToMs * (S.AggregateExclusive[nIndex]/nAggregateFrames);
+ float fMaxExclusive = fToMs * (S.AggregateMaxExclusive[nIndex]);
+
+ float fGroupAverage = fToMs * (S.AggregateGroup[nGroupId] / nAggregateFrames);
+ float fGroupMax = fToMs * (S.AggregateGroupMax[nGroupId]);
+ float fGroup = fToMs * (S.FrameGroup[nGroupId]);
+
+
+ MicroProfileStringArray ToolTip;
+ MicroProfileStringArrayClear(&ToolTip);
+ const char* pGroupName = S.GroupInfo[nGroupId].pName;
+ const char* pTimerName = S.TimerInfo[nTimerId].pName;
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Timer:");
+ MicroProfileStringArrayFormat(&ToolTip, "%s", pTimerName);
+
+#if MICROPROFILE_DEBUG
+ MicroProfileStringArrayFormat(&ToolTip,"0x%p", UI.nHoverAddressEnter);
+ MicroProfileStringArrayFormat(&ToolTip,"0x%p", UI.nHoverAddressLeave);
+#endif
+
+ if(nTime != (uint64_t)0)
+ {
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Time:");
+ MicroProfileStringArrayFormat(&ToolTip,"%6.3fms", fMs);
+ MicroProfileStringArrayAddLiteral(&ToolTip, "");
+ MicroProfileStringArrayAddLiteral(&ToolTip, "");
+ }
+
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Frame Time:");
+ MicroProfileStringArrayFormat(&ToolTip,"%6.3fms", fFrameMs);
+
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Average:");
+ MicroProfileStringArrayFormat(&ToolTip,"%6.3fms", fAverage);
+
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Max:");
+ MicroProfileStringArrayFormat(&ToolTip,"%6.3fms", fMax);
+
+ MicroProfileStringArrayAddLiteral(&ToolTip, "");
+ MicroProfileStringArrayAddLiteral(&ToolTip, "");
+
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Frame Call Average:");
+ MicroProfileStringArrayFormat(&ToolTip,"%6.3fms", fCallAverage);
+
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Frame Call Count:");
+ MicroProfileStringArrayFormat(&ToolTip, "%6d", nAggregateCount / nAggregateFrames);
+
+ MicroProfileStringArrayAddLiteral(&ToolTip, "");
+ MicroProfileStringArrayAddLiteral(&ToolTip, "");
+
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Exclusive Frame Time:");
+ MicroProfileStringArrayFormat(&ToolTip, "%6.3fms", fFrameMsExclusive);
+
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Exclusive Average:");
+ MicroProfileStringArrayFormat(&ToolTip, "%6.3fms", fAverageExclusive);
+
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Exclusive Max:");
+ MicroProfileStringArrayFormat(&ToolTip, "%6.3fms", fMaxExclusive);
+
+ MicroProfileStringArrayAddLiteral(&ToolTip, "");
+ MicroProfileStringArrayAddLiteral(&ToolTip, "");
+
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Group:");
+ MicroProfileStringArrayFormat(&ToolTip, "%s", pGroupName);
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Frame Time:");
+ MicroProfileStringArrayFormat(&ToolTip, "%6.3f", fGroup);
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Frame Average:");
+ MicroProfileStringArrayFormat(&ToolTip, "%6.3f", fGroupAverage);
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Frame Max:");
+ MicroProfileStringArrayFormat(&ToolTip, "%6.3f", fGroupMax);
+
+
+
+
+ MicroProfileToolTipMeta(&ToolTip);
+
+
+ MicroProfileDrawFloatWindow(nX, nY+20, &ToolTip.ppStrings[0], ToolTip.nNumStrings, S.TimerInfo[nTimerId].nColor);
+
+ if(UI.nMouseLeftMod)
+ {
+ int nIndex = (g_MicroProfileUI.LockedToolTipFront + MICROPROFILE_TOOLTIP_MAX_LOCKED - 1) % MICROPROFILE_TOOLTIP_MAX_LOCKED;
+ g_MicroProfileUI.nLockedToolTipColor[nIndex] = S.TimerInfo[nTimerId].nColor;
+ MicroProfileStringArrayCopy(&g_MicroProfileUI.LockedToolTips[nIndex], &ToolTip);
+ g_MicroProfileUI.LockedToolTipFront = nIndex;
+
+ }
+}
+
+
+void MicroProfileZoomTo(int64_t nTickStart, int64_t nTickEnd)
+{
+ MicroProfile& S = *MicroProfileGet();
+
+ int64_t nStart = S.Frames[S.nFrameCurrent].nFrameStartCpu;
+ float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
+ UI.fDetailedOffsetTarget = MicroProfileLogTickDifference(nStart, nTickStart) * fToMs;
+ UI.fDetailedRangeTarget = MicroProfileLogTickDifference(nTickStart, nTickEnd) * fToMs;
+}
+
+void MicroProfileCenter(int64_t nTickCenter)
+{
+ MicroProfile& S = *MicroProfileGet();
+ int64_t nStart = S.Frames[S.nFrameCurrent].nFrameStartCpu;
+ float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
+ float fCenter = MicroProfileLogTickDifference(nStart, nTickCenter) * fToMs;
+ UI.fDetailedOffsetTarget = UI.fDetailedOffset = fCenter - 0.5f * UI.fDetailedRange;
+}
+#ifdef MICROPROFILE_DEBUG
+uint64_t* g_pMicroProfileDumpStart = 0;
+uint64_t* g_pMicroProfileDumpEnd = 0;
+void MicroProfileDebugDumpRange()
+{
+ MicroProfile& S = *MicroProfileGet();
+ if(g_pMicroProfileDumpStart != g_pMicroProfileDumpEnd)
+ {
+ uint64_t* pStart = g_pMicroProfileDumpStart;
+ uint64_t* pEnd = g_pMicroProfileDumpEnd;
+ while(pStart != pEnd)
+ {
+ uint64_t nTick = MicroProfileLogGetTick(*pStart);
+ uint64_t nToken = MicroProfileLogTimerIndex(*pStart);
+ uint32_t nTimerId = MicroProfileGetTimerIndex(nToken);
+
+ const char* pTimerName = S.TimerInfo[nTimerId].pName;
+ char buffer[256];
+ int type = MicroProfileLogType(*pStart);
+
+ const char* pBegin = type == MP_LOG_LEAVE ? "END" :
+ (type == MP_LOG_ENTER ? "BEGIN" : "META");
+ snprintf(buffer, 255, "DUMP 0x%p: %s :: %llx: %s\n", pStart, pBegin, nTick, pTimerName);
+#ifdef _WIN32
+ OutputDebugString(buffer);
+#else
+ printf("%s", buffer);
+#endif
+ pStart++;
+ }
+
+ g_pMicroProfileDumpStart = g_pMicroProfileDumpEnd;
+ }
+}
+#define MP_DEBUG_DUMP_RANGE() MicroProfileDebugDumpRange();
+#else
+#define MP_DEBUG_DUMP_RANGE() do{} while(0)
+#endif
+
+#define MICROPROFILE_HOVER_DIST 0.5f
+
+void MicroProfileDrawDetailedContextSwitchBars(uint32_t nY, uint32_t nThreadId, uint32_t nContextSwitchStart, uint32_t nContextSwitchEnd, int64_t nBaseTicks, uint32_t nBaseY)
+{
+ MicroProfile& S = *MicroProfileGet();
+ int64_t nTickIn = -1;
+ uint32_t nThreadBefore = -1;
+ float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
+ float fMsToScreen = UI.nWidth / UI.fDetailedRange;
+ float fMouseX = (float)UI.nMouseX;
+ float fMouseY = (float)UI.nMouseY;
+
+
+ for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE)
+ {
+ MP_ASSERT(j < MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE);
+ MicroProfileContextSwitch CS = S.ContextSwitch[j];
+
+ if(nTickIn == -1)
+ {
+ if(CS.nThreadIn == nThreadId)
+ {
+ nTickIn = CS.nTicks;
+ nThreadBefore = CS.nThreadOut;
+ }
+ }
+ else
+ {
+ if(CS.nThreadOut == nThreadId)
+ {
+ int64_t nTickOut = CS.nTicks;
+ float fMsStart = fToMs * MicroProfileLogTickDifference(nBaseTicks, nTickIn);
+ float fMsEnd = fToMs * MicroProfileLogTickDifference(nBaseTicks, nTickOut);
+ if(fMsStart <= fMsEnd)
+ {
+ float fXStart = fMsStart * fMsToScreen;
+ float fXEnd = fMsEnd * fMsToScreen;
+ float fYStart = (float)nY;
+ float fYEnd = fYStart + (MICROPROFILE_DETAILED_CONTEXT_SWITCH_HEIGHT);
+ uint32_t nColor = g_nMicroProfileContextSwitchThreadColors[CS.nCpu%MICROPROFILE_NUM_CONTEXT_SWITCH_COLORS];
+ float fXDist = MicroProfileMax(fXStart - fMouseX, fMouseX - fXEnd);
+ bool bHover = fXDist < MICROPROFILE_HOVER_DIST && fYStart <= fMouseY && fMouseY <= fYEnd && nBaseY < fMouseY;
+ if(bHover)
+ {
+ UI.nRangeBegin = nTickIn;
+ UI.nRangeEnd = nTickOut;
+ S.nContextSwitchHoverTickIn = nTickIn;
+ S.nContextSwitchHoverTickOut = nTickOut;
+ S.nContextSwitchHoverThread = CS.nThreadOut;
+ S.nContextSwitchHoverThreadBefore = nThreadBefore;
+ S.nContextSwitchHoverThreadAfter = CS.nThreadIn;
+ S.nContextSwitchHoverCpuNext = CS.nCpu;
+ nColor = UI.nHoverColor;
+ }
+ if(CS.nCpu == S.nContextSwitchHoverCpu)
+ {
+ nColor = UI.nHoverColorShared;
+ }
+ MicroProfileDrawBox(fXStart, fYStart, fXEnd, fYEnd, nColor|UI.nOpacityForeground, MicroProfileBoxTypeFlat);
+ }
+ nTickIn = -1;
+ }
+ }
+ }
+}
+
+void MicroProfileDrawDetailedBars(uint32_t nWidth, uint32_t nHeight, int nBaseY, int nSelectedFrame)
+{
+ MicroProfile& S = *MicroProfileGet();
+ MP_DEBUG_DUMP_RANGE();
+ int nY = nBaseY - UI.nOffsetY;
+ int64_t nNumBoxes = 0;
+ int64_t nNumLines = 0;
+
+ uint32_t nFrameNext = (S.nFrameCurrent+1) % MICROPROFILE_MAX_FRAME_HISTORY;
+ MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent];
+ MicroProfileFrameState* pFrameNext = &S.Frames[nFrameNext];
+
+ UI.nRangeBegin = 0;
+ UI.nRangeEnd = 0;
+ UI.nRangeBeginGpu = 0;
+ UI.nRangeEndGpu = 0;
+ UI.nRangeBeginIndex = UI.nRangeEndIndex = 0;
+ UI.pRangeLog = 0;
+ int64_t nFrameStartCpu = pFrameCurrent->nFrameStartCpu;
+ int64_t nFrameStartGpu = pFrameCurrent->nFrameStartGpu;
+ int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu();
+ int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu();
+ float fToMsCpu = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu);
+ float fToMsGpu = MicroProfileTickToMsMultiplier(nTicksPerSecondGpu);
+
+ float fDetailedOffset = UI.fDetailedOffset;
+ float fDetailedRange = UI.fDetailedRange;
+
+
+ int64_t nDetailedOffsetTicksCpu = MicroProfileMsToTick(fDetailedOffset, MicroProfileTicksPerSecondCpu());
+ int64_t nDetailedOffsetTicksGpu = MicroProfileMsToTick(fDetailedOffset, MicroProfileTicksPerSecondGpu());
+ int64_t nBaseTicksCpu = nDetailedOffsetTicksCpu + nFrameStartCpu;
+ int64_t nBaseTicksGpu = nDetailedOffsetTicksGpu + nFrameStartGpu;
+ int64_t nBaseTicksEndCpu = nBaseTicksCpu + MicroProfileMsToTick(fDetailedRange, MicroProfileTicksPerSecondCpu());
+
+ int64_t nTickReferenceCpu = 0, nTickReferenceGpu = 0;
+ static int64_t nRefCpu = 0, nRefGpu = 0;
+ if(MicroProfileGetGpuTickReference(&nTickReferenceCpu, &nTickReferenceGpu))
+ {
+ if(0 == nRefCpu || abs(nRefCpu-nBaseTicksCpu) > abs(nTickReferenceCpu-nBaseTicksCpu))
+ {
+ nRefCpu = nTickReferenceCpu;
+ nRefGpu = nTickReferenceGpu;
+ }
+ else
+ {
+ nTickReferenceCpu = nRefCpu;
+ nTickReferenceGpu = nRefGpu;
+ }
+ nBaseTicksGpu = (nBaseTicksCpu - nTickReferenceCpu) * nTicksPerSecondGpu / nTicksPerSecondCpu + nTickReferenceGpu;
+ }
+ int64_t nBaseTicksEndGpu = nBaseTicksCpu + MicroProfileMsToTick(fDetailedRange, MicroProfileTicksPerSecondCpu());
+
+ MicroProfileFrameState* pFrameFirst = pFrameCurrent;
+ int64_t nGapTime = MicroProfileTicksPerSecondCpu() * MICROPROFILE_GAP_TIME / 1000;
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY; ++i)
+ {
+ uint32_t nNextIndex = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY;
+ pFrameFirst = &S.Frames[nNextIndex];
+ if(pFrameFirst->nFrameStartCpu <= nBaseTicksCpu-nGapTime)
+ break;
+ }
+
+ float fMsBase = fToMsCpu * nDetailedOffsetTicksCpu;
+ float fMs = fDetailedRange;
+ float fMsEnd = fMs + fMsBase;
+ float fWidth = (float)nWidth;
+ float fMsToScreen = fWidth / fMs;
+
+ {
+ float fRate = floor(2*(log10(fMs)-1))/2;
+ float fStep = powf(10.f, fRate);
+ float fRcpStep = 1.f / fStep;
+ int nColorIndex = (int)(floor(fMsBase*fRcpStep));
+ float fStart = floor(fMsBase*fRcpStep) * fStep;
+ for(float f = fStart; f < fMsEnd; )
+ {
+ float fStart = f;
+ float fNext = f + fStep;
+ MicroProfileDrawBox(((fStart-fMsBase) * fMsToScreen), nBaseY, (fNext-fMsBase) * fMsToScreen+1, nBaseY + nHeight, UI.nOpacityBackground | g_nMicroProfileBackColors[nColorIndex++ & 1]);
+ f = fNext;
+ }
+ }
+
+ nY += MICROPROFILE_TEXT_HEIGHT+1;
+ MicroProfileLogEntry* pMouseOver = UI.pDisplayMouseOver;
+ MicroProfileLogEntry* pMouseOverNext = 0;
+ uint64_t nMouseOverToken = pMouseOver ? MicroProfileLogTimerIndex(*pMouseOver) : MICROPROFILE_INVALID_TOKEN;
+ float fMouseX = (float)UI.nMouseX;
+ float fMouseY = (float)UI.nMouseY;
+ uint64_t nHoverToken = MICROPROFILE_INVALID_TOKEN;
+ int64_t nHoverTime = 0;
+
+ static int nHoverCounter = 155;
+ static int nHoverCounterDelta = 10;
+ nHoverCounter += nHoverCounterDelta;
+ if(nHoverCounter >= 245)
+ nHoverCounterDelta = -10;
+ else if(nHoverCounter < 100)
+ nHoverCounterDelta = 10;
+ UI.nHoverColor = (nHoverCounter<<24)|(nHoverCounter<<16)|(nHoverCounter<<8)|nHoverCounter;
+ uint32_t nHoverCounterShared = nHoverCounter>>2;
+ UI.nHoverColorShared = (nHoverCounterShared<<24)|(nHoverCounterShared<<16)|(nHoverCounterShared<<8)|nHoverCounterShared;
+
+ uint32_t nLinesDrawn[MICROPROFILE_STACK_MAX]={0};
+
+ uint32_t nContextSwitchHoverThreadAfter = S.nContextSwitchHoverThreadAfter;
+ uint32_t nContextSwitchHoverThreadBefore = S.nContextSwitchHoverThreadBefore;
+ S.nContextSwitchHoverThread = S.nContextSwitchHoverThreadAfter = S.nContextSwitchHoverThreadBefore = -1;
+
+ uint32_t nContextSwitchStart = -1;
+ uint32_t nContextSwitchEnd = -1;
+ S.nContextSwitchHoverCpuNext = 0xff;
+ S.nContextSwitchHoverTickIn = -1;
+ S.nContextSwitchHoverTickOut = -1;
+ if(S.bContextSwitchRunning)
+ {
+ MicroProfileContextSwitchSearch(&nContextSwitchStart, &nContextSwitchEnd, nBaseTicksCpu, nBaseTicksEndCpu);
+ }
+
+ bool bSkipBarView = S.bContextSwitchRunning && S.bContextSwitchNoBars;
+
+ if(!bSkipBarView)
+ {
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
+ {
+ MicroProfileThreadLog* pLog = S.Pool[i];
+ if(!pLog)
+ continue;
+
+ uint32_t nPut = pFrameNext->nLogStart[i];
+ ///note: this may display new samples as old data, but this will only happen when
+ // unpaused, where the detailed view is hardly perceptible
+ uint32_t nFront = S.Pool[i]->nPut.load(std::memory_order_relaxed);
+ MicroProfileFrameState* pFrameLogFirst = pFrameCurrent;
+ MicroProfileFrameState* pFrameLogLast = pFrameNext;
+ uint32_t nGet = pFrameLogFirst->nLogStart[i];
+ do
+ {
+ MP_ASSERT(pFrameLogFirst >= &S.Frames[0] && pFrameLogFirst < &S.Frames[MICROPROFILE_MAX_FRAME_HISTORY]);
+ uint32_t nNewGet = pFrameLogFirst->nLogStart[i];
+ bool bIsValid = false;
+ if(nPut < nFront)
+ {
+ bIsValid = nNewGet <= nPut || nNewGet >= nFront;
+ }
+ else
+ {
+ bIsValid = nNewGet <= nPut && nNewGet >= nFront;
+ }
+ if(bIsValid)
+ {
+ nGet = nNewGet;
+ pFrameLogFirst--;
+ if(pFrameLogFirst < &S.Frames[0])
+ pFrameLogFirst = &S.Frames[MICROPROFILE_MAX_FRAME_HISTORY-1];
+ }
+ else
+ {
+ break;
+ }
+ }while(pFrameLogFirst != pFrameFirst);
+
+
+ if(nGet == (uint32_t)-1)
+ continue;
+ MP_ASSERT(nGet != (uint32_t)-1);
+
+ nPut = pFrameLogLast->nLogStart[i];
+
+ uint32_t nRange[2][2] = { {0, 0}, {0, 0}, };
+
+ MicroProfileGetRange(nPut, nGet, nRange);
+ if(nPut == nGet)
+ continue;
+ uint32_t nMaxStackDepth = 0;
+
+ bool bGpu = pLog->nGpu != 0;
+ float fToMs = bGpu ? fToMsGpu : fToMsCpu;
+ int64_t nBaseTicks = bGpu ? nBaseTicksGpu : nBaseTicksCpu;
+ char ThreadName[MicroProfileThreadLog::THREAD_MAX_LEN + 16];
+ uint64_t nThreadId = pLog->nThreadId;
+ snprintf(ThreadName, sizeof(ThreadName)-1, "%04llx: %s", nThreadId, &pLog->ThreadName[0] );
+ nY += 3;
+ uint32_t nThreadColor = -1;
+ if(pLog->nThreadId == nContextSwitchHoverThreadAfter || pLog->nThreadId == nContextSwitchHoverThreadBefore)
+ nThreadColor = UI.nHoverColorShared|0x906060;
+ MicroProfileDrawText(0, nY, nThreadColor, &ThreadName[0], (uint32_t)strlen(&ThreadName[0]));
+ nY += 3;
+ nY += MICROPROFILE_TEXT_HEIGHT + 1;
+
+ if(S.bContextSwitchRunning)
+ {
+ MicroProfileDrawDetailedContextSwitchBars(nY, pLog->nThreadId, nContextSwitchStart, nContextSwitchEnd, nBaseTicks, nBaseY);
+ nY -= MICROPROFILE_DETAILED_BAR_HEIGHT;
+ nY += MICROPROFILE_DETAILED_CONTEXT_SWITCH_HEIGHT+1;
+ }
+
+ uint32_t nYDelta = MICROPROFILE_DETAILED_BAR_HEIGHT;
+ uint32_t nStack[MICROPROFILE_STACK_MAX];
+ uint32_t nStackPos = 0;
+ for(uint32_t j = 0; j < 2; ++j)
+ {
+ uint32_t nStart = nRange[j][0];
+ uint32_t nEnd = nRange[j][1];
+ for(uint32_t k = nStart; k < nEnd; ++k)
+ {
+ MicroProfileLogEntry* pEntry = pLog->Log + k;
+ int nType = MicroProfileLogType(*pEntry);
+ if(MP_LOG_ENTER == nType)
+ {
+ MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX);
+ nStack[nStackPos++] = k;
+ }
+ else if(MP_LOG_META == nType)
+ {
+
+ }
+ else if(MP_LOG_LEAVE == nType)
+ {
+ if(0 == nStackPos)
+ {
+ continue;
+ }
+
+ MicroProfileLogEntry* pEntryEnter = pLog->Log + nStack[nStackPos-1];
+ if(MicroProfileLogTimerIndex(*pEntryEnter) != MicroProfileLogTimerIndex(*pEntry))
+ {
+ //uprintf("mismatch %llx %llx\n", pEntryEnter->nToken, pEntry->nToken);
+ continue;
+ }
+ int64_t nTickStart = MicroProfileLogGetTick(*pEntryEnter);
+ int64_t nTickEnd = MicroProfileLogGetTick(*pEntry);
+ uint64_t nTimerIndex = MicroProfileLogTimerIndex(*pEntry);
+ uint32_t nColor = S.TimerInfo[nTimerIndex].nColor;
+ if(nMouseOverToken == nTimerIndex)
+ {
+ if(pEntry == pMouseOver)
+ {
+ nColor = UI.nHoverColor;
+ if(bGpu)
+ {
+ UI.nRangeBeginGpu = *pEntryEnter;
+ UI.nRangeEndGpu = *pEntry;
+ uint32_t nCpuBegin = (nStack[nStackPos-1] + 1) % MICROPROFILE_BUFFER_SIZE;
+ uint32_t nCpuEnd = (k + 1) % MICROPROFILE_BUFFER_SIZE;
+ MicroProfileLogEntry LogCpuBegin = pLog->Log[nCpuBegin];
+ MicroProfileLogEntry LogCpuEnd = pLog->Log[nCpuEnd];
+ if(MicroProfileLogType(LogCpuBegin)==3 && MicroProfileLogType(LogCpuEnd) == 3)
+ {
+ UI.nRangeBegin = LogCpuBegin;
+ UI.nRangeEnd = LogCpuEnd;
+ }
+ UI.nRangeBeginIndex = nStack[nStackPos-1];
+ UI.nRangeEndIndex = k;
+ UI.pRangeLog = pLog;
+ }
+ else
+ {
+ UI.nRangeBegin = *pEntryEnter;
+ UI.nRangeEnd = *pEntry;
+ UI.nRangeBeginIndex = nStack[nStackPos-1];
+ UI.nRangeEndIndex = k;
+ UI.pRangeLog = pLog;
+
+ }
+ }
+ else
+ {
+ nColor = UI.nHoverColorShared;
+ }
+ }
+
+ nMaxStackDepth = MicroProfileMax(nMaxStackDepth, nStackPos);
+ float fMsStart = fToMs * MicroProfileLogTickDifference(nBaseTicks, nTickStart);
+ float fMsEnd = fToMs * MicroProfileLogTickDifference(nBaseTicks, nTickEnd);
+ float fXStart = fMsStart * fMsToScreen;
+ float fXEnd = fMsEnd * fMsToScreen;
+ float fYStart = (float)(nY + nStackPos * nYDelta);
+ float fYEnd = fYStart + (MICROPROFILE_DETAILED_BAR_HEIGHT);
+ float fXDist = MicroProfileMax(fXStart - fMouseX, fMouseX - fXEnd);
+ bool bHover = fXDist < MICROPROFILE_HOVER_DIST && fYStart <= fMouseY && fMouseY <= fYEnd && nBaseY < fMouseY;
+ uint32_t nIntegerWidth = (uint32_t)(fXEnd - fXStart);
+ if(nIntegerWidth)
+ {
+ if(bHover && UI.nActiveMenu == -1)
+ {
+ nHoverToken = MicroProfileLogTimerIndex(*pEntry);
+ #if MICROPROFILE_DEBUG
+ UI.nHoverAddressEnter = (uint64_t)pEntryEnter;
+ UI.nHoverAddressLeave = (uint64_t)pEntry;
+ #endif
+ nHoverTime = MicroProfileLogTickDifference(nTickStart, nTickEnd);
+ pMouseOverNext = pEntry;
+ }
+
+ MicroProfileDrawBox(fXStart, fYStart, fXEnd, fYEnd, nColor|UI.nOpacityForeground, MicroProfileBoxTypeBar);
+#if MICROPROFILE_DETAILED_BAR_NAMES
+ if(nIntegerWidth>3*MICROPROFILE_TEXT_WIDTH)
+ {
+ float fXStartText = MicroProfileMax(fXStart, 0.f);
+ int nTextWidth = (int)(fXEnd - fXStartText);
+ int nCharacters = (nTextWidth - 2*MICROPROFILE_TEXT_WIDTH) / MICROPROFILE_TEXT_WIDTH;
+ if(nCharacters>0)
+ {
+ MicroProfileDrawText(fXStartText+1, fYStart+1, -1, S.TimerInfo[nTimerIndex].pName, MicroProfileMin<uint32_t>(S.TimerInfo[nTimerIndex].nNameLen, nCharacters));
+ }
+ }
+#endif
+ ++nNumBoxes;
+ }
+ else
+ {
+ float fXAvg = 0.5f * (fXStart + fXEnd);
+ int nLineX = (int)floor(fXAvg+0.5f);
+ if(nLineX != (int)nLinesDrawn[nStackPos])
+ {
+ if(bHover && UI.nActiveMenu == -1)
+ {
+ nHoverToken = (uint32_t)MicroProfileLogTimerIndex(*pEntry);
+ nHoverTime = MicroProfileLogTickDifference(nTickStart, nTickEnd);
+ pMouseOverNext = pEntry;
+ }
+ nLinesDrawn[nStackPos] = nLineX;
+ MicroProfileDrawLineVertical(nLineX, fYStart + 0.5f, fYEnd + 0.5f, nColor|UI.nOpacityForeground);
+ ++nNumLines;
+ }
+ }
+ nStackPos--;
+ if(0 == nStackPos)
+ {
+ if(bGpu ? (nTickStart > nBaseTicksEndGpu) : (nTickStart > nBaseTicksEndCpu))
+ {
+ break;
+ }
+ }
+ }
+ }
+ }
+ nY += nMaxStackDepth * nYDelta + MICROPROFILE_DETAILED_BAR_HEIGHT+1;
+ }
+ }
+ if(S.bContextSwitchRunning && (S.bContextSwitchAllThreads||S.bContextSwitchNoBars))
+ {
+ uint32_t nNumThreads = 0;
+ uint32_t nThreads[MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS];
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS && S.Pool[i]; ++i)
+ nThreads[nNumThreads++] = S.Pool[i]->nThreadId;
+ uint32_t nNumThreadsBase = nNumThreads;
+ if(S.bContextSwitchAllThreads)
+ {
+ for(uint32_t i = nContextSwitchStart; i != nContextSwitchEnd; i = (i+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE)
+ {
+ MicroProfileContextSwitch CS = S.ContextSwitch[i];
+ ThreadIdType nThreadId = CS.nThreadIn;
+ if(nThreadId)
+ {
+ bool bSeen = false;
+ for(uint32_t j = 0; j < nNumThreads; ++j)
+ {
+ if(nThreads[j] == nThreadId)
+ {
+ bSeen = true;
+ break;
+ }
+ }
+ if(!bSeen)
+ {
+ nThreads[nNumThreads++] = nThreadId;
+ }
+ }
+ if(nNumThreads == MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS)
+ {
+ S.nOverflow = 10;
+ break;
+ }
+ }
+ std::sort(&nThreads[nNumThreadsBase], &nThreads[nNumThreads]);
+ }
+ uint32_t nStart = nNumThreadsBase;
+ if(S.bContextSwitchNoBars)
+ nStart = 0;
+ for(uint32_t i = nStart; i < nNumThreads; ++i)
+ {
+ ThreadIdType nThreadId = nThreads[i];
+ if(nThreadId)
+ {
+ char ThreadName[MicroProfileThreadLog::THREAD_MAX_LEN + 16];
+ const char* cLocal = MicroProfileIsLocalThread(nThreadId) ? "*": " ";
+
+ int nStrLen = snprintf(ThreadName, sizeof(ThreadName)-1, "%04x: %s%s", nThreadId, cLocal, i < nNumThreadsBase ? &S.Pool[i]->ThreadName[0] : MICROPROFILE_THREAD_NAME_FROM_ID(nThreadId) );
+ uint32_t nThreadColor = -1;
+ if(nThreadId == nContextSwitchHoverThreadAfter || nThreadId == nContextSwitchHoverThreadBefore)
+ nThreadColor = UI.nHoverColorShared|0x906060;
+ MicroProfileDrawDetailedContextSwitchBars(nY+2, nThreadId, nContextSwitchStart, nContextSwitchEnd, nBaseTicksCpu, nBaseY);
+ MicroProfileDrawText(0, nY, nThreadColor, &ThreadName[0], nStrLen);
+ nY += MICROPROFILE_TEXT_HEIGHT+1;
+ }
+ }
+ }
+
+ S.nContextSwitchHoverCpu = S.nContextSwitchHoverCpuNext;
+
+
+
+
+ UI.pDisplayMouseOver = pMouseOverNext;
+
+ if(!S.nRunning)
+ {
+ if(nHoverToken != MICROPROFILE_INVALID_TOKEN && nHoverTime)
+ {
+ UI.nHoverToken = nHoverToken;
+ UI.nHoverTime = nHoverTime;
+ }
+
+ if(nSelectedFrame != -1)
+ {
+ UI.nRangeBegin = S.Frames[nSelectedFrame].nFrameStartCpu;
+ UI.nRangeEnd = S.Frames[(nSelectedFrame+1)%MICROPROFILE_MAX_FRAME_HISTORY].nFrameStartCpu;
+ UI.nRangeBeginGpu = S.Frames[nSelectedFrame].nFrameStartGpu;
+ UI.nRangeEndGpu = S.Frames[(nSelectedFrame+1)%MICROPROFILE_MAX_FRAME_HISTORY].nFrameStartGpu;
+ }
+ if(UI.nRangeBegin != UI.nRangeEnd)
+ {
+ float fMsStart = fToMsCpu * MicroProfileLogTickDifference(nBaseTicksCpu, UI.nRangeBegin);
+ float fMsEnd = fToMsCpu * MicroProfileLogTickDifference(nBaseTicksCpu, UI.nRangeEnd);
+ float fXStart = fMsStart * fMsToScreen;
+ float fXEnd = fMsEnd * fMsToScreen;
+ MicroProfileDrawBox(fXStart, nBaseY, fXEnd, nHeight, MICROPROFILE_FRAME_COLOR_HIGHTLIGHT, MicroProfileBoxTypeFlat);
+ MicroProfileDrawLineVertical(fXStart, nBaseY, nHeight, MICROPROFILE_FRAME_COLOR_HIGHTLIGHT | 0x44000000);
+ MicroProfileDrawLineVertical(fXEnd, nBaseY, nHeight, MICROPROFILE_FRAME_COLOR_HIGHTLIGHT | 0x44000000);
+
+ fMsStart += fDetailedOffset;
+ fMsEnd += fDetailedOffset;
+ char sBuffer[32];
+ uint32_t nLenStart = snprintf(sBuffer, sizeof(sBuffer)-1, "%.2fms", fMsStart);
+ float fStartTextWidth = (float)((1+MICROPROFILE_TEXT_WIDTH) * nLenStart);
+ float fStartTextX = fXStart - fStartTextWidth - 2;
+ MicroProfileDrawBox(fStartTextX, nBaseY, fStartTextX + fStartTextWidth + 2, MICROPROFILE_TEXT_HEIGHT + 2 + nBaseY, 0x33000000, MicroProfileBoxTypeFlat);
+ MicroProfileDrawText(fStartTextX+1, nBaseY, (uint32_t)-1, sBuffer, nLenStart);
+ uint32_t nLenEnd = snprintf(sBuffer, sizeof(sBuffer)-1, "%.2fms", fMsEnd);
+ MicroProfileDrawBox(fXEnd+1, nBaseY, fXEnd+1+(1+MICROPROFILE_TEXT_WIDTH) * nLenEnd + 3, MICROPROFILE_TEXT_HEIGHT + 2 + nBaseY, 0x33000000, MicroProfileBoxTypeFlat);
+ MicroProfileDrawText(fXEnd+2, nBaseY+1, (uint32_t)-1, sBuffer, nLenEnd);
+
+ if(UI.nMouseRight)
+ {
+ MicroProfileZoomTo(UI.nRangeBegin, UI.nRangeEnd);
+ }
+ }
+
+ if(UI.nRangeBeginGpu != UI.nRangeEndGpu)
+ {
+ float fMsStart = fToMsGpu * MicroProfileLogTickDifference(nBaseTicksGpu, UI.nRangeBeginGpu);
+ float fMsEnd = fToMsGpu * MicroProfileLogTickDifference(nBaseTicksGpu, UI.nRangeEndGpu);
+ float fXStart = fMsStart * fMsToScreen;
+ float fXEnd = fMsEnd * fMsToScreen;
+ MicroProfileDrawBox(fXStart, nBaseY, fXEnd, nHeight, MICROPROFILE_FRAME_COLOR_HIGHTLIGHT_GPU, MicroProfileBoxTypeFlat);
+ MicroProfileDrawLineVertical(fXStart, nBaseY, nHeight, MICROPROFILE_FRAME_COLOR_HIGHTLIGHT_GPU | 0x44000000);
+ MicroProfileDrawLineVertical(fXEnd, nBaseY, nHeight, MICROPROFILE_FRAME_COLOR_HIGHTLIGHT_GPU | 0x44000000);
+
+ nBaseY += MICROPROFILE_TEXT_HEIGHT+1;
+
+ fMsStart += fDetailedOffset;
+ fMsEnd += fDetailedOffset;
+ char sBuffer[32];
+ uint32_t nLenStart = snprintf(sBuffer, sizeof(sBuffer)-1, "%.2fms", fMsStart);
+ float fStartTextWidth = (float)((1+MICROPROFILE_TEXT_WIDTH) * nLenStart);
+ float fStartTextX = fXStart - fStartTextWidth - 2;
+ MicroProfileDrawBox(fStartTextX, nBaseY, fStartTextX + fStartTextWidth + 2, MICROPROFILE_TEXT_HEIGHT + 2 + nBaseY, 0x33000000, MicroProfileBoxTypeFlat);
+ MicroProfileDrawText(fStartTextX+1, nBaseY, (uint32_t)-1, sBuffer, nLenStart);
+ uint32_t nLenEnd = snprintf(sBuffer, sizeof(sBuffer)-1, "%.2fms", fMsEnd);
+ MicroProfileDrawBox(fXEnd+1, nBaseY, fXEnd+1+(1+MICROPROFILE_TEXT_WIDTH) * nLenEnd + 3, MICROPROFILE_TEXT_HEIGHT + 2 + nBaseY, 0x33000000, MicroProfileBoxTypeFlat);
+ MicroProfileDrawText(fXEnd+2, nBaseY+1, (uint32_t)-1, sBuffer, nLenEnd);
+ }
+ }
+}
+
+
+void MicroProfileDrawDetailedFrameHistory(uint32_t nWidth, uint32_t nHeight, uint32_t nBaseY, uint32_t nSelectedFrame)
+{
+ MicroProfile& S = *MicroProfileGet();
+
+ const uint32_t nBarHeight = MICROPROFILE_FRAME_HISTORY_HEIGHT;
+ float fBaseX = (float)nWidth;
+ float fDx = fBaseX / MICROPROFILE_NUM_FRAMES;
+
+ uint32_t nLastIndex = (S.nFrameCurrent+1) % MICROPROFILE_MAX_FRAME_HISTORY;
+ MicroProfileDrawBox(0, nBaseY, nWidth, nBaseY+MICROPROFILE_FRAME_HISTORY_HEIGHT, 0xff000000 | g_nMicroProfileBackColors[0], MicroProfileBoxTypeFlat);
+ float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * S.fRcpReferenceTime;
+ float fToMsGpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu()) * S.fRcpReferenceTime;
+
+
+ MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent];
+ uint64_t nFrameStartCpu = pFrameCurrent->nFrameStartCpu;
+ int64_t nDetailedOffsetTicksCpu = MicroProfileMsToTick(UI.fDetailedOffset, MicroProfileTicksPerSecondCpu());
+ int64_t nCpuStart = nDetailedOffsetTicksCpu + nFrameStartCpu;
+ int64_t nCpuEnd = nCpuStart + MicroProfileMsToTick(UI.fDetailedRange, MicroProfileTicksPerSecondCpu());;
+
+
+ float fSelectionStart = (float)nWidth;
+ float fSelectionEnd = 0.f;
+ for(uint32_t i = 0; i < MICROPROFILE_NUM_FRAMES; ++i)
+ {
+ uint32_t nIndex = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY;
+ MicroProfileFrameState* pCurrent = &S.Frames[nIndex];
+ MicroProfileFrameState* pNext = &S.Frames[nLastIndex];
+
+ int64_t nTicks = pNext->nFrameStartCpu - pCurrent->nFrameStartCpu;
+ int64_t nTicksGpu = pNext->nFrameStartGpu - pCurrent->nFrameStartGpu;
+ float fScale = fToMs * nTicks;
+ float fScaleGpu = fToMsGpu * nTicksGpu;
+ fScale = fScale > 1.f ? 0.f : 1.f - fScale;
+ fScaleGpu = fScaleGpu > 1.f ? 0.f : 1.f - fScaleGpu;
+ float fXEnd = fBaseX;
+ float fXStart = fBaseX - fDx;
+ fBaseX = fXStart;
+ uint32_t nColor = MICROPROFILE_FRAME_HISTORY_COLOR_CPU;
+ if(nIndex == nSelectedFrame)
+ nColor = (uint32_t)-1;
+ MicroProfileDrawBox(fXStart, nBaseY + fScale * nBarHeight, fXEnd, nBaseY+MICROPROFILE_FRAME_HISTORY_HEIGHT, nColor, MicroProfileBoxTypeBar);
+ if(pNext->nFrameStartCpu > nCpuStart)
+ {
+ fSelectionStart = fXStart;
+ }
+ if(pCurrent->nFrameStartCpu < nCpuEnd && fSelectionEnd == 0.f)
+ {
+ fSelectionEnd = fXEnd;
+ }
+ nLastIndex = nIndex;
+ }
+ MicroProfileDrawBox(fSelectionStart, nBaseY, fSelectionEnd, nBaseY+MICROPROFILE_FRAME_HISTORY_HEIGHT, MICROPROFILE_FRAME_HISTORY_COLOR_HIGHTLIGHT, MicroProfileBoxTypeFlat);
+}
+void MicroProfileDrawDetailedView(uint32_t nWidth, uint32_t nHeight)
+{
+ MicroProfile& S = *MicroProfileGet();
+
+ MICROPROFILE_SCOPE(g_MicroProfileDetailed);
+ uint32_t nBaseY = MICROPROFILE_TEXT_HEIGHT + 1;
+
+ int nSelectedFrame = -1;
+ if(UI.nMouseY > nBaseY && UI.nMouseY <= nBaseY + MICROPROFILE_FRAME_HISTORY_HEIGHT && UI.nActiveMenu == -1)
+ {
+
+ nSelectedFrame = ((MICROPROFILE_NUM_FRAMES) * (UI.nWidth-UI.nMouseX) / UI.nWidth);
+ nSelectedFrame = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - nSelectedFrame) % MICROPROFILE_MAX_FRAME_HISTORY;
+ UI.nHoverFrame = nSelectedFrame;
+ if(UI.nMouseRight)
+ {
+ int64_t nRangeBegin = S.Frames[nSelectedFrame].nFrameStartCpu;
+ int64_t nRangeEnd = S.Frames[(nSelectedFrame+1)%MICROPROFILE_MAX_FRAME_HISTORY].nFrameStartCpu;
+ MicroProfileZoomTo(nRangeBegin, nRangeEnd);
+ }
+ if(UI.nMouseDownLeft)
+ {
+ uint64_t nFrac = (1024 * (MICROPROFILE_NUM_FRAMES) * (UI.nMouseX) / UI.nWidth) % 1024;
+ int64_t nRangeBegin = S.Frames[nSelectedFrame].nFrameStartCpu;
+ int64_t nRangeEnd = S.Frames[(nSelectedFrame+1)%MICROPROFILE_MAX_FRAME_HISTORY].nFrameStartCpu;
+ MicroProfileCenter(nRangeBegin + (nRangeEnd-nRangeBegin) * nFrac / 1024);
+ }
+ }
+ else
+ {
+ UI.nHoverFrame = -1;
+ }
+
+ MicroProfileDrawDetailedBars(nWidth, nHeight, nBaseY + MICROPROFILE_FRAME_HISTORY_HEIGHT, nSelectedFrame);
+ MicroProfileDrawDetailedFrameHistory(nWidth, nHeight, nBaseY, nSelectedFrame);
+}
+
+void MicroProfileDrawTextRight(uint32_t nX, uint32_t nY, uint32_t nColor, const char* pStr, uint32_t nStrLen)
+{
+ MicroProfileDrawText(nX - nStrLen * (MICROPROFILE_TEXT_WIDTH+1), nY, nColor, pStr, nStrLen);
+}
+void MicroProfileDrawHeader(int32_t nX, uint32_t nWidth, const char* pName)
+{
+ if(pName)
+ {
+ MicroProfileDrawBox(nX-8, MICROPROFILE_TEXT_HEIGHT + 2, nX + nWidth+5, MICROPROFILE_TEXT_HEIGHT + 2 + (MICROPROFILE_TEXT_HEIGHT+1), 0xff000000|g_nMicroProfileBackColors[1]);
+ MicroProfileDrawText(nX, MICROPROFILE_TEXT_HEIGHT + 2, (uint32_t)-1, pName, (uint32_t)strlen(pName));
+ }
+}
+
+
+typedef void (*MicroProfileLoopGroupCallback)(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pData);
+
+void MicroProfileLoopActiveGroupsDraw(int32_t nX, int32_t nY, const char* pName, MicroProfileLoopGroupCallback CB, void* pData)
+{
+ MicroProfile& S = *MicroProfileGet();
+ nY += MICROPROFILE_TEXT_HEIGHT + 2;
+ uint64_t nGroup = S.nAllGroupsWanted ? S.nGroupMask : S.nActiveGroupWanted;
+ uint32_t nCount = 0;
+ for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
+ {
+ uint64_t nMask = 1ll << j;
+ if(nMask & nGroup)
+ {
+ nY += MICROPROFILE_TEXT_HEIGHT + 1;
+ for(uint32_t i = 0; i < S.nTotalTimers;++i)
+ {
+ uint64_t nTokenMask = MicroProfileGetGroupMask(S.TimerInfo[i].nToken);
+ if(nTokenMask & nMask)
+ {
+ if(nY >= 0)
+ CB(i, nCount, nMask, nX, nY, pData);
+
+ nCount += 2;
+ nY += MICROPROFILE_TEXT_HEIGHT + 1;
+
+ if(nY > (int)UI.nHeight)
+ return;
+ }
+ }
+
+ }
+ }
+}
+
+
+void MicroProfileCalcTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, uint64_t nGroup, uint32_t nSize)
+{
+ MicroProfile& S = *MicroProfileGet();
+
+ uint32_t nCount = 0;
+ uint64_t nMask = 1;
+
+ for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
+ {
+ if(nMask & nGroup)
+ {
+ const float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[j].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
+ for(uint32_t i = 0; i < S.nTotalTimers;++i)
+ {
+ uint64_t nTokenMask = MicroProfileGetGroupMask(S.TimerInfo[i].nToken);
+ if(nTokenMask & nMask)
+ {
+ {
+ uint32_t nTimer = i;
+ uint32_t nIdx = nCount;
+ uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
+ uint32_t nAggregateCount = S.Aggregate[nTimer].nCount ? S.Aggregate[nTimer].nCount : 1;
+ float fToPrc = S.fRcpReferenceTime;
+ float fMs = fToMs * (S.Frame[nTimer].nTicks);
+ float fPrc = MicroProfileMin(fMs * fToPrc, 1.f);
+ float fAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateFrames);
+ float fAveragePrc = MicroProfileMin(fAverageMs * fToPrc, 1.f);
+ float fMaxMs = fToMs * (S.AggregateMax[nTimer]);
+ float fMaxPrc = MicroProfileMin(fMaxMs * fToPrc, 1.f);
+ float fCallAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateCount);
+ float fCallAveragePrc = MicroProfileMin(fCallAverageMs * fToPrc, 1.f);
+ float fMsExclusive = fToMs * (S.FrameExclusive[nTimer]);
+ float fPrcExclusive = MicroProfileMin(fMsExclusive * fToPrc, 1.f);
+ float fAverageMsExclusive = fToMs * (S.AggregateExclusive[nTimer] / nAggregateFrames);
+ float fAveragePrcExclusive = MicroProfileMin(fAverageMsExclusive * fToPrc, 1.f);
+ float fMaxMsExclusive = fToMs * (S.AggregateMaxExclusive[nTimer]);
+ float fMaxPrcExclusive = MicroProfileMin(fMaxMsExclusive * fToPrc, 1.f);
+ pTimers[nIdx] = fMs;
+ pTimers[nIdx+1] = fPrc;
+ pAverage[nIdx] = fAverageMs;
+ pAverage[nIdx+1] = fAveragePrc;
+ pMax[nIdx] = fMaxMs;
+ pMax[nIdx+1] = fMaxPrc;
+ pCallAverage[nIdx] = fCallAverageMs;
+ pCallAverage[nIdx+1] = fCallAveragePrc;
+ pExclusive[nIdx] = fMsExclusive;
+ pExclusive[nIdx+1] = fPrcExclusive;
+ pAverageExclusive[nIdx] = fAverageMsExclusive;
+ pAverageExclusive[nIdx+1] = fAveragePrcExclusive;
+ pMaxExclusive[nIdx] = fMaxMsExclusive;
+ pMaxExclusive[nIdx+1] = fMaxPrcExclusive;
+ }
+ nCount += 2;
+ }
+ }
+ }
+ nMask <<= 1ll;
+ }
+}
+
+#define SBUF_MAX 32
+
+void MicroProfileDrawBarArrayCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
+{
+ const uint32_t nHeight = MICROPROFILE_TEXT_HEIGHT;
+ const uint32_t nTextWidth = 6 * (1+MICROPROFILE_TEXT_WIDTH);
+ const float fWidth = (float)MICROPROFILE_BAR_WIDTH;
+
+ float* pTimers = ((float**)pExtra)[0];
+ float* pTimers2 = ((float**)pExtra)[1];
+ MicroProfile& S = *MicroProfileGet();
+ char sBuffer[SBUF_MAX];
+ if (pTimers2 && pTimers2[nIdx] > 0.1f)
+ snprintf(sBuffer, SBUF_MAX-1, "%5.2f %3.1fx", pTimers[nIdx], pTimers[nIdx] / pTimers2[nIdx]);
+ else
+ snprintf(sBuffer, SBUF_MAX-1, "%5.2f", pTimers[nIdx]);
+ if (!pTimers2)
+ MicroProfileDrawBox(nX + nTextWidth, nY, nX + nTextWidth + fWidth * pTimers[nIdx+1], nY + nHeight, UI.nOpacityForeground|S.TimerInfo[nTimer].nColor, MicroProfileBoxTypeBar);
+ MicroProfileDrawText(nX, nY, (uint32_t)-1, sBuffer, (uint32_t)strlen(sBuffer));
+}
+
+
+uint32_t MicroProfileDrawBarArray(int32_t nX, int32_t nY, float* pTimers, const char* pName, uint32_t nTotalHeight, float* pTimers2 = NULL)
+{
+ const uint32_t nTextWidth = 6 * (1+MICROPROFILE_TEXT_WIDTH);
+ const uint32_t nWidth = MICROPROFILE_BAR_WIDTH;
+
+ MicroProfileDrawLineVertical(nX-5, 0, nTotalHeight+nY, UI.nOpacityBackground|g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]);
+ float* pTimersArray[2] = {pTimers, pTimers2};
+ MicroProfileLoopActiveGroupsDraw(nX, nY, pName, MicroProfileDrawBarArrayCallback, pTimersArray);
+ MicroProfileDrawHeader(nX, nTextWidth + nWidth, pName);
+ return nWidth + 5 + nTextWidth;
+
+}
+void MicroProfileDrawBarCallCountCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
+{
+ MicroProfile& S = *MicroProfileGet();
+ char sBuffer[SBUF_MAX];
+ int nLen = snprintf(sBuffer, SBUF_MAX-1, "%5d", S.Frame[nTimer].nCount);//fix
+ MicroProfileDrawText(nX, nY, (uint32_t)-1, sBuffer, nLen);
+}
+
+uint32_t MicroProfileDrawBarCallCount(int32_t nX, int32_t nY, const char* pName)
+{
+ MicroProfileLoopActiveGroupsDraw(nX, nY, pName, MicroProfileDrawBarCallCountCallback, 0);
+ const uint32_t nTextWidth = 6 * MICROPROFILE_TEXT_WIDTH;
+ MicroProfileDrawHeader(nX, 5 + nTextWidth, pName);
+ return 5 + nTextWidth;
+}
+
+struct MicroProfileMetaAverageArgs
+{
+ uint64_t* pCounters;
+ float fRcpFrames;
+};
+
+void MicroProfileDrawBarMetaAverageCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
+{
+ MicroProfileMetaAverageArgs* pArgs = (MicroProfileMetaAverageArgs*)pExtra;
+ uint64_t* pCounters = pArgs->pCounters;
+ float fRcpFrames = pArgs->fRcpFrames;
+ char sBuffer[SBUF_MAX];
+ int nLen = snprintf(sBuffer, SBUF_MAX-1, "%5.2f", pCounters[nTimer] * fRcpFrames);
+ MicroProfileDrawText(nX - nLen * (MICROPROFILE_TEXT_WIDTH+1), nY, (uint32_t)-1, sBuffer, nLen);
+}
+
+uint32_t MicroProfileDrawBarMetaAverage(int32_t nX, int32_t nY, uint64_t* pCounters, const char* pName, uint32_t nTotalHeight)
+{
+ if(!pName)
+ return 0;
+ MicroProfileDrawLineVertical(nX-5, 0, nTotalHeight+nY, UI.nOpacityBackground|g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]);
+ uint32_t nTextWidth = (1+MICROPROFILE_TEXT_WIDTH) * MicroProfileMax<uint32_t>(6, (uint32_t)strlen(pName));
+ float fRcpFrames = 1.f / (MicroProfileGet()->nAggregateFrames ? MicroProfileGet()->nAggregateFrames : 1);
+ MicroProfileMetaAverageArgs Args = {pCounters, fRcpFrames};
+ MicroProfileLoopActiveGroupsDraw(nX + nTextWidth, nY, pName, MicroProfileDrawBarMetaAverageCallback, &Args);
+ MicroProfileDrawHeader(nX, 5 + nTextWidth, pName);
+ return 5 + nTextWidth;
+}
+
+
+void MicroProfileDrawBarMetaCountCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
+{
+ uint64_t* pCounters = (uint64_t*)pExtra;
+ char sBuffer[SBUF_MAX];
+ int nLen = snprintf(sBuffer, SBUF_MAX-1, "%5llu", pCounters[nTimer]);
+ MicroProfileDrawText(nX - nLen * (MICROPROFILE_TEXT_WIDTH+1), nY, (uint32_t)-1, sBuffer, nLen);
+}
+
+uint32_t MicroProfileDrawBarMetaCount(int32_t nX, int32_t nY, uint64_t* pCounters, const char* pName, uint32_t nTotalHeight)
+{
+ if(!pName)
+ return 0;
+
+ MicroProfileDrawLineVertical(nX-5, 0, nTotalHeight+nY, UI.nOpacityBackground|g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]);
+ uint32_t nTextWidth = (1+MICROPROFILE_TEXT_WIDTH) * MicroProfileMax<uint32_t>(6, (uint32_t)strlen(pName));
+ MicroProfileLoopActiveGroupsDraw(nX + nTextWidth, nY, pName, MicroProfileDrawBarMetaCountCallback, pCounters);
+ MicroProfileDrawHeader(nX, 5 + nTextWidth, pName);
+ return 5 + nTextWidth;
+}
+
+void MicroProfileDrawBarLegendCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
+{
+ MicroProfile& S = *MicroProfileGet();
+ if (S.TimerInfo[nTimer].bGraph)
+ {
+ MicroProfileDrawText(nX, nY, S.TimerInfo[nTimer].nColor, ">", 1);
+ }
+ MicroProfileDrawTextRight(nX, nY, S.TimerInfo[nTimer].nColor, S.TimerInfo[nTimer].pName, (uint32_t)strlen(S.TimerInfo[nTimer].pName));
+ if(UI.nMouseY >= nY && UI.nMouseY < nY + MICROPROFILE_TEXT_HEIGHT+1)
+ {
+ UI.nHoverToken = nTimer;
+ UI.nHoverTime = 0;
+ }
+}
+
+uint32_t MicroProfileDrawBarLegend(int32_t nX, int32_t nY, uint32_t nTotalHeight, uint32_t nMaxWidth)
+{
+ MicroProfileDrawLineVertical(nX-5, nY, nTotalHeight, UI.nOpacityBackground | g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]);
+ MicroProfileLoopActiveGroupsDraw(nMaxWidth, nY, 0, MicroProfileDrawBarLegendCallback, 0);
+ return nX;
+}
+
+bool MicroProfileDrawGraph(uint32_t nScreenWidth, uint32_t nScreenHeight)
+{
+ MicroProfile& S = *MicroProfileGet();
+
+ MICROPROFILE_SCOPE(g_MicroProfileDrawGraph);
+ bool bEnabled = false;
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
+ if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN)
+ bEnabled = true;
+ if(!bEnabled)
+ return false;
+
+ uint32_t nX = nScreenWidth - MICROPROFILE_GRAPH_WIDTH;
+ uint32_t nY = nScreenHeight - MICROPROFILE_GRAPH_HEIGHT;
+ MicroProfileDrawBox(nX, nY, nX + MICROPROFILE_GRAPH_WIDTH, nY + MICROPROFILE_GRAPH_HEIGHT, 0x88000000 | g_nMicroProfileBackColors[0]);
+ bool bMouseOver = UI.nMouseX >= nX && UI.nMouseY >= nY;
+ float fMouseXPrc =(float(UI.nMouseX - nX)) / MICROPROFILE_GRAPH_WIDTH;
+ if(bMouseOver)
+ {
+ float fXAvg = fMouseXPrc * MICROPROFILE_GRAPH_WIDTH + nX;
+ MicroProfileDrawLineVertical(fXAvg, nY, nY + MICROPROFILE_GRAPH_HEIGHT, (uint32_t)-1);
+ }
+
+
+ float fY = (float)nScreenHeight;
+ float fDX = MICROPROFILE_GRAPH_WIDTH * 1.f / MICROPROFILE_GRAPH_HISTORY;
+ float fDY = MICROPROFILE_GRAPH_HEIGHT;
+ uint32_t nPut = S.nGraphPut;
+ float* pGraphData = (float*)alloca(sizeof(float)* MICROPROFILE_GRAPH_HISTORY*2);
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
+ {
+ if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN)
+ {
+ uint32_t nGroupId = MicroProfileGetGroupIndex(S.Graph[i].nToken);
+ bool bGpu = S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu;
+ float fToMs = MicroProfileTickToMsMultiplier(bGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
+ float fToPrc = fToMs * S.fRcpReferenceTime * 3 / 4;
+
+ float fX = (float)nX;
+ for(uint32_t j = 0; j < MICROPROFILE_GRAPH_HISTORY; ++j)
+ {
+ float fWeigth = MicroProfileMin(fToPrc * (S.Graph[i].nHistory[(j+nPut)%MICROPROFILE_GRAPH_HISTORY]), 1.f);
+ pGraphData[(j*2)] = fX;
+ pGraphData[(j*2)+1] = fY - fDY * fWeigth;
+ fX += fDX;
+ }
+ MicroProfileDrawLine2D(MICROPROFILE_GRAPH_HISTORY, pGraphData, S.TimerInfo[MicroProfileGetTimerIndex(S.Graph[i].nToken)].nColor);
+ }
+ }
+ {
+ float fY1 = 0.25f * MICROPROFILE_GRAPH_HEIGHT + nY;
+ float fY2 = 0.50f * MICROPROFILE_GRAPH_HEIGHT + nY;
+ float fY3 = 0.75f * MICROPROFILE_GRAPH_HEIGHT + nY;
+ MicroProfileDrawLineHorizontal(nX, nX + MICROPROFILE_GRAPH_WIDTH, fY1, 0xffdd4444);
+ MicroProfileDrawLineHorizontal(nX, nX + MICROPROFILE_GRAPH_WIDTH, fY2, 0xff000000| g_nMicroProfileBackColors[0]);
+ MicroProfileDrawLineHorizontal(nX, nX + MICROPROFILE_GRAPH_WIDTH, fY3, 0xff000000|g_nMicroProfileBackColors[0]);
+
+ char buf[32];
+ int nLen = snprintf(buf, sizeof(buf)-1, "%5.2fms", S.fReferenceTime);
+ MicroProfileDrawText(nX+1, fY1 - (2+MICROPROFILE_TEXT_HEIGHT), (uint32_t)-1, buf, nLen);
+ }
+
+
+
+ if(bMouseOver)
+ {
+ uint32_t pColors[MICROPROFILE_MAX_GRAPHS];
+ MicroProfileStringArray Strings;
+ MicroProfileStringArrayClear(&Strings);
+ uint32_t nTextCount = 0;
+ uint32_t nGraphIndex = (S.nGraphPut + MICROPROFILE_GRAPH_HISTORY - int(MICROPROFILE_GRAPH_HISTORY*(1.f - fMouseXPrc))) % MICROPROFILE_GRAPH_HISTORY;
+
+ uint32_t nX = UI.nMouseX;
+ uint32_t nY = UI.nMouseY + 20;
+
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
+ {
+ if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN)
+ {
+ uint32_t nGroupId = MicroProfileGetGroupIndex(S.Graph[i].nToken);
+ bool bGpu = S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu;
+ float fToMs = MicroProfileTickToMsMultiplier(bGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
+ uint32_t nIndex = MicroProfileGetTimerIndex(S.Graph[i].nToken);
+ uint32_t nColor = S.TimerInfo[nIndex].nColor;
+ const char* pName = S.TimerInfo[nIndex].pName;
+ pColors[nTextCount++] = nColor;
+ MicroProfileStringArrayAddLiteral(&Strings, pName);
+ MicroProfileStringArrayFormat(&Strings, "%5.2fms", fToMs * (S.Graph[i].nHistory[nGraphIndex]));
+ }
+ }
+ if(nTextCount)
+ {
+ MicroProfileDrawFloatWindow(nX, nY, Strings.ppStrings, Strings.nNumStrings, 0, pColors);
+ }
+
+ if(UI.nMouseRight)
+ {
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
+ {
+ S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN;
+ }
+ }
+ }
+
+ return bMouseOver;
+}
+
+void MicroProfileDumpTimers()
+{
+ MicroProfile& S = *MicroProfileGet();
+
+ uint64_t nActiveGroup = S.nGroupMask;
+
+ uint32_t nNumTimers = S.nTotalTimers;
+ uint32_t nBlockSize = 2 * nNumTimers;
+ float* pTimers = (float*)alloca(nBlockSize * 7 * sizeof(float));
+ float* pAverage = pTimers + nBlockSize;
+ float* pMax = pTimers + 2 * nBlockSize;
+ float* pCallAverage = pTimers + 3 * nBlockSize;
+ float* pTimersExclusive = pTimers + 4 * nBlockSize;
+ float* pAverageExclusive = pTimers + 5 * nBlockSize;
+ float* pMaxExclusive = pTimers + 6 * nBlockSize;
+ MicroProfileCalcTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, nActiveGroup, nNumTimers);
+
+ MICROPROFILE_PRINTF("%11s, ", "Time");
+ MICROPROFILE_PRINTF("%11s, ", "Average");
+ MICROPROFILE_PRINTF("%11s, ", "Max");
+ MICROPROFILE_PRINTF("%11s, ", "Call Avg");
+ MICROPROFILE_PRINTF("%9s, ", "Count");
+ MICROPROFILE_PRINTF("%11s, ", "Excl");
+ MICROPROFILE_PRINTF("%11s, ", "Avg Excl");
+ MICROPROFILE_PRINTF("%11s, \n", "Max Excl");
+
+ for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
+ {
+ uint64_t nMask = 1ll << j;
+ if(nMask & nActiveGroup)
+ {
+ MICROPROFILE_PRINTF("%s\n", S.GroupInfo[j].pName);
+ for(uint32_t i = 0; i < S.nTotalTimers;++i)
+ {
+ uint64_t nTokenMask = MicroProfileGetGroupMask(S.TimerInfo[i].nToken);
+ if(nTokenMask & nMask)
+ {
+ uint32_t nIdx = i * 2;
+ MICROPROFILE_PRINTF("%9.2fms, ", pTimers[nIdx]);
+ MICROPROFILE_PRINTF("%9.2fms, ", pAverage[nIdx]);
+ MICROPROFILE_PRINTF("%9.2fms, ", pMax[nIdx]);
+ MICROPROFILE_PRINTF("%9.2fms, ", pCallAverage[nIdx]);
+ MICROPROFILE_PRINTF("%9d, ", S.Frame[i].nCount);
+ MICROPROFILE_PRINTF("%9.2fms, ", pTimersExclusive[nIdx]);
+ MICROPROFILE_PRINTF("%9.2fms, ", pAverageExclusive[nIdx]);
+ MICROPROFILE_PRINTF("%9.2fms, ", pMaxExclusive[nIdx]);
+ MICROPROFILE_PRINTF("%s\n", S.TimerInfo[i].pName);
+ }
+ }
+ }
+ }
+}
+
+void MicroProfileDrawBarView(uint32_t nScreenWidth, uint32_t nScreenHeight)
+{
+ MicroProfile& S = *MicroProfileGet();
+
+ uint64_t nActiveGroup = S.nAllGroupsWanted ? S.nGroupMask : S.nActiveGroupWanted;
+ if(!nActiveGroup)
+ return;
+ MICROPROFILE_SCOPE(g_MicroProfileDrawBarView);
+
+ const uint32_t nHeight = MICROPROFILE_TEXT_HEIGHT;
+ int nColorIndex = 0;
+ uint32_t nMaxTimerNameLen = 1;
+ uint32_t nNumTimers = 0;
+ uint32_t nNumGroups = 0;
+ for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
+ {
+ if(nActiveGroup & (1ll << j))
+ {
+ nNumTimers += S.GroupInfo[j].nNumTimers;
+ nNumGroups += 1;
+ nMaxTimerNameLen = MicroProfileMax(nMaxTimerNameLen, S.GroupInfo[j].nMaxTimerNameLen);
+ }
+ }
+ uint32_t nTimerWidth = 2+(4+nMaxTimerNameLen) * (MICROPROFILE_TEXT_WIDTH+1);
+ uint32_t nX = nTimerWidth + UI.nOffsetX;
+ uint32_t nY = nHeight + 3 - UI.nOffsetY;
+ uint32_t nBlockSize = 2 * nNumTimers;
+ float* pTimers = (float*)alloca(nBlockSize * 7 * sizeof(float));
+ float* pAverage = pTimers + nBlockSize;
+ float* pMax = pTimers + 2 * nBlockSize;
+ float* pCallAverage = pTimers + 3 * nBlockSize;
+ float* pTimersExclusive = pTimers + 4 * nBlockSize;
+ float* pAverageExclusive = pTimers + 5 * nBlockSize;
+ float* pMaxExclusive = pTimers + 6 * nBlockSize;
+ MicroProfileCalcTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, nActiveGroup, nNumTimers);
+ uint32_t nWidth = 0;
+ {
+ uint32_t nMetaIndex = 0;
+ for(uint32_t i = 1; i ; i <<= 1)
+ {
+ if(S.nBars & i)
+ {
+ if(i >= MP_DRAW_META_FIRST)
+ {
+ if(nMetaIndex < MICROPROFILE_META_MAX && S.MetaCounters[nMetaIndex].pName)
+ {
+ uint32_t nStrWidth = strlen(S.MetaCounters[nMetaIndex].pName);
+ if(S.nBars & MP_DRAW_TIMERS)
+ nWidth += 6 + (1+MICROPROFILE_TEXT_WIDTH) * (nStrWidth);
+ if(S.nBars & MP_DRAW_AVERAGE)
+ nWidth += 6 + (1+MICROPROFILE_TEXT_WIDTH) * (nStrWidth + 4);
+ if(S.nBars & MP_DRAW_MAX)
+ nWidth += 6 + (1+MICROPROFILE_TEXT_WIDTH) * (nStrWidth + 4);
+ }
+ }
+ else
+ {
+ nWidth += MICROPROFILE_BAR_WIDTH + 6 + 6 * (1+MICROPROFILE_TEXT_WIDTH);
+ if(i & MP_DRAW_CALL_COUNT)
+ nWidth += 6 + 6 * MICROPROFILE_TEXT_WIDTH;
+ }
+ }
+ if(i >= MP_DRAW_META_FIRST)
+ {
+ ++nMetaIndex;
+ }
+ }
+ nWidth += (1+nMaxTimerNameLen) * (MICROPROFILE_TEXT_WIDTH+1);
+ for(uint32_t i = 0; i < nNumTimers+nNumGroups+1; ++i)
+ {
+ uint32_t nY0 = nY + i * (nHeight + 1);
+ bool bInside = (UI.nActiveMenu == -1) && ((UI.nMouseY >= nY0) && (UI.nMouseY < (nY0 + nHeight + 1)));
+ MicroProfileDrawBox(nX, nY0, nWidth+nX, nY0 + (nHeight+1)+1, UI.nOpacityBackground | (g_nMicroProfileBackColors[nColorIndex++ & 1] + ((bInside) ? 0x002c2c2c : 0)));
+ }
+ nX += 10;
+ }
+ int nTotalHeight = (nNumTimers+nNumGroups+1) * (nHeight+1);
+ uint32_t nLegendOffset = 1;
+ if(S.nBars & MP_DRAW_TIMERS)
+ nX += MicroProfileDrawBarArray(nX, nY, pTimers, "Time", nTotalHeight) + 1;
+ if(S.nBars & MP_DRAW_AVERAGE)
+ nX += MicroProfileDrawBarArray(nX, nY, pAverage, "Average", nTotalHeight) + 1;
+ if(S.nBars & MP_DRAW_MAX)
+ nX += MicroProfileDrawBarArray(nX, nY, pMax, (!UI.bShowSpikes) ? "Max Time" : "Max Time, Spike", nTotalHeight, UI.bShowSpikes ? pAverage : NULL) + 1;
+ if(S.nBars & MP_DRAW_CALL_COUNT)
+ {
+ nX += MicroProfileDrawBarArray(nX, nY, pCallAverage, "Call Average", nTotalHeight) + 1;
+ nX += MicroProfileDrawBarCallCount(nX, nY, "Count") + 1;
+ }
+ if(S.nBars & MP_DRAW_TIMERS_EXCLUSIVE)
+ nX += MicroProfileDrawBarArray(nX, nY, pTimersExclusive, "Exclusive Time", nTotalHeight) + 1;
+ if(S.nBars & MP_DRAW_AVERAGE_EXCLUSIVE)
+ nX += MicroProfileDrawBarArray(nX, nY, pAverageExclusive, "Exclusive Average", nTotalHeight) + 1;
+ if(S.nBars & MP_DRAW_MAX_EXCLUSIVE)
+ nX += MicroProfileDrawBarArray(nX, nY, pMaxExclusive, (!UI.bShowSpikes) ? "Exclusive Max Time" :"Excl Max Time, Spike", nTotalHeight, UI.bShowSpikes ? pAverageExclusive : NULL) + 1;
+
+ for(int i = 0; i < MICROPROFILE_META_MAX; ++i)
+ {
+ if(0 != (S.nBars & (MP_DRAW_META_FIRST<<i)) && S.MetaCounters[i].pName)
+ {
+ uint32_t nBufferSize = strlen(S.MetaCounters[i].pName) + 32;
+ char* buffer = (char*)alloca(nBufferSize);
+ if(S.nBars & MP_DRAW_TIMERS)
+ nX += MicroProfileDrawBarMetaCount(nX, nY, &S.MetaCounters[i].nCounters[0], S.MetaCounters[i].pName, nTotalHeight) + 1;
+ if(S.nBars & MP_DRAW_AVERAGE)
+ {
+ snprintf(buffer, nBufferSize-1, "%s Avg", S.MetaCounters[i].pName);
+ nX += MicroProfileDrawBarMetaAverage(nX, nY, &S.MetaCounters[i].nAggregate[0], buffer, nTotalHeight) + 1;
+ }
+ if(S.nBars & MP_DRAW_MAX)
+ {
+ snprintf(buffer, nBufferSize-1, "%s Max", S.MetaCounters[i].pName);
+ nX += MicroProfileDrawBarMetaCount(nX, nY, &S.MetaCounters[i].nAggregateMax[0], buffer, nTotalHeight) + 1;
+ }
+ }
+ }
+ nX = 0;
+ nY = nHeight + 3 - UI.nOffsetY;
+ for(uint32_t i = 0; i < nNumTimers+nNumGroups+1; ++i)
+ {
+ uint32_t nY0 = nY + i * (nHeight + 1);
+ bool bInside = (UI.nActiveMenu == -1) && ((UI.nMouseY >= nY0) && (UI.nMouseY < (nY0 + nHeight + 1)));
+ MicroProfileDrawBox(nX, nY0, nTimerWidth, nY0 + (nHeight+1)+1, 0xff0000000 | (g_nMicroProfileBackColors[nColorIndex++ & 1] + ((bInside) ? 0x002c2c2c : 0)));
+ }
+ nX += MicroProfileDrawBarLegend(nX, nY, nTotalHeight, nTimerWidth-5) + 1;
+
+ for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
+ {
+ if(nActiveGroup & (1ll << j))
+ {
+ MicroProfileDrawText(nX, nY + (1+nHeight) * nLegendOffset, (uint32_t)-1, S.GroupInfo[j].pName, S.GroupInfo[j].nNameLen);
+ nLegendOffset += S.GroupInfo[j].nNumTimers+1;
+ }
+ }
+ MicroProfileDrawHeader(nX, nTimerWidth-5, "Group");
+ MicroProfileDrawTextRight(nTimerWidth-3, MICROPROFILE_TEXT_HEIGHT + 2, (uint32_t)-1, "Timer", 5);
+ MicroProfileDrawLineVertical(nTimerWidth, 0, nTotalHeight+nY, UI.nOpacityBackground|g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]);
+ MicroProfileDrawLineHorizontal(0, nWidth, 2*MICROPROFILE_TEXT_HEIGHT + 3, UI.nOpacityBackground|g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]);
+}
+
+typedef const char* (*MicroProfileSubmenuCallback)(int, bool* bSelected);
+typedef void (*MicroProfileClickCallback)(int);
+
+
+const char* MicroProfileUIMenuMode(int nIndex, bool* bSelected)
+{
+ MicroProfile& S = *MicroProfileGet();
+ switch(nIndex)
+ {
+ case 0:
+ *bSelected = S.nDisplay == MP_DRAW_DETAILED;
+ return "Detailed";
+ case 1:
+ *bSelected = S.nDisplay == MP_DRAW_BARS;
+ return "Timers";
+ case 2:
+ *bSelected = S.nDisplay == MP_DRAW_HIDDEN;
+ return "Hidden";
+ case 3:
+ *bSelected = true;
+ return "Off";
+ case 4:
+ *bSelected = true;
+ return "------";
+ case 5:
+ *bSelected = S.nForceEnable != 0;
+ return "Force Enable";
+
+ default: return 0;
+ }
+}
+
+const char* MicroProfileUIMenuGroups(int nIndex, bool* bSelected)
+{
+ MicroProfile& S = *MicroProfileGet();
+ *bSelected = false;
+ if(nIndex == 0)
+ {
+ *bSelected = S.nAllGroupsWanted != 0;
+ return "[ALL]";
+ }
+ else
+ {
+ nIndex = nIndex-1;
+ if(nIndex < UI.GroupMenuCount)
+ {
+ MicroProfileGroupMenuItem& Item = UI.GroupMenu[nIndex];
+ static char buffer[MICROPROFILE_NAME_MAX_LEN+32];
+ if(Item.nIsCategory)
+ {
+ uint64_t nGroupMask = S.CategoryInfo[Item.nIndex].nGroupMask;
+ *bSelected = nGroupMask == (nGroupMask & S.nActiveGroupWanted);
+ snprintf(buffer, sizeof(buffer)-1, "[%s]", Item.pName);
+ }
+ else
+ {
+ *bSelected = 0 != (S.nActiveGroupWanted & (1ll << Item.nIndex));
+ snprintf(buffer, sizeof(buffer)-1, " %s", Item.pName);
+ }
+ return buffer;
+ }
+ return 0;
+ }
+}
+
+const char* MicroProfileUIMenuAggregate(int nIndex, bool* bSelected)
+{
+ MicroProfile& S = *MicroProfileGet();
+ if(nIndex < sizeof(g_MicroProfileAggregatePresets)/sizeof(g_MicroProfileAggregatePresets[0]))
+ {
+ int val = g_MicroProfileAggregatePresets[nIndex];
+ *bSelected = (int)S.nAggregateFlip == val;
+ if(0 == val)
+ return "Infinite";
+ else
+ {
+ static char buf[128];
+ snprintf(buf, sizeof(buf)-1, "%7d", val);
+ return buf;
+ }
+ }
+ return 0;
+
+}
+
+const char* MicroProfileUIMenuTimers(int nIndex, bool* bSelected)
+{
+ MicroProfile& S = *MicroProfileGet();
+ *bSelected = 0 != (S.nBars & (1 << nIndex));
+ switch(nIndex)
+ {
+ case 0: return "Time";
+ case 1: return "Average";
+ case 2: return "Max";
+ case 3: return "Call Count";
+ case 4: return "Exclusive Timers";
+ case 5: return "Exclusive Average";
+ case 6: return "Exclusive Max";
+ }
+ int nMetaIndex = nIndex - 7;
+ if(nMetaIndex < MICROPROFILE_META_MAX)
+ {
+ return S.MetaCounters[nMetaIndex].pName;
+ }
+ return 0;
+}
+
+const char* MicroProfileUIMenuOptions(int nIndex, bool* bSelected)
+{
+ MicroProfile& S = *MicroProfileGet();
+ if(nIndex >= MICROPROFILE_OPTION_SIZE) return 0;
+ switch(UI.Options[nIndex].nSubType)
+ {
+ case 0:
+ *bSelected = S.fReferenceTime == g_MicroProfileReferenceTimePresets[UI.Options[nIndex].nIndex];
+ break;
+ case 1:
+ *bSelected = UI.nOpacityBackground>>24 == g_MicroProfileOpacityPresets[UI.Options[nIndex].nIndex];
+ break;
+ case 2:
+ *bSelected = UI.nOpacityForeground>>24 == g_MicroProfileOpacityPresets[UI.Options[nIndex].nIndex];
+ break;
+ case 3:
+ *bSelected = UI.bShowSpikes;
+ break;
+#if MICROPROFILE_CONTEXT_SWITCH_TRACE
+ case 4:
+ {
+ switch(UI.Options[nIndex].nIndex)
+ {
+ case 0:
+ *bSelected = S.bContextSwitchRunning;
+ break;
+ case 1:
+ *bSelected = S.bContextSwitchAllThreads;
+ break;
+ case 2:
+ *bSelected = S.bContextSwitchNoBars;
+ break;
+ }
+ }
+ break;
+#endif
+ }
+ return UI.Options[nIndex].Text;
+}
+
+const char* MicroProfileUIMenuPreset(int nIndex, bool* bSelected)
+{
+ static char buf[128];
+ *bSelected = false;
+ int nNumPresets = sizeof(g_MicroProfilePresetNames) / sizeof(g_MicroProfilePresetNames[0]);
+ int nIndexSave = nIndex - nNumPresets - 1;
+ if(nIndex == nNumPresets)
+ return "--";
+ else if(nIndexSave >=0 && nIndexSave <nNumPresets)
+ {
+ snprintf(buf, sizeof(buf)-1, "Save '%s'", g_MicroProfilePresetNames[nIndexSave]);
+ return buf;
+ }
+ else if(nIndex < nNumPresets)
+ {
+ snprintf(buf, sizeof(buf)-1, "Load '%s'", g_MicroProfilePresetNames[nIndex]);
+ return buf;
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+const char* MicroProfileUIMenuCustom(int nIndex, bool* bSelected)
+{
+ if((uint32_t)-1 == UI.nCustomActive)
+ {
+ *bSelected = nIndex == 0;
+ }
+ else
+ {
+ *bSelected = nIndex-2 == UI.nCustomActive;
+ }
+ switch(nIndex)
+ {
+ case 0: return "Disable";
+ case 1: return "--";
+ default:
+ nIndex -= 2;
+ if(nIndex < UI.nCustomCount)
+ {
+ return UI.Custom[nIndex].pName;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+}
+
+const char* MicroProfileUIMenuEmpty(int nIndex, bool* bSelected)
+{
+ return 0;
+}
+
+
+void MicroProfileUIClickMode(int nIndex)
+{
+ MicroProfile& S = *MicroProfileGet();
+ switch(nIndex)
+ {
+ case 0:
+ S.nDisplay = MP_DRAW_DETAILED;
+ break;
+ case 1:
+ S.nDisplay = MP_DRAW_BARS;
+ break;
+ case 2:
+ S.nDisplay = MP_DRAW_HIDDEN;
+ break;
+ case 3:
+ S.nDisplay = 0;
+ break;
+ case 4:
+ break;
+ case 5:
+ S.nForceEnable = !S.nForceEnable;
+ break;
+ }
+}
+
+void MicroProfileUIClickGroups(int nIndex)
+{
+ MicroProfile& S = *MicroProfileGet();
+ if(nIndex == 0)
+ S.nAllGroupsWanted = 1-S.nAllGroupsWanted;
+ else
+ {
+ nIndex -= 1;
+ if(nIndex < UI.GroupMenuCount)
+ {
+ MicroProfileGroupMenuItem& Item = UI.GroupMenu[nIndex];
+ if(Item.nIsCategory)
+ {
+ uint64_t nGroupMask = S.CategoryInfo[Item.nIndex].nGroupMask;
+ if(nGroupMask != (nGroupMask & S.nActiveGroupWanted))
+ {
+ S.nActiveGroupWanted |= nGroupMask;
+ }
+ else
+ {
+ S.nActiveGroupWanted &= ~nGroupMask;
+ }
+ }
+ else
+ {
+ MP_ASSERT(Item.nIndex < S.nGroupCount);
+ S.nActiveGroupWanted ^= (1ll << Item.nIndex);
+ }
+ }
+ }
+}
+
+void MicroProfileUIClickAggregate(int nIndex)
+{
+ MicroProfile& S = *MicroProfileGet();
+ S.nAggregateFlip = g_MicroProfileAggregatePresets[nIndex];
+ if(0 == S.nAggregateFlip)
+ {
+ S.nAggregateClear = 1;
+ }
+}
+
+void MicroProfileUIClickTimers(int nIndex)
+{
+ MicroProfile& S = *MicroProfileGet();
+ S.nBars ^= (1 << nIndex);
+}
+
+void MicroProfileUIClickOptions(int nIndex)
+{
+ MicroProfile& S = *MicroProfileGet();
+ switch(UI.Options[nIndex].nSubType)
+ {
+ case 0:
+ S.fReferenceTime = g_MicroProfileReferenceTimePresets[UI.Options[nIndex].nIndex];
+ S.fRcpReferenceTime = 1.f / S.fReferenceTime;
+ break;
+ case 1:
+ UI.nOpacityBackground = g_MicroProfileOpacityPresets[UI.Options[nIndex].nIndex]<<24;
+ break;
+ case 2:
+ UI.nOpacityForeground = g_MicroProfileOpacityPresets[UI.Options[nIndex].nIndex]<<24;
+ break;
+ case 3:
+ UI.bShowSpikes = !UI.bShowSpikes;
+ break;
+#if MICROPROFILE_CONTEXT_SWITCH_TRACE
+ case 4:
+ {
+ switch(UI.Options[nIndex].nIndex)
+ {
+ case 0:
+ if(S.bContextSwitchRunning)
+ {
+ MicroProfileStopContextSwitchTrace();
+ }
+ else
+ {
+ MicroProfileStartContextSwitchTrace();
+ }
+ break;
+ case 1:
+ S.bContextSwitchAllThreads = !S.bContextSwitchAllThreads;
+ break;
+ case 2:
+ S.bContextSwitchNoBars= !S.bContextSwitchNoBars;
+ break;
+
+ }
+ }
+ break;
+#endif
+ }
+}
+
+void MicroProfileUIClickPreset(int nIndex)
+{
+ int nNumPresets = sizeof(g_MicroProfilePresetNames) / sizeof(g_MicroProfilePresetNames[0]);
+ int nIndexSave = nIndex - nNumPresets - 1;
+ if(nIndexSave >= 0 && nIndexSave < nNumPresets)
+ {
+ MicroProfileSavePreset(g_MicroProfilePresetNames[nIndexSave]);
+ }
+ else if(nIndex >= 0 && nIndex < nNumPresets)
+ {
+ MicroProfileLoadPreset(g_MicroProfilePresetNames[nIndex]);
+ }
+}
+
+void MicroProfileUIClickCustom(int nIndex)
+{
+ if(nIndex == 0)
+ {
+ MicroProfileCustomGroupDisable();
+ }
+ else
+ {
+ MicroProfileCustomGroupEnable(nIndex-2);
+ }
+
+}
+
+void MicroProfileUIClickEmpty(int nIndex)
+{
+
+}
+
+
+void MicroProfileDrawMenu(uint32_t nWidth, uint32_t nHeight)
+{
+ MicroProfile& S = *MicroProfileGet();
+
+ uint32_t nX = 0;
+ uint32_t nY = 0;
+ bool bMouseOver = UI.nMouseY < MICROPROFILE_TEXT_HEIGHT + 1;
+#define SBUF_SIZE 256
+ char buffer[256];
+ MicroProfileDrawBox(nX, nY, nX + nWidth, nY + (MICROPROFILE_TEXT_HEIGHT+1)+1, 0xff000000|g_nMicroProfileBackColors[1]);
+
+#define MICROPROFILE_MENU_MAX 16
+ const char* pMenuText[MICROPROFILE_MENU_MAX] = {0};
+ uint32_t nMenuX[MICROPROFILE_MENU_MAX] = {0};
+ uint32_t nNumMenuItems = 0;
+
+ int nLen = snprintf(buffer, 127, "MicroProfile");
+ MicroProfileDrawText(nX, nY, (uint32_t)-1, buffer, nLen);
+ nX += (sizeof("MicroProfile")+2) * (MICROPROFILE_TEXT_WIDTH+1);
+ pMenuText[nNumMenuItems++] = "Mode";
+ pMenuText[nNumMenuItems++] = "Groups";
+ char AggregateText[64];
+ snprintf(AggregateText, sizeof(AggregateText)-1, "Aggregate[%d]", S.nAggregateFlip ? S.nAggregateFlip : S.nAggregateFlipCount);
+ pMenuText[nNumMenuItems++] = &AggregateText[0];
+ pMenuText[nNumMenuItems++] = "Timers";
+ pMenuText[nNumMenuItems++] = "Options";
+ pMenuText[nNumMenuItems++] = "Preset";
+ pMenuText[nNumMenuItems++] = "Custom";
+ const int nPauseIndex = nNumMenuItems;
+ pMenuText[nNumMenuItems++] = S.nRunning ? "Pause" : "Unpause";
+ pMenuText[nNumMenuItems++] = "Help";
+
+ if(S.nOverflow)
+ {
+ pMenuText[nNumMenuItems++] = "!BUFFERSFULL!";
+ }
+
+
+ if(UI.GroupMenuCount != S.nGroupCount + S.nCategoryCount)
+ {
+ UI.GroupMenuCount = S.nGroupCount + S.nCategoryCount;
+ for(uint32_t i = 0; i < S.nCategoryCount; ++i)
+ {
+ UI.GroupMenu[i].nIsCategory = 1;
+ UI.GroupMenu[i].nCategoryIndex = i;
+ UI.GroupMenu[i].nIndex = i;
+ UI.GroupMenu[i].pName = S.CategoryInfo[i].pName;
+ }
+ for(uint32_t i = 0; i < S.nGroupCount; ++i)
+ {
+ uint32_t idx = i + S.nCategoryCount;
+ UI.GroupMenu[idx].nIsCategory = 0;
+ UI.GroupMenu[idx].nCategoryIndex = S.GroupInfo[i].nCategory;
+ UI.GroupMenu[idx].nIndex = i;
+ UI.GroupMenu[idx].pName = S.GroupInfo[i].pName;
+ }
+ std::sort(&UI.GroupMenu[0], &UI.GroupMenu[UI.GroupMenuCount],
+ [] (const MicroProfileGroupMenuItem& l, const MicroProfileGroupMenuItem& r) -> bool
+ {
+ if(l.nCategoryIndex < r.nCategoryIndex)
+ {
+ return true;
+ }
+ else if(r.nCategoryIndex < l.nCategoryIndex)
+ {
+ return false;
+ }
+ if(r.nIsCategory || l.nIsCategory)
+ {
+ return l.nIsCategory > r.nIsCategory;
+ }
+ return MP_STRCASECMP(l.pName, r.pName)<0;
+ }
+ );
+ }
+
+ MicroProfileSubmenuCallback GroupCallback[MICROPROFILE_MENU_MAX] =
+ {
+ MicroProfileUIMenuMode,
+ MicroProfileUIMenuGroups,
+ MicroProfileUIMenuAggregate,
+ MicroProfileUIMenuTimers,
+ MicroProfileUIMenuOptions,
+ MicroProfileUIMenuPreset,
+ MicroProfileUIMenuCustom,
+ MicroProfileUIMenuEmpty,
+ MicroProfileUIMenuEmpty,
+ MicroProfileUIMenuEmpty,
+ };
+
+ MicroProfileClickCallback CBClick[MICROPROFILE_MENU_MAX] =
+ {
+ MicroProfileUIClickMode,
+ MicroProfileUIClickGroups,
+ MicroProfileUIClickAggregate,
+ MicroProfileUIClickTimers,
+ MicroProfileUIClickOptions,
+ MicroProfileUIClickPreset,
+ MicroProfileUIClickCustom,
+ MicroProfileUIClickEmpty,
+ MicroProfileUIClickEmpty,
+ MicroProfileUIClickEmpty,
+ };
+
+
+ uint32_t nSelectMenu = (uint32_t)-1;
+ for(uint32_t i = 0; i < nNumMenuItems; ++i)
+ {
+ nMenuX[i] = nX;
+ uint32_t nLen = (uint32_t)strlen(pMenuText[i]);
+ uint32_t nEnd = nX + nLen * (MICROPROFILE_TEXT_WIDTH+1);
+ if(UI.nMouseY <= MICROPROFILE_TEXT_HEIGHT && UI.nMouseX <= nEnd && UI.nMouseX >= nX)
+ {
+ MicroProfileDrawBox(nX-1, nY, nX + nLen * (MICROPROFILE_TEXT_WIDTH+1), nY +(MICROPROFILE_TEXT_HEIGHT+1)+1, 0xff888888);
+ nSelectMenu = i;
+ if((UI.nMouseLeft || UI.nMouseRight) && i == (int)nPauseIndex)
+ {
+ S.nToggleRunning = 1;
+ }
+ }
+ MicroProfileDrawText(nX, nY, (uint32_t)-1, pMenuText[i], (uint32_t)strlen(pMenuText[i]));
+ nX += (nLen+1) * (MICROPROFILE_TEXT_WIDTH+1);
+ }
+ uint32_t nMenu = nSelectMenu != (uint32_t)-1 ? nSelectMenu : UI.nActiveMenu;
+ UI.nActiveMenu = nMenu;
+ if((uint32_t)-1 != nMenu)
+ {
+ nX = nMenuX[nMenu];
+ nY += MICROPROFILE_TEXT_HEIGHT+1;
+ MicroProfileSubmenuCallback CB = GroupCallback[nMenu];
+ int nNumLines = 0;
+ bool bSelected = false;
+ const char* pString = CB(nNumLines, &bSelected);
+ uint32_t nWidth = 0, nHeight = 0;
+ while(pString)
+ {
+ nWidth = MicroProfileMax<int>(nWidth, (int)strlen(pString));
+ nNumLines++;
+ pString = CB(nNumLines, &bSelected);
+ }
+ nWidth = (2+nWidth) * (MICROPROFILE_TEXT_WIDTH+1);
+ nHeight = nNumLines * (MICROPROFILE_TEXT_HEIGHT+1);
+ if(UI.nMouseY <= nY + nHeight+0 && UI.nMouseY >= nY-0 && UI.nMouseX <= nX + nWidth + 0 && UI.nMouseX >= nX - 0)
+ {
+ UI.nActiveMenu = nMenu;
+ }
+ else if(nSelectMenu == (uint32_t)-1)
+ {
+ UI.nActiveMenu = (uint32_t)-1;
+ }
+ MicroProfileDrawBox(nX, nY, nX + nWidth, nY + nHeight, 0xff000000|g_nMicroProfileBackColors[1]);
+ for(int i = 0; i < nNumLines; ++i)
+ {
+ bool bSelected = false;
+ const char* pString = CB(i, &bSelected);
+ if(UI.nMouseY >= nY && UI.nMouseY < nY + MICROPROFILE_TEXT_HEIGHT + 1)
+ {
+ bMouseOver = true;
+ if(UI.nMouseLeft || UI.nMouseRight)
+ {
+ CBClick[nMenu](i);
+ }
+ MicroProfileDrawBox(nX, nY, nX + nWidth, nY + MICROPROFILE_TEXT_HEIGHT + 1, 0xff888888);
+ }
+ int nLen = snprintf(buffer, SBUF_SIZE-1, "%c %s", bSelected ? '*' : ' ' ,pString);
+ MicroProfileDrawText(nX, nY, (uint32_t)-1, buffer, nLen);
+ nY += MICROPROFILE_TEXT_HEIGHT+1;
+ }
+ }
+
+
+ {
+ static char FrameTimeMessage[64];
+ float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
+ uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
+ float fMs = fToMs * (S.nFlipTicks);
+ float fAverageMs = fToMs * (S.nFlipAggregateDisplay / nAggregateFrames);
+ float fMaxMs = fToMs * S.nFlipMaxDisplay;
+ int nLen = snprintf(FrameTimeMessage, sizeof(FrameTimeMessage)-1, "Time[%6.2f] Avg[%6.2f] Max[%6.2f]", fMs, fAverageMs, fMaxMs);
+ pMenuText[nNumMenuItems++] = &FrameTimeMessage[0];
+ MicroProfileDrawText(nWidth - nLen * (MICROPROFILE_TEXT_WIDTH+1), 0, -1, FrameTimeMessage, nLen);
+ }
+}
+
+
+void MicroProfileMoveGraph()
+{
+
+ int nZoom = UI.nMouseWheelDelta;
+ int nPanX = 0;
+ int nPanY = 0;
+ static int X = 0, Y = 0;
+ if(UI.nMouseDownLeft && !UI.nModDown)
+ {
+ nPanX = UI.nMouseX - X;
+ nPanY = UI.nMouseY - Y;
+ }
+ X = UI.nMouseX;
+ Y = UI.nMouseY;
+
+ if(nZoom)
+ {
+ float fOldRange = UI.fDetailedRange;
+ if(nZoom>0)
+ {
+ UI.fDetailedRangeTarget = UI.fDetailedRange *= UI.nModDown ? 1.40f : 1.05f;
+ }
+ else
+ {
+ float fNewDetailedRange = UI.fDetailedRange / (UI.nModDown ? 1.40f : 1.05f);
+ if(fNewDetailedRange < 1e-4f) //100ns
+ fNewDetailedRange = 1e-4f;
+ UI.fDetailedRangeTarget = UI.fDetailedRange = fNewDetailedRange;
+ }
+
+ float fDiff = fOldRange - UI.fDetailedRange;
+ float fMousePrc = MicroProfileMax((float)UI.nMouseX / UI.nWidth ,0.f);
+ UI.fDetailedOffsetTarget = UI.fDetailedOffset += fDiff * fMousePrc;
+
+ }
+ if(nPanX)
+ {
+ UI.fDetailedOffsetTarget = UI.fDetailedOffset += -nPanX * UI.fDetailedRange / UI.nWidth;
+ }
+ UI.nOffsetY -= nPanY;
+ UI.nOffsetX += nPanX;
+ if(UI.nOffsetX > 0)
+ UI.nOffsetX = 0;
+ if(UI.nOffsetY<0)
+ UI.nOffsetY = 0;
+}
+
+void MicroProfileDrawCustom(uint32_t nWidth, uint32_t nHeight)
+{
+ if((uint32_t)-1 != UI.nCustomActive)
+ {
+ MicroProfile& S = *MicroProfileGet();
+ MP_ASSERT(UI.nCustomActive < MICROPROFILE_CUSTOM_MAX);
+ MicroProfileCustom* pCustom = &UI.Custom[UI.nCustomActive];
+ uint32_t nCount = pCustom->nNumTimers;
+ uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
+ uint32_t nExtraOffset = 1 + ((pCustom->nFlags & MICROPROFILE_CUSTOM_STACK) != 0 ? 3 : 0);
+ uint32_t nOffsetYBase = nHeight - (nExtraOffset+nCount)* (1+MICROPROFILE_TEXT_HEIGHT) - MICROPROFILE_CUSTOM_PADDING;
+ uint32_t nOffsetY = nOffsetYBase;
+ float fReference = pCustom->fReference;
+ float fRcpReference = 1.f / fReference;
+ uint32_t nReducedWidth = UI.nWidth - 2*MICROPROFILE_CUSTOM_PADDING - MICROPROFILE_GRAPH_WIDTH;
+
+ char Buffer[MICROPROFILE_NAME_MAX_LEN*2+1];
+ float* pTime = (float*)alloca(sizeof(float)*nCount);
+ float* pTimeAvg = (float*)alloca(sizeof(float)*nCount);
+ float* pTimeMax = (float*)alloca(sizeof(float)*nCount);
+ uint32_t* pColors = (uint32_t*)alloca(sizeof(uint32_t)*nCount);
+ uint32_t nMaxOffsetX = 0;
+ MicroProfileDrawBox(MICROPROFILE_CUSTOM_PADDING-1, nOffsetY-1, MICROPROFILE_CUSTOM_PADDING+nReducedWidth+1, UI.nHeight - MICROPROFILE_CUSTOM_PADDING+1, 0x88000000|g_nMicroProfileBackColors[0]);
+
+ for(uint32_t i = 0; i < nCount; ++i)
+ {
+ uint16_t nTimerIndex = MicroProfileGetTimerIndex(pCustom->pTimers[i]);
+ uint16_t nGroupIndex = MicroProfileGetGroupIndex(pCustom->pTimers[i]);
+ float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupIndex].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
+ pTime[i] = S.Frame[nTimerIndex].nTicks * fToMs;
+ pTimeAvg[i] = fToMs * (S.Aggregate[nTimerIndex].nTicks / nAggregateFrames);
+ pTimeMax[i] = fToMs * (S.AggregateMax[nTimerIndex]);
+ pColors[i] = S.TimerInfo[nTimerIndex].nColor;
+ }
+
+ MicroProfileDrawText(MICROPROFILE_CUSTOM_PADDING + 3*MICROPROFILE_TEXT_WIDTH, nOffsetY, (uint32_t)-1, "Avg", sizeof("Avg")-1);
+ MicroProfileDrawText(MICROPROFILE_CUSTOM_PADDING + 13*MICROPROFILE_TEXT_WIDTH, nOffsetY, (uint32_t)-1, "Max", sizeof("Max")-1);
+ for(uint32_t i = 0; i < nCount; ++i)
+ {
+ nOffsetY += (1+MICROPROFILE_TEXT_HEIGHT);
+ uint16_t nTimerIndex = MicroProfileGetTimerIndex(pCustom->pTimers[i]);
+ uint16_t nGroupIndex = MicroProfileGetGroupIndex(pCustom->pTimers[i]);
+ MicroProfileTimerInfo* pTimerInfo = &S.TimerInfo[nTimerIndex];
+ int nSize;
+ uint32_t nOffsetX = MICROPROFILE_CUSTOM_PADDING;
+ nSize = snprintf(Buffer, sizeof(Buffer)-1, "%6.2f", pTimeAvg[i]);
+ MicroProfileDrawText(nOffsetX, nOffsetY, (uint32_t)-1, Buffer, nSize);
+ nOffsetX += (nSize+2) * (MICROPROFILE_TEXT_WIDTH+1);
+ nSize = snprintf(Buffer, sizeof(Buffer)-1, "%6.2f", pTimeMax[i]);
+ MicroProfileDrawText(nOffsetX, nOffsetY, (uint32_t)-1, Buffer, nSize);
+ nOffsetX += (nSize+2) * (MICROPROFILE_TEXT_WIDTH+1);
+ nSize = snprintf(Buffer, sizeof(Buffer)-1, "%s:%s", S.GroupInfo[nGroupIndex].pName, pTimerInfo->pName);
+ MicroProfileDrawText(nOffsetX, nOffsetY, pTimerInfo->nColor, Buffer, nSize);
+ nOffsetX += (nSize+2) * (MICROPROFILE_TEXT_WIDTH+1);
+ nMaxOffsetX = MicroProfileMax(nMaxOffsetX, nOffsetX);
+ }
+ uint32_t nMaxWidth = nReducedWidth- nMaxOffsetX;
+
+ if(pCustom->nFlags & MICROPROFILE_CUSTOM_BARS)
+ {
+ nOffsetY = nOffsetYBase;
+ float* pMs = pCustom->nFlags & MICROPROFILE_CUSTOM_BAR_SOURCE_MAX ? pTimeMax : pTimeAvg;
+ const char* pString = pCustom->nFlags & MICROPROFILE_CUSTOM_BAR_SOURCE_MAX ? "Max" : "Avg";
+ MicroProfileDrawText(nMaxOffsetX, nOffsetY, (uint32_t)-1, pString, strlen(pString));
+ int nSize = snprintf(Buffer, sizeof(Buffer)-1, "%6.2fms", fReference);
+ MicroProfileDrawText(nReducedWidth - (1+nSize) * (MICROPROFILE_TEXT_WIDTH+1), nOffsetY, (uint32_t)-1, Buffer, nSize);
+ for(uint32_t i = 0; i < nCount; ++i)
+ {
+ nOffsetY += (1+MICROPROFILE_TEXT_HEIGHT);
+ uint32_t nWidth = MicroProfileMin(nMaxWidth, (uint32_t)(nMaxWidth * pMs[i] * fRcpReference));
+ MicroProfileDrawBox(nMaxOffsetX, nOffsetY, nMaxOffsetX+nWidth, nOffsetY+MICROPROFILE_TEXT_HEIGHT, pColors[i]|0xff000000);
+ }
+ }
+ if(pCustom->nFlags & MICROPROFILE_CUSTOM_STACK)
+ {
+ nOffsetY += 2*(1+MICROPROFILE_TEXT_HEIGHT);
+ const char* pString = pCustom->nFlags & MICROPROFILE_CUSTOM_STACK_SOURCE_MAX ? "Max" : "Avg";
+ MicroProfileDrawText(MICROPROFILE_CUSTOM_PADDING, nOffsetY, (uint32_t)-1, pString, strlen(pString));
+ int nSize = snprintf(Buffer, sizeof(Buffer)-1, "%6.2fms", fReference);
+ MicroProfileDrawText(nReducedWidth - (1+nSize) * (MICROPROFILE_TEXT_WIDTH+1), nOffsetY, (uint32_t)-1, Buffer, nSize);
+ nOffsetY += (1+MICROPROFILE_TEXT_HEIGHT);
+ float fPosX = MICROPROFILE_CUSTOM_PADDING;
+ float* pMs = pCustom->nFlags & MICROPROFILE_CUSTOM_STACK_SOURCE_MAX ? pTimeMax : pTimeAvg;
+ for(uint32_t i = 0; i < nCount; ++i)
+ {
+ float fWidth = pMs[i] * fRcpReference * nReducedWidth;
+ uint32_t nX = fPosX;
+ fPosX += fWidth;
+ uint32_t nXEnd = fPosX;
+ if(nX < nXEnd)
+ {
+ MicroProfileDrawBox(nX, nOffsetY, nXEnd, nOffsetY+MICROPROFILE_TEXT_HEIGHT, pColors[i]|0xff000000);
+ }
+ }
+ }
+ }
+}
+void MicroProfileDraw(uint32_t nWidth, uint32_t nHeight)
+{
+ MICROPROFILE_SCOPE(g_MicroProfileDraw);
+ MicroProfile& S = *MicroProfileGet();
+
+ {
+ static int once = 0;
+ if(0 == once)
+ {
+ std::recursive_mutex& m = MicroProfileGetMutex();
+ m.lock();
+ MicroProfileInitUI();
+
+
+
+ uint32_t nDisplay = S.nDisplay;
+ MicroProfileLoadPreset(MICROPROFILE_DEFAULT_PRESET);
+ once++;
+ S.nDisplay = nDisplay;// dont load display, just state
+ m.unlock();
+
+ }
+ }
+
+
+ if(S.nDisplay)
+ {
+ std::recursive_mutex& m = MicroProfileGetMutex();
+ m.lock();
+ UI.nWidth = nWidth;
+ UI.nHeight = nHeight;
+ UI.nHoverToken = MICROPROFILE_INVALID_TOKEN;
+ UI.nHoverTime = 0;
+ UI.nHoverFrame = -1;
+ if(S.nDisplay != MP_DRAW_DETAILED)
+ S.nContextSwitchHoverThread = S.nContextSwitchHoverThreadAfter = S.nContextSwitchHoverThreadBefore = -1;
+ MicroProfileMoveGraph();
+
+
+ if(S.nDisplay == MP_DRAW_DETAILED)
+ {
+ MicroProfileDrawDetailedView(nWidth, nHeight);
+ }
+ else if(S.nDisplay == MP_DRAW_BARS && S.nBars)
+ {
+ MicroProfileDrawBarView(nWidth, nHeight);
+ }
+
+ MicroProfileDrawMenu(nWidth, nHeight);
+ bool bMouseOverGraph = MicroProfileDrawGraph(nWidth, nHeight);
+ MicroProfileDrawCustom(nWidth, nHeight);
+ bool bHidden = S.nDisplay == MP_DRAW_HIDDEN;
+ if(!bHidden)
+ {
+ uint32_t nLockedToolTipX = 3;
+ bool bDeleted = false;
+ for(int i = 0; i < MICROPROFILE_TOOLTIP_MAX_LOCKED; ++i)
+ {
+ int nIndex = (g_MicroProfileUI.LockedToolTipFront + i) % MICROPROFILE_TOOLTIP_MAX_LOCKED;
+ if(g_MicroProfileUI.LockedToolTips[nIndex].ppStrings[0])
+ {
+ uint32_t nToolTipWidth = 0, nToolTipHeight = 0;
+ MicroProfileFloatWindowSize(g_MicroProfileUI.LockedToolTips[nIndex].ppStrings, g_MicroProfileUI.LockedToolTips[nIndex].nNumStrings, 0, nToolTipWidth, nToolTipHeight, 0);
+ uint32_t nStartY = nHeight - nToolTipHeight - 2;
+ if(!bDeleted && UI.nMouseY > nStartY && UI.nMouseX > nLockedToolTipX && UI.nMouseX <= nLockedToolTipX + nToolTipWidth && (UI.nMouseLeft || UI.nMouseRight) )
+ {
+ bDeleted = true;
+ int j = i;
+ for(; j < MICROPROFILE_TOOLTIP_MAX_LOCKED-1; ++j)
+ {
+ int nIndex0 = (g_MicroProfileUI.LockedToolTipFront + j) % MICROPROFILE_TOOLTIP_MAX_LOCKED;
+ int nIndex1 = (g_MicroProfileUI.LockedToolTipFront + j+1) % MICROPROFILE_TOOLTIP_MAX_LOCKED;
+ MicroProfileStringArrayCopy(&g_MicroProfileUI.LockedToolTips[nIndex0], &g_MicroProfileUI.LockedToolTips[nIndex1]);
+ }
+ MicroProfileStringArrayClear(&g_MicroProfileUI.LockedToolTips[(g_MicroProfileUI.LockedToolTipFront + j) % MICROPROFILE_TOOLTIP_MAX_LOCKED]);
+ }
+ else
+ {
+ MicroProfileDrawFloatWindow(nLockedToolTipX, nHeight-nToolTipHeight-2, &g_MicroProfileUI.LockedToolTips[nIndex].ppStrings[0], g_MicroProfileUI.LockedToolTips[nIndex].nNumStrings, g_MicroProfileUI.nLockedToolTipColor[nIndex]);
+ nLockedToolTipX += nToolTipWidth + 4;
+ }
+ }
+ }
+
+ if(UI.nActiveMenu == 8)
+ {
+ if(S.nDisplay & MP_DRAW_DETAILED)
+ {
+ MicroProfileStringArray DetailedHelp;
+ MicroProfileStringArrayClear(&DetailedHelp);
+ MicroProfileStringArrayFormat(&DetailedHelp, "%s", MICROPROFILE_HELP_LEFT);
+ MicroProfileStringArrayAddLiteral(&DetailedHelp, "Toggle Graph");
+ MicroProfileStringArrayFormat(&DetailedHelp, "%s", MICROPROFILE_HELP_ALT);
+ MicroProfileStringArrayAddLiteral(&DetailedHelp, "Zoom");
+ MicroProfileStringArrayFormat(&DetailedHelp, "%s + %s", MICROPROFILE_HELP_MOD, MICROPROFILE_HELP_LEFT);
+ MicroProfileStringArrayAddLiteral(&DetailedHelp, "Lock Tooltip");
+ MicroProfileStringArrayAddLiteral(&DetailedHelp, "Drag");
+ MicroProfileStringArrayAddLiteral(&DetailedHelp, "Pan View");
+ MicroProfileStringArrayAddLiteral(&DetailedHelp, "Mouse Wheel");
+ MicroProfileStringArrayAddLiteral(&DetailedHelp, "Zoom");
+ MicroProfileDrawFloatWindow(nWidth, MICROPROFILE_FRAME_HISTORY_HEIGHT+20, DetailedHelp.ppStrings, DetailedHelp.nNumStrings, 0xff777777);
+
+ MicroProfileStringArray DetailedHistoryHelp;
+ MicroProfileStringArrayClear(&DetailedHistoryHelp);
+ MicroProfileStringArrayFormat(&DetailedHistoryHelp, "%s", MICROPROFILE_HELP_LEFT);
+ MicroProfileStringArrayAddLiteral(&DetailedHistoryHelp, "Center View");
+ MicroProfileStringArrayFormat(&DetailedHistoryHelp, "%s", MICROPROFILE_HELP_ALT);
+ MicroProfileStringArrayAddLiteral(&DetailedHistoryHelp, "Zoom to frame");
+ MicroProfileDrawFloatWindow(nWidth, 20, DetailedHistoryHelp.ppStrings, DetailedHistoryHelp.nNumStrings, 0xff777777);
+
+
+
+ }
+ else if(0 != (S.nDisplay & MP_DRAW_BARS) && S.nBars)
+ {
+ MicroProfileStringArray BarHelp;
+ MicroProfileStringArrayClear(&BarHelp);
+ MicroProfileStringArrayFormat(&BarHelp, "%s", MICROPROFILE_HELP_LEFT);
+ MicroProfileStringArrayAddLiteral(&BarHelp, "Toggle Graph");
+ MicroProfileStringArrayFormat(&BarHelp, "%s + %s", MICROPROFILE_HELP_MOD, MICROPROFILE_HELP_LEFT);
+ MicroProfileStringArrayAddLiteral(&BarHelp, "Lock Tooltip");
+ MicroProfileStringArrayAddLiteral(&BarHelp, "Drag");
+ MicroProfileStringArrayAddLiteral(&BarHelp, "Pan View");
+ MicroProfileDrawFloatWindow(nWidth, MICROPROFILE_FRAME_HISTORY_HEIGHT+20, BarHelp.ppStrings, BarHelp.nNumStrings, 0xff777777);
+
+ }
+ MicroProfileStringArray Debug;
+ MicroProfileStringArrayClear(&Debug);
+ MicroProfileStringArrayAddLiteral(&Debug, "Memory Usage");
+ MicroProfileStringArrayFormat(&Debug, "%4.2fmb", S.nMemUsage / (1024.f * 1024.f));
+ MicroProfileStringArrayAddLiteral(&Debug, "Web Server Port");
+ MicroProfileStringArrayFormat(&Debug, "%d", MicroProfileWebServerPort());
+ uint32_t nFrameNext = (S.nFrameCurrent+1) % MICROPROFILE_MAX_FRAME_HISTORY;
+ MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent];
+ MicroProfileFrameState* pFrameNext = &S.Frames[nFrameNext];
+
+
+ MicroProfileStringArrayAddLiteral(&Debug, "");
+ MicroProfileStringArrayAddLiteral(&Debug, "");
+ MicroProfileStringArrayAddLiteral(&Debug, "Usage");
+ MicroProfileStringArrayAddLiteral(&Debug, "markers [frames] ");
+
+#if MICROPROFILE_CONTEXT_SWITCH_TRACE
+ MicroProfileStringArrayAddLiteral(&Debug, "Context Switch");
+ MicroProfileStringArrayFormat(&Debug, "%9d [%7d]", S.nContextSwitchUsage, MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE / S.nContextSwitchUsage );
+#endif
+
+ for(int i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
+ {
+ if(pFrameCurrent->nLogStart[i] && S.Pool[i])
+ {
+ uint32_t nEnd = pFrameNext->nLogStart[i];
+ uint32_t nStart = pFrameCurrent->nLogStart[i];
+ uint32_t nUsage = nStart < nEnd ? (nEnd - nStart) : (nEnd + MICROPROFILE_BUFFER_SIZE - nStart);
+ uint32_t nFrameSupport = MICROPROFILE_BUFFER_SIZE / nUsage;
+ MicroProfileStringArrayFormat(&Debug, "%s", &S.Pool[i]->ThreadName[0]);
+ MicroProfileStringArrayFormat(&Debug, "%9d [%7d]", nUsage, nFrameSupport);
+ }
+ }
+
+ MicroProfileDrawFloatWindow(0, nHeight-10, Debug.ppStrings, Debug.nNumStrings, 0xff777777);
+ }
+
+
+
+ if(UI.nActiveMenu == -1 && !bMouseOverGraph)
+ {
+ if(UI.nHoverToken != MICROPROFILE_INVALID_TOKEN)
+ {
+ MicroProfileDrawFloatTooltip(UI.nMouseX, UI.nMouseY, UI.nHoverToken, UI.nHoverTime);
+ }
+ else if(S.nContextSwitchHoverThreadAfter != -1 && S.nContextSwitchHoverThreadBefore != -1)
+ {
+ float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
+ MicroProfileStringArray ToolTip;
+ MicroProfileStringArrayClear(&ToolTip);
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Context Switch");
+ MicroProfileStringArrayFormat(&ToolTip, "%04x", S.nContextSwitchHoverThread);
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Before");
+ MicroProfileStringArrayFormat(&ToolTip, "%04x", S.nContextSwitchHoverThreadBefore);
+ MicroProfileStringArrayAddLiteral(&ToolTip, "After");
+ MicroProfileStringArrayFormat(&ToolTip, "%04x", S.nContextSwitchHoverThreadAfter);
+ MicroProfileStringArrayAddLiteral(&ToolTip, "Duration");
+ int64_t nDifference = MicroProfileLogTickDifference(S.nContextSwitchHoverTickIn, S.nContextSwitchHoverTickOut);
+ MicroProfileStringArrayFormat(&ToolTip, "%6.2fms", fToMs * nDifference );
+ MicroProfileStringArrayAddLiteral(&ToolTip, "CPU");
+ MicroProfileStringArrayFormat(&ToolTip, "%d", S.nContextSwitchHoverCpu);
+ MicroProfileDrawFloatWindow(UI.nMouseX, UI.nMouseY+20, &ToolTip.ppStrings[0], ToolTip.nNumStrings, -1);
+
+
+ }
+ else if(UI.nHoverFrame != -1)
+ {
+ uint32_t nNextFrame = (UI.nHoverFrame+1)%MICROPROFILE_MAX_FRAME_HISTORY;
+ int64_t nTick = S.Frames[UI.nHoverFrame].nFrameStartCpu;
+ int64_t nTickNext = S.Frames[nNextFrame].nFrameStartCpu;
+ int64_t nTickGpu = S.Frames[UI.nHoverFrame].nFrameStartGpu;
+ int64_t nTickNextGpu = S.Frames[nNextFrame].nFrameStartGpu;
+
+ float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
+ float fToMsGpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
+ float fMs = fToMs * (nTickNext - nTick);
+ float fMsGpu = fToMsGpu * (nTickNextGpu - nTickGpu);
+ MicroProfileStringArray ToolTip;
+ MicroProfileStringArrayClear(&ToolTip);
+ MicroProfileStringArrayFormat(&ToolTip, "Frame %d", UI.nHoverFrame);
+ #if MICROPROFILE_DEBUG
+ MicroProfileStringArrayFormat(&ToolTip, "%p", &S.Frames[UI.nHoverFrame]);
+ #else
+ MicroProfileStringArrayAddLiteral(&ToolTip, "");
+ #endif
+ MicroProfileStringArrayAddLiteral(&ToolTip, "CPU Time");
+ MicroProfileStringArrayFormat(&ToolTip, "%6.2fms", fMs);
+ MicroProfileStringArrayAddLiteral(&ToolTip, "GPU Time");
+ MicroProfileStringArrayFormat(&ToolTip, "%6.2fms", fMsGpu);
+ #if MICROPROFILE_DEBUG
+ for(int i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
+ {
+ if(S.Frames[UI.nHoverFrame].nLogStart[i])
+ {
+ MicroProfileStringArrayFormat(&ToolTip, "%d", i);
+ MicroProfileStringArrayFormat(&ToolTip, "%d", S.Frames[UI.nHoverFrame].nLogStart[i]);
+ }
+ }
+ #endif
+ MicroProfileDrawFloatWindow(UI.nMouseX, UI.nMouseY+20, &ToolTip.ppStrings[0], ToolTip.nNumStrings, -1);
+ }
+ if(UI.nMouseLeft)
+ {
+ if(UI.nHoverToken != MICROPROFILE_INVALID_TOKEN)
+ MicroProfileToggleGraph(UI.nHoverToken);
+ }
+ }
+ }
+
+#if MICROPROFILE_DRAWCURSOR
+ {
+ float fCursor[8] =
+ {
+ MicroProfileMax(0, (int)UI.nMouseX-3), UI.nMouseY,
+ MicroProfileMin(nWidth, UI.nMouseX+3), UI.nMouseY,
+ UI.nMouseX, MicroProfileMax((int)UI.nMouseY-3, 0),
+ UI.nMouseX, MicroProfileMin(nHeight, UI.nMouseY+3),
+ };
+ MicroProfileDrawLine2D(2, &fCursor[0], 0xff00ff00);
+ MicroProfileDrawLine2D(2, &fCursor[4], 0xff00ff00);
+ }
+#endif
+ m.unlock();
+ }
+ else if(UI.nCustomActive != (uint32_t)-1)
+ {
+ std::recursive_mutex& m = MicroProfileGetMutex();
+ m.lock();
+ MicroProfileDrawGraph(nWidth, nHeight);
+ MicroProfileDrawCustom(nWidth, nHeight);
+ m.unlock();
+
+ }
+ UI.nMouseLeft = UI.nMouseRight = 0;
+ UI.nMouseLeftMod = UI.nMouseRightMod = 0;
+ UI.nMouseWheelDelta = 0;
+ if(S.nOverflow)
+ S.nOverflow--;
+
+ UI.fDetailedOffset = UI.fDetailedOffset + (UI.fDetailedOffsetTarget - UI.fDetailedOffset) * MICROPROFILE_ANIM_DELAY_PRC;
+ UI.fDetailedRange = UI.fDetailedRange + (UI.fDetailedRangeTarget - UI.fDetailedRange) * MICROPROFILE_ANIM_DELAY_PRC;
+
+
+}
+
+bool MicroProfileIsDrawing()
+{
+ MicroProfile& S = *MicroProfileGet();
+ return S.nDisplay != 0;
+}
+
+void MicroProfileToggleGraph(MicroProfileToken nToken)
+{
+ MicroProfile& S = *MicroProfileGet();
+ uint32_t nTimerId = MicroProfileGetTimerIndex(nToken);
+ nToken &= 0xffff;
+ int32_t nMinSort = 0x7fffffff;
+ int32_t nFreeIndex = -1;
+ int32_t nMinIndex = 0;
+ int32_t nMaxSort = 0x80000000;
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
+ {
+ if(S.Graph[i].nToken == MICROPROFILE_INVALID_TOKEN)
+ nFreeIndex = i;
+ if(S.Graph[i].nToken == nToken)
+ {
+ S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN;
+ S.TimerInfo[nTimerId].bGraph = false;
+ return;
+ }
+ if(S.Graph[i].nKey < nMinSort)
+ {
+ nMinSort = S.Graph[i].nKey;
+ nMinIndex = i;
+ }
+ if(S.Graph[i].nKey > nMaxSort)
+ {
+ nMaxSort = S.Graph[i].nKey;
+ }
+ }
+ int nIndex = nFreeIndex > -1 ? nFreeIndex : nMinIndex;
+ if (nFreeIndex == -1)
+ {
+ uint32_t idx = MicroProfileGetTimerIndex(S.Graph[nIndex].nToken);
+ S.TimerInfo[idx].bGraph = false;
+ }
+ S.Graph[nIndex].nToken = nToken;
+ S.Graph[nIndex].nKey = nMaxSort+1;
+ memset(&S.Graph[nIndex].nHistory[0], 0, sizeof(S.Graph[nIndex].nHistory));
+ S.TimerInfo[nTimerId].bGraph = true;
+}
+
+
+void MicroProfileMousePosition(uint32_t nX, uint32_t nY, int nWheelDelta)
+{
+ UI.nMouseX = nX;
+ UI.nMouseY = nY;
+ UI.nMouseWheelDelta = nWheelDelta;
+}
+
+void MicroProfileModKey(uint32_t nKeyState)
+{
+ UI.nModDown = nKeyState ? 1 : 0;
+}
+
+void MicroProfileClearGraph()
+{
+ MicroProfile& S = *MicroProfileGet();
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
+ {
+ if(S.Graph[i].nToken != 0)
+ {
+ S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN;
+ }
+ }
+}
+
+void MicroProfileMouseButton(uint32_t nLeft, uint32_t nRight)
+{
+ bool bCanRelease = abs((int)(UI.nMouseDownX - UI.nMouseX)) + abs((int)(UI.nMouseDownY - UI.nMouseY)) < 3;
+
+ if(0 == nLeft && UI.nMouseDownLeft && bCanRelease)
+ {
+ if(UI.nModDown)
+ UI.nMouseLeftMod = 1;
+ else
+ UI.nMouseLeft = 1;
+ }
+
+ if(0 == nRight && UI.nMouseDownRight && bCanRelease)
+ {
+ if(UI.nModDown)
+ UI.nMouseRightMod = 1;
+ else
+ UI.nMouseRight = 1;
+ }
+ if((nLeft || nRight) && !(UI.nMouseDownLeft || UI.nMouseDownRight))
+ {
+ UI.nMouseDownX = UI.nMouseX;
+ UI.nMouseDownY = UI.nMouseY;
+ }
+
+ UI.nMouseDownLeft = nLeft;
+ UI.nMouseDownRight = nRight;
+
+}
+
+void MicroProfileDrawLineVertical(int nX, int nTop, int nBottom, uint32_t nColor)
+{
+ MicroProfileDrawBox(nX, nTop, nX + 1, nBottom, nColor);
+}
+
+void MicroProfileDrawLineHorizontal(int nLeft, int nRight, int nY, uint32_t nColor)
+{
+ MicroProfileDrawBox(nLeft, nY, nRight, nY + 1, nColor);
+}
+
+
+
+#include <stdio.h>
+
+#define MICROPROFILE_PRESET_HEADER_MAGIC 0x28586813
+#define MICROPROFILE_PRESET_HEADER_VERSION 0x00000102
+struct MicroProfilePresetHeader
+{
+ uint32_t nMagic;
+ uint32_t nVersion;
+ //groups, threads, aggregate, reference frame, graphs timers
+ uint32_t nGroups[MICROPROFILE_MAX_GROUPS];
+ uint32_t nThreads[MICROPROFILE_MAX_THREADS];
+ uint32_t nGraphName[MICROPROFILE_MAX_GRAPHS];
+ uint32_t nGraphGroupName[MICROPROFILE_MAX_GRAPHS];
+ uint32_t nAllGroupsWanted;
+ uint32_t nAllThreadsWanted;
+ uint32_t nAggregateFlip;
+ float fReferenceTime;
+ uint32_t nBars;
+ uint32_t nDisplay;
+ uint32_t nOpacityBackground;
+ uint32_t nOpacityForeground;
+ uint32_t nShowSpikes;
+};
+
+#ifndef MICROPROFILE_PRESET_FILENAME_FUNC
+#define MICROPROFILE_PRESET_FILENAME_FUNC MicroProfilePresetFilename
+static const char* MicroProfilePresetFilename(const char* pSuffix)
+{
+ static char filename[512];
+ snprintf(filename, sizeof(filename)-1, ".microprofilepreset.%s", pSuffix);
+ return filename;
+}
+#endif
+
+void MicroProfileSavePreset(const char* pPresetName)
+{
+ std::lock_guard<std::recursive_mutex> Lock(MicroProfileGetMutex());
+ FILE* F = fopen(MICROPROFILE_PRESET_FILENAME_FUNC(pPresetName), "wb");
+ if(!F) return;
+
+ MicroProfile& S = *MicroProfileGet();
+
+ MicroProfilePresetHeader Header;
+ memset(&Header, 0, sizeof(Header));
+ Header.nAggregateFlip = S.nAggregateFlip;
+ Header.nBars = S.nBars;
+ Header.fReferenceTime = S.fReferenceTime;
+ Header.nAllGroupsWanted = S.nAllGroupsWanted;
+ Header.nAllThreadsWanted = S.nAllThreadsWanted;
+ Header.nMagic = MICROPROFILE_PRESET_HEADER_MAGIC;
+ Header.nVersion = MICROPROFILE_PRESET_HEADER_VERSION;
+ Header.nDisplay = S.nDisplay;
+ Header.nOpacityBackground = UI.nOpacityBackground;
+ Header.nOpacityForeground = UI.nOpacityForeground;
+ Header.nShowSpikes = UI.bShowSpikes ? 1 : 0;
+ fwrite(&Header, sizeof(Header), 1, F);
+ uint64_t nMask = 1;
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
+ {
+ if(S.nActiveGroupWanted & nMask)
+ {
+ uint32_t offset = ftell(F);
+ const char* pName = S.GroupInfo[i].pName;
+ int nLen = (int)strlen(pName)+1;
+ fwrite(pName, nLen, 1, F);
+ Header.nGroups[i] = offset;
+ }
+ nMask <<= 1;
+ }
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
+ {
+ MicroProfileThreadLog* pLog = S.Pool[i];
+ if(pLog && S.nThreadActive[i])
+ {
+ uint32_t nOffset = ftell(F);
+ const char* pName = &pLog->ThreadName[0];
+ int nLen = (int)strlen(pName)+1;
+ fwrite(pName, nLen, 1, F);
+ Header.nThreads[i] = nOffset;
+ }
+ }
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
+ {
+ MicroProfileToken nToken = S.Graph[i].nToken;
+ if(nToken != MICROPROFILE_INVALID_TOKEN)
+ {
+ uint32_t nGroupIndex = MicroProfileGetGroupIndex(nToken);
+ uint32_t nTimerIndex = MicroProfileGetTimerIndex(nToken);
+ const char* pGroupName = S.GroupInfo[nGroupIndex].pName;
+ const char* pTimerName = S.TimerInfo[nTimerIndex].pName;
+ MP_ASSERT(pGroupName);
+ MP_ASSERT(pTimerName);
+ int nGroupLen = (int)strlen(pGroupName)+1;
+ int nTimerLen = (int)strlen(pTimerName)+1;
+
+ uint32_t nOffsetGroup = ftell(F);
+ fwrite(pGroupName, nGroupLen, 1, F);
+ uint32_t nOffsetTimer = ftell(F);
+ fwrite(pTimerName, nTimerLen, 1, F);
+ Header.nGraphName[i] = nOffsetTimer;
+ Header.nGraphGroupName[i] = nOffsetGroup;
+ }
+ }
+ fseek(F, 0, SEEK_SET);
+ fwrite(&Header, sizeof(Header), 1, F);
+
+ fclose(F);
+
+}
+
+
+
+void MicroProfileLoadPreset(const char* pSuffix)
+{
+ std::lock_guard<std::recursive_mutex> Lock(MicroProfileGetMutex());
+ FILE* F = fopen(MICROPROFILE_PRESET_FILENAME_FUNC(pSuffix), "rb");
+ if(!F)
+ {
+ return;
+ }
+ fseek(F, 0, SEEK_END);
+ int nSize = ftell(F);
+ char* const pBuffer = (char*)alloca(nSize);
+ fseek(F, 0, SEEK_SET);
+ int nRead = (int)fread(pBuffer, nSize, 1, F);
+ fclose(F);
+ if(1 != nRead)
+ return;
+
+ MicroProfile& S = *MicroProfileGet();
+
+ MicroProfilePresetHeader& Header = *(MicroProfilePresetHeader*)pBuffer;
+
+ if(Header.nMagic != MICROPROFILE_PRESET_HEADER_MAGIC || Header.nVersion != MICROPROFILE_PRESET_HEADER_VERSION)
+ {
+ return;
+ }
+
+ S.nAggregateFlip = Header.nAggregateFlip;
+ S.nBars = Header.nBars;
+ S.fReferenceTime = Header.fReferenceTime;
+ S.fRcpReferenceTime = 1.f / Header.fReferenceTime;
+ S.nAllGroupsWanted = Header.nAllGroupsWanted;
+ S.nAllThreadsWanted = Header.nAllThreadsWanted;
+ S.nDisplay = Header.nDisplay;
+ S.nActiveGroupWanted = 0;
+ UI.nOpacityBackground = Header.nOpacityBackground;
+ UI.nOpacityForeground = Header.nOpacityForeground;
+ UI.bShowSpikes = Header.nShowSpikes == 1;
+
+ memset(&S.nThreadActive[0], 0, sizeof(S.nThreadActive));
+
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
+ {
+ if(Header.nGroups[i])
+ {
+ const char* pGroupName = pBuffer + Header.nGroups[i];
+ for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
+ {
+ if(0 == MP_STRCASECMP(pGroupName, S.GroupInfo[j].pName))
+ {
+ S.nActiveGroupWanted |= (1ll << j);
+ }
+ }
+ }
+ }
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
+ {
+ if(Header.nThreads[i])
+ {
+ const char* pThreadName = pBuffer + Header.nThreads[i];
+ for(uint32_t j = 0; j < MICROPROFILE_MAX_THREADS; ++j)
+ {
+ MicroProfileThreadLog* pLog = S.Pool[j];
+ if(pLog && 0 == MP_STRCASECMP(pThreadName, &pLog->ThreadName[0]))
+ {
+ S.nThreadActive[j] = 1;
+ }
+ }
+ }
+ }
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
+ {
+ MicroProfileToken nPrevToken = S.Graph[i].nToken;
+ S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN;
+ if(Header.nGraphName[i] && Header.nGraphGroupName[i])
+ {
+ const char* pGraphName = pBuffer + Header.nGraphName[i];
+ const char* pGraphGroupName = pBuffer + Header.nGraphGroupName[i];
+ for(uint32_t j = 0; j < S.nTotalTimers; ++j)
+ {
+ uint64_t nGroupIndex = S.TimerInfo[j].nGroupIndex;
+ if(0 == MP_STRCASECMP(pGraphName, S.TimerInfo[j].pName) && 0 == MP_STRCASECMP(pGraphGroupName, S.GroupInfo[nGroupIndex].pName))
+ {
+ MicroProfileToken nToken = MicroProfileMakeToken(1ll << nGroupIndex, (uint16_t)j);
+ S.Graph[i].nToken = nToken; // note: group index is stored here but is checked without in MicroProfileToggleGraph()!
+ S.TimerInfo[j].bGraph = true;
+ if(nToken != nPrevToken)
+ {
+ memset(&S.Graph[i].nHistory, 0, sizeof(S.Graph[i].nHistory));
+ }
+ break;
+ }
+ }
+ }
+ }
+}
+
+uint32_t MicroProfileCustomGroupFind(const char* pCustomName)
+{
+ for(uint32_t i = 0; i < UI.nCustomCount; ++i)
+ {
+ if(!MP_STRCASECMP(pCustomName, UI.Custom[i].pName))
+ {
+ return i;
+ }
+ }
+ return (uint32_t)-1;
+}
+
+uint32_t MicroProfileCustomGroup(const char* pCustomName)
+{
+ for(uint32_t i = 0; i < UI.nCustomCount; ++i)
+ {
+ if(!MP_STRCASECMP(pCustomName, UI.Custom[i].pName))
+ {
+ return i;
+ }
+ }
+ MP_ASSERT(UI.nCustomCount < MICROPROFILE_CUSTOM_MAX);
+ uint32_t nIndex = UI.nCustomCount;
+ UI.nCustomCount++;
+ memset(&UI.Custom[nIndex], 0, sizeof(UI.Custom[nIndex]));
+ uint32_t nLen = (uint32_t)strlen(pCustomName);
+ if(nLen > MICROPROFILE_NAME_MAX_LEN-1)
+ nLen = MICROPROFILE_NAME_MAX_LEN-1;
+ memcpy(&UI.Custom[nIndex].pName[0], pCustomName, nLen);
+ UI.Custom[nIndex].pName[nLen] = '\0';
+ return nIndex;
+}
+void MicroProfileCustomGroup(const char* pCustomName, uint32_t nMaxTimers, uint32_t nAggregateFlip, float fReferenceTime, uint32_t nFlags)
+{
+ uint32_t nIndex = MicroProfileCustomGroup(pCustomName);
+ MP_ASSERT(UI.Custom[nIndex].pTimers == 0);//only call once!
+ UI.Custom[nIndex].pTimers = &UI.CustomTimer[UI.nCustomTimerCount];
+ UI.Custom[nIndex].nMaxTimers = nMaxTimers;
+ UI.Custom[nIndex].fReference = fReferenceTime;
+ UI.nCustomTimerCount += nMaxTimers;
+ MP_ASSERT(UI.nCustomTimerCount <= MICROPROFILE_CUSTOM_MAX_TIMERS); //bump MICROPROFILE_CUSTOM_MAX_TIMERS
+ UI.Custom[nIndex].nFlags = nFlags;
+ UI.Custom[nIndex].nAggregateFlip = nAggregateFlip;
+}
+
+void MicroProfileCustomGroupEnable(uint32_t nIndex)
+{
+ if(nIndex < UI.nCustomCount)
+ {
+ MicroProfile& S = *MicroProfileGet();
+ S.nForceGroupUI = UI.Custom[nIndex].nGroupMask;
+ MicroProfileSetAggregateFrames(UI.Custom[nIndex].nAggregateFlip);
+ S.fReferenceTime = UI.Custom[nIndex].fReference;
+ S.fRcpReferenceTime = 1.f / UI.Custom[nIndex].fReference;
+ UI.nCustomActive = nIndex;
+
+ for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
+ {
+ if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN)
+ {
+ uint32_t nTimerId = MicroProfileGetTimerIndex(S.Graph[i].nToken);
+ S.TimerInfo[nTimerId].bGraph = false;
+ S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN;
+ }
+ }
+
+ for(uint32_t i = 0; i < UI.Custom[nIndex].nNumTimers; ++i)
+ {
+ if(i == MICROPROFILE_MAX_GRAPHS)
+ {
+ break;
+ }
+ S.Graph[i].nToken = UI.Custom[nIndex].pTimers[i];
+ S.Graph[i].nKey = i;
+ uint32_t nTimerId = MicroProfileGetTimerIndex(S.Graph[i].nToken);
+ S.TimerInfo[nTimerId].bGraph = true;
+ }
+ }
+}
+
+void MicroProfileCustomGroupToggle(const char* pCustomName)
+{
+ uint32_t nIndex = MicroProfileCustomGroupFind(pCustomName);
+ if(nIndex == (uint32_t)-1 || nIndex == UI.nCustomActive)
+ {
+ MicroProfileCustomGroupDisable();
+ }
+ else
+ {
+ MicroProfileCustomGroupEnable(nIndex);
+ }
+}
+
+void MicroProfileCustomGroupEnable(const char* pCustomName)
+{
+ uint32_t nIndex = MicroProfileCustomGroupFind(pCustomName);
+ MicroProfileCustomGroupEnable(nIndex);
+}
+void MicroProfileCustomGroupDisable()
+{
+ MicroProfile& S = *MicroProfileGet();
+ S.nForceGroupUI = 0;
+ UI.nCustomActive = (uint32_t)-1;
+}
+
+void MicroProfileCustomGroupAddTimer(const char* pCustomName, const char* pGroup, const char* pTimer)
+{
+ uint32_t nIndex = MicroProfileCustomGroupFind(pCustomName);
+ if((uint32_t)-1 == nIndex)
+ {
+ return;
+ }
+ uint32_t nTimerIndex = UI.Custom[nIndex].nNumTimers;
+ MP_ASSERT(nTimerIndex < UI.Custom[nIndex].nMaxTimers);
+ uint64_t nToken = MicroProfileFindToken(pGroup, pTimer);
+ MP_ASSERT(nToken != MICROPROFILE_INVALID_TOKEN); //Timer must be registered first.
+ UI.Custom[nIndex].pTimers[nTimerIndex] = nToken;
+ uint16_t nGroup = MicroProfileGetGroupIndex(nToken);
+ UI.Custom[nIndex].nGroupMask |= (1ll << nGroup);
+ UI.Custom[nIndex].nNumTimers++;
+}
+
+#undef UI
+
+#endif
+#endif
diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index d6fcb66a5..46f4a07c9 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -6,6 +6,9 @@
#include <thread>
#include <iostream>
+// This needs to be included before getopt.h because the latter #defines symbols used by it
+#include "common/microprofile.h"
+
#ifdef _MSC_VER
#include <getopt.h>
#else
@@ -59,6 +62,8 @@ int main(int argc, char **argv) {
Log::Filter log_filter(Log::Level::Debug);
Log::SetFilter(&log_filter);
+ MicroProfileOnThreadCreate("EmuThread");
+
if (boot_filename.empty()) {
LOG_CRITICAL(Frontend, "Failed to load ROM: No ROM specified");
return -1;
@@ -89,5 +94,7 @@ int main(int argc, char **argv) {
delete emu_window;
+ MicroProfileShutdown();
+
return 0;
}
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index 0c0515054..a82e8a85b 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -18,6 +18,7 @@ set(SRCS
debugger/ramview.cpp
debugger/registers.cpp
util/spinbox.cpp
+ util/util.cpp
bootmanager.cpp
hotkeys.cpp
main.cpp
@@ -42,6 +43,7 @@ set(HEADERS
debugger/ramview.h
debugger/registers.h
util/spinbox.h
+ util/util.h
bootmanager.h
hotkeys.h
main.h
diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp
index a96fbea5f..f8aacb527 100644
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@@ -14,6 +14,7 @@
#include "common/string_util.h"
#include "common/scm_rev.h"
#include "common/key_map.h"
+#include "common/microprofile.h"
#include "core/core.h"
#include "core/settings.h"
@@ -37,6 +38,8 @@ EmuThread::EmuThread(GRenderWindow* render_window) :
void EmuThread::run() {
render_window->MakeCurrent();
+ MicroProfileOnThreadCreate("EmuThread");
+
stop_run = false;
// holds whether the cpu was running during the last iteration,
@@ -69,6 +72,8 @@ void EmuThread::run() {
}
}
+ MicroProfileOnThreadExit();
+
render_window->moveContext();
}
diff --git a/src/citra_qt/debugger/graphics.cpp b/src/citra_qt/debugger/graphics.cpp
index 7424671f1..7d15028f0 100644
--- a/src/citra_qt/debugger/graphics.cpp
+++ b/src/citra_qt/debugger/graphics.cpp
@@ -7,6 +7,8 @@
#include <QVBoxLayout>
#include <QDebug>
+#include "citra_qt/util/util.h"
+
extern GraphicsDebugger g_debugger;
GPUCommandStreamItemModel::GPUCommandStreamItemModel(QObject* parent) : QAbstractListModel(parent), command_count(0)
@@ -79,7 +81,7 @@ GPUCommandStreamWidget::GPUCommandStreamWidget(QWidget* parent) : QDockWidget(tr
QListView* command_list = new QListView;
command_list->setModel(command_model);
- command_list->setFont(QFont("monospace"));
+ command_list->setFont(GetMonospaceFont());
setWidget(command_list);
}
diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp
index 35a3140b2..025434687 100644
--- a/src/citra_qt/debugger/graphics_cmdlists.cpp
+++ b/src/citra_qt/debugger/graphics_cmdlists.cpp
@@ -14,6 +14,8 @@
#include <QSpinBox>
#include <QComboBox>
+#include "citra_qt/util/util.h"
+
#include "common/vector_math.h"
#include "video_core/debug_utils/debug_utils.h"
@@ -303,9 +305,7 @@ GPUCommandListWidget::GPUCommandListWidget(QWidget* parent) : QDockWidget(tr("Pi
list_widget = new QTreeView;
list_widget->setModel(model);
- QFont font("monospace");
- font.setStyleHint(QFont::Monospace); // Automatic fallback to a monospace font on on platforms without a font called "monospace"
- list_widget->setFont(font);
+ list_widget->setFont(GetMonospaceFont());
list_widget->setRootIsDecorated(false);
list_widget->setUniformRowHeights(true);
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp
index 0c17edee0..1d9a00e89 100644
--- a/src/citra_qt/debugger/graphics_vertex_shader.cpp
+++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp
@@ -15,6 +15,8 @@
#include <QSpinBox>
#include <QTreeView>
+#include "citra_qt/util/util.h"
+
#include "video_core/shader/shader.h"
#include "graphics_vertex_shader.h"
@@ -245,7 +247,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
}
case Qt::FontRole:
- return QFont("monospace");
+ return GetMonospaceFont();
case Qt::BackgroundRole:
// Highlight instructions which have no debug data associated to them
diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp
index 89b28c2f4..5261d4836 100644
--- a/src/citra_qt/debugger/profiler.cpp
+++ b/src/citra_qt/debugger/profiler.cpp
@@ -2,9 +2,21 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <QMouseEvent>
+#include <QPainter>
+#include <QString>
+
#include "profiler.h"
+#include "citra_qt/util/util.h"
+
#include "common/profiler_reporting.h"
+#include "common/microprofile.h"
+
+// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the
+// non-Qt frontends don't need it (and don't implement the UI drawing hooks either).
+#define MICROPROFILEUI_IMPL 1
+#include "common/microprofileui.h"
using namespace Common::Profiling;
@@ -136,3 +148,193 @@ void ProfilerWidget::setProfilingInfoUpdateEnabled(bool enable)
update_timer.stop();
}
}
+
+class MicroProfileWidget : public QWidget {
+public:
+ MicroProfileWidget(QWidget* parent = 0);
+
+protected:
+ void paintEvent(QPaintEvent* ev) override;
+ void showEvent(QShowEvent* ev) override;
+ void hideEvent(QHideEvent* ev) override;
+
+ void mouseMoveEvent(QMouseEvent* ev) override;
+ void mousePressEvent(QMouseEvent* ev) override;
+ void mouseReleaseEvent(QMouseEvent* ev) override;
+ void wheelEvent(QWheelEvent* ev) override;
+
+ void keyPressEvent(QKeyEvent* ev) override;
+ void keyReleaseEvent(QKeyEvent* ev) override;
+
+private:
+ /// This timer is used to redraw the widget's contents continuously. To save resources, it only
+ /// runs while the widget is visible.
+ QTimer update_timer;
+};
+
+MicroProfileDialog::MicroProfileDialog(QWidget* parent)
+ : QWidget(parent, Qt::Dialog)
+{
+ setObjectName("MicroProfile");
+ setWindowTitle(tr("MicroProfile"));
+ resize(1000, 600);
+ // Remove the "?" button from the titlebar and enable the maximize button
+ setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint);
+
+ MicroProfileWidget* widget = new MicroProfileWidget(this);
+
+ QLayout* layout = new QVBoxLayout(this);
+ layout->setContentsMargins(0, 0, 0, 0);
+ layout->addWidget(widget);
+ setLayout(layout);
+
+ // Configure focus so that widget is focusable and the dialog automatically forwards focus to it.
+ setFocusProxy(widget);
+ widget->setFocusPolicy(Qt::StrongFocus);
+ widget->setFocus();
+}
+
+QAction* MicroProfileDialog::toggleViewAction() {
+ if (toggle_view_action == nullptr) {
+ toggle_view_action = new QAction(windowTitle(), this);
+ toggle_view_action->setCheckable(true);
+ toggle_view_action->setChecked(isVisible());
+ connect(toggle_view_action, SIGNAL(toggled(bool)), SLOT(setVisible(bool)));
+ }
+
+ return toggle_view_action;
+}
+
+void MicroProfileDialog::showEvent(QShowEvent* ev) {
+ if (toggle_view_action) {
+ toggle_view_action->setChecked(isVisible());
+ }
+ QWidget::showEvent(ev);
+}
+
+void MicroProfileDialog::hideEvent(QHideEvent* ev) {
+ if (toggle_view_action) {
+ toggle_view_action->setChecked(isVisible());
+ }
+ QWidget::hideEvent(ev);
+}
+
+/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the
+/// QPainter available inside the drawing callbacks.
+static QPainter* mp_painter = nullptr;
+
+MicroProfileWidget::MicroProfileWidget(QWidget* parent) : QWidget(parent) {
+ // Send mouse motion events even when not dragging.
+ setMouseTracking(true);
+
+ MicroProfileSetDisplayMode(1); // Timers screen
+ MicroProfileInitUI();
+
+ connect(&update_timer, SIGNAL(timeout()), SLOT(update()));
+}
+
+void MicroProfileWidget::paintEvent(QPaintEvent* ev) {
+ QPainter painter(this);
+
+ painter.setBackground(Qt::black);
+ painter.eraseRect(rect());
+
+ QFont font = GetMonospaceFont();
+ font.setPixelSize(MICROPROFILE_TEXT_HEIGHT);
+ painter.setFont(font);
+
+ mp_painter = &painter;
+ MicroProfileDraw(rect().width(), rect().height());
+ mp_painter = nullptr;
+}
+
+void MicroProfileWidget::showEvent(QShowEvent* ev) {
+ update_timer.start(15); // ~60 Hz
+ QWidget::showEvent(ev);
+}
+
+void MicroProfileWidget::hideEvent(QHideEvent* ev) {
+ update_timer.stop();
+ QWidget::hideEvent(ev);
+}
+
+void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) {
+ MicroProfileMousePosition(ev->x(), ev->y(), 0);
+ ev->accept();
+}
+
+void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) {
+ MicroProfileMousePosition(ev->x(), ev->y(), 0);
+ MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
+ ev->accept();
+}
+
+void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) {
+ MicroProfileMousePosition(ev->x(), ev->y(), 0);
+ MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
+ ev->accept();
+}
+
+void MicroProfileWidget::wheelEvent(QWheelEvent* ev) {
+ MicroProfileMousePosition(ev->x(), ev->y(), ev->delta() / 120);
+ ev->accept();
+}
+
+void MicroProfileWidget::keyPressEvent(QKeyEvent* ev) {
+ if (ev->key() == Qt::Key_Control) {
+ // Inform MicroProfile that the user is holding Ctrl.
+ MicroProfileModKey(1);
+ }
+ QWidget::keyPressEvent(ev);
+}
+
+void MicroProfileWidget::keyReleaseEvent(QKeyEvent* ev) {
+ if (ev->key() == Qt::Key_Control) {
+ MicroProfileModKey(0);
+ }
+ QWidget::keyReleaseEvent(ev);
+}
+
+// These functions are called by MicroProfileDraw to draw the interface elements on the screen.
+
+void MicroProfileDrawText(int x, int y, u32 hex_color, const char* text, u32 text_length) {
+ // hex_color does not include an alpha, so it must be assumed to be 255
+ mp_painter->setPen(QColor::fromRgb(hex_color));
+
+ // It's impossible to draw a string using a monospaced font with a fixed width per cell in a
+ // way that's reliable across different platforms and fonts as far as I (yuriks) can tell, so
+ // draw each character individually in order to precisely control the text advance.
+ for (u32 i = 0; i < text_length; ++i) {
+ // Position the text baseline 1 pixel above the bottom of the text cell, this gives nice
+ // vertical alignment of text for a wide range of tested fonts.
+ mp_painter->drawText(x, y + MICROPROFILE_TEXT_HEIGHT - 2, QChar(text[i]));
+ x += MICROPROFILE_TEXT_WIDTH + 1;
+ }
+}
+
+void MicroProfileDrawBox(int left, int top, int right, int bottom, u32 hex_color, MicroProfileBoxType type) {
+ QColor color = QColor::fromRgba(hex_color);
+ QBrush brush = color;
+ if (type == MicroProfileBoxTypeBar) {
+ QLinearGradient gradient(left, top, left, bottom);
+ gradient.setColorAt(0.f, color.lighter(125));
+ gradient.setColorAt(1.f, color.darker(125));
+ brush = gradient;
+ }
+ mp_painter->fillRect(left, top, right - left, bottom - top, brush);
+}
+
+void MicroProfileDrawLine2D(u32 vertices_length, float* vertices, u32 hex_color) {
+ // Temporary vector used to convert between the float array and QPointF. Marked static to reuse
+ // the allocation across calls.
+ static std::vector<QPointF> point_buf;
+
+ for (u32 i = 0; i < vertices_length; ++i) {
+ point_buf.emplace_back(vertices[i*2 + 0], vertices[i*2 + 1]);
+ }
+
+ // hex_color does not include an alpha, so it must be assumed to be 255
+ mp_painter->setPen(QColor::fromRgb(hex_color));
+ mp_painter->drawPolyline(point_buf.data(), vertices_length);
+ point_buf.clear();
+}
diff --git a/src/citra_qt/debugger/profiler.h b/src/citra_qt/debugger/profiler.h
index fabf279b8..2199eaef1 100644
--- a/src/citra_qt/debugger/profiler.h
+++ b/src/citra_qt/debugger/profiler.h
@@ -48,3 +48,20 @@ private:
QTimer update_timer;
};
+
+class MicroProfileDialog : public QWidget {
+ Q_OBJECT
+
+public:
+ MicroProfileDialog(QWidget* parent = 0);
+
+ /// Returns a QAction that can be used to toggle visibility of this dialog.
+ QAction* toggleViewAction();
+
+protected:
+ void showEvent(QShowEvent* ev) override;
+ void hideEvent(QHideEvent* ev) override;
+
+private:
+ QAction* toggle_view_action = nullptr;
+};
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index a1a4865bd..7fb1b0dcb 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -17,6 +17,7 @@
#include "common/logging/backend.h"
#include "common/logging/filter.h"
#include "common/make_unique.h"
+#include "common/microprofile.h"
#include "common/platform.h"
#include "common/scm_rev.h"
#include "common/scope_exit.h"
@@ -64,6 +65,9 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
addDockWidget(Qt::BottomDockWidgetArea, profilerWidget);
profilerWidget->hide();
+ microProfileDialog = new MicroProfileDialog(this);
+ microProfileDialog->hide();
+
disasmWidget = new DisassemblerWidget(this, emu_thread.get());
addDockWidget(Qt::BottomDockWidgetArea, disasmWidget);
disasmWidget->hide();
@@ -102,6 +106,7 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
debug_menu->addAction(profilerWidget->toggleViewAction());
+ debug_menu->addAction(microProfileDialog->toggleViewAction());
debug_menu->addAction(disasmWidget->toggleViewAction());
debug_menu->addAction(registersWidget->toggleViewAction());
debug_menu->addAction(callstackWidget->toggleViewAction());
@@ -128,6 +133,8 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
restoreGeometry(settings.value("geometry").toByteArray());
restoreState(settings.value("state").toByteArray());
render_window->restoreGeometry(settings.value("geometryRenderWindow").toByteArray());
+ microProfileDialog->restoreGeometry(settings.value("microProfileDialogGeometry").toByteArray());
+ microProfileDialog->setVisible(settings.value("microProfileDialogVisible").toBool());
ui.action_Use_Hardware_Renderer->setChecked(Settings::values.use_hw_renderer);
SetHardwareRendererEnabled(ui.action_Use_Hardware_Renderer->isChecked());
@@ -287,6 +294,17 @@ void GMainWindow::ShutdownGame() {
render_window->hide();
}
+void GMainWindow::StoreRecentFile(const QString& filename)
+{
+ QSettings settings;
+ QStringList recent_files = settings.value("recentFiles").toStringList();
+ recent_files.prepend(filename);
+ recent_files.removeDuplicates();
+ settings.setValue("recentFiles", recent_files);
+
+ UpdateRecentFiles();
+}
+
void GMainWindow::UpdateRecentFiles() {
QSettings settings;
QStringList recent_files = settings.value("recentFiles").toStringList();
@@ -297,6 +315,7 @@ void GMainWindow::UpdateRecentFiles() {
QString text = QString("&%1. %2").arg(i + 1).arg(QFileInfo(recent_files[i]).fileName());
actions_recent_files[i]->setText(text);
actions_recent_files[i]->setData(recent_files[i]);
+ actions_recent_files[i]->setToolTip(recent_files[i]);
actions_recent_files[i]->setVisible(true);
}
@@ -319,11 +338,7 @@ void GMainWindow::OnMenuLoadFile() {
QString filename = QFileDialog::getOpenFileName(this, tr("Load File"), rom_path, tr("3DS executable (*.3ds *.3dsx *.elf *.axf *.cci *.cxi)"));
if (filename.size()) {
settings.setValue("romsPath", QFileInfo(filename).path());
- // Update recent files list
- QStringList recent_files = settings.value("recentFiles").toStringList();
- recent_files.prepend(filename);
- settings.setValue("recentFiles", recent_files);
- UpdateRecentFiles(); // Update UI
+ StoreRecentFile(filename);
BootGame(filename.toLatin1().data());
}
@@ -349,6 +364,7 @@ void GMainWindow::OnMenuRecentFile() {
QFileInfo file_info(filename);
if (file_info.exists()) {
BootGame(filename.toLatin1().data());
+ StoreRecentFile(filename); // Put the filename on top of the list
} else {
// Display an error message and remove the file from the list.
QMessageBox::information(this, tr("File not found"), tr("File \"%1\" not found").arg(filename));
@@ -357,12 +373,7 @@ void GMainWindow::OnMenuRecentFile() {
QStringList recent_files = settings.value("recentFiles").toStringList();
recent_files.removeOne(filename);
settings.setValue("recentFiles", recent_files);
-
- action->setVisible(false);
- // Grey out the recent files menu if the list is empty
- if (ui.menu_recent_files->isEmpty()) {
- ui.menu_recent_files->setEnabled(false);
- }
+ UpdateRecentFiles();
}
}
@@ -430,6 +441,8 @@ void GMainWindow::closeEvent(QCloseEvent* event) {
settings.setValue("geometry", saveGeometry());
settings.setValue("state", saveState());
settings.setValue("geometryRenderWindow", render_window->saveGeometry());
+ settings.setValue("microProfileDialogGeometry", microProfileDialog->saveGeometry());
+ settings.setValue("microProfileDialogVisible", microProfileDialog->isVisible());
settings.setValue("singleWindowMode", ui.action_Single_Window_Mode->isChecked());
settings.setValue("displayTitleBars", ui.actionDisplay_widget_title_bars->isChecked());
settings.setValue("firstStart", false);
@@ -452,6 +465,11 @@ int main(int argc, char* argv[]) {
Log::Filter log_filter(Log::Level::Info);
Log::SetFilter(&log_filter);
+ MicroProfileOnThreadCreate("Frontend");
+ SCOPE_EXIT({
+ MicroProfileShutdown();
+ });
+
// Init settings params
QSettings::setDefaultFormat(QSettings::IniFormat);
QCoreApplication::setOrganizationName("Citra team");
diff --git a/src/citra_qt/main.h b/src/citra_qt/main.h
index 4b260ae8b..32523fded 100644
--- a/src/citra_qt/main.h
+++ b/src/citra_qt/main.h
@@ -14,6 +14,7 @@ class GImageInfo;
class GRenderWindow;
class EmuThread;
class ProfilerWidget;
+class MicroProfileDialog;
class DisassemblerWidget;
class RegistersWidget;
class CallstackWidget;
@@ -60,6 +61,24 @@ private:
void BootGame(const std::string& filename);
void ShutdownGame();
+ /**
+ * Stores the filename in the recently loaded files list.
+ * The new filename is stored at the beginning of the recently loaded files list.
+ * After inserting the new entry, duplicates are removed meaning that if
+ * this was inserted from \a OnMenuRecentFile(), the entry will be put on top
+ * and remove from its previous position.
+ *
+ * Finally, this function calls \a UpdateRecentFiles() to update the UI.
+ *
+ * @param filename the filename to store
+ */
+ void StoreRecentFile(const QString& filename);
+
+ /**
+ * Updates the recent files menu.
+ * Menu entries are rebuilt from the configuration file.
+ * If there is no entry in the menu, the menu is greyed out.
+ */
void UpdateRecentFiles();
void closeEvent(QCloseEvent* event) override;
@@ -86,6 +105,7 @@ private:
std::unique_ptr<EmuThread> emu_thread;
ProfilerWidget* profilerWidget;
+ MicroProfileDialog* microProfileDialog;
DisassemblerWidget* disasmWidget;
RegistersWidget* registersWidget;
CallstackWidget* callstackWidget;
diff --git a/src/citra_qt/util/util.cpp b/src/citra_qt/util/util.cpp
new file mode 100644
index 000000000..2cb939af1
--- /dev/null
+++ b/src/citra_qt/util/util.cpp
@@ -0,0 +1,13 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "util.h"
+
+QFont GetMonospaceFont() {
+ QFont font("monospace");
+ // Automatic fallback to a monospace font on on platforms without a font called "monospace"
+ font.setStyleHint(QFont::Monospace);
+ font.setFixedPitch(true);
+ return font;
+}
diff --git a/src/citra_qt/util/util.h b/src/citra_qt/util/util.h
new file mode 100644
index 000000000..98a944047
--- /dev/null
+++ b/src/citra_qt/util/util.h
@@ -0,0 +1,10 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <QFont>
+
+/// Returns a QFont object appropriate to use as a monospace font for debugging widgets, etc.
+QFont GetMonospaceFont();
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index e743a026d..7f3712efa 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -11,6 +11,7 @@ set(SRCS
logging/text_formatter.cpp
logging/backend.cpp
memory_util.cpp
+ microprofile.cpp
misc.cpp
profiler.cpp
scm_rev.cpp
@@ -43,6 +44,8 @@ set(HEADERS
make_unique.h
math_util.h
memory_util.h
+ microprofile.h
+ microprofileui.h
platform.h
profiler.h
profiler_reporting.h
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index 88e452a16..ed20c3629 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -45,14 +45,20 @@
// GCC 4.8 defines all the rotate functions now
// Small issue with GCC's lrotl/lrotr intrinsics is they are still 32bit while we require 64bit
-#ifndef _rotl
-inline u32 _rotl(u32 x, int shift) {
+#ifdef _rotl
+#define rotl _rotl
+#else
+inline u32 rotl(u32 x, int shift) {
shift &= 31;
if (!shift) return x;
return (x << shift) | (x >> (32 - shift));
}
+#endif
-inline u32 _rotr(u32 x, int shift) {
+#ifdef _rotr
+#define rotr _rotr
+#else
+inline u32 rotr(u32 x, int shift) {
shift &= 31;
if (!shift) return x;
return (x >> shift) | (x << (32 - shift));
diff --git a/src/common/file_util.h b/src/common/file_util.h
index d0dccdf69..e71a9b2fa 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -244,7 +244,7 @@ private:
template <typename T>
void OpenFStream(T& fstream, const std::string& filename, std::ios_base::openmode openmode)
{
-#ifdef _WIN32
+#ifdef _MSC_VER
fstream.open(Common::UTF8ToTStr(filename).c_str(), openmode);
#else
fstream.open(filename.c_str(), openmode);
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index e16dde7fc..5fd3bd7f5 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -91,17 +91,16 @@ void LogMessage(Class log_class, Level log_level,
} // namespace Log
#define LOG_GENERIC(log_class, log_level, ...) \
- ::Log::LogMessage(::Log::Class::log_class, ::Log::Level::log_level, \
- __FILE__, __LINE__, __func__, __VA_ARGS__)
+ ::Log::LogMessage(log_class, log_level, __FILE__, __LINE__, __func__, __VA_ARGS__)
#ifdef _DEBUG
-#define LOG_TRACE( log_class, ...) LOG_GENERIC(log_class, Trace, __VA_ARGS__)
+#define LOG_TRACE( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Trace, __VA_ARGS__)
#else
#define LOG_TRACE( log_class, ...) (void(0))
#endif
-#define LOG_DEBUG( log_class, ...) LOG_GENERIC(log_class, Debug, __VA_ARGS__)
-#define LOG_INFO( log_class, ...) LOG_GENERIC(log_class, Info, __VA_ARGS__)
-#define LOG_WARNING( log_class, ...) LOG_GENERIC(log_class, Warning, __VA_ARGS__)
-#define LOG_ERROR( log_class, ...) LOG_GENERIC(log_class, Error, __VA_ARGS__)
-#define LOG_CRITICAL(log_class, ...) LOG_GENERIC(log_class, Critical, __VA_ARGS__)
+#define LOG_DEBUG( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Debug, __VA_ARGS__)
+#define LOG_INFO( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Info, __VA_ARGS__)
+#define LOG_WARNING( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Warning, __VA_ARGS__)
+#define LOG_ERROR( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Error, __VA_ARGS__)
+#define LOG_CRITICAL(log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Critical, __VA_ARGS__)
diff --git a/src/common/microprofile.cpp b/src/common/microprofile.cpp
new file mode 100644
index 000000000..ee25dd37f
--- /dev/null
+++ b/src/common/microprofile.cpp
@@ -0,0 +1,7 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// Includes the MicroProfile implementation in this file for compilation
+#define MICROPROFILE_IMPL 1
+#include "common/microprofile.h"
diff --git a/src/common/microprofile.h b/src/common/microprofile.h
new file mode 100644
index 000000000..9eb6016a8
--- /dev/null
+++ b/src/common/microprofile.h
@@ -0,0 +1,25 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+// Customized Citra settings.
+// This file wraps the MicroProfile header so that these are consistent everywhere.
+#define MICROPROFILE_WEBSERVER 0
+#define MICROPROFILE_GPU_TIMERS 0 // TODO: Implement timer queries when we upgrade to OpenGL 3.3
+#define MICROPROFILE_CONTEXT_SWITCH_TRACE 0
+#define MICROPROFILE_PER_THREAD_BUFFER_SIZE (2048<<12) // 8 MB
+
+#include <microprofile.h>
+
+#define MP_RGB(r, g, b) ((r) << 16 | (g) << 8 | (b) << 0)
+
+// On OS X, some Mach header included by MicroProfile defines these as macros, conflicting with
+// identifiers we use.
+#ifdef PAGE_SIZE
+#undef PAGE_SIZE
+#endif
+#ifdef PAGE_MASK
+#undef PAGE_MASK
+#endif
diff --git a/src/common/microprofileui.h b/src/common/microprofileui.h
new file mode 100644
index 000000000..97c369bd9
--- /dev/null
+++ b/src/common/microprofileui.h
@@ -0,0 +1,16 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/microprofile.h"
+
+// Customized Citra settings.
+// This file wraps the MicroProfile header so that these are consistent everywhere.
+#define MICROPROFILE_TEXT_WIDTH 6
+#define MICROPROFILE_TEXT_HEIGHT 12
+#define MICROPROFILE_HELP_ALT "Right-Click"
+#define MICROPROFILE_HELP_MOD "Ctrl"
+
+#include <microprofileui.h>
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
index 4b79acd1f..939df210e 100644
--- a/src/common/x64/emitter.cpp
+++ b/src/common/x64/emitter.cpp
@@ -15,6 +15,7 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
+#include <cinttypes>
#include <cstring>
#include "common/assert.h"
@@ -25,11 +26,6 @@
#include "cpu_detect.h"
#include "emitter.h"
-#define PRIx64 "llx"
-
-// Minimize the diff against Dolphin
-#define DYNA_REC JIT
-
namespace Gen
{
@@ -113,6 +109,29 @@ u8 *XEmitter::GetWritableCodePtr()
return code;
}
+void XEmitter::Write8(u8 value)
+{
+ *code++ = value;
+}
+
+void XEmitter::Write16(u16 value)
+{
+ std::memcpy(code, &value, sizeof(u16));
+ code += sizeof(u16);
+}
+
+void XEmitter::Write32(u32 value)
+{
+ std::memcpy(code, &value, sizeof(u32));
+ code += sizeof(u32);
+}
+
+void XEmitter::Write64(u64 value)
+{
+ std::memcpy(code, &value, sizeof(u64));
+ code += sizeof(u64);
+}
+
void XEmitter::ReserveCodeSpace(int bytes)
{
for (int i = 0; i < bytes; i++)
@@ -374,7 +393,7 @@ void XEmitter::Rex(int w, int r, int x, int b)
Write8(rx);
}
-void XEmitter::JMP(const u8 *addr, bool force5Bytes)
+void XEmitter::JMP(const u8* addr, bool force5Bytes)
{
u64 fn = (u64)addr;
if (!force5Bytes)
@@ -398,7 +417,7 @@ void XEmitter::JMP(const u8 *addr, bool force5Bytes)
}
}
-void XEmitter::JMPptr(const OpArg &arg2)
+void XEmitter::JMPptr(const OpArg& arg2)
{
OpArg arg = arg2;
if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument");
@@ -425,7 +444,7 @@ void XEmitter::CALLptr(OpArg arg)
arg.WriteRest(this);
}
-void XEmitter::CALL(const void *fnptr)
+void XEmitter::CALL(const void* fnptr)
{
u64 distance = u64(fnptr) - (u64(code) + 5);
ASSERT_MSG(
@@ -496,7 +515,7 @@ void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes)
}
}
-void XEmitter::SetJumpTarget(const FixupBranch &branch)
+void XEmitter::SetJumpTarget(const FixupBranch& branch)
{
if (branch.type == 0)
{
@@ -512,30 +531,6 @@ void XEmitter::SetJumpTarget(const FixupBranch &branch)
}
}
-// INC/DEC considered harmful on newer CPUs due to partial flag set.
-// Use ADD, SUB instead.
-
-/*
-void XEmitter::INC(int bits, OpArg arg)
-{
- if (arg.IsImm()) ASSERT_MSG(0, "INC - Imm argument");
- arg.operandReg = 0;
- if (bits == 16) {Write8(0x66);}
- arg.WriteRex(this, bits, bits);
- Write8(bits == 8 ? 0xFE : 0xFF);
- arg.WriteRest(this);
-}
-void XEmitter::DEC(int bits, OpArg arg)
-{
- if (arg.IsImm()) ASSERT_MSG(0, "DEC - Imm argument");
- arg.operandReg = 1;
- if (bits == 16) {Write8(0x66);}
- arg.WriteRex(this, bits, bits);
- Write8(bits == 8 ? 0xFE : 0xFF);
- arg.WriteRest(this);
-}
-*/
-
//Single byte opcodes
//There is no PUSHAD/POPAD in 64-bit mode.
void XEmitter::INT3() {Write8(0xCC);}
@@ -667,7 +662,7 @@ void XEmitter::CBW(int bits)
void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);}
void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);}
-void XEmitter::PUSH(int bits, const OpArg &reg)
+void XEmitter::PUSH(int bits, const OpArg& reg)
{
if (reg.IsSimpleReg())
PUSH(reg.GetSimpleReg());
@@ -703,7 +698,7 @@ void XEmitter::PUSH(int bits, const OpArg &reg)
}
}
-void XEmitter::POP(int /*bits*/, const OpArg &reg)
+void XEmitter::POP(int /*bits*/, const OpArg& reg)
{
if (reg.IsSimpleReg())
POP(reg.GetSimpleReg());
@@ -791,12 +786,12 @@ void XEmitter::WriteMulDivType(int bits, OpArg src, int ext)
src.WriteRest(this);
}
-void XEmitter::MUL(int bits, OpArg src) {WriteMulDivType(bits, src, 4);}
-void XEmitter::DIV(int bits, OpArg src) {WriteMulDivType(bits, src, 6);}
-void XEmitter::IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);}
-void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);}
-void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);}
-void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);}
+void XEmitter::MUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 4);}
+void XEmitter::DIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 6);}
+void XEmitter::IMUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 5);}
+void XEmitter::IDIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 7);}
+void XEmitter::NEG(int bits, const OpArg& src) {WriteMulDivType(bits, src, 3);}
+void XEmitter::NOT(int bits, const OpArg& src) {WriteMulDivType(bits, src, 2);}
void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep)
{
@@ -813,24 +808,24 @@ void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bo
src.WriteRest(this);
}
-void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src)
+void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src)
{
if (bits <= 16)
ASSERT_MSG(0, "MOVNTI - bits<=16");
WriteBitSearchType(bits, src, dest, 0xC3);
}
-void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit
-void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit
+void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBC);} // Bottom bit to top bit
+void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBD);} // Top bit to bottom bit
-void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src)
+void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src)
{
CheckFlags();
if (!Common::GetCPUCaps().bmi1)
ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
WriteBitSearchType(bits, dest, src, 0xBC, true);
}
-void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src)
+void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src)
{
CheckFlags();
if (!Common::GetCPUCaps().lzcnt)
@@ -950,7 +945,7 @@ void XEmitter::LEA(int bits, X64Reg dest, OpArg src)
}
//shift can be either imm8 or cl
-void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext)
+void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext)
{
CheckFlags();
bool writeImm = false;
@@ -991,16 +986,16 @@ void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext)
// large rotates and shift are slower on intel than amd
// intel likes to rotate by 1, and the op is smaller too
-void XEmitter::ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);}
-void XEmitter::ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);}
-void XEmitter::RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);}
-void XEmitter::RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);}
-void XEmitter::SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);}
-void XEmitter::SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);}
-void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);}
+void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 0);}
+void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 1);}
+void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 2);}
+void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 3);}
+void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 4);}
+void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 5);}
+void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 7);}
// index can be either imm8 or register, don't use memory destination because it's slow
-void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext)
+void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext)
{
CheckFlags();
if (dest.IsImm())
@@ -1029,13 +1024,13 @@ void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext)
}
}
-void XEmitter::BT(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 4);}
-void XEmitter::BTS(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 5);}
-void XEmitter::BTR(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 6);}
-void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 7);}
+void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 4);}
+void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 5);}
+void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 6);}
+void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 7);}
//shift can be either imm8 or cl
-void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift)
+void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift)
{
CheckFlags();
if (dest.IsImm())
@@ -1067,7 +1062,7 @@ void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift)
}
}
-void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift)
+void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift)
{
CheckFlags();
if (dest.IsImm())
@@ -1111,7 +1106,7 @@ void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bit
}
//operand can either be immediate or register
-void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const
+void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const
{
X64Reg _operandReg;
if (IsImm())
@@ -1257,7 +1252,7 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o
}
}
-void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2)
+void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2)
{
if (a1.IsImm())
{
@@ -1283,24 +1278,24 @@ void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg
}
}
-void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);}
-void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);}
-void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);}
-void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);}
-void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);}
-void XEmitter::OR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);}
-void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);}
-void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2)
+void XEmitter::ADD (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);}
+void XEmitter::ADC (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);}
+void XEmitter::SUB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);}
+void XEmitter::SBB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);}
+void XEmitter::AND (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);}
+void XEmitter::OR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);}
+void XEmitter::XOR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);}
+void XEmitter::MOV (int bits, const OpArg& a1, const OpArg& a2)
{
if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())
LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code);
WriteNormalOp(this, bits, nrmMOV, a1, a2);
}
-void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);}
-void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);}
-void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);}
+void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);}
+void XEmitter::CMP (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);}
+void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);}
-void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2)
+void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2)
{
CheckFlags();
if (bits == 8)
@@ -1353,7 +1348,7 @@ void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2)
}
}
-void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a)
+void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a)
{
CheckFlags();
if (bits == 8)
@@ -1390,7 +1385,7 @@ void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extr
arg.WriteRest(this, extrabytes);
}
-void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
+void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)
{
WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes);
}
@@ -1400,25 +1395,25 @@ static int GetVEXmmmmm(u16 op)
// Currently, only 0x38 and 0x3A are used as secondary escape byte.
if ((op >> 8) == 0x3A)
return 3;
- else if ((op >> 8) == 0x38)
+ if ((op >> 8) == 0x38)
return 2;
- else
- return 1;
+
+ return 1;
}
static int GetVEXpp(u8 opPrefix)
{
if (opPrefix == 0x66)
return 1;
- else if (opPrefix == 0xF3)
+ if (opPrefix == 0xF3)
return 2;
- else if (opPrefix == 0xF2)
+ if (opPrefix == 0xF2)
return 3;
- else
- return 0;
+
+ return 0;
}
-void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
+void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
{
if (!Common::GetCPUCaps().avx)
ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer.");
@@ -1431,7 +1426,7 @@ void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpA
}
// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2
-void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
+void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
{
if (size != 32 && size != 64)
ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!");
@@ -1442,7 +1437,7 @@ void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg r
arg.WriteRest(this, extrabytes, regOp1);
}
-void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
+void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
{
CheckFlags();
if (!Common::GetCPUCaps().bmi1)
@@ -1450,7 +1445,7 @@ void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg
WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
}
-void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
+void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
{
CheckFlags();
if (!Common::GetCPUCaps().bmi2)
@@ -1517,135 +1512,136 @@ void XEmitter::WriteMXCSR(OpArg arg, int ext)
arg.WriteRest(this);
}
-void XEmitter::STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);}
-void XEmitter::LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);}
-
-void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);}
-void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);}
-void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);}
-
-void XEmitter::ADDSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);}
-void XEmitter::ADDSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);}
-void XEmitter::SUBSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);}
-void XEmitter::SUBSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);}
-void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);}
-void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);}
-void XEmitter::MULSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);}
-void XEmitter::MULSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);}
-void XEmitter::DIVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);}
-void XEmitter::DIVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);}
-void XEmitter::MINSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);}
-void XEmitter::MINSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);}
-void XEmitter::MAXSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);}
-void XEmitter::MAXSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);}
-void XEmitter::SQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);}
-void XEmitter::SQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);}
-void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);}
-
-void XEmitter::ADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseADD, regOp, arg);}
-void XEmitter::ADDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseADD, regOp, arg);}
-void XEmitter::SUBPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);}
-void XEmitter::SUBPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);}
-void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);}
-void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);}
-void XEmitter::ANDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseAND, regOp, arg);}
-void XEmitter::ANDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseAND, regOp, arg);}
-void XEmitter::ANDNPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);}
-void XEmitter::ANDNPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);}
-void XEmitter::ORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseOR, regOp, arg);}
-void XEmitter::ORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseOR, regOp, arg);}
-void XEmitter::XORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);}
-void XEmitter::XORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);}
-void XEmitter::MULPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);}
-void XEmitter::MULPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);}
-void XEmitter::DIVPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);}
-void XEmitter::DIVPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);}
-void XEmitter::MINPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);}
-void XEmitter::MINPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);}
-void XEmitter::MAXPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);}
-void XEmitter::MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);}
-void XEmitter::SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);}
-void XEmitter::SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);}
-void XEmitter::RCPPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); }
-void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);}
-void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);}
-void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);}
-
-void XEmitter::HADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);}
-
-void XEmitter::COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed
-void XEmitter::COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered
-void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered
-void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);}
-
-void XEmitter::MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);}
-void XEmitter::MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);}
-void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);}
-void XEmitter::MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);}
-
-void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);}
-void XEmitter::MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);}
-void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);}
-void XEmitter::MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);}
-
-void XEmitter::MOVDQA(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);}
-void XEmitter::MOVDQA(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);}
-void XEmitter::MOVDQU(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);}
-void XEmitter::MOVDQU(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);}
-
-void XEmitter::MOVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);}
-void XEmitter::MOVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);}
-void XEmitter::MOVSS(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);}
-void XEmitter::MOVSD(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);}
-
-void XEmitter::MOVLPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); }
-void XEmitter::MOVLPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); }
-void XEmitter::MOVLPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); }
-void XEmitter::MOVLPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); }
-
-void XEmitter::MOVHPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); }
-void XEmitter::MOVHPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); }
-void XEmitter::MOVHPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); }
-void XEmitter::MOVHPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); }
+void XEmitter::STMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 3);}
+void XEmitter::LDMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 2);}
+
+void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);}
+void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);}
+void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);}
+
+void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);}
+void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);}
+void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);}
+void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);}
+void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);}
+void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);}
+void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);}
+void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);}
+void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);}
+void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);}
+void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);}
+void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);}
+void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);}
+void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);}
+void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);}
+void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);}
+void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRCP, regOp, arg);}
+void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);}
+
+void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseADD, regOp, arg);}
+void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseADD, regOp, arg);}
+void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);}
+void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);}
+void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);}
+void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);}
+void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseAND, regOp, arg);}
+void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseAND, regOp, arg);}
+void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);}
+void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);}
+void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseOR, regOp, arg);}
+void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseOR, regOp, arg);}
+void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);}
+void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);}
+void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);}
+void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);}
+void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);}
+void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);}
+void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);}
+void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);}
+void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);}
+void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);}
+void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);}
+void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);}
+void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); }
+void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);}
+void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);}
+void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);}
+
+void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);}
+
+void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed
+void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered
+void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered
+void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);}
+
+void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);}
+void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);}
+void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);}
+void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);}
+
+void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);}
+void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);}
+void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);}
+void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);}
+
+void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);}
+void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);}
+void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);}
+void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);}
+
+void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);}
+void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);}
+void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);}
+void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);}
+
+void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); }
+void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); }
+void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); }
+void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); }
+
+void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); }
+void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); }
+void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); }
+void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); }
void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));}
void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));}
-void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);}
-void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);}
+void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);}
+void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);}
-void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);}
-void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);}
-void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);}
-void XEmitter::CVTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);}
-void XEmitter::CVTSI2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);}
-void XEmitter::CVTSI2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);}
+void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);}
+void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);}
+void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);}
+void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);}
+void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);}
+void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);}
-void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);}
-void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);}
-void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);}
-void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);}
+void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);}
+void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);}
+void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);}
+void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);}
-void XEmitter::CVTTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);}
-void XEmitter::CVTTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);}
-void XEmitter::CVTTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);}
-void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);}
+void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);}
+void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);}
+void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);}
+void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);}
void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));}
-void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);}
-void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);}
+void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x50, dest, arg);}
+void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x50, dest, arg);}
-void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
+void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
// THESE TWO ARE UNTESTED.
-void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
-void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
+void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
+void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
-void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x14, dest, arg);}
-void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x15, dest, arg);}
+void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x14, dest, arg);}
+void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x15, dest, arg);}
-void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg)
+void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg)
{
if (Common::GetCPUCaps().sse3)
{
@@ -1663,9 +1659,9 @@ void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg)
//There are a few more left
// Also some integer instructions are missing
-void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x6B, dest, arg);}
-void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x63, dest, arg);}
-void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, arg);}
+void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x6B, dest, arg);}
+void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x63, dest, arg);}
+void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x67, dest, arg);}
void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);}
void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);}
@@ -1690,7 +1686,7 @@ void XEmitter::PSRLQ(X64Reg reg, int shift)
Write8(shift);
}
-void XEmitter::PSRLQ(X64Reg reg, OpArg arg)
+void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg)
{
WriteSSEOp(0x66, 0xd3, reg, arg);
}
@@ -1735,212 +1731,212 @@ void XEmitter::PSRAD(X64Reg reg, int shift)
Write8(shift);
}
-void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
+void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)
{
if (!Common::GetCPUCaps().ssse3)
ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer.");
WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
}
-void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
+void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)
{
if (!Common::GetCPUCaps().sse4_1)
ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer.");
WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
}
-void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);}
-void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);}
-void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);}
-void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);}
-
-void XEmitter::PMINSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);}
-void XEmitter::PMINSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);}
-void XEmitter::PMINUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);}
-void XEmitter::PMINUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);}
-void XEmitter::PMAXSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);}
-void XEmitter::PMAXSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);}
-void XEmitter::PMAXUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);}
-void XEmitter::PMAXUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);}
-
-void XEmitter::PMOVSXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);}
-void XEmitter::PMOVSXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);}
-void XEmitter::PMOVSXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);}
-void XEmitter::PMOVSXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);}
-void XEmitter::PMOVSXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);}
-void XEmitter::PMOVSXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);}
-void XEmitter::PMOVZXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);}
-void XEmitter::PMOVZXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);}
-void XEmitter::PMOVZXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);}
-void XEmitter::PMOVZXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);}
-void XEmitter::PMOVZXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);}
-void XEmitter::PMOVZXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);}
-
-void XEmitter::PBLENDVB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);}
-void XEmitter::BLENDVPS(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);}
-void XEmitter::BLENDVPD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);}
+void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);}
+void XEmitter::PTEST(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);}
+void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);}
+void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);}
+
+void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);}
+void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);}
+void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);}
+void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);}
+void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);}
+void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);}
+void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);}
+void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);}
+
+void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);}
+void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);}
+void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);}
+void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);}
+void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);}
+void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);}
+void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);}
+void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);}
+void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);}
+void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);}
+void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);}
+void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);}
+
+void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);}
+void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);}
+void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);}
void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); }
void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); }
-void XEmitter::ROUNDSS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);}
-void XEmitter::ROUNDSD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);}
-void XEmitter::ROUNDPS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);}
-void XEmitter::ROUNDPD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);}
+void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);}
+void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);}
+void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);}
+void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);}
-void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDB, dest, arg);}
-void XEmitter::PANDN(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDF, dest, arg);}
-void XEmitter::PXOR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEF, dest, arg);}
-void XEmitter::POR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEB, dest, arg);}
+void XEmitter::PAND(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDB, dest, arg);}
+void XEmitter::PANDN(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDF, dest, arg);}
+void XEmitter::PXOR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEF, dest, arg);}
+void XEmitter::POR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEB, dest, arg);}
-void XEmitter::PADDB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFC, dest, arg);}
-void XEmitter::PADDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFD, dest, arg);}
-void XEmitter::PADDD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFE, dest, arg);}
-void XEmitter::PADDQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD4, dest, arg);}
+void XEmitter::PADDB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFC, dest, arg);}
+void XEmitter::PADDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFD, dest, arg);}
+void XEmitter::PADDD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFE, dest, arg);}
+void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD4, dest, arg);}
-void XEmitter::PADDSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEC, dest, arg);}
-void XEmitter::PADDSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xED, dest, arg);}
-void XEmitter::PADDUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDC, dest, arg);}
-void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDD, dest, arg);}
+void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEC, dest, arg);}
+void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xED, dest, arg);}
+void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDC, dest, arg);}
+void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDD, dest, arg);}
-void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF8, dest, arg);}
-void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF9, dest, arg);}
-void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFA, dest, arg);}
-void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFB, dest, arg);}
+void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF8, dest, arg);}
+void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF9, dest, arg);}
+void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFA, dest, arg);}
+void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFB, dest, arg);}
-void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE8, dest, arg);}
-void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE9, dest, arg);}
-void XEmitter::PSUBUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD8, dest, arg);}
-void XEmitter::PSUBUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD9, dest, arg);}
+void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE8, dest, arg);}
+void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE9, dest, arg);}
+void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD8, dest, arg);}
+void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD9, dest, arg);}
-void XEmitter::PAVGB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE0, dest, arg);}
-void XEmitter::PAVGW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE3, dest, arg);}
+void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE0, dest, arg);}
+void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE3, dest, arg);}
-void XEmitter::PCMPEQB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x74, dest, arg);}
-void XEmitter::PCMPEQW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x75, dest, arg);}
-void XEmitter::PCMPEQD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x76, dest, arg);}
+void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x74, dest, arg);}
+void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x75, dest, arg);}
+void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x76, dest, arg);}
-void XEmitter::PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x64, dest, arg);}
-void XEmitter::PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x65, dest, arg);}
-void XEmitter::PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x66, dest, arg);}
+void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x64, dest, arg);}
+void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x65, dest, arg);}
+void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x66, dest, arg);}
-void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);}
-void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);}
+void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);}
+void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);}
-void XEmitter::PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF5, dest, arg); }
-void XEmitter::PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF6, dest, arg);}
+void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF5, dest, arg); }
+void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF6, dest, arg);}
-void XEmitter::PMAXSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEE, dest, arg); }
-void XEmitter::PMAXUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDE, dest, arg); }
-void XEmitter::PMINSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEA, dest, arg); }
-void XEmitter::PMINUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDA, dest, arg); }
+void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEE, dest, arg); }
+void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDE, dest, arg); }
+void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEA, dest, arg); }
+void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDA, dest, arg); }
-void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD7, dest, arg); }
-void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);}
-void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);}
-void XEmitter::PSHUFHW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);}
+void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD7, dest, arg); }
+void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);}
+void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);}
+void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);}
// VEX
-void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);}
-void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);}
-void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);}
-void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);}
-void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);}
-void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);}
-void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);}
-void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);}
-void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);}
-void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);}
-void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);}
-void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);}
-
-void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); }
-void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); }
-void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); }
-void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); }
-void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); }
-void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); }
-void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); }
-void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); }
-
-void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); }
-void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); }
-void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); }
-void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); }
-
-void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); }
-void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); }
-void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); }
-void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); }
-void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); }
-void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); }
-void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); }
-void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); }
-void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); }
-void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); }
-void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); }
-void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); }
-void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); }
-
-void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);}
-void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);}
-void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);}
-void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);}
-void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);}
-void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);}
-void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);}
-void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);}
-void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);}
-void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);}
-void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);}
-void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);}
-void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);}
+void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);}
+void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);}
+void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);}
+void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);}
+void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);}
+void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);}
+void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);}
+void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);}
+void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);}
+void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);}
+void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);}
+void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);}
+
+void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); }
+void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); }
+void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); }
+void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); }
+void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); }
+void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); }
+void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); }
+void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); }
+
+void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); }
+void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); }
+void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); }
+void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); }
+
+void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); }
+void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); }
+void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); }
+void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); }
+void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); }
+void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); }
+void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); }
+void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); }
+void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); }
+void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); }
+void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); }
+void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); }
+void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); }
+
+void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);}
+void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);}
+void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);}
+void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);}
+void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);}
+void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);}
+void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);}
+void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);}
+void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);}
+void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);}
+void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);}
+void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);}
+void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);}
// Prefixes
@@ -1956,7 +1952,7 @@ void XEmitter::FWAIT()
}
// TODO: make this more generic
-void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg)
+void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg)
{
int mf = 0;
ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction");
@@ -1974,9 +1970,9 @@ void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg a
arg.WriteRest(this, 0, (X64Reg) op);
}
-void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);}
-void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);}
-void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);}
+void XEmitter::FLD(int bits, const OpArg& src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);}
+void XEmitter::FST(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);}
+void XEmitter::FSTP(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);}
void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); }
void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); }
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index e9c924126..a49cd2cf1 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -328,8 +328,6 @@ enum SSECompare
ORD,
};
-typedef const u8* JumpTarget;
-
class XEmitter
{
friend struct OpArg; // for Write8 etc
@@ -344,27 +342,27 @@ private:
void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
void WriteMulDivType(int bits, OpArg src, int ext);
void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
- void WriteShift(int bits, OpArg dest, OpArg &shift, int ext);
- void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext);
+ void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext);
+ void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext);
void WriteMXCSR(OpArg arg, int ext);
void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
- void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
- void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
- void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
- void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
- void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
- void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
- void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
- void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg);
- void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
+ void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
+ void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
+ void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
+ void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
+ void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
+ void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
+ void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
+ void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg);
+ void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
protected:
- inline void Write8(u8 value) {*code++ = value;}
- inline void Write16(u16 value) {*(u16*)code = (value); code += 2;}
- inline void Write32(u32 value) {*(u32*)code = (value); code += 4;}
- inline void Write64(u64 value) {*(u64*)code = (value); code += 8;}
+ void Write8(u8 value);
+ void Write16(u16 value);
+ void Write32(u32 value);
+ void Write64(u64 value);
public:
XEmitter() { code = nullptr; flags_locked = false; }
@@ -413,8 +411,8 @@ public:
// Stack control
void PUSH(X64Reg reg);
void POP(X64Reg reg);
- void PUSH(int bits, const OpArg &reg);
- void POP(int bits, const OpArg &reg);
+ void PUSH(int bits, const OpArg& reg);
+ void POP(int bits, const OpArg& reg);
void PUSHF();
void POPF();
@@ -424,21 +422,19 @@ public:
void UD2();
FixupBranch J(bool force5bytes = false);
- void JMP(const u8 * addr, bool force5Bytes = false);
- void JMP(OpArg arg);
- void JMPptr(const OpArg &arg);
+ void JMP(const u8* addr, bool force5Bytes = false);
+ void JMPptr(const OpArg& arg);
void JMPself(); //infinite loop!
#ifdef CALL
#undef CALL
#endif
- void CALL(const void *fnptr);
+ void CALL(const void* fnptr);
void CALLptr(OpArg arg);
FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
- //void J_CC(CCFlags conditionCode, JumpTarget target);
- void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false);
+ void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
- void SetJumpTarget(const FixupBranch &branch);
+ void SetJumpTarget(const FixupBranch& branch);
void SETcc(CCFlags flag, OpArg dest);
// Note: CMOV brings small if any benefit on current cpus.
@@ -450,8 +446,8 @@ public:
void SFENCE();
// Bit scan
- void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit
- void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit
+ void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit
+ void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit
// Cache control
enum PrefetchLevel
@@ -462,67 +458,67 @@ public:
PF_T2, //Levels 3+ (aliased to T0 on AMD)
};
void PREFETCH(PrefetchLevel level, OpArg arg);
- void MOVNTI(int bits, OpArg dest, X64Reg src);
- void MOVNTDQ(OpArg arg, X64Reg regOp);
- void MOVNTPS(OpArg arg, X64Reg regOp);
- void MOVNTPD(OpArg arg, X64Reg regOp);
+ void MOVNTI(int bits, const OpArg& dest, X64Reg src);
+ void MOVNTDQ(const OpArg& arg, X64Reg regOp);
+ void MOVNTPS(const OpArg& arg, X64Reg regOp);
+ void MOVNTPD(const OpArg& arg, X64Reg regOp);
// Multiplication / division
- void MUL(int bits, OpArg src); //UNSIGNED
- void IMUL(int bits, OpArg src); //SIGNED
- void IMUL(int bits, X64Reg regOp, OpArg src);
- void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm);
- void DIV(int bits, OpArg src);
- void IDIV(int bits, OpArg src);
+ void MUL(int bits, const OpArg& src); //UNSIGNED
+ void IMUL(int bits, const OpArg& src); //SIGNED
+ void IMUL(int bits, X64Reg regOp, const OpArg& src);
+ void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm);
+ void DIV(int bits, const OpArg& src);
+ void IDIV(int bits, const OpArg& src);
// Shift
- void ROL(int bits, OpArg dest, OpArg shift);
- void ROR(int bits, OpArg dest, OpArg shift);
- void RCL(int bits, OpArg dest, OpArg shift);
- void RCR(int bits, OpArg dest, OpArg shift);
- void SHL(int bits, OpArg dest, OpArg shift);
- void SHR(int bits, OpArg dest, OpArg shift);
- void SAR(int bits, OpArg dest, OpArg shift);
+ void ROL(int bits, const OpArg& dest, const OpArg& shift);
+ void ROR(int bits, const OpArg& dest, const OpArg& shift);
+ void RCL(int bits, const OpArg& dest, const OpArg& shift);
+ void RCR(int bits, const OpArg& dest, const OpArg& shift);
+ void SHL(int bits, const OpArg& dest, const OpArg& shift);
+ void SHR(int bits, const OpArg& dest, const OpArg& shift);
+ void SAR(int bits, const OpArg& dest, const OpArg& shift);
// Bit Test
- void BT(int bits, OpArg dest, OpArg index);
- void BTS(int bits, OpArg dest, OpArg index);
- void BTR(int bits, OpArg dest, OpArg index);
- void BTC(int bits, OpArg dest, OpArg index);
+ void BT(int bits, const OpArg& dest, const OpArg& index);
+ void BTS(int bits, const OpArg& dest, const OpArg& index);
+ void BTR(int bits, const OpArg& dest, const OpArg& index);
+ void BTC(int bits, const OpArg& dest, const OpArg& index);
// Double-Precision Shift
- void SHRD(int bits, OpArg dest, OpArg src, OpArg shift);
- void SHLD(int bits, OpArg dest, OpArg src, OpArg shift);
+ void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
+ void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
// Extend EAX into EDX in various ways
void CWD(int bits = 16);
- inline void CDQ() {CWD(32);}
- inline void CQO() {CWD(64);}
+ void CDQ() {CWD(32);}
+ void CQO() {CWD(64);}
void CBW(int bits = 8);
- inline void CWDE() {CBW(16);}
- inline void CDQE() {CBW(32);}
+ void CWDE() {CBW(16);}
+ void CDQE() {CBW(32);}
// Load effective address
void LEA(int bits, X64Reg dest, OpArg src);
// Integer arithmetic
- void NEG (int bits, OpArg src);
- void ADD (int bits, const OpArg &a1, const OpArg &a2);
- void ADC (int bits, const OpArg &a1, const OpArg &a2);
- void SUB (int bits, const OpArg &a1, const OpArg &a2);
- void SBB (int bits, const OpArg &a1, const OpArg &a2);
- void AND (int bits, const OpArg &a1, const OpArg &a2);
- void CMP (int bits, const OpArg &a1, const OpArg &a2);
+ void NEG(int bits, const OpArg& src);
+ void ADD(int bits, const OpArg& a1, const OpArg& a2);
+ void ADC(int bits, const OpArg& a1, const OpArg& a2);
+ void SUB(int bits, const OpArg& a1, const OpArg& a2);
+ void SBB(int bits, const OpArg& a1, const OpArg& a2);
+ void AND(int bits, const OpArg& a1, const OpArg& a2);
+ void CMP(int bits, const OpArg& a1, const OpArg& a2);
// Bit operations
- void NOT (int bits, OpArg src);
- void OR (int bits, const OpArg &a1, const OpArg &a2);
- void XOR (int bits, const OpArg &a1, const OpArg &a2);
- void MOV (int bits, const OpArg &a1, const OpArg &a2);
- void TEST(int bits, const OpArg &a1, const OpArg &a2);
+ void NOT (int bits, const OpArg& src);
+ void OR(int bits, const OpArg& a1, const OpArg& a2);
+ void XOR(int bits, const OpArg& a1, const OpArg& a2);
+ void MOV(int bits, const OpArg& a1, const OpArg& a2);
+ void TEST(int bits, const OpArg& a1, const OpArg& a2);
// Are these useful at all? Consider removing.
- void XCHG(int bits, const OpArg &a1, const OpArg &a2);
+ void XCHG(int bits, const OpArg& a1, const OpArg& a2);
void XCHG_AHAL();
// Byte swapping (32 and 64-bit only).
@@ -536,13 +532,13 @@ public:
void MOVBE(int dbits, const OpArg& dest, const OpArg& src);
// Available only on AMD >= Phenom or Intel >= Haswell
- void LZCNT(int bits, X64Reg dest, OpArg src);
+ void LZCNT(int bits, X64Reg dest, const OpArg& src);
// Note: this one is actually part of BMI1
- void TZCNT(int bits, X64Reg dest, OpArg src);
+ void TZCNT(int bits, X64Reg dest, const OpArg& src);
// WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
- void STMXCSR(OpArg memloc);
- void LDMXCSR(OpArg memloc);
+ void STMXCSR(const OpArg& memloc);
+ void LDMXCSR(const OpArg& memloc);
// Prefixes
void LOCK();
@@ -569,259 +565,243 @@ public:
x87_FPUBusy = 0x8000,
};
- void FLD(int bits, OpArg src);
- void FST(int bits, OpArg dest);
- void FSTP(int bits, OpArg dest);
+ void FLD(int bits, const OpArg& src);
+ void FST(int bits, const OpArg& dest);
+ void FSTP(int bits, const OpArg& dest);
void FNSTSW_AX();
void FWAIT();
// SSE/SSE2: Floating point arithmetic
- void ADDSS(X64Reg regOp, OpArg arg);
- void ADDSD(X64Reg regOp, OpArg arg);
- void SUBSS(X64Reg regOp, OpArg arg);
- void SUBSD(X64Reg regOp, OpArg arg);
- void MULSS(X64Reg regOp, OpArg arg);
- void MULSD(X64Reg regOp, OpArg arg);
- void DIVSS(X64Reg regOp, OpArg arg);
- void DIVSD(X64Reg regOp, OpArg arg);
- void MINSS(X64Reg regOp, OpArg arg);
- void MINSD(X64Reg regOp, OpArg arg);
- void MAXSS(X64Reg regOp, OpArg arg);
- void MAXSD(X64Reg regOp, OpArg arg);
- void SQRTSS(X64Reg regOp, OpArg arg);
- void SQRTSD(X64Reg regOp, OpArg arg);
- void RSQRTSS(X64Reg regOp, OpArg arg);
+ void ADDSS(X64Reg regOp, const OpArg& arg);
+ void ADDSD(X64Reg regOp, const OpArg& arg);
+ void SUBSS(X64Reg regOp, const OpArg& arg);
+ void SUBSD(X64Reg regOp, const OpArg& arg);
+ void MULSS(X64Reg regOp, const OpArg& arg);
+ void MULSD(X64Reg regOp, const OpArg& arg);
+ void DIVSS(X64Reg regOp, const OpArg& arg);
+ void DIVSD(X64Reg regOp, const OpArg& arg);
+ void MINSS(X64Reg regOp, const OpArg& arg);
+ void MINSD(X64Reg regOp, const OpArg& arg);
+ void MAXSS(X64Reg regOp, const OpArg& arg);
+ void MAXSD(X64Reg regOp, const OpArg& arg);
+ void SQRTSS(X64Reg regOp, const OpArg& arg);
+ void SQRTSD(X64Reg regOp, const OpArg& arg);
+ void RCPSS(X64Reg regOp, const OpArg& arg);
+ void RSQRTSS(X64Reg regOp, const OpArg& arg);
// SSE/SSE2: Floating point bitwise (yes)
- void CMPSS(X64Reg regOp, OpArg arg, u8 compare);
- void CMPSD(X64Reg regOp, OpArg arg, u8 compare);
+ void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare);
+ void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare);
- inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); }
- inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); }
- inline void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); }
- inline void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); }
- inline void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); }
- inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); }
- inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); }
+ void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); }
+ void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); }
+ void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); }
+ void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); }
+ void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); }
+ void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); }
+ void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); }
// SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
- void ADDPS(X64Reg regOp, OpArg arg);
- void ADDPD(X64Reg regOp, OpArg arg);
- void SUBPS(X64Reg regOp, OpArg arg);
- void SUBPD(X64Reg regOp, OpArg arg);
- void CMPPS(X64Reg regOp, OpArg arg, u8 compare);
- void CMPPD(X64Reg regOp, OpArg arg, u8 compare);
- void MULPS(X64Reg regOp, OpArg arg);
- void MULPD(X64Reg regOp, OpArg arg);
- void DIVPS(X64Reg regOp, OpArg arg);
- void DIVPD(X64Reg regOp, OpArg arg);
- void MINPS(X64Reg regOp, OpArg arg);
- void MINPD(X64Reg regOp, OpArg arg);
- void MAXPS(X64Reg regOp, OpArg arg);
- void MAXPD(X64Reg regOp, OpArg arg);
- void SQRTPS(X64Reg regOp, OpArg arg);
- void SQRTPD(X64Reg regOp, OpArg arg);
- void RCPPS(X64Reg regOp, OpArg arg);
- void RSQRTPS(X64Reg regOp, OpArg arg);
+ void ADDPS(X64Reg regOp, const OpArg& arg);
+ void ADDPD(X64Reg regOp, const OpArg& arg);
+ void SUBPS(X64Reg regOp, const OpArg& arg);
+ void SUBPD(X64Reg regOp, const OpArg& arg);
+ void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare);
+ void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare);
+ void MULPS(X64Reg regOp, const OpArg& arg);
+ void MULPD(X64Reg regOp, const OpArg& arg);
+ void DIVPS(X64Reg regOp, const OpArg& arg);
+ void DIVPD(X64Reg regOp, const OpArg& arg);
+ void MINPS(X64Reg regOp, const OpArg& arg);
+ void MINPD(X64Reg regOp, const OpArg& arg);
+ void MAXPS(X64Reg regOp, const OpArg& arg);
+ void MAXPD(X64Reg regOp, const OpArg& arg);
+ void SQRTPS(X64Reg regOp, const OpArg& arg);
+ void SQRTPD(X64Reg regOp, const OpArg& arg);
+ void RCPPS(X64Reg regOp, const OpArg& arg);
+ void RSQRTPS(X64Reg regOp, const OpArg& arg);
// SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double)
- void ANDPS(X64Reg regOp, OpArg arg);
- void ANDPD(X64Reg regOp, OpArg arg);
- void ANDNPS(X64Reg regOp, OpArg arg);
- void ANDNPD(X64Reg regOp, OpArg arg);
- void ORPS(X64Reg regOp, OpArg arg);
- void ORPD(X64Reg regOp, OpArg arg);
- void XORPS(X64Reg regOp, OpArg arg);
- void XORPD(X64Reg regOp, OpArg arg);
+ void ANDPS(X64Reg regOp, const OpArg& arg);
+ void ANDPD(X64Reg regOp, const OpArg& arg);
+ void ANDNPS(X64Reg regOp, const OpArg& arg);
+ void ANDNPD(X64Reg regOp, const OpArg& arg);
+ void ORPS(X64Reg regOp, const OpArg& arg);
+ void ORPD(X64Reg regOp, const OpArg& arg);
+ void XORPS(X64Reg regOp, const OpArg& arg);
+ void XORPD(X64Reg regOp, const OpArg& arg);
// SSE/SSE2: Shuffle components. These are tricky - see Intel documentation.
- void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle);
- void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle);
+ void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle);
+ void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle);
// SSE/SSE2: Useful alternative to shuffle in some cases.
- void MOVDDUP(X64Reg regOp, OpArg arg);
-
- // TODO: Actually implement
-#if 0
- // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products...
- void ADDSUBPS(X64Reg dest, OpArg src);
- void ADDSUBPD(X64Reg dest, OpArg src);
- void HADDPD(X64Reg dest, OpArg src);
- void HSUBPS(X64Reg dest, OpArg src);
- void HSUBPD(X64Reg dest, OpArg src);
-
- // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
- void DPPD(X64Reg dest, OpArg src, u8 arg);
-
- // These are probably useful for VFPU emulation.
- void INSERTPS(X64Reg dest, OpArg src, u8 arg);
- void EXTRACTPS(OpArg dest, X64Reg src, u8 arg);
-#endif
+ void MOVDDUP(X64Reg regOp, const OpArg& arg);
// SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy.
- void HADDPS(X64Reg dest, OpArg src);
+ void HADDPS(X64Reg dest, const OpArg& src);
// SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
- void DPPS(X64Reg dest, OpArg src, u8 arg);
+ void DPPS(X64Reg dest, const OpArg& src, u8 arg);
- void UNPCKLPS(X64Reg dest, OpArg src);
- void UNPCKHPS(X64Reg dest, OpArg src);
- void UNPCKLPD(X64Reg dest, OpArg src);
- void UNPCKHPD(X64Reg dest, OpArg src);
+ void UNPCKLPS(X64Reg dest, const OpArg& src);
+ void UNPCKHPS(X64Reg dest, const OpArg& src);
+ void UNPCKLPD(X64Reg dest, const OpArg& src);
+ void UNPCKHPD(X64Reg dest, const OpArg& src);
// SSE/SSE2: Compares.
- void COMISS(X64Reg regOp, OpArg arg);
- void COMISD(X64Reg regOp, OpArg arg);
- void UCOMISS(X64Reg regOp, OpArg arg);
- void UCOMISD(X64Reg regOp, OpArg arg);
+ void COMISS(X64Reg regOp, const OpArg& arg);
+ void COMISD(X64Reg regOp, const OpArg& arg);
+ void UCOMISS(X64Reg regOp, const OpArg& arg);
+ void UCOMISD(X64Reg regOp, const OpArg& arg);
// SSE/SSE2: Moves. Use the right data type for your data, in most cases.
- void MOVAPS(X64Reg regOp, OpArg arg);
- void MOVAPD(X64Reg regOp, OpArg arg);
- void MOVAPS(OpArg arg, X64Reg regOp);
- void MOVAPD(OpArg arg, X64Reg regOp);
-
- void MOVUPS(X64Reg regOp, OpArg arg);
- void MOVUPD(X64Reg regOp, OpArg arg);
- void MOVUPS(OpArg arg, X64Reg regOp);
- void MOVUPD(OpArg arg, X64Reg regOp);
-
- void MOVDQA(X64Reg regOp, OpArg arg);
- void MOVDQA(OpArg arg, X64Reg regOp);
- void MOVDQU(X64Reg regOp, OpArg arg);
- void MOVDQU(OpArg arg, X64Reg regOp);
-
- void MOVSS(X64Reg regOp, OpArg arg);
- void MOVSD(X64Reg regOp, OpArg arg);
- void MOVSS(OpArg arg, X64Reg regOp);
- void MOVSD(OpArg arg, X64Reg regOp);
-
- void MOVLPS(X64Reg regOp, OpArg arg);
- void MOVLPD(X64Reg regOp, OpArg arg);
- void MOVLPS(OpArg arg, X64Reg regOp);
- void MOVLPD(OpArg arg, X64Reg regOp);
-
- void MOVHPS(X64Reg regOp, OpArg arg);
- void MOVHPD(X64Reg regOp, OpArg arg);
- void MOVHPS(OpArg arg, X64Reg regOp);
- void MOVHPD(OpArg arg, X64Reg regOp);
+ void MOVAPS(X64Reg regOp, const OpArg& arg);
+ void MOVAPD(X64Reg regOp, const OpArg& arg);
+ void MOVAPS(const OpArg& arg, X64Reg regOp);
+ void MOVAPD(const OpArg& arg, X64Reg regOp);
+
+ void MOVUPS(X64Reg regOp, const OpArg& arg);
+ void MOVUPD(X64Reg regOp, const OpArg& arg);
+ void MOVUPS(const OpArg& arg, X64Reg regOp);
+ void MOVUPD(const OpArg& arg, X64Reg regOp);
+
+ void MOVDQA(X64Reg regOp, const OpArg& arg);
+ void MOVDQA(const OpArg& arg, X64Reg regOp);
+ void MOVDQU(X64Reg regOp, const OpArg& arg);
+ void MOVDQU(const OpArg& arg, X64Reg regOp);
+
+ void MOVSS(X64Reg regOp, const OpArg& arg);
+ void MOVSD(X64Reg regOp, const OpArg& arg);
+ void MOVSS(const OpArg& arg, X64Reg regOp);
+ void MOVSD(const OpArg& arg, X64Reg regOp);
+
+ void MOVLPS(X64Reg regOp, const OpArg& arg);
+ void MOVLPD(X64Reg regOp, const OpArg& arg);
+ void MOVLPS(const OpArg& arg, X64Reg regOp);
+ void MOVLPD(const OpArg& arg, X64Reg regOp);
+
+ void MOVHPS(X64Reg regOp, const OpArg& arg);
+ void MOVHPD(X64Reg regOp, const OpArg& arg);
+ void MOVHPS(const OpArg& arg, X64Reg regOp);
+ void MOVHPD(const OpArg& arg, X64Reg regOp);
void MOVHLPS(X64Reg regOp1, X64Reg regOp2);
void MOVLHPS(X64Reg regOp1, X64Reg regOp2);
- void MOVD_xmm(X64Reg dest, const OpArg &arg);
+ void MOVD_xmm(X64Reg dest, const OpArg& arg);
void MOVQ_xmm(X64Reg dest, OpArg arg);
- void MOVD_xmm(const OpArg &arg, X64Reg src);
+ void MOVD_xmm(const OpArg& arg, X64Reg src);
void MOVQ_xmm(OpArg arg, X64Reg src);
// SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question.
- void MOVMSKPS(X64Reg dest, OpArg arg);
- void MOVMSKPD(X64Reg dest, OpArg arg);
+ void MOVMSKPS(X64Reg dest, const OpArg& arg);
+ void MOVMSKPD(X64Reg dest, const OpArg& arg);
// SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one.
void MASKMOVDQU(X64Reg dest, X64Reg src);
- void LDDQU(X64Reg dest, OpArg src);
+ void LDDQU(X64Reg dest, const OpArg& src);
// SSE/SSE2: Data type conversions.
- void CVTPS2PD(X64Reg dest, OpArg src);
- void CVTPD2PS(X64Reg dest, OpArg src);
- void CVTSS2SD(X64Reg dest, OpArg src);
- void CVTSI2SS(X64Reg dest, OpArg src);
- void CVTSD2SS(X64Reg dest, OpArg src);
- void CVTSI2SD(X64Reg dest, OpArg src);
- void CVTDQ2PD(X64Reg regOp, OpArg arg);
- void CVTPD2DQ(X64Reg regOp, OpArg arg);
- void CVTDQ2PS(X64Reg regOp, OpArg arg);
- void CVTPS2DQ(X64Reg regOp, OpArg arg);
-
- void CVTTPS2DQ(X64Reg regOp, OpArg arg);
- void CVTTPD2DQ(X64Reg regOp, OpArg arg);
+ void CVTPS2PD(X64Reg dest, const OpArg& src);
+ void CVTPD2PS(X64Reg dest, const OpArg& src);
+ void CVTSS2SD(X64Reg dest, const OpArg& src);
+ void CVTSI2SS(X64Reg dest, const OpArg& src);
+ void CVTSD2SS(X64Reg dest, const OpArg& src);
+ void CVTSI2SD(X64Reg dest, const OpArg& src);
+ void CVTDQ2PD(X64Reg regOp, const OpArg& arg);
+ void CVTPD2DQ(X64Reg regOp, const OpArg& arg);
+ void CVTDQ2PS(X64Reg regOp, const OpArg& arg);
+ void CVTPS2DQ(X64Reg regOp, const OpArg& arg);
+
+ void CVTTPS2DQ(X64Reg regOp, const OpArg& arg);
+ void CVTTPD2DQ(X64Reg regOp, const OpArg& arg);
// Destinations are X64 regs (rax, rbx, ...) for these instructions.
- void CVTSS2SI(X64Reg xregdest, OpArg src);
- void CVTSD2SI(X64Reg xregdest, OpArg src);
- void CVTTSS2SI(X64Reg xregdest, OpArg arg);
- void CVTTSD2SI(X64Reg xregdest, OpArg arg);
+ void CVTSS2SI(X64Reg xregdest, const OpArg& src);
+ void CVTSD2SI(X64Reg xregdest, const OpArg& src);
+ void CVTTSS2SI(X64Reg xregdest, const OpArg& arg);
+ void CVTTSD2SI(X64Reg xregdest, const OpArg& arg);
// SSE2: Packed integer instructions
- void PACKSSDW(X64Reg dest, OpArg arg);
- void PACKSSWB(X64Reg dest, OpArg arg);
- void PACKUSDW(X64Reg dest, OpArg arg);
- void PACKUSWB(X64Reg dest, OpArg arg);
+ void PACKSSDW(X64Reg dest, const OpArg& arg);
+ void PACKSSWB(X64Reg dest, const OpArg& arg);
+ void PACKUSDW(X64Reg dest, const OpArg& arg);
+ void PACKUSWB(X64Reg dest, const OpArg& arg);
void PUNPCKLBW(X64Reg dest, const OpArg &arg);
void PUNPCKLWD(X64Reg dest, const OpArg &arg);
void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
void PUNPCKLQDQ(X64Reg dest, const OpArg &arg);
- void PTEST(X64Reg dest, OpArg arg);
- void PAND(X64Reg dest, OpArg arg);
- void PANDN(X64Reg dest, OpArg arg);
- void PXOR(X64Reg dest, OpArg arg);
- void POR(X64Reg dest, OpArg arg);
-
- void PADDB(X64Reg dest, OpArg arg);
- void PADDW(X64Reg dest, OpArg arg);
- void PADDD(X64Reg dest, OpArg arg);
- void PADDQ(X64Reg dest, OpArg arg);
-
- void PADDSB(X64Reg dest, OpArg arg);
- void PADDSW(X64Reg dest, OpArg arg);
- void PADDUSB(X64Reg dest, OpArg arg);
- void PADDUSW(X64Reg dest, OpArg arg);
-
- void PSUBB(X64Reg dest, OpArg arg);
- void PSUBW(X64Reg dest, OpArg arg);
- void PSUBD(X64Reg dest, OpArg arg);
- void PSUBQ(X64Reg dest, OpArg arg);
-
- void PSUBSB(X64Reg dest, OpArg arg);
- void PSUBSW(X64Reg dest, OpArg arg);
- void PSUBUSB(X64Reg dest, OpArg arg);
- void PSUBUSW(X64Reg dest, OpArg arg);
-
- void PAVGB(X64Reg dest, OpArg arg);
- void PAVGW(X64Reg dest, OpArg arg);
-
- void PCMPEQB(X64Reg dest, OpArg arg);
- void PCMPEQW(X64Reg dest, OpArg arg);
- void PCMPEQD(X64Reg dest, OpArg arg);
-
- void PCMPGTB(X64Reg dest, OpArg arg);
- void PCMPGTW(X64Reg dest, OpArg arg);
- void PCMPGTD(X64Reg dest, OpArg arg);
-
- void PEXTRW(X64Reg dest, OpArg arg, u8 subreg);
- void PINSRW(X64Reg dest, OpArg arg, u8 subreg);
-
- void PMADDWD(X64Reg dest, OpArg arg);
- void PSADBW(X64Reg dest, OpArg arg);
-
- void PMAXSW(X64Reg dest, OpArg arg);
- void PMAXUB(X64Reg dest, OpArg arg);
- void PMINSW(X64Reg dest, OpArg arg);
- void PMINUB(X64Reg dest, OpArg arg);
+ void PTEST(X64Reg dest, const OpArg& arg);
+ void PAND(X64Reg dest, const OpArg& arg);
+ void PANDN(X64Reg dest, const OpArg& arg);
+ void PXOR(X64Reg dest, const OpArg& arg);
+ void POR(X64Reg dest, const OpArg& arg);
+
+ void PADDB(X64Reg dest, const OpArg& arg);
+ void PADDW(X64Reg dest, const OpArg& arg);
+ void PADDD(X64Reg dest, const OpArg& arg);
+ void PADDQ(X64Reg dest, const OpArg& arg);
+
+ void PADDSB(X64Reg dest, const OpArg& arg);
+ void PADDSW(X64Reg dest, const OpArg& arg);
+ void PADDUSB(X64Reg dest, const OpArg& arg);
+ void PADDUSW(X64Reg dest, const OpArg& arg);
+
+ void PSUBB(X64Reg dest, const OpArg& arg);
+ void PSUBW(X64Reg dest, const OpArg& arg);
+ void PSUBD(X64Reg dest, const OpArg& arg);
+ void PSUBQ(X64Reg dest, const OpArg& arg);
+
+ void PSUBSB(X64Reg dest, const OpArg& arg);
+ void PSUBSW(X64Reg dest, const OpArg& arg);
+ void PSUBUSB(X64Reg dest, const OpArg& arg);
+ void PSUBUSW(X64Reg dest, const OpArg& arg);
+
+ void PAVGB(X64Reg dest, const OpArg& arg);
+ void PAVGW(X64Reg dest, const OpArg& arg);
+
+ void PCMPEQB(X64Reg dest, const OpArg& arg);
+ void PCMPEQW(X64Reg dest, const OpArg& arg);
+ void PCMPEQD(X64Reg dest, const OpArg& arg);
+
+ void PCMPGTB(X64Reg dest, const OpArg& arg);
+ void PCMPGTW(X64Reg dest, const OpArg& arg);
+ void PCMPGTD(X64Reg dest, const OpArg& arg);
+
+ void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg);
+ void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg);
+
+ void PMADDWD(X64Reg dest, const OpArg& arg);
+ void PSADBW(X64Reg dest, const OpArg& arg);
+
+ void PMAXSW(X64Reg dest, const OpArg& arg);
+ void PMAXUB(X64Reg dest, const OpArg& arg);
+ void PMINSW(X64Reg dest, const OpArg& arg);
+ void PMINUB(X64Reg dest, const OpArg& arg);
// SSE4: More MAX/MIN instructions.
- void PMINSB(X64Reg dest, OpArg arg);
- void PMINSD(X64Reg dest, OpArg arg);
- void PMINUW(X64Reg dest, OpArg arg);
- void PMINUD(X64Reg dest, OpArg arg);
- void PMAXSB(X64Reg dest, OpArg arg);
- void PMAXSD(X64Reg dest, OpArg arg);
- void PMAXUW(X64Reg dest, OpArg arg);
- void PMAXUD(X64Reg dest, OpArg arg);
-
- void PMOVMSKB(X64Reg dest, OpArg arg);
- void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle);
- void PSHUFB(X64Reg dest, OpArg arg);
-
- void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle);
- void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle);
+ void PMINSB(X64Reg dest, const OpArg& arg);
+ void PMINSD(X64Reg dest, const OpArg& arg);
+ void PMINUW(X64Reg dest, const OpArg& arg);
+ void PMINUD(X64Reg dest, const OpArg& arg);
+ void PMAXSB(X64Reg dest, const OpArg& arg);
+ void PMAXSD(X64Reg dest, const OpArg& arg);
+ void PMAXUW(X64Reg dest, const OpArg& arg);
+ void PMAXUD(X64Reg dest, const OpArg& arg);
+
+ void PMOVMSKB(X64Reg dest, const OpArg& arg);
+ void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle);
+ void PSHUFB(X64Reg dest, const OpArg& arg);
+
+ void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle);
+ void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle);
void PSRLW(X64Reg reg, int shift);
void PSRLD(X64Reg reg, int shift);
void PSRLQ(X64Reg reg, int shift);
- void PSRLQ(X64Reg reg, OpArg arg);
+ void PSRLQ(X64Reg reg, const OpArg& arg);
void PSRLDQ(X64Reg reg, int shift);
void PSLLW(X64Reg reg, int shift);
@@ -833,198 +813,198 @@ public:
void PSRAD(X64Reg reg, int shift);
// SSE4: data type conversions
- void PMOVSXBW(X64Reg dest, OpArg arg);
- void PMOVSXBD(X64Reg dest, OpArg arg);
- void PMOVSXBQ(X64Reg dest, OpArg arg);
- void PMOVSXWD(X64Reg dest, OpArg arg);
- void PMOVSXWQ(X64Reg dest, OpArg arg);
- void PMOVSXDQ(X64Reg dest, OpArg arg);
- void PMOVZXBW(X64Reg dest, OpArg arg);
- void PMOVZXBD(X64Reg dest, OpArg arg);
- void PMOVZXBQ(X64Reg dest, OpArg arg);
- void PMOVZXWD(X64Reg dest, OpArg arg);
- void PMOVZXWQ(X64Reg dest, OpArg arg);
- void PMOVZXDQ(X64Reg dest, OpArg arg);
+ void PMOVSXBW(X64Reg dest, const OpArg& arg);
+ void PMOVSXBD(X64Reg dest, const OpArg& arg);
+ void PMOVSXBQ(X64Reg dest, const OpArg& arg);
+ void PMOVSXWD(X64Reg dest, const OpArg& arg);
+ void PMOVSXWQ(X64Reg dest, const OpArg& arg);
+ void PMOVSXDQ(X64Reg dest, const OpArg& arg);
+ void PMOVZXBW(X64Reg dest, const OpArg& arg);
+ void PMOVZXBD(X64Reg dest, const OpArg& arg);
+ void PMOVZXBQ(X64Reg dest, const OpArg& arg);
+ void PMOVZXWD(X64Reg dest, const OpArg& arg);
+ void PMOVZXWQ(X64Reg dest, const OpArg& arg);
+ void PMOVZXDQ(X64Reg dest, const OpArg& arg);
// SSE4: variable blend instructions (xmm0 implicit argument)
- void PBLENDVB(X64Reg dest, OpArg arg);
- void BLENDVPS(X64Reg dest, OpArg arg);
- void BLENDVPD(X64Reg dest, OpArg arg);
+ void PBLENDVB(X64Reg dest, const OpArg& arg);
+ void BLENDVPS(X64Reg dest, const OpArg& arg);
+ void BLENDVPD(X64Reg dest, const OpArg& arg);
void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend);
void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend);
// SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.)
- void ROUNDSS(X64Reg dest, OpArg arg, u8 mode);
- void ROUNDSD(X64Reg dest, OpArg arg, u8 mode);
- void ROUNDPS(X64Reg dest, OpArg arg, u8 mode);
- void ROUNDPD(X64Reg dest, OpArg arg, u8 mode);
-
- inline void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
- inline void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
- inline void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
- inline void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
-
- inline void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
- inline void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
- inline void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
- inline void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
-
- inline void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
- inline void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
- inline void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
- inline void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
-
- inline void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
- inline void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
- inline void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
- inline void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
+ void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode);
+ void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode);
+ void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode);
+ void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode);
+
+ void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
+ void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
+ void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
+ void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
+
+ void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
+ void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
+ void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
+ void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
+
+ void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
+ void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
+ void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
+ void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
+
+ void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
+ void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
+ void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
+ void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
// AVX
- void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle);
- void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-
- void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-
- void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
+ void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle);
+ void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+
+ void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+
+ void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
// FMA3
- void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
+ void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
// VEX GPR instructions
- void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
- void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
- void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
- void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate);
- void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
- void BLSR(int bits, X64Reg regOp, OpArg arg);
- void BLSMSK(int bits, X64Reg regOp, OpArg arg);
- void BLSI(int bits, X64Reg regOp, OpArg arg);
- void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
- void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
+ void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+ void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+ void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+ void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate);
+ void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+ void BLSR(int bits, X64Reg regOp, const OpArg& arg);
+ void BLSMSK(int bits, X64Reg regOp, const OpArg& arg);
+ void BLSI(int bits, X64Reg regOp, const OpArg& arg);
+ void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+ void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void RDTSC();
// Utility functions
// The difference between this and CALL is that this aligns the stack
// where appropriate.
- void ABI_CallFunction(const void *func);
+ void ABI_CallFunction(const void* func);
template <typename T>
void ABI_CallFunction(T (*func)()) {
- ABI_CallFunction((const void *)func);
+ ABI_CallFunction((const void*)func);
}
- void ABI_CallFunction(const u8 *func) {
- ABI_CallFunction((const void *)func);
+ void ABI_CallFunction(const u8* func) {
+ ABI_CallFunction((const void*)func);
}
- void ABI_CallFunctionC16(const void *func, u16 param1);
- void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2);
+ void ABI_CallFunctionC16(const void* func, u16 param1);
+ void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2);
// These only support u32 parameters, but that's enough for a lot of uses.
// These will destroy the 1 or 2 first "parameter regs".
- void ABI_CallFunctionC(const void *func, u32 param1);
- void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2);
- void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3);
- void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3);
- void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4);
- void ABI_CallFunctionP(const void *func, void *param1);
- void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2);
- void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3);
- void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3);
- void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2);
- void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3);
- void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1);
- void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2);
+ void ABI_CallFunctionC(const void* func, u32 param1);
+ void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2);
+ void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3);
+ void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3);
+ void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4);
+ void ABI_CallFunctionP(const void* func, void* param1);
+ void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2);
+ void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3);
+ void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3);
+ void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2);
+ void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3);
+ void ABI_CallFunctionA(const void* func, const OpArg& arg1);
+ void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2);
// Pass a register as a parameter.
- void ABI_CallFunctionR(const void *func, X64Reg reg1);
- void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2);
+ void ABI_CallFunctionR(const void* func, X64Reg reg1);
+ void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2);
template <typename Tr, typename T1>
void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) {
- ABI_CallFunctionC((const void *)func, param1);
+ ABI_CallFunctionC((const void*)func, param1);
}
// A function that doesn't have any control over what it will do to regs,
@@ -1048,9 +1028,9 @@ public:
void ABI_EmitEpilogue(int maxCallParams);
#ifdef _M_IX86
- inline int ABI_GetNumXMMRegs() { return 8; }
+ static int ABI_GetNumXMMRegs() { return 8; }
#else
- inline int ABI_GetNumXMMRegs() { return 16; }
+ static int ABI_GetNumXMMRegs() { return 16; }
#endif
}; // class XEmitter
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 6cc60fd58..c17290b9b 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -29,6 +29,7 @@ set(SRCS
hle/kernel/address_arbiter.cpp
hle/kernel/event.cpp
hle/kernel/kernel.cpp
+ hle/kernel/memory.cpp
hle/kernel/mutex.cpp
hle/kernel/process.cpp
hle/kernel/resource_limit.cpp
@@ -115,7 +116,6 @@ set(SRCS
loader/loader.cpp
loader/ncch.cpp
tracer/recorder.cpp
- mem_map.cpp
memory.cpp
settings.cpp
system.cpp
@@ -157,6 +157,7 @@ set(HEADERS
hle/kernel/address_arbiter.h
hle/kernel/event.h
hle/kernel/kernel.h
+ hle/kernel/memory.h
hle/kernel/mutex.h
hle/kernel/process.h
hle/kernel/resource_limit.h
@@ -245,7 +246,6 @@ set(HEADERS
loader/ncch.h
tracer/recorder.h
tracer/citrace.h
- mem_map.h
memory.h
memory_setup.h
settings.h
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 422e80b50..0fddb07a0 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/memory.h"
@@ -48,65 +49,47 @@ enum {
typedef unsigned int (*shtop_fp_t)(ARMul_State* cpu, unsigned int sht_oper);
-static int CondPassed(ARMul_State* cpu, unsigned int cond) {
- const u32 NFLAG = cpu->NFlag;
- const u32 ZFLAG = cpu->ZFlag;
- const u32 CFLAG = cpu->CFlag;
- const u32 VFLAG = cpu->VFlag;
-
- int temp = 0;
+static bool CondPassed(ARMul_State* cpu, unsigned int cond) {
+ const bool n_flag = cpu->NFlag != 0;
+ const bool z_flag = cpu->ZFlag != 0;
+ const bool c_flag = cpu->CFlag != 0;
+ const bool v_flag = cpu->VFlag != 0;
switch (cond) {
- case 0x0:
- temp = ZFLAG;
- break;
- case 0x1: // NE
- temp = !ZFLAG;
- break;
- case 0x2: // CS
- temp = CFLAG;
- break;
- case 0x3: // CC
- temp = !CFLAG;
- break;
- case 0x4: // MI
- temp = NFLAG;
- break;
- case 0x5: // PL
- temp = !NFLAG;
- break;
- case 0x6: // VS
- temp = VFLAG;
- break;
- case 0x7: // VC
- temp = !VFLAG;
- break;
- case 0x8: // HI
- temp = (CFLAG && !ZFLAG);
- break;
- case 0x9: // LS
- temp = (!CFLAG || ZFLAG);
- break;
- case 0xa: // GE
- temp = ((!NFLAG && !VFLAG) || (NFLAG && VFLAG));
- break;
- case 0xb: // LT
- temp = ((NFLAG && !VFLAG) || (!NFLAG && VFLAG));
- break;
- case 0xc: // GT
- temp = ((!NFLAG && !VFLAG && !ZFLAG) || (NFLAG && VFLAG && !ZFLAG));
- break;
- case 0xd: // LE
- temp = ((NFLAG && !VFLAG) || (!NFLAG && VFLAG)) || ZFLAG;
- break;
- case 0xe: // AL
- temp = 1;
- break;
- case 0xf:
- temp = 1;
- break;
- }
- return temp;
+ case ConditionCode::EQ:
+ return z_flag;
+ case ConditionCode::NE:
+ return !z_flag;
+ case ConditionCode::CS:
+ return c_flag;
+ case ConditionCode::CC:
+ return !c_flag;
+ case ConditionCode::MI:
+ return n_flag;
+ case ConditionCode::PL:
+ return !n_flag;
+ case ConditionCode::VS:
+ return v_flag;
+ case ConditionCode::VC:
+ return !v_flag;
+ case ConditionCode::HI:
+ return (c_flag && !z_flag);
+ case ConditionCode::LS:
+ return (!c_flag || z_flag);
+ case ConditionCode::GE:
+ return (n_flag == v_flag);
+ case ConditionCode::LT:
+ return (n_flag != v_flag);
+ case ConditionCode::GT:
+ return (!z_flag && (n_flag == v_flag));
+ case ConditionCode::LE:
+ return (z_flag || (n_flag != v_flag));
+ case ConditionCode::AL:
+ case ConditionCode::NV: // Unconditional
+ return true;
+ }
+
+ return false;
}
static unsigned int DPO(Immediate)(ARMul_State* cpu, unsigned int sht_oper) {
@@ -3522,8 +3505,11 @@ enum {
FETCH_EXCEPTION
};
+MICROPROFILE_DEFINE(DynCom_Decode, "DynCom", "Decode", MP_RGB(255, 64, 64));
+
static int InterpreterTranslate(ARMul_State* cpu, int& bb_start, u32 addr) {
Common::Profiling::ScopeTimer timer_decode(profile_decode);
+ MICROPROFILE_SCOPE(DynCom_Decode);
// Decode instruction, get index
// Allocate memory and init InsCream
@@ -3588,8 +3574,11 @@ static int clz(unsigned int x) {
return n;
}
+MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0));
+
unsigned InterpreterMainLoop(ARMul_State* cpu) {
Common::Profiling::ScopeTimer timer_execute(profile_execute);
+ MICROPROFILE_SCOPE(DynCom_Execute);
#undef RM
#undef RS
diff --git a/src/core/arm/skyeye_common/armstate.cpp b/src/core/arm/skyeye_common/armstate.cpp
index ccb2eb0eb..0491717dc 100644
--- a/src/core/arm/skyeye_common/armstate.cpp
+++ b/src/core/arm/skyeye_common/armstate.cpp
@@ -4,7 +4,6 @@
#include "common/swap.h"
#include "common/logging/log.h"
-#include "core/mem_map.h"
#include "core/memory.h"
#include "core/arm/skyeye_common/armstate.h"
#include "core/arm/skyeye_common/vfp/vfp.h"
diff --git a/src/core/arm/skyeye_common/armsupp.cpp b/src/core/arm/skyeye_common/armsupp.cpp
index d31fb9449..883713e86 100644
--- a/src/core/arm/skyeye_common/armsupp.cpp
+++ b/src/core/arm/skyeye_common/armsupp.cpp
@@ -17,7 +17,6 @@
#include "common/logging/log.h"
-#include "core/mem_map.h"
#include "core/arm/skyeye_common/arm_regformat.h"
#include "core/arm/skyeye_common/armstate.h"
#include "core/arm/skyeye_common/armsupp.h"
diff --git a/src/core/hle/config_mem.cpp b/src/core/hle/config_mem.cpp
index aea936d2d..b1a72dc0c 100644
--- a/src/core/hle/config_mem.cpp
+++ b/src/core/hle/config_mem.cpp
@@ -25,10 +25,6 @@ void Init() {
config_mem.sys_core_ver = 0x2;
config_mem.unit_info = 0x1; // Bit 0 set for Retail
config_mem.prev_firm = 0;
- config_mem.app_mem_type = 0x2; // Default app mem type is 0
- config_mem.app_mem_alloc = 0x06000000; // Set to 96MB, since some games use more than the default (64MB)
- config_mem.base_mem_alloc = 0x01400000; // Default base memory is 20MB
- config_mem.sys_mem_alloc = Memory::FCRAM_SIZE - (config_mem.app_mem_alloc + config_mem.base_mem_alloc);
config_mem.firm_unk = 0;
config_mem.firm_version_rev = 0;
config_mem.firm_version_min = 0x40;
@@ -36,7 +32,4 @@ void Init() {
config_mem.firm_sys_core_ver = 0x2;
}
-void Shutdown() {
-}
-
} // namespace
diff --git a/src/core/hle/config_mem.h b/src/core/hle/config_mem.h
index 9825a09e8..24a1254f2 100644
--- a/src/core/hle/config_mem.h
+++ b/src/core/hle/config_mem.h
@@ -52,6 +52,5 @@ static_assert(sizeof(ConfigMemDef) == Memory::CONFIG_MEMORY_SIZE, "Config Memory
extern ConfigMemDef config_mem;
void Init();
-void Shutdown();
} // namespace
diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h
index 1a0518926..5846a161b 100644
--- a/src/core/hle/function_wrappers.h
+++ b/src/core/hle/function_wrappers.h
@@ -172,6 +172,14 @@ template<ResultCode func(u32, s64, s64)> void Wrap() {
FuncReturn(func(PARAM(0), param1, param2).raw);
}
+template<ResultCode func(s64*, Handle, u32)> void Wrap() {
+ s64 param_1 = 0;
+ u32 retval = func(&param_1, PARAM(1), PARAM(2)).raw;
+ Core::g_app_core->SetReg(1, (u32)param_1);
+ Core::g_app_core->SetReg(2, (u32)(param_1 >> 32));
+ FuncReturn(retval);
+}
+
////////////////////////////////////////////////////////////////////////////////////////////////////
// Function wrappers that return type u32
diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp
index cd0a400dc..331b1b22a 100644
--- a/src/core/hle/hle.cpp
+++ b/src/core/hle/hle.cpp
@@ -34,8 +34,6 @@ void Reschedule(const char *reason) {
void Init() {
Service::Init();
- ConfigMem::Init();
- SharedPage::Init();
g_reschedule = false;
@@ -43,8 +41,6 @@ void Init() {
}
void Shutdown() {
- ConfigMem::Shutdown();
- SharedPage::Shutdown();
Service::Shutdown();
LOG_DEBUG(Kernel, "shutdown OK");
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 5711c0405..7a401a965 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -7,11 +7,14 @@
#include "common/assert.h"
#include "common/logging/log.h"
+#include "core/hle/config_mem.h"
#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/resource_limit.h"
+#include "core/hle/kernel/memory.h"
#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/resource_limit.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/kernel/timer.h"
+#include "core/hle/shared_page.h"
namespace Kernel {
@@ -119,6 +122,13 @@ void HandleTable::Clear() {
/// Initialize the kernel
void Init() {
+ ConfigMem::Init();
+ SharedPage::Init();
+
+ // TODO(yuriks): The memory type parameter needs to be determined by the ExHeader field instead
+ // For now it defaults to the one with a largest allocation to the app
+ Kernel::MemoryInit(2); // Allocates 96MB to the application
+
Kernel::ResourceLimitsInit();
Kernel::ThreadingInit();
Kernel::TimersInit();
@@ -131,11 +141,14 @@ void Init() {
/// Shutdown the kernel
void Shutdown() {
+ g_handle_table.Clear(); // Free all kernel objects
+
Kernel::ThreadingShutdown();
+ g_current_process = nullptr;
+
Kernel::TimersShutdown();
Kernel::ResourceLimitsShutdown();
- g_handle_table.Clear(); // Free all kernel objects
- g_current_process = nullptr;
+ Kernel::MemoryShutdown();
}
} // namespace
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp
new file mode 100644
index 000000000..e4fc5f3c4
--- /dev/null
+++ b/src/core/hle/kernel/memory.cpp
@@ -0,0 +1,136 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/logging/log.h"
+
+#include "core/hle/config_mem.h"
+#include "core/hle/kernel/memory.h"
+#include "core/hle/kernel/vm_manager.h"
+#include "core/hle/result.h"
+#include "core/hle/shared_page.h"
+#include "core/memory.h"
+#include "core/memory_setup.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace Kernel {
+
+static MemoryRegionInfo memory_regions[3];
+
+/// Size of the APPLICATION, SYSTEM and BASE memory regions (respectively) for each sytem
+/// memory configuration type.
+static const u32 memory_region_sizes[8][3] = {
+ // Old 3DS layouts
+ {0x04000000, 0x02C00000, 0x01400000}, // 0
+ { /* This appears to be unused. */ }, // 1
+ {0x06000000, 0x00C00000, 0x01400000}, // 2
+ {0x05000000, 0x01C00000, 0x01400000}, // 3
+ {0x04800000, 0x02400000, 0x01400000}, // 4
+ {0x02000000, 0x04C00000, 0x01400000}, // 5
+
+ // New 3DS layouts
+ {0x07C00000, 0x06400000, 0x02000000}, // 6
+ {0x0B200000, 0x02E00000, 0x02000000}, // 7
+};
+
+void MemoryInit(u32 mem_type) {
+ // TODO(yuriks): On the n3DS, all o3DS configurations (<=5) are forced to 6 instead.
+ ASSERT_MSG(mem_type <= 5, "New 3DS memory configuration aren't supported yet!");
+ ASSERT(mem_type != 1);
+
+ // The kernel allocation regions (APPLICATION, SYSTEM and BASE) are laid out in sequence, with
+ // the sizes specified in the memory_region_sizes table.
+ VAddr base = 0;
+ for (int i = 0; i < 3; ++i) {
+ memory_regions[i].base = base;
+ memory_regions[i].size = memory_region_sizes[mem_type][i];
+ memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>();
+
+ base += memory_regions[i].size;
+ }
+
+ // We must've allocated the entire FCRAM by the end
+ ASSERT(base == Memory::FCRAM_SIZE);
+
+ using ConfigMem::config_mem;
+ config_mem.app_mem_type = mem_type;
+ // app_mem_malloc does not always match the configured size for memory_region[0]: in case the
+ // n3DS type override is in effect it reports the size the game expects, not the real one.
+ config_mem.app_mem_alloc = memory_region_sizes[mem_type][0];
+ config_mem.sys_mem_alloc = memory_regions[1].size;
+ config_mem.base_mem_alloc = memory_regions[2].size;
+}
+
+void MemoryShutdown() {
+ for (auto& region : memory_regions) {
+ region.base = 0;
+ region.size = 0;
+ region.linear_heap_memory = nullptr;
+ }
+}
+
+MemoryRegionInfo* GetMemoryRegion(MemoryRegion region) {
+ switch (region) {
+ case MemoryRegion::APPLICATION:
+ return &memory_regions[0];
+ case MemoryRegion::SYSTEM:
+ return &memory_regions[1];
+ case MemoryRegion::BASE:
+ return &memory_regions[2];
+ default:
+ UNREACHABLE();
+ }
+}
+
+}
+
+namespace Memory {
+
+namespace {
+
+struct MemoryArea {
+ u32 base;
+ u32 size;
+ const char* name;
+};
+
+// We don't declare the IO regions in here since its handled by other means.
+static MemoryArea memory_areas[] = {
+ {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory
+ {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)
+ {DSP_RAM_VADDR, DSP_RAM_SIZE, "DSP RAM"}, // DSP memory
+ {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory
+};
+
+}
+
+void Init() {
+ InitMemoryMap();
+ LOG_DEBUG(HW_Memory, "initialized OK");
+}
+
+void InitLegacyAddressSpace(Kernel::VMManager& address_space) {
+ using namespace Kernel;
+
+ for (MemoryArea& area : memory_areas) {
+ auto block = std::make_shared<std::vector<u8>>(area.size);
+ address_space.MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private).Unwrap();
+ }
+
+ auto cfg_mem_vma = address_space.MapBackingMemory(CONFIG_MEMORY_VADDR,
+ (u8*)&ConfigMem::config_mem, CONFIG_MEMORY_SIZE, MemoryState::Shared).MoveFrom();
+ address_space.Reprotect(cfg_mem_vma, VMAPermission::Read);
+
+ auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR,
+ (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom();
+ address_space.Reprotect(shared_page_vma, VMAPermission::Read);
+}
+
+} // namespace
diff --git a/src/core/hle/kernel/memory.h b/src/core/hle/kernel/memory.h
new file mode 100644
index 000000000..36690b091
--- /dev/null
+++ b/src/core/hle/kernel/memory.h
@@ -0,0 +1,35 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+#include "common/common_types.h"
+
+#include "core/hle/kernel/process.h"
+
+namespace Kernel {
+
+class VMManager;
+
+struct MemoryRegionInfo {
+ u32 base; // Not an address, but offset from start of FCRAM
+ u32 size;
+
+ std::shared_ptr<std::vector<u8>> linear_heap_memory;
+};
+
+void MemoryInit(u32 mem_type);
+void MemoryShutdown();
+MemoryRegionInfo* GetMemoryRegion(MemoryRegion region);
+
+}
+
+namespace Memory {
+
+void Init();
+void InitLegacyAddressSpace(Kernel::VMManager& address_space);
+
+} // namespace
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index a7892c652..c2b4963d4 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -7,11 +7,11 @@
#include "common/logging/log.h"
#include "common/make_unique.h"
+#include "core/hle/kernel/memory.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/kernel/vm_manager.h"
-#include "core/mem_map.h"
#include "core/memory.h"
namespace Kernel {
@@ -36,8 +36,7 @@ SharedPtr<Process> Process::Create(SharedPtr<CodeSet> code_set) {
process->codeset = std::move(code_set);
process->flags.raw = 0;
process->flags.memory_region = MemoryRegion::APPLICATION;
- process->address_space = Common::make_unique<VMManager>();
- Memory::InitLegacyAddressSpace(*process->address_space);
+ Memory::InitLegacyAddressSpace(process->vm_manager);
return process;
}
@@ -93,9 +92,11 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) {
mapping.unk_flag = false;
} else if ((type & 0xFE0) == 0xFC0) { // 0x01FF
// Kernel version
- int minor = descriptor & 0xFF;
- int major = (descriptor >> 8) & 0xFF;
- LOG_INFO(Loader, "ExHeader kernel version ignored: %d.%d", major, minor);
+ kernel_version = descriptor & 0xFFFF;
+
+ int minor = kernel_version & 0xFF;
+ int major = (kernel_version >> 8) & 0xFF;
+ LOG_INFO(Loader, "ExHeader kernel version: %d.%d", major, minor);
} else {
LOG_ERROR(Loader, "Unhandled kernel caps descriptor: 0x%08X", descriptor);
}
@@ -103,20 +104,161 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) {
}
void Process::Run(s32 main_thread_priority, u32 stack_size) {
+ memory_region = GetMemoryRegion(flags.memory_region);
+
auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions, MemoryState memory_state) {
- auto vma = address_space->MapMemoryBlock(segment.addr, codeset->memory,
+ auto vma = vm_manager.MapMemoryBlock(segment.addr, codeset->memory,
segment.offset, segment.size, memory_state).Unwrap();
- address_space->Reprotect(vma, permissions);
+ vm_manager.Reprotect(vma, permissions);
+ misc_memory_used += segment.size;
};
+ // Map CodeSet segments
MapSegment(codeset->code, VMAPermission::ReadExecute, MemoryState::Code);
MapSegment(codeset->rodata, VMAPermission::Read, MemoryState::Code);
MapSegment(codeset->data, VMAPermission::ReadWrite, MemoryState::Private);
- address_space->LogLayout();
+ // Allocate and map stack
+ vm_manager.MapMemoryBlock(Memory::HEAP_VADDR_END - stack_size,
+ std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size, MemoryState::Locked
+ ).Unwrap();
+ misc_memory_used += stack_size;
+
+ vm_manager.LogLayout(Log::Level::Debug);
Kernel::SetupMainThread(codeset->entrypoint, main_thread_priority);
}
+VAddr Process::GetLinearHeapBase() const {
+ return (kernel_version < 0x22C ? Memory::LINEAR_HEAP_VADDR : Memory::NEW_LINEAR_HEAP_VADDR)
+ + memory_region->base;
+}
+
+VAddr Process::GetLinearHeapLimit() const {
+ return GetLinearHeapBase() + memory_region->size;
+}
+
+ResultVal<VAddr> Process::HeapAllocate(VAddr target, u32 size, VMAPermission perms) {
+ if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END || target + size < target) {
+ return ERR_INVALID_ADDRESS;
+ }
+
+ if (heap_memory == nullptr) {
+ // Initialize heap
+ heap_memory = std::make_shared<std::vector<u8>>();
+ heap_start = heap_end = target;
+ }
+
+ // If necessary, expand backing vector to cover new heap extents.
+ if (target < heap_start) {
+ heap_memory->insert(begin(*heap_memory), heap_start - target, 0);
+ heap_start = target;
+ vm_manager.RefreshMemoryBlockMappings(heap_memory.get());
+ }
+ if (target + size > heap_end) {
+ heap_memory->insert(end(*heap_memory), (target + size) - heap_end, 0);
+ heap_end = target + size;
+ vm_manager.RefreshMemoryBlockMappings(heap_memory.get());
+ }
+ ASSERT(heap_end - heap_start == heap_memory->size());
+
+ CASCADE_RESULT(auto vma, vm_manager.MapMemoryBlock(target, heap_memory, target - heap_start, size, MemoryState::Private));
+ vm_manager.Reprotect(vma, perms);
+
+ heap_used += size;
+
+ return MakeResult<VAddr>(heap_end - size);
+}
+
+ResultCode Process::HeapFree(VAddr target, u32 size) {
+ if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END || target + size < target) {
+ return ERR_INVALID_ADDRESS;
+ }
+
+ if (size == 0) {
+ return RESULT_SUCCESS;
+ }
+
+ ResultCode result = vm_manager.UnmapRange(target, size);
+ if (result.IsError()) return result;
+
+ heap_used -= size;
+
+ return RESULT_SUCCESS;
+}
+
+ResultVal<VAddr> Process::LinearAllocate(VAddr target, u32 size, VMAPermission perms) {
+ auto& linheap_memory = memory_region->linear_heap_memory;
+
+ VAddr heap_end = GetLinearHeapBase() + (u32)linheap_memory->size();
+ // Games and homebrew only ever seem to pass 0 here (which lets the kernel decide the address),
+ // but explicit addresses are also accepted and respected.
+ if (target == 0) {
+ target = heap_end;
+ }
+
+ if (target < GetLinearHeapBase() || target + size > GetLinearHeapLimit() ||
+ target > heap_end || target + size < target) {
+
+ return ERR_INVALID_ADDRESS;
+ }
+
+ // Expansion of the linear heap is only allowed if you do an allocation immediatelly at its
+ // end. It's possible to free gaps in the middle of the heap and then reallocate them later,
+ // but expansions are only allowed at the end.
+ if (target == heap_end) {
+ linheap_memory->insert(linheap_memory->end(), size, 0);
+ vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
+ }
+
+ // TODO(yuriks): As is, this lets processes map memory allocated by other processes from the
+ // same region. It is unknown if or how the 3DS kernel checks against this.
+ size_t offset = target - GetLinearHeapBase();
+ CASCADE_RESULT(auto vma, vm_manager.MapMemoryBlock(target, linheap_memory, offset, size, MemoryState::Continuous));
+ vm_manager.Reprotect(vma, perms);
+
+ linear_heap_used += size;
+
+ return MakeResult<VAddr>(target);
+}
+
+ResultCode Process::LinearFree(VAddr target, u32 size) {
+ auto& linheap_memory = memory_region->linear_heap_memory;
+
+ if (target < GetLinearHeapBase() || target + size > GetLinearHeapLimit() ||
+ target + size < target) {
+
+ return ERR_INVALID_ADDRESS;
+ }
+
+ if (size == 0) {
+ return RESULT_SUCCESS;
+ }
+
+ VAddr heap_end = GetLinearHeapBase() + (u32)linheap_memory->size();
+ if (target + size > heap_end) {
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+
+ ResultCode result = vm_manager.UnmapRange(target, size);
+ if (result.IsError()) return result;
+
+ linear_heap_used -= size;
+
+ if (target + size == heap_end) {
+ // End of linear heap has been freed, so check what's the last allocated block in it and
+ // reduce the size.
+ auto vma = vm_manager.FindVMA(target);
+ ASSERT(vma != vm_manager.vma_map.end());
+ ASSERT(vma->second.type == VMAType::Free);
+ VAddr new_end = vma->second.base;
+ if (new_end >= GetLinearHeapBase()) {
+ linheap_memory->resize(new_end - GetLinearHeapBase());
+ }
+ }
+
+ return RESULT_SUCCESS;
+}
+
Kernel::Process::Process() {}
Kernel::Process::~Process() {}
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 83d3aceae..60e17f251 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -15,6 +15,7 @@
#include "common/common_types.h"
#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/vm_manager.h"
namespace Kernel {
@@ -48,7 +49,7 @@ union ProcessFlags {
};
class ResourceLimit;
-class VMManager;
+struct MemoryRegionInfo;
struct CodeSet final : public Object {
static SharedPtr<CodeSet> Create(std::string name, u64 program_id);
@@ -104,14 +105,12 @@ public:
/// processes access to specific I/O regions and device memory.
boost::container::static_vector<AddressMapping, 8> address_mappings;
ProcessFlags flags;
+ /// Kernel compatibility version for this process
+ u16 kernel_version = 0;
/// The id of this process
u32 process_id = next_process_id++;
- /// Bitmask of the used TLS slots
- std::bitset<300> used_tls_slots;
- std::unique_ptr<VMManager> address_space;
-
/**
* Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them
* to this process.
@@ -123,6 +122,36 @@ public:
*/
void Run(s32 main_thread_priority, u32 stack_size);
+
+ ///////////////////////////////////////////////////////////////////////////////////////////////
+ // Memory Management
+
+ VMManager vm_manager;
+
+ // Memory used to back the allocations in the regular heap. A single vector is used to cover
+ // the entire virtual address space extents that bound the allocations, including any holes.
+ // This makes deallocation and reallocation of holes fast and keeps process memory contiguous
+ // in the emulator address space, allowing Memory::GetPointer to be reasonably safe.
+ std::shared_ptr<std::vector<u8>> heap_memory;
+ // The left/right bounds of the address space covered by heap_memory.
+ VAddr heap_start = 0, heap_end = 0;
+
+ u32 heap_used = 0, linear_heap_used = 0, misc_memory_used = 0;
+
+ MemoryRegionInfo* memory_region = nullptr;
+
+ /// Bitmask of the used TLS slots
+ std::bitset<300> used_tls_slots;
+
+ VAddr GetLinearHeapBase() const;
+ VAddr GetLinearHeapLimit() const;
+
+ ResultVal<VAddr> HeapAllocate(VAddr target, u32 size, VMAPermission perms);
+ ResultCode HeapFree(VAddr target, u32 size);
+
+ ResultVal<VAddr> LinearAllocate(VAddr target, u32 size, VMAPermission perms);
+ ResultCode LinearFree(VAddr target, u32 size);
+
private:
Process();
~Process() override;
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index 94b3e3298..67dde08c2 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -6,7 +6,6 @@
#include "common/logging/log.h"
-#include "core/mem_map.h"
#include "core/hle/kernel/resource_limit.h"
namespace Kernel {
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 4137683b5..1f477664b 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -20,6 +20,7 @@ SharedPtr<SharedMemory> SharedMemory::Create(u32 size, MemoryPermission permissi
shared_memory->name = std::move(name);
shared_memory->base_address = 0x0;
+ shared_memory->fixed_address = 0x0;
shared_memory->size = size;
shared_memory->permissions = permissions;
shared_memory->other_permissions = other_permissions;
@@ -30,9 +31,31 @@ SharedPtr<SharedMemory> SharedMemory::Create(u32 size, MemoryPermission permissi
ResultCode SharedMemory::Map(VAddr address, MemoryPermission permissions,
MemoryPermission other_permissions) {
+ if (base_address != 0) {
+ LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: already mapped at 0x%08X!",
+ GetObjectId(), address, name.c_str(), base_address);
+ // TODO: Verify error code with hardware
+ return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel,
+ ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
+ }
+
+ if (fixed_address != 0) {
+ if (address != 0 && address != fixed_address) {
+ LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: fixed_addres is 0x%08X!",
+ GetObjectId(), address, name.c_str(), fixed_address);
+ // TODO: Verify error code with hardware
+ return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel,
+ ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
+ }
+
+ // HACK(yuriks): This is only here to support the APT shared font mapping right now.
+ // Later, this should actually map the memory block onto the address space.
+ return RESULT_SUCCESS;
+ }
+
if (address < Memory::SHARED_MEMORY_VADDR || address + size >= Memory::SHARED_MEMORY_VADDR_END) {
- LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X outside of shared mem bounds!",
- GetObjectId(), address);
+ LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s outside of shared mem bounds!",
+ GetObjectId(), address, name.c_str());
// TODO: Verify error code with hardware
return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel,
ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index 7a2922776..35b550d12 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -61,6 +61,8 @@ public:
/// Address of shared memory block in the process.
VAddr base_address;
+ /// Fixed address to allow mapping to. Used for blocks created from the linear heap.
+ VAddr fixed_address;
/// Size of the memory block. Page-aligned.
u32 size;
/// Permission restrictions applied to the process which created the block.
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 29ea6d531..c10126513 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -117,6 +117,7 @@ void Thread::Stop() {
wait_objects.clear();
Kernel::g_current_process->used_tls_slots[tls_index] = false;
+ g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE;
HLE::Reschedule(__func__);
}
@@ -414,6 +415,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
}
ASSERT_MSG(thread->tls_index != -1, "Out of TLS space");
+ g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE;
// TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
// to initialize the context
@@ -504,7 +506,7 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
}
VAddr Thread::GetTLSAddress() const {
- return Memory::TLS_AREA_VADDR + tls_index * 0x200;
+ return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 205cc7b53..2610acf76 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -11,6 +11,15 @@
namespace Kernel {
+static const char* GetMemoryStateName(MemoryState state) {
+ static const char* names[] = {
+ "Free", "Reserved", "IO", "Static", "Code", "Private", "Shared", "Continuous", "Aliased",
+ "Alias", "AliasCode", "Locked",
+ };
+
+ return names[(int)state];
+}
+
bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
ASSERT(base + size == next.base);
if (permissions != next.permissions ||
@@ -51,11 +60,15 @@ void VMManager::Reset() {
}
VMManager::VMAHandle VMManager::FindVMA(VAddr target) const {
- return std::prev(vma_map.upper_bound(target));
+ if (target >= MAX_ADDRESS) {
+ return vma_map.end();
+ } else {
+ return std::prev(vma_map.upper_bound(target));
+ }
}
ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
- std::shared_ptr<std::vector<u8>> block, u32 offset, u32 size, MemoryState state) {
+ std::shared_ptr<std::vector<u8>> block, size_t offset, u32 size, MemoryState state) {
ASSERT(block != nullptr);
ASSERT(offset + size <= block->size());
@@ -106,10 +119,8 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u3
return MakeResult<VMAHandle>(MergeAdjacent(vma_handle));
}
-void VMManager::Unmap(VMAHandle vma_handle) {
- VMAIter iter = StripIterConstness(vma_handle);
-
- VirtualMemoryArea& vma = iter->second;
+VMManager::VMAIter VMManager::Unmap(VMAIter vma_handle) {
+ VirtualMemoryArea& vma = vma_handle->second;
vma.type = VMAType::Free;
vma.permissions = VMAPermission::None;
vma.meminfo_state = MemoryState::Free;
@@ -121,26 +132,67 @@ void VMManager::Unmap(VMAHandle vma_handle) {
UpdatePageTableForVMA(vma);
- MergeAdjacent(iter);
+ return MergeAdjacent(vma_handle);
+}
+
+ResultCode VMManager::UnmapRange(VAddr target, u32 size) {
+ CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size));
+ VAddr target_end = target + size;
+
+ VMAIter end = vma_map.end();
+ // The comparison against the end of the range must be done using addresses since VMAs can be
+ // merged during this process, causing invalidation of the iterators.
+ while (vma != end && vma->second.base < target_end) {
+ vma = std::next(Unmap(vma));
+ }
+
+ ASSERT(FindVMA(target)->second.size >= size);
+ return RESULT_SUCCESS;
}
-void VMManager::Reprotect(VMAHandle vma_handle, VMAPermission new_perms) {
+VMManager::VMAHandle VMManager::Reprotect(VMAHandle vma_handle, VMAPermission new_perms) {
VMAIter iter = StripIterConstness(vma_handle);
VirtualMemoryArea& vma = iter->second;
vma.permissions = new_perms;
UpdatePageTableForVMA(vma);
- MergeAdjacent(iter);
+ return MergeAdjacent(iter);
+}
+
+ResultCode VMManager::ReprotectRange(VAddr target, u32 size, VMAPermission new_perms) {
+ CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size));
+ VAddr target_end = target + size;
+
+ VMAIter end = vma_map.end();
+ // The comparison against the end of the range must be done using addresses since VMAs can be
+ // merged during this process, causing invalidation of the iterators.
+ while (vma != end && vma->second.base < target_end) {
+ vma = std::next(StripIterConstness(Reprotect(vma, new_perms)));
+ }
+
+ return RESULT_SUCCESS;
}
-void VMManager::LogLayout() const {
+void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) {
+ // If this ever proves to have a noticeable performance impact, allow users of the function to
+ // specify a specific range of addresses to limit the scan to.
for (const auto& p : vma_map) {
const VirtualMemoryArea& vma = p.second;
- LOG_DEBUG(Kernel, "%08X - %08X size: %8X %c%c%c", vma.base, vma.base + vma.size, vma.size,
+ if (block == vma.backing_block.get()) {
+ UpdatePageTableForVMA(vma);
+ }
+ }
+}
+
+void VMManager::LogLayout(Log::Level log_level) const {
+ for (const auto& p : vma_map) {
+ const VirtualMemoryArea& vma = p.second;
+ LOG_GENERIC(Log::Class::Kernel, log_level, "%08X - %08X size: %8X %c%c%c %s",
+ vma.base, vma.base + vma.size, vma.size,
(u8)vma.permissions & (u8)VMAPermission::Read ? 'R' : '-',
(u8)vma.permissions & (u8)VMAPermission::Write ? 'W' : '-',
- (u8)vma.permissions & (u8)VMAPermission::Execute ? 'X' : '-');
+ (u8)vma.permissions & (u8)VMAPermission::Execute ? 'X' : '-', GetMemoryStateName(vma.meminfo_state));
}
}
@@ -151,21 +203,19 @@ VMManager::VMAIter VMManager::StripIterConstness(const VMAHandle & iter) {
}
ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) {
- ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: %8X", size);
- ASSERT_MSG((base & Memory::PAGE_MASK) == 0, "non-page aligned base: %08X", base);
+ ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x%8X", size);
+ ASSERT_MSG((base & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x%08X", base);
VMAIter vma_handle = StripIterConstness(FindVMA(base));
if (vma_handle == vma_map.end()) {
// Target address is outside the range managed by the kernel
- return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS,
- ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E01BF5
+ return ERR_INVALID_ADDRESS;
}
VirtualMemoryArea& vma = vma_handle->second;
if (vma.type != VMAType::Free) {
// Region is already allocated
- return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS,
- ErrorSummary::InvalidState, ErrorLevel::Usage); // 0xE0A01BF5
+ return ERR_INVALID_ADDRESS_STATE;
}
u32 start_in_vma = base - vma.base;
@@ -173,8 +223,7 @@ ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) {
if (end_in_vma > vma.size) {
// Requested allocation doesn't fit inside VMA
- return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS,
- ErrorSummary::InvalidState, ErrorLevel::Usage); // 0xE0A01BF5
+ return ERR_INVALID_ADDRESS_STATE;
}
if (end_in_vma != vma.size) {
@@ -189,6 +238,35 @@ ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) {
return MakeResult<VMAIter>(vma_handle);
}
+ResultVal<VMManager::VMAIter> VMManager::CarveVMARange(VAddr target, u32 size) {
+ ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x%8X", size);
+ ASSERT_MSG((target & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x%08X", target);
+
+ VAddr target_end = target + size;
+ ASSERT(target_end >= target);
+ ASSERT(target_end <= MAX_ADDRESS);
+ ASSERT(size > 0);
+
+ VMAIter begin_vma = StripIterConstness(FindVMA(target));
+ VMAIter i_end = vma_map.lower_bound(target_end);
+ for (auto i = begin_vma; i != i_end; ++i) {
+ if (i->second.type == VMAType::Free) {
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+ }
+
+ if (target != begin_vma->second.base) {
+ begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
+ }
+
+ VMAIter end_vma = StripIterConstness(FindVMA(target_end));
+ if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
+ end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
+ }
+
+ return MakeResult<VMAIter>(begin_vma);
+}
+
VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u32 offset_in_vma) {
VirtualMemoryArea& old_vma = vma_handle->second;
VirtualMemoryArea new_vma = old_vma; // Make a copy of the VMA
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index b3795a94a..4e95f1f0c 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -14,6 +14,14 @@
namespace Kernel {
+const ResultCode ERR_INVALID_ADDRESS{ // 0xE0E01BF5
+ ErrorDescription::InvalidAddress, ErrorModule::OS,
+ ErrorSummary::InvalidArgument, ErrorLevel::Usage};
+
+const ResultCode ERR_INVALID_ADDRESS_STATE{ // 0xE0A01BF5
+ ErrorDescription::InvalidAddress, ErrorModule::OS,
+ ErrorSummary::InvalidState, ErrorLevel::Usage};
+
enum class VMAType : u8 {
/// VMA represents an unmapped region of the address space.
Free,
@@ -75,7 +83,7 @@ struct VirtualMemoryArea {
/// Memory block backing this VMA.
std::shared_ptr<std::vector<u8>> backing_block = nullptr;
/// Offset into the backing_memory the mapping starts from.
- u32 offset = 0;
+ size_t offset = 0;
// Settings for type = BackingMemory
/// Pointer backing this VMA. It will not be destroyed or freed when the VMA is removed.
@@ -141,7 +149,7 @@ public:
* @param state MemoryState tag to attach to the VMA.
*/
ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block,
- u32 offset, u32 size, MemoryState state);
+ size_t offset, u32 size, MemoryState state);
/**
* Maps an unmanaged host memory pointer at a given address.
@@ -163,14 +171,23 @@ public:
*/
ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u32 size, MemoryState state);
- /// Unmaps the given VMA.
- void Unmap(VMAHandle vma);
+ /// Unmaps a range of addresses, splitting VMAs as necessary.
+ ResultCode UnmapRange(VAddr target, u32 size);
/// Changes the permissions of the given VMA.
- void Reprotect(VMAHandle vma, VMAPermission new_perms);
+ VMAHandle Reprotect(VMAHandle vma, VMAPermission new_perms);
+
+ /// Changes the permissions of a range of addresses, splitting VMAs as necessary.
+ ResultCode ReprotectRange(VAddr target, u32 size, VMAPermission new_perms);
+
+ /**
+ * Scans all VMAs and updates the page table range of any that use the given vector as backing
+ * memory. This should be called after any operation that causes reallocation of the vector.
+ */
+ void RefreshMemoryBlockMappings(const std::vector<u8>* block);
/// Dumps the address space layout to the log, for debugging
- void LogLayout() const;
+ void LogLayout(Log::Level log_level) const;
private:
using VMAIter = decltype(vma_map)::iterator;
@@ -178,6 +195,9 @@ private:
/// Converts a VMAHandle to a mutable VMAIter.
VMAIter StripIterConstness(const VMAHandle& iter);
+ /// Unmaps the given VMA.
+ VMAIter Unmap(VMAIter vma);
+
/**
* Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
* the appropriate error checking.
@@ -185,6 +205,12 @@ private:
ResultVal<VMAIter> CarveVMA(VAddr base, u32 size);
/**
+ * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
+ * end of the range.
+ */
+ ResultVal<VMAIter> CarveVMARange(VAddr base, u32 size);
+
+ /**
* Splits a VMA in two, at the specified offset.
* @returns the right side of the split, with the original iterator becoming the left side.
*/
diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp
index 35402341b..ba66569b4 100644
--- a/src/core/hle/service/apt/apt.cpp
+++ b/src/core/hle/service/apt/apt.cpp
@@ -16,6 +16,7 @@
#include "core/hle/hle.h"
#include "core/hle/kernel/event.h"
#include "core/hle/kernel/mutex.h"
+#include "core/hle/kernel/process.h"
#include "core/hle/kernel/shared_memory.h"
#include "core/hle/kernel/thread.h"
@@ -37,7 +38,7 @@ static Kernel::SharedPtr<Kernel::Mutex> lock;
static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event
static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event
-static std::vector<u8> shared_font;
+static std::shared_ptr<std::vector<u8>> shared_font;
static u32 cpu_percent; ///< CPU time available to the running application
@@ -74,11 +75,12 @@ void Initialize(Service::Interface* self) {
void GetSharedFont(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- if (!shared_font.empty()) {
- // TODO(bunnei): This function shouldn't copy the shared font every time it's called.
- // Instead, it should probably map the shared font as RO memory. We don't currently have
- // an easy way to do this, but the copy should be sufficient for now.
- memcpy(Memory::GetPointer(SHARED_FONT_VADDR), shared_font.data(), shared_font.size());
+ if (shared_font != nullptr) {
+ // TODO(yuriks): This is a hack to keep this working right now even with our completely
+ // broken shared memory system.
+ shared_font_mem->fixed_address = SHARED_FONT_VADDR;
+ Kernel::g_current_process->vm_manager.MapMemoryBlock(shared_font_mem->fixed_address,
+ shared_font, 0, shared_font_mem->size, Kernel::MemoryState::Shared);
cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2);
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
@@ -391,7 +393,6 @@ void Init() {
// a homebrew app to do this: https://github.com/citra-emu/3dsutils. Put the resulting file
// "shared_font.bin" in the Citra "sysdata" directory.
- shared_font.clear();
std::string filepath = FileUtil::GetUserPath(D_SYSDATA_IDX) + SHARED_FONT;
FileUtil::CreateFullPath(filepath); // Create path if not already created
@@ -399,8 +400,8 @@ void Init() {
if (file.IsOpen()) {
// Read shared font data
- shared_font.resize((size_t)file.GetSize());
- file.ReadBytes(shared_font.data(), (size_t)file.GetSize());
+ shared_font = std::make_shared<std::vector<u8>>((size_t)file.GetSize());
+ file.ReadBytes(shared_font->data(), shared_font->size());
// Create shared font memory object
using Kernel::MemoryPermission;
@@ -424,7 +425,7 @@ void Init() {
}
void Shutdown() {
- shared_font.clear();
+ shared_font = nullptr;
shared_font_mem = nullptr;
lock = nullptr;
notification_event = nullptr;
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index e93c1b436..c3d0d28a5 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -3,8 +3,8 @@
// Refer to the license.txt file included.
#include "common/bit_field.h"
+#include "common/microprofile.h"
-#include "core/mem_map.h"
#include "core/memory.h"
#include "core/hle/kernel/event.h"
#include "core/hle/kernel/shared_memory.h"
@@ -230,6 +230,10 @@ void SetBufferSwap(u32 screen_id, const FrameBufferInfo& info) {
if (Pica::g_debug_context)
Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::BufferSwapped, nullptr);
+
+ if (screen_id == 0) {
+ MicroProfileFlip();
+ }
}
/**
@@ -418,7 +422,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
case CommandId::SET_DISPLAY_TRANSFER:
{
- auto& params = command.image_copy;
+ auto& params = command.display_transfer;
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
@@ -433,17 +437,22 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
// TODO: Check if texture copies are implemented correctly..
case CommandId::SET_TEXTURE_COPY:
{
- auto& params = command.image_copy;
- WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
+ auto& params = command.texture_copy;
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address),
Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
- WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address),
Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
- WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size);
- WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size);
- WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)), params.flags);
-
- // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1?
- WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1);
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size),
+ params.size);
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size),
+ params.in_width_gap);
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size),
+ params.out_width_gap);
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags),
+ params.flags);
+
+ // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to matter.
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1);
break;
}
diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h
index c89d0a467..8bcb30ad1 100644
--- a/src/core/hle/service/gsp_gpu.h
+++ b/src/core/hle/service/gsp_gpu.h
@@ -127,7 +127,16 @@ struct Command {
u32 in_buffer_size;
u32 out_buffer_size;
u32 flags;
- } image_copy;
+ } display_transfer;
+
+ struct {
+ u32 in_buffer_address;
+ u32 out_buffer_address;
+ u32 size;
+ u32 in_width_gap;
+ u32 out_width_gap;
+ u32 flags;
+ } texture_copy;
u8 raw_data[0x1C];
};
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
index 6e7dafaad..6b1b71fe4 100644
--- a/src/core/hle/service/y2r_u.cpp
+++ b/src/core/hle/service/y2r_u.cpp
@@ -10,7 +10,6 @@
#include "core/hle/kernel/event.h"
#include "core/hle/service/y2r_u.h"
#include "core/hw/y2r.h"
-#include "core/mem_map.h"
#include "video_core/renderer_base.h"
#include "video_core/utils.h"
diff --git a/src/core/hle/shared_page.cpp b/src/core/hle/shared_page.cpp
index 26d87c7e2..50c5bc01b 100644
--- a/src/core/hle/shared_page.cpp
+++ b/src/core/hle/shared_page.cpp
@@ -18,7 +18,4 @@ void Init() {
shared_page.running_hw = 0x1; // product
}
-void Shutdown() {
-}
-
} // namespace
diff --git a/src/core/hle/shared_page.h b/src/core/hle/shared_page.h
index db6a5340b..379bb7b63 100644
--- a/src/core/hle/shared_page.h
+++ b/src/core/hle/shared_page.h
@@ -54,6 +54,5 @@ static_assert(sizeof(SharedPageDef) == Memory::SHARED_PAGE_SIZE, "Shared page st
extern SharedPageDef shared_page;
void Init();
-void Shutdown();
} // namespace
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index bb64fdfb7..19f750d72 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -5,16 +5,17 @@
#include <map>
#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "common/string_util.h"
#include "common/symbols.h"
#include "core/core_timing.h"
-#include "core/mem_map.h"
#include "core/arm/arm_interface.h"
#include "core/hle/kernel/address_arbiter.h"
#include "core/hle/kernel/event.h"
+#include "core/hle/kernel/memory.h"
#include "core/hle/kernel/mutex.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
@@ -41,32 +42,114 @@ const ResultCode ERR_NOT_FOUND(ErrorDescription::NotFound, ErrorModule::Kernel,
const ResultCode ERR_PORT_NAME_TOO_LONG(ErrorDescription(30), ErrorModule::OS,
ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E0181E
+const ResultCode ERR_MISALIGNED_ADDRESS{ // 0xE0E01BF1
+ ErrorDescription::MisalignedAddress, ErrorModule::OS,
+ ErrorSummary::InvalidArgument, ErrorLevel::Usage};
+const ResultCode ERR_MISALIGNED_SIZE{ // 0xE0E01BF2
+ ErrorDescription::MisalignedSize, ErrorModule::OS,
+ ErrorSummary::InvalidArgument, ErrorLevel::Usage};
+const ResultCode ERR_INVALID_COMBINATION{ // 0xE0E01BEE
+ ErrorDescription::InvalidCombination, ErrorModule::OS,
+ ErrorSummary::InvalidArgument, ErrorLevel::Usage};
+
enum ControlMemoryOperation {
- MEMORY_OPERATION_HEAP = 0x00000003,
- MEMORY_OPERATION_GSP_HEAP = 0x00010003,
+ MEMOP_FREE = 1,
+ MEMOP_RESERVE = 2, // This operation seems to be unsupported in the kernel
+ MEMOP_COMMIT = 3,
+ MEMOP_MAP = 4,
+ MEMOP_UNMAP = 5,
+ MEMOP_PROTECT = 6,
+ MEMOP_OPERATION_MASK = 0xFF,
+
+ MEMOP_REGION_APP = 0x100,
+ MEMOP_REGION_SYSTEM = 0x200,
+ MEMOP_REGION_BASE = 0x300,
+ MEMOP_REGION_MASK = 0xF00,
+
+ MEMOP_LINEAR = 0x10000,
};
/// Map application or GSP heap memory
static ResultCode ControlMemory(u32* out_addr, u32 operation, u32 addr0, u32 addr1, u32 size, u32 permissions) {
- LOG_TRACE(Kernel_SVC,"called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=%08X, permissions=0x%08X",
+ using namespace Kernel;
+
+ LOG_DEBUG(Kernel_SVC,"called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=0x%X, permissions=0x%08X",
operation, addr0, addr1, size, permissions);
- switch (operation) {
+ if ((addr0 & Memory::PAGE_MASK) != 0 || (addr1 & Memory::PAGE_MASK) != 0) {
+ return ERR_MISALIGNED_ADDRESS;
+ }
+ if ((size & Memory::PAGE_MASK) != 0) {
+ return ERR_MISALIGNED_SIZE;
+ }
+
+ u32 region = operation & MEMOP_REGION_MASK;
+ operation &= ~MEMOP_REGION_MASK;
+
+ if (region != 0) {
+ LOG_WARNING(Kernel_SVC, "ControlMemory with specified region not supported, region=%X", region);
+ }
- // Map normal heap memory
- case MEMORY_OPERATION_HEAP:
- *out_addr = Memory::MapBlock_Heap(size, operation, permissions);
+ if ((permissions & (u32)MemoryPermission::ReadWrite) != permissions) {
+ return ERR_INVALID_COMBINATION;
+ }
+ VMAPermission vma_permissions = (VMAPermission)permissions;
+
+ auto& process = *g_current_process;
+
+ switch (operation & MEMOP_OPERATION_MASK) {
+ case MEMOP_FREE:
+ {
+ if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) {
+ ResultCode result = process.HeapFree(addr0, size);
+ if (result.IsError()) return result;
+ } else if (addr0 >= process.GetLinearHeapBase() && addr0 < process.GetLinearHeapLimit()) {
+ ResultCode result = process.LinearFree(addr0, size);
+ if (result.IsError()) return result;
+ } else {
+ return ERR_INVALID_ADDRESS;
+ }
+ *out_addr = addr0;
break;
+ }
- // Map GSP heap memory
- case MEMORY_OPERATION_GSP_HEAP:
- *out_addr = Memory::MapBlock_HeapLinear(size, operation, permissions);
+ case MEMOP_COMMIT:
+ {
+ if (operation & MEMOP_LINEAR) {
+ CASCADE_RESULT(*out_addr, process.LinearAllocate(addr0, size, vma_permissions));
+ } else {
+ CASCADE_RESULT(*out_addr, process.HeapAllocate(addr0, size, vma_permissions));
+ }
+ break;
+ }
+
+ case MEMOP_MAP: // TODO: This is just a hack to avoid regressions until memory aliasing is implemented
+ {
+ CASCADE_RESULT(*out_addr, process.HeapAllocate(addr0, size, vma_permissions));
break;
+ }
+
+ case MEMOP_UNMAP: // TODO: This is just a hack to avoid regressions until memory aliasing is implemented
+ {
+ ResultCode result = process.HeapFree(addr0, size);
+ if (result.IsError()) return result;
+ break;
+ }
+
+ case MEMOP_PROTECT:
+ {
+ ResultCode result = process.vm_manager.ReprotectRange(addr0, size, vma_permissions);
+ if (result.IsError()) return result;
+ break;
+ }
- // Unknown ControlMemory operation
default:
LOG_ERROR(Kernel_SVC, "unknown operation=0x%08X", operation);
+ return ERR_INVALID_COMBINATION;
}
+
+ process.vm_manager.LogLayout(Log::Level::Trace);
+
return RESULT_SUCCESS;
}
@@ -537,9 +620,9 @@ static ResultCode QueryProcessMemory(MemoryInfo* memory_info, PageInfo* page_inf
if (process == nullptr)
return ERR_INVALID_HANDLE;
- auto vma = process->address_space->FindVMA(addr);
+ auto vma = process->vm_manager.FindVMA(addr);
- if (vma == process->address_space->vma_map.end())
+ if (vma == Kernel::g_current_process->vm_manager.vma_map.end())
return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
memory_info->base_address = vma->second.base;
@@ -692,6 +775,52 @@ static ResultCode CreateMemoryBlock(Handle* out_handle, u32 addr, u32 size, u32
return RESULT_SUCCESS;
}
+static ResultCode GetProcessInfo(s64* out, Handle process_handle, u32 type) {
+ LOG_TRACE(Kernel_SVC, "called process=0x%08X type=%u", process_handle, type);
+
+ using Kernel::Process;
+ Kernel::SharedPtr<Process> process = Kernel::g_handle_table.Get<Process>(process_handle);
+ if (process == nullptr)
+ return ERR_INVALID_HANDLE;
+
+ switch (type) {
+ case 0:
+ case 2:
+ // TODO(yuriks): Type 0 returns a slightly higher number than type 2, but I'm not sure
+ // what's the difference between them.
+ *out = process->heap_used + process->linear_heap_used + process->misc_memory_used;
+ break;
+ case 1:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ // These are valid, but not implemented yet
+ LOG_ERROR(Kernel_SVC, "unimplemented GetProcessInfo type=%u", type);
+ break;
+ case 20:
+ *out = Memory::FCRAM_PADDR - process->GetLinearHeapBase();
+ break;
+ default:
+ LOG_ERROR(Kernel_SVC, "unknown GetProcessInfo type=%u", type);
+
+ if (type >= 21 && type <= 23) {
+ return ResultCode( // 0xE0E01BF4
+ ErrorDescription::NotImplemented, ErrorModule::OS,
+ ErrorSummary::InvalidArgument, ErrorLevel::Usage);
+ } else {
+ return ResultCode( // 0xD8E007ED
+ ErrorDescription::InvalidEnumValue, ErrorModule::Kernel,
+ ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
+ }
+ break;
+ }
+
+ return RESULT_SUCCESS;
+}
+
namespace {
struct FunctionDef {
using Func = void();
@@ -746,7 +875,7 @@ static const FunctionDef SVC_Table[] = {
{0x28, HLE::Wrap<GetSystemTick>, "GetSystemTick"},
{0x29, nullptr, "GetHandleInfo"},
{0x2A, nullptr, "GetSystemInfo"},
- {0x2B, nullptr, "GetProcessInfo"},
+ {0x2B, HLE::Wrap<GetProcessInfo>, "GetProcessInfo"},
{0x2C, nullptr, "GetThreadInfo"},
{0x2D, HLE::Wrap<ConnectToPort>, "ConnectToPort"},
{0x2E, nullptr, "SendSyncRequest1"},
@@ -841,8 +970,11 @@ static const FunctionDef* GetSVCInfo(u32 func_num) {
return &SVC_Table[func_num];
}
+MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
+
void CallSVC(u32 immediate) {
Common::Profiling::ScopeTimer timer_svc(profiler_svc);
+ MICROPROFILE_SCOPE(Kernel_SVC);
const FunctionDef* info = GetSVCInfo(immediate);
if (info) {
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 3ccbc03b2..bc7bde903 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -3,11 +3,13 @@
// Refer to the license.txt file included.
#include <cstring>
+#include <numeric>
#include <type_traits>
#include "common/color.h"
#include "common/common_types.h"
#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "common/vector_math.h"
#include "core/settings.h"
@@ -84,6 +86,9 @@ static Math::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_
}
}
+MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255));
+MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100));
+
template <typename T>
inline void Write(u32 addr, const T data) {
addr -= HW::VADDR_GPU;
@@ -149,6 +154,8 @@ inline void Write(u32 addr, const T data) {
case GPU_REG_INDEX(display_transfer_config.trigger):
{
+ MICROPROFILE_SCOPE(GPU_DisplayTransfer);
+
const auto& config = g_regs.display_transfer_config;
if (config.trigger & 1) {
@@ -158,14 +165,59 @@ inline void Write(u32 addr, const T data) {
u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
+ if (config.is_texture_copy) {
+ u32 input_width = config.texture_copy.input_width * 16;
+ u32 input_gap = config.texture_copy.input_gap * 16;
+ u32 output_width = config.texture_copy.output_width * 16;
+ u32 output_gap = config.texture_copy.output_gap * 16;
+
+ size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
+ VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), contiguous_input_size);
+
+ u32 remaining_size = config.texture_copy.size;
+ u32 remaining_input = input_width;
+ u32 remaining_output = output_width;
+ while (remaining_size > 0) {
+ u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
+
+ std::memcpy(dst_pointer, src_pointer, copy_size);
+ src_pointer += copy_size;
+ dst_pointer += copy_size;
+
+ remaining_input -= copy_size;
+ remaining_output -= copy_size;
+ remaining_size -= copy_size;
+
+ if (remaining_input == 0) {
+ remaining_input = input_width;
+ src_pointer += input_gap;
+ }
+ if (remaining_output == 0) {
+ remaining_output = output_width;
+ dst_pointer += output_gap;
+ }
+ }
+
+ LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
+ config.texture_copy.size,
+ config.GetPhysicalInputAddress(), input_width, input_gap,
+ config.GetPhysicalOutputAddress(), output_width, output_gap,
+ config.flags);
+
+ size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
+ VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), contiguous_output_size);
+
+ GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
+ break;
+ }
+
if (config.scaling > config.ScaleXY) {
LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
UNIMPLEMENTED();
break;
}
- if (config.output_tiled &&
- (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) {
+ if (config.input_linear && config.scaling != config.NoScale) {
LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
UNIMPLEMENTED();
break;
@@ -182,23 +234,6 @@ inline void Write(u32 addr, const T data) {
VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size);
- if (config.raw_copy) {
- // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions
- // TODO(Subv): Verify if raw copies perform scaling
- memcpy(dst_pointer, src_pointer, output_size);
-
- LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), output format: %x, flags 0x%08X, Raw copy",
- output_size,
- config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
- config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(),
- config.output_format.Value(), config.flags);
-
- GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
-
- VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size);
- break;
- }
-
for (u32 y = 0; y < output_height; ++y) {
for (u32 x = 0; x < output_width; ++x) {
Math::Vec4<u8> src_color;
@@ -220,7 +255,7 @@ inline void Write(u32 addr, const T data) {
u32 src_offset;
u32 dst_offset;
- if (config.output_tiled) {
+ if (config.input_linear) {
if (!config.dont_swizzle) {
// Interpret the input as linear and the output as tiled
u32 coarse_y = y & ~7;
@@ -315,6 +350,8 @@ inline void Write(u32 addr, const T data) {
const auto& config = g_regs.command_processor_config;
if (config.trigger & 1)
{
+ MICROPROFILE_SCOPE(GPU_CmdlistProcessing);
+
u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress());
if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index daad506fe..2e3a9f779 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -201,12 +201,14 @@ struct Regs {
u32 flags;
BitField< 0, 1, u32> flip_vertically; // flips input data vertically
- BitField< 1, 1, u32> output_tiled; // Converts from linear to tiled format
- BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing
+ BitField< 1, 1, u32> input_linear; // Converts from linear to tiled format
+ BitField< 2, 1, u32> crop_input_lines;
+ BitField< 3, 1, u32> is_texture_copy; // Copies the data without performing any processing and respecting texture copy fields
BitField< 5, 1, u32> dont_swizzle;
BitField< 8, 3, PixelFormat> input_format;
BitField<12, 3, PixelFormat> output_format;
-
+ /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
+ BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented
BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
};
@@ -214,10 +216,30 @@ struct Regs {
// it seems that writing to this field triggers the display transfer
u32 trigger;
+
+ INSERT_PADDING_WORDS(0x1);
+
+ struct {
+ u32 size;
+
+ union {
+ u32 input_size;
+
+ BitField< 0, 16, u32> input_width;
+ BitField<16, 16, u32> input_gap;
+ };
+
+ union {
+ u32 output_size;
+
+ BitField< 0, 16, u32> output_width;
+ BitField<16, 16, u32> output_gap;
+ };
+ } texture_copy;
} display_transfer_config;
- ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c);
+ ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c);
- INSERT_PADDING_WORDS(0x331);
+ INSERT_PADDING_WORDS(0x32D);
struct {
// command list size (in bytes)
diff --git a/src/core/mem_map.cpp b/src/core/mem_map.cpp
deleted file mode 100644
index cbe993fbe..000000000
--- a/src/core/mem_map.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <map>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "common/common_types.h"
-#include "common/logging/log.h"
-
-#include "core/hle/config_mem.h"
-#include "core/hle/kernel/vm_manager.h"
-#include "core/hle/result.h"
-#include "core/hle/shared_page.h"
-#include "core/mem_map.h"
-#include "core/memory.h"
-#include "core/memory_setup.h"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace Memory {
-
-namespace {
-
-struct MemoryArea {
- u32 base;
- u32 size;
- const char* name;
-};
-
-// We don't declare the IO regions in here since its handled by other means.
-static MemoryArea memory_areas[] = {
- {HEAP_VADDR, HEAP_SIZE, "Heap"}, // Application heap (main memory)
- {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory
- {LINEAR_HEAP_VADDR, LINEAR_HEAP_SIZE, "Linear Heap"}, // Linear heap (main memory)
- {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)
- {DSP_RAM_VADDR, DSP_RAM_SIZE, "DSP RAM"}, // DSP memory
- {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory
-};
-
-/// Represents a block of memory mapped by ControlMemory/MapMemoryBlock
-struct MemoryBlock {
- MemoryBlock() : handle(0), base_address(0), address(0), size(0), operation(0), permissions(0) {
- }
- u32 handle;
- u32 base_address;
- u32 address;
- u32 size;
- u32 operation;
- u32 permissions;
-
- const u32 GetVirtualAddress() const{
- return base_address + address;
- }
-};
-
-static std::map<u32, MemoryBlock> heap_map;
-static std::map<u32, MemoryBlock> heap_linear_map;
-
-}
-
-u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) {
- MemoryBlock block;
-
- block.base_address = HEAP_VADDR;
- block.size = size;
- block.operation = operation;
- block.permissions = permissions;
-
- if (heap_map.size() > 0) {
- const MemoryBlock last_block = heap_map.rbegin()->second;
- block.address = last_block.address + last_block.size;
- }
- heap_map[block.GetVirtualAddress()] = block;
-
- return block.GetVirtualAddress();
-}
-
-u32 MapBlock_HeapLinear(u32 size, u32 operation, u32 permissions) {
- MemoryBlock block;
-
- block.base_address = LINEAR_HEAP_VADDR;
- block.size = size;
- block.operation = operation;
- block.permissions = permissions;
-
- if (heap_linear_map.size() > 0) {
- const MemoryBlock last_block = heap_linear_map.rbegin()->second;
- block.address = last_block.address + last_block.size;
- }
- heap_linear_map[block.GetVirtualAddress()] = block;
-
- return block.GetVirtualAddress();
-}
-
-PAddr VirtualToPhysicalAddress(const VAddr addr) {
- if (addr == 0) {
- return 0;
- } else if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) {
- return addr - VRAM_VADDR + VRAM_PADDR;
- } else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) {
- return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR;
- } else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) {
- return addr - DSP_RAM_VADDR + DSP_RAM_PADDR;
- } else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) {
- return addr - IO_AREA_VADDR + IO_AREA_PADDR;
- }
-
- LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08x", addr);
- // To help with debugging, set bit on address so that it's obviously invalid.
- return addr | 0x80000000;
-}
-
-VAddr PhysicalToVirtualAddress(const PAddr addr) {
- if (addr == 0) {
- return 0;
- } else if (addr >= VRAM_PADDR && addr < VRAM_PADDR_END) {
- return addr - VRAM_PADDR + VRAM_VADDR;
- } else if (addr >= FCRAM_PADDR && addr < FCRAM_PADDR_END) {
- return addr - FCRAM_PADDR + LINEAR_HEAP_VADDR;
- } else if (addr >= DSP_RAM_PADDR && addr < DSP_RAM_PADDR_END) {
- return addr - DSP_RAM_PADDR + DSP_RAM_VADDR;
- } else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) {
- return addr - IO_AREA_PADDR + IO_AREA_VADDR;
- }
-
- LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08x", addr);
- // To help with debugging, set bit on address so that it's obviously invalid.
- return addr | 0x80000000;
-}
-
-void Init() {
- InitMemoryMap();
- LOG_DEBUG(HW_Memory, "initialized OK");
-}
-
-void InitLegacyAddressSpace(Kernel::VMManager& address_space) {
- using namespace Kernel;
-
- for (MemoryArea& area : memory_areas) {
- auto block = std::make_shared<std::vector<u8>>(area.size);
- address_space.MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private).Unwrap();
- }
-
- auto cfg_mem_vma = address_space.MapBackingMemory(CONFIG_MEMORY_VADDR,
- (u8*)&ConfigMem::config_mem, CONFIG_MEMORY_SIZE, MemoryState::Shared).MoveFrom();
- address_space.Reprotect(cfg_mem_vma, VMAPermission::Read);
-
- auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR,
- (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom();
- address_space.Reprotect(shared_page_vma, VMAPermission::Read);
-}
-
-void Shutdown() {
- heap_map.clear();
- heap_linear_map.clear();
-
- LOG_DEBUG(HW_Memory, "shutdown OK");
-}
-
-} // namespace
diff --git a/src/core/mem_map.h b/src/core/mem_map.h
deleted file mode 100644
index 229ef82c5..000000000
--- a/src/core/mem_map.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace Kernel {
-class VMManager;
-}
-
-namespace Memory {
-
-void Init();
-void InitLegacyAddressSpace(Kernel::VMManager& address_space);
-void Shutdown();
-
-/**
- * Maps a block of memory on the heap
- * @param size Size of block in bytes
- * @param operation Memory map operation type
- * @param permissions Memory allocation permissions
- */
-u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions);
-
-/**
- * Maps a block of memory on the GSP heap
- * @param size Size of block in bytes
- * @param operation Memory map operation type
- * @param permissions Control memory permissions
- */
-u32 MapBlock_HeapLinear(u32 size, u32 operation, u32 permissions);
-
-/**
- * Converts a virtual address inside a region with 1:1 mapping to physical memory to a physical
- * address. This should be used by services to translate addresses for use by the hardware.
- */
-PAddr VirtualToPhysicalAddress(VAddr addr);
-
-/**
- * Undoes a mapping performed by VirtualToPhysicalAddress().
- */
-VAddr PhysicalToVirtualAddress(PAddr addr);
-
-} // namespace
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 1f66bb27d..cde390b8a 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -9,7 +9,7 @@
#include "common/logging/log.h"
#include "common/swap.h"
-#include "core/mem_map.h"
+#include "core/hle/kernel/process.h"
#include "core/memory.h"
#include "core/memory_setup.h"
@@ -198,4 +198,42 @@ void WriteBlock(const VAddr addr, const u8* data, const size_t size) {
Write8(addr + offset, data[offset]);
}
+PAddr VirtualToPhysicalAddress(const VAddr addr) {
+ if (addr == 0) {
+ return 0;
+ } else if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) {
+ return addr - VRAM_VADDR + VRAM_PADDR;
+ } else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) {
+ return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR;
+ } else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) {
+ return addr - DSP_RAM_VADDR + DSP_RAM_PADDR;
+ } else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) {
+ return addr - IO_AREA_VADDR + IO_AREA_PADDR;
+ } else if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) {
+ return addr - NEW_LINEAR_HEAP_VADDR + FCRAM_PADDR;
+ }
+
+ LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08X", addr);
+ // To help with debugging, set bit on address so that it's obviously invalid.
+ return addr | 0x80000000;
+}
+
+VAddr PhysicalToVirtualAddress(const PAddr addr) {
+ if (addr == 0) {
+ return 0;
+ } else if (addr >= VRAM_PADDR && addr < VRAM_PADDR_END) {
+ return addr - VRAM_PADDR + VRAM_VADDR;
+ } else if (addr >= FCRAM_PADDR && addr < FCRAM_PADDR_END) {
+ return addr - FCRAM_PADDR + Kernel::g_current_process->GetLinearHeapBase();
+ } else if (addr >= DSP_RAM_PADDR && addr < DSP_RAM_PADDR_END) {
+ return addr - DSP_RAM_PADDR + DSP_RAM_VADDR;
+ } else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) {
+ return addr - IO_AREA_PADDR + IO_AREA_VADDR;
+ }
+
+ LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08X", addr);
+ // To help with debugging, set bit on address so that it's obviously invalid.
+ return addr | 0x80000000;
+}
+
} // namespace
diff --git a/src/core/memory.h b/src/core/memory.h
index 418609de0..5af72b7a7 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -15,6 +15,8 @@ namespace Memory {
* be mapped.
*/
const u32 PAGE_SIZE = 0x1000;
+const u32 PAGE_MASK = PAGE_SIZE - 1;
+const int PAGE_BITS = 12;
/// Physical memory regions as seen from the ARM11
enum : PAddr {
@@ -103,8 +105,15 @@ enum : VAddr {
// hardcoded value.
/// Area where TLS (Thread-Local Storage) buffers are allocated.
TLS_AREA_VADDR = 0x1FF82000,
- TLS_AREA_SIZE = 0x00030000, // Each TLS buffer is 0x200 bytes, allows for 300 threads
+ TLS_ENTRY_SIZE = 0x200,
+ TLS_AREA_SIZE = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size
TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE,
+
+
+ /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS.
+ NEW_LINEAR_HEAP_VADDR = 0x30000000,
+ NEW_LINEAR_HEAP_SIZE = 0x10000000,
+ NEW_LINEAR_HEAP_VADDR_END = NEW_LINEAR_HEAP_VADDR + NEW_LINEAR_HEAP_SIZE,
};
u8 Read8(VAddr addr);
@@ -122,6 +131,17 @@ void WriteBlock(VAddr addr, const u8* data, size_t size);
u8* GetPointer(VAddr virtual_address);
/**
+* Converts a virtual address inside a region with 1:1 mapping to physical memory to a physical
+* address. This should be used by services to translate addresses for use by the hardware.
+*/
+PAddr VirtualToPhysicalAddress(VAddr addr);
+
+/**
+* Undoes a mapping performed by VirtualToPhysicalAddress().
+*/
+VAddr PhysicalToVirtualAddress(PAddr addr);
+
+/**
* Gets a pointer to the memory region beginning at the specified physical address.
*
* @note This is currently implemented using PhysicalToVirtualAddress().
diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h
index 361bfc816..84ff30120 100644
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -10,9 +10,6 @@
namespace Memory {
-const u32 PAGE_MASK = PAGE_SIZE - 1;
-const int PAGE_BITS = 12;
-
void InitMemoryMap();
/**
diff --git a/src/core/system.cpp b/src/core/system.cpp
index 561ff82f0..3cd84bf5e 100644
--- a/src/core/system.cpp
+++ b/src/core/system.cpp
@@ -4,11 +4,11 @@
#include "core/core.h"
#include "core/core_timing.h"
-#include "core/mem_map.h"
#include "core/system.h"
#include "core/hw/hw.h"
#include "core/hle/hle.h"
#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/memory.h"
#include "video_core/video_core.h"
@@ -29,7 +29,6 @@ void Shutdown() {
HLE::Shutdown();
Kernel::Shutdown();
HW::Shutdown();
- Memory::Shutdown();
CoreTiming::Shutdown();
Core::Shutdown();
}
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d82e20f86..a78985510 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -4,6 +4,7 @@
#include <boost/range/algorithm/fill.hpp>
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/hle/service/gsp_gpu.h"
@@ -43,6 +44,8 @@ static const u32 expand_bits_to_bytes[] = {
0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff
};
+MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240));
+
static void WritePicaReg(u32 id, u32 value, u32 mask) {
auto& regs = g_state.regs;
@@ -126,6 +129,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX(trigger_draw_indexed):
{
Common::Profiling::ScopeTimer scope_timer(category_drawing);
+ MICROPROFILE_SCOPE(GPU_Drawing);
#if PICA_LOG_TEV
DebugUtils::DumpTevStageConfig(regs.GetTevStages());
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 8ad77f0c8..059445f7d 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -25,6 +25,8 @@
#include "common/math_util.h"
#include "common/vector_math.h"
+#include "core/settings.h"
+
#include "video_core/pica.h"
#include "video_core/renderer_base.h"
#include "video_core/utils.h"
@@ -45,8 +47,10 @@ void DebugContext::OnEvent(Event event, void* data) {
{
std::unique_lock<std::mutex> lock(breakpoint_mutex);
- // Commit the hardware renderer's framebuffer so it will show on debug widgets
- VideoCore::g_renderer->hw_rasterizer->CommitFramebuffer();
+ if (Settings::values.use_hw_renderer) {
+ // Commit the hardware renderer's framebuffer so it will show on debug widgets
+ VideoCore::g_renderer->hw_rasterizer->CommitFramebuffer();
+ }
// TODO: Should stop the CPU thread here once we multithread emulation.
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 58b924f9e..f40684d83 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -441,8 +441,14 @@ struct Regs {
};
enum class StencilAction : u32 {
- Keep = 0,
- Xor = 5,
+ Keep = 0,
+ Zero = 1,
+ Replace = 2,
+ Increment = 3,
+ Decrement = 4,
+ Invert = 5,
+ IncrementWrap = 6,
+ DecrementWrap = 7
};
struct {
@@ -481,23 +487,29 @@ struct Regs {
struct {
union {
+ // Raw value of this register
+ u32 raw_func;
+
// If true, enable stencil testing
BitField< 0, 1, u32> enable;
// Comparison operation for stencil testing
BitField< 4, 3, CompareFunc> func;
- // Value to calculate the new stencil value from
- BitField< 8, 8, u32> replacement_value;
+ // Mask used to control writing to the stencil buffer
+ BitField< 8, 8, u32> write_mask;
// Value to compare against for stencil testing
BitField<16, 8, u32> reference_value;
// Mask to apply on stencil test inputs
- BitField<24, 8, u32> mask;
+ BitField<24, 8, u32> input_mask;
};
union {
+ // Raw value of this register
+ u32 raw_op;
+
// Action to perform when the stencil test fails
BitField< 0, 3, StencilAction> action_stencil_fail;
@@ -1021,12 +1033,20 @@ struct float24 {
return ret;
}
+ static float24 Zero() {
+ return FromFloat32(0.f);
+ }
+
// Not recommended for anything but logging
float ToFloat32() const {
return value;
}
float24 operator * (const float24& flt) const {
+ if ((this->value == 0.f && !std::isnan(flt.value)) ||
+ (flt.value == 0.f && !std::isnan(this->value)))
+ // PICA gives 0 instead of NaN when multiplying by inf
+ return Zero();
return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
}
@@ -1043,7 +1063,11 @@ struct float24 {
}
float24& operator *= (const float24& flt) {
- value *= flt.ToFloat32();
+ if ((this->value == 0.f && !std::isnan(flt.value)) ||
+ (flt.value == 0.f && !std::isnan(this->value)))
+ // PICA gives 0 instead of NaN when multiplying by inf
+ *this = Zero();
+ else value *= flt.ToFloat32();
return *this;
}
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index b83798b0f..77eadda9e 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -7,6 +7,7 @@
#include "common/color.h"
#include "common/common_types.h"
#include "common/math_util.h"
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/hw/gpu.h"
@@ -215,14 +216,33 @@ static void SetStencil(int x, int y, u8 value) {
}
}
-// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not!
-static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) {
+static u8 PerformStencilAction(Regs::StencilAction action, u8 old_stencil, u8 ref) {
switch (action) {
case Regs::StencilAction::Keep:
- return dest;
+ return old_stencil;
- case Regs::StencilAction::Xor:
- return dest ^ ref;
+ case Regs::StencilAction::Zero:
+ return 0;
+
+ case Regs::StencilAction::Replace:
+ return ref;
+
+ case Regs::StencilAction::Increment:
+ // Saturated increment
+ return std::min<u8>(old_stencil, 254) + 1;
+
+ case Regs::StencilAction::Decrement:
+ // Saturated decrement
+ return std::max<u8>(old_stencil, 1) - 1;
+
+ case Regs::StencilAction::Invert:
+ return ~old_stencil;
+
+ case Regs::StencilAction::IncrementWrap:
+ return old_stencil + 1;
+
+ case Regs::StencilAction::DecrementWrap:
+ return old_stencil - 1;
default:
LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action);
@@ -267,6 +287,7 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
};
static Common::Profiling::TimingCategory rasterization_category("Rasterization");
+MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240));
/**
* Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
@@ -279,6 +300,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
{
const auto& regs = g_state.regs;
Common::Profiling::ScopeTimer timer(rasterization_category);
+ MICROPROFILE_SCOPE(GPU_Rasterization);
// vertex positions in rasterizer coordinates
static auto FloatToFix = [](float24 flt) {
@@ -780,10 +802,16 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
}
u8 old_stencil = 0;
+
+ auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) {
+ u8 new_stencil = PerformStencilAction(action, old_stencil, stencil_test.reference_value);
+ SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask));
+ };
+
if (stencil_action_enable) {
old_stencil = GetStencil(x >> 4, y >> 4);
- u8 dest = old_stencil & stencil_test.mask;
- u8 ref = stencil_test.reference_value & stencil_test.mask;
+ u8 dest = old_stencil & stencil_test.input_mask;
+ u8 ref = stencil_test.reference_value & stencil_test.input_mask;
bool pass = false;
switch (stencil_test.func) {
@@ -821,8 +849,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
}
if (!pass) {
- u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value);
- SetStencil(x >> 4, y >> 4, new_stencil);
+ UpdateStencil(stencil_test.action_stencil_fail);
continue;
}
}
@@ -872,23 +899,19 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
}
if (!pass) {
- if (stencil_action_enable) {
- u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value);
- SetStencil(x >> 4, y >> 4, new_stencil);
- }
+ if (stencil_action_enable)
+ UpdateStencil(stencil_test.action_depth_fail);
continue;
}
if (output_merger.depth_write_enable)
SetDepth(x >> 4, y >> 4, z);
-
- if (stencil_action_enable) {
- // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway?
- u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value);
- SetStencil(x >> 4, y >> 4, new_stencil);
- }
}
+ // The stencil depth_pass action is executed even if depth testing is disabled
+ if (stencil_action_enable)
+ UpdateStencil(stencil_test.action_depth_pass);
+
auto dest = GetPixel(x >> 4, y >> 4);
Math::Vec4<u8> blend_output = combiner_output;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9f1552adf..d29049508 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -7,6 +7,7 @@
#include "common/color.h"
#include "common/math_util.h"
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/hw/gpu.h"
@@ -230,8 +231,8 @@ void RasterizerOpenGL::DrawTriangles() {
u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format)
* regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight();
- res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size);
- res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size);
+ res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size, true);
+ res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size, true);
}
void RasterizerOpenGL::CommitFramebuffer() {
@@ -268,7 +269,8 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
break;
// Stencil test
- case PICA_REG_INDEX(output_merger.stencil_test):
+ case PICA_REG_INDEX(output_merger.stencil_test.raw_func):
+ case PICA_REG_INDEX(output_merger.stencil_test.raw_op):
SyncStencilTest();
break;
@@ -675,7 +677,15 @@ void RasterizerOpenGL::SyncLogicOp() {
}
void RasterizerOpenGL::SyncStencilTest() {
- // TODO: Implement stencil test, mask, and op
+ const auto& regs = Pica::g_state.regs;
+ state.stencil.test_enabled = regs.output_merger.stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
+ state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func);
+ state.stencil.test_ref = regs.output_merger.stencil_test.reference_value;
+ state.stencil.test_mask = regs.output_merger.stencil_test.input_mask;
+ state.stencil.write_mask = regs.output_merger.stencil_test.write_mask;
+ state.stencil.action_stencil_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail);
+ state.stencil.action_depth_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail);
+ state.stencil.action_depth_pass = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass);
}
void RasterizerOpenGL::SyncDepthTest() {
@@ -777,12 +787,16 @@ void RasterizerOpenGL::SyncDrawState() {
state.Apply();
}
+MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200));
+
void RasterizerOpenGL::ReloadColorBuffer() {
u8* color_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetColorBufferPhysicalAddress());
if (color_buffer == nullptr)
return;
+ MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
+
u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
@@ -822,6 +836,8 @@ void RasterizerOpenGL::ReloadDepthBuffer() {
if (depth_buffer == nullptr)
return;
+ MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
+
u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
// OpenGL needs 4 bpp alignment for D24
@@ -860,14 +876,22 @@ void RasterizerOpenGL::ReloadDepthBuffer() {
state.Apply();
glActiveTexture(GL_TEXTURE0);
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height,
- fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get());
+ if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
+ // TODO(Subv): There is a bug with Intel Windows drivers that makes glTexSubImage2D not change the stencil buffer.
+ // The bug has been reported to Intel (https://communities.intel.com/message/324464)
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, fb_depth_texture.width, fb_depth_texture.height, 0,
+ GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, temp_fb_depth_buffer.get());
+ } else {
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height,
+ fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get());
+ }
state.texture_units[0].texture_2d = 0;
state.Apply();
}
Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit");
+MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200));
void RasterizerOpenGL::CommitColorBuffer() {
if (last_fb_color_addr != 0) {
@@ -875,6 +899,7 @@ void RasterizerOpenGL::CommitColorBuffer() {
if (color_buffer != nullptr) {
Common::Profiling::ScopeTimer timer(buffer_commit_category);
+ MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
@@ -911,6 +936,7 @@ void RasterizerOpenGL::CommitDepthBuffer() {
if (depth_buffer != nullptr) {
Common::Profiling::ScopeTimer timer(buffer_commit_category);
+ MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 70f0ba5f1..1e38c2e6d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -2,8 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/hash.h"
#include "common/make_unique.h"
#include "common/math_util.h"
+#include "common/microprofile.h"
#include "common/vector_math.h"
#include "core/memory.h"
@@ -16,15 +18,18 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
FullFlush();
}
+MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
+
void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) {
PAddr texture_addr = config.config.GetPhysicalAddress();
-
const auto cached_texture = texture_cache.find(texture_addr);
if (cached_texture != texture_cache.end()) {
state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle;
state.Apply();
} else {
+ MICROPROFILE_SCOPE(OpenGL_TextureUpload);
+
std::unique_ptr<CachedTexture> new_texture = Common::make_unique<CachedTexture>();
new_texture->texture.Create();
@@ -46,12 +51,14 @@ void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned text
}
const auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format);
+ u8* texture_src_data = Memory::GetPhysicalPointer(texture_addr);
new_texture->width = info.width;
new_texture->height = info.height;
- new_texture->size = info.width * info.height * Pica::Regs::NibblesPerPixel(info.format);
+ new_texture->size = info.stride * info.height;
+ new_texture->addr = texture_addr;
+ new_texture->hash = Common::ComputeHash64(texture_src_data, new_texture->size);
- u8* texture_src_data = Memory::GetPhysicalPointer(texture_addr);
std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]);
for (int y = 0; y < info.height; ++y) {
@@ -66,12 +73,18 @@ void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned text
}
}
-void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size) {
+void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size, bool ignore_hash) {
// Flush any texture that falls in the flushed region
// TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound
auto cache_upper_bound = texture_cache.upper_bound(addr + size);
+
for (auto it = texture_cache.begin(); it != cache_upper_bound;) {
- if (MathUtil::IntervalsIntersect(addr, size, it->first, it->second->size)) {
+ const auto& info = *it->second;
+
+ // Flush the texture only if the memory region intersects and a change is detected
+ if (MathUtil::IntervalsIntersect(addr, size, info.addr, info.size) &&
+ (ignore_hash || info.hash != Common::ComputeHash64(Memory::GetPhysicalPointer(info.addr), info.size))) {
+
it = texture_cache.erase(it);
} else {
++it;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 96f3a925c..d8f9edf59 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -19,7 +19,7 @@ public:
void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config);
/// Flush any cached resource that touches the flushed region
- void NotifyFlush(PAddr addr, u32 size);
+ void NotifyFlush(PAddr addr, u32 size, bool ignore_hash = false);
/// Flush all cached OpenGL resources tracked by this cache manager
void FullFlush();
@@ -30,6 +30,8 @@ private:
GLuint width;
GLuint height;
u32 size;
+ u64 hash;
+ PAddr addr;
};
std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache;
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 871324014..ba47ce8b8 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -26,6 +26,9 @@ OpenGLState::OpenGLState() {
stencil.test_ref = 0;
stencil.test_mask = -1;
stencil.write_mask = -1;
+ stencil.action_depth_fail = GL_KEEP;
+ stencil.action_depth_pass = GL_KEEP;
+ stencil.action_stencil_fail = GL_KEEP;
blend.enabled = false;
blend.src_rgb_func = GL_ONE;
@@ -105,6 +108,12 @@ void OpenGLState::Apply() {
glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask);
}
+ if (stencil.action_depth_fail != cur_state.stencil.action_depth_fail ||
+ stencil.action_depth_pass != cur_state.stencil.action_depth_pass ||
+ stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) {
+ glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail, stencil.action_depth_pass);
+ }
+
// Stencil mask
if (stencil.write_mask != cur_state.stencil.write_mask) {
glStencilMask(stencil.write_mask);
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 3e2379021..81e7e0877 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -32,6 +32,9 @@ public:
GLint test_ref; // GL_STENCIL_REF
GLuint test_mask; // GL_STENCIL_VALUE_MASK
GLuint write_mask; // GL_STENCIL_WRITEMASK
+ GLenum action_stencil_fail; // GL_STENCIL_FAIL
+ GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL
+ GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS
} stencil;
struct {
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index 3b562da86..12806fad5 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -152,6 +152,29 @@ inline GLenum CompareFunc(Pica::Regs::CompareFunc func) {
return compare_func_table[(unsigned)func];
}
+inline GLenum StencilOp(Pica::Regs::StencilAction action) {
+ static const GLenum stencil_op_table[] = {
+ GL_KEEP, // StencilAction::Keep
+ GL_ZERO, // StencilAction::Zero
+ GL_REPLACE, // StencilAction::Replace
+ GL_INCR, // StencilAction::Increment
+ GL_DECR, // StencilAction::Decrement
+ GL_INVERT, // StencilAction::Invert
+ GL_INCR_WRAP, // StencilAction::IncrementWrap
+ GL_DECR_WRAP // StencilAction::DecrementWrap
+ };
+
+ // Range check table for input
+ if ((unsigned)action >= ARRAY_SIZE(stencil_op_table)) {
+ LOG_CRITICAL(Render_OpenGL, "Unknown stencil op %d", action);
+ UNREACHABLE();
+
+ return GL_KEEP;
+ }
+
+ return stencil_op_table[(unsigned)action];
+}
+
inline std::array<GLfloat, 4> ColorRGBA8(const u8* bytes) {
return { { bytes[0] / 255.0f,
bytes[1] / 255.0f,
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 4e9836c80..f89117521 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -9,6 +9,7 @@
#include "common/hash.h"
#include "common/make_unique.h"
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "video_core/debug_utils/debug_utils.h"
@@ -51,15 +52,19 @@ void Setup(UnitState<false>& state) {
}
void Shutdown() {
+#ifdef ARCHITECTURE_x86_64
shader_map.clear();
+#endif // ARCHITECTURE_x86_64
}
static Common::Profiling::TimingCategory shader_category("Vertex Shader");
+MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240));
OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
auto& config = g_state.regs.vs;
Common::Profiling::ScopeTimer timer(shader_category);
+ MICROPROFILE_SCOPE(GPU_VertexShader);
state.program_counter = config.main_offset;
state.debug.max_offset = 0;
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index e14de0768..69e4efa68 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -177,7 +177,10 @@ void RunInterpreter(UnitState<Debug>& state) {
if (!swizzle.DestComponentEnabled(i))
continue;
- dest[i] = std::max(src1[i], src2[i]);
+ // NOTE: Exact form required to match NaN semantics to hardware:
+ // max(0, NaN) -> NaN
+ // max(NaN, 0) -> 0
+ dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i];
}
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
break;
@@ -190,19 +193,29 @@ void RunInterpreter(UnitState<Debug>& state) {
if (!swizzle.DestComponentEnabled(i))
continue;
- dest[i] = std::min(src1[i], src2[i]);
+ // NOTE: Exact form required to match NaN semantics to hardware:
+ // min(0, NaN) -> NaN
+ // min(NaN, 0) -> 0
+ dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i];
}
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
break;
case OpCode::Id::DP3:
case OpCode::Id::DP4:
+ case OpCode::Id::DPH:
+ case OpCode::Id::DPHI:
{
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+
+ OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode();
+ if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI)
+ src1[3] = float24::FromFloat32(1.0f);
+
float24 dot = float24::FromFloat32(0.f);
- int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4;
+ int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4;
for (int i = 0; i < num_components; ++i)
dot = dot + src1[i] * src2[i];
@@ -221,13 +234,12 @@ void RunInterpreter(UnitState<Debug>& state) {
{
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+ float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32());
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
continue;
- // TODO: Be stable against division by zero!
- // TODO: I think this might be wrong... we should only use one component here
- dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32());
+ dest[i] = rcp_res;
}
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
break;
@@ -238,13 +250,12 @@ void RunInterpreter(UnitState<Debug>& state) {
{
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+ float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32()));
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
continue;
- // TODO: Be stable against division by zero!
- // TODO: I think this might be wrong... we should only use one component here
- dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32()));
+ dest[i] = rsq_res;
}
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
break;
@@ -278,6 +289,20 @@ void RunInterpreter(UnitState<Debug>& state) {
break;
}
+ case OpCode::Id::SGE:
+ case OpCode::Id::SGEI:
+ Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
+ Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
+ Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+ for (int i = 0; i < 4; ++i) {
+ if (!swizzle.DestComponentEnabled(i))
+ continue;
+
+ dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
+ }
+ Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
+ break;
+
case OpCode::Id::SLT:
case OpCode::Id::SLTI:
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
@@ -334,6 +359,42 @@ void RunInterpreter(UnitState<Debug>& state) {
Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code);
break;
+ case OpCode::Id::EX2:
+ {
+ Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
+ Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+
+ // EX2 only takes first component exp2 and writes it to all dest components
+ float24 ex2_res = float24::FromFloat32(std::exp2(src1[0].ToFloat32()));
+ for (int i = 0; i < 4; ++i) {
+ if (!swizzle.DestComponentEnabled(i))
+ continue;
+
+ dest[i] = ex2_res;
+ }
+
+ Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
+ break;
+ }
+
+ case OpCode::Id::LG2:
+ {
+ Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
+ Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+
+ // LG2 only takes the first component log2 and writes it to all dest components
+ float24 lg2_res = float24::FromFloat32(std::log2(src1[0].ToFloat32()));
+ for (int i = 0; i < 4; ++i) {
+ if (!swizzle.DestComponentEnabled(i))
+ continue;
+
+ dest[i] = lg2_res;
+ }
+
+ Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
+ break;
+ }
+
default:
LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
(int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 836942c6b..d3cfe109e 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -23,14 +23,14 @@ const JitFunction instr_table[64] = {
&JitCompiler::Compile_ADD, // add
&JitCompiler::Compile_DP3, // dp3
&JitCompiler::Compile_DP4, // dp4
- nullptr, // dph
+ &JitCompiler::Compile_DPH, // dph
nullptr, // unknown
- nullptr, // ex2
- nullptr, // lg2
+ &JitCompiler::Compile_EX2, // ex2
+ &JitCompiler::Compile_LG2, // lg2
nullptr, // unknown
&JitCompiler::Compile_MUL, // mul
- nullptr, // lge
- nullptr, // slt
+ &JitCompiler::Compile_SGE, // sge
+ &JitCompiler::Compile_SLT, // slt
&JitCompiler::Compile_FLR, // flr
&JitCompiler::Compile_MAX, // max
&JitCompiler::Compile_MIN, // min
@@ -44,10 +44,10 @@ const JitFunction instr_table[64] = {
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
- nullptr, // dphi
+ &JitCompiler::Compile_DPH, // dphi
nullptr, // unknown
- nullptr, // sgei
- &JitCompiler::Compile_SLTI, // slti
+ &JitCompiler::Compile_SGE, // sgei
+ &JitCompiler::Compile_SLT, // slti
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
@@ -115,6 +115,8 @@ static const X64Reg SRC1 = XMM1;
static const X64Reg SRC2 = XMM2;
/// Loaded with the third swizzled source register, otherwise can be used as a scratch register
static const X64Reg SRC3 = XMM3;
+/// Additional scratch register
+static const X64Reg SCRATCH2 = XMM4;
/// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
static const X64Reg ONE = XMM14;
/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
@@ -227,8 +229,8 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
BLENDPS(SCRATCH, R(src), mask);
} else {
- MOVAPS(XMM4, R(src));
- UNPCKHPS(XMM4, R(SCRATCH)); // Unpack X/Y components of source and destination
+ MOVAPS(SCRATCH2, R(src));
+ UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination
UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
// Compute selector to selectively copy source components to destination for SHUFPS instruction
@@ -236,7 +238,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) |
((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) |
((swiz.DestComponentEnabled(3) ? 2 : 3) << 6);
- SHUFPS(SCRATCH, R(XMM4), sel);
+ SHUFPS(SCRATCH, R(SCRATCH2), sel);
}
// Store dest back to memory
@@ -244,6 +246,19 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
}
}
+void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) {
+ MOVAPS(scratch, R(src1));
+ CMPPS(scratch, R(src2), CMP_ORD);
+
+ MULPS(src1, R(src2));
+
+ MOVAPS(src2, R(src1));
+ CMPPS(src2, R(src2), CMP_UNORD);
+
+ XORPS(scratch, R(src2));
+ ANDPS(src1, R(scratch));
+}
+
void JitCompiler::Compile_EvaluateCondition(Instruction instr) {
// Note: NXOR is used below to check for equality
switch (instr.flow_control.op) {
@@ -280,6 +295,22 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) {
CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
}
+void JitCompiler::Compile_PushCallerSavedXMM() {
+#ifndef _WIN32
+ SUB(64, R(RSP), Imm8(2 * 16));
+ MOVUPS(MDisp(RSP, 16), ONE);
+ MOVUPS(MDisp(RSP, 0), NEGBIT);
+#endif
+}
+
+void JitCompiler::Compile_PopCallerSavedXMM() {
+#ifndef _WIN32
+ MOVUPS(NEGBIT, MDisp(RSP, 0));
+ MOVUPS(ONE, MDisp(RSP, 16));
+ ADD(64, R(RSP), Imm8(2 * 16));
+#endif
+}
+
void JitCompiler::Compile_ADD(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
@@ -291,21 +322,17 @@ void JitCompiler::Compile_DP3(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- if (Common::GetCPUCaps().sse4_1) {
- DPPS(SRC1, R(SRC2), 0x7f);
- } else {
- MULPS(SRC1, R(SRC2));
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
- MOVAPS(SRC2, R(SRC1));
- SHUFPS(SRC2, R(SRC2), _MM_SHUFFLE(1, 1, 1, 1));
+ MOVAPS(SRC2, R(SRC1));
+ SHUFPS(SRC2, R(SRC2), _MM_SHUFFLE(1, 1, 1, 1));
- MOVAPS(SRC3, R(SRC1));
- SHUFPS(SRC3, R(SRC3), _MM_SHUFFLE(2, 2, 2, 2));
+ MOVAPS(SRC3, R(SRC1));
+ SHUFPS(SRC3, R(SRC3), _MM_SHUFFLE(2, 2, 2, 2));
- SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0));
- ADDPS(SRC1, R(SRC2));
- ADDPS(SRC1, R(SRC3));
- }
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0));
+ ADDPS(SRC1, R(SRC2));
+ ADDPS(SRC1, R(SRC3));
Compile_DestEnable(instr, SRC1);
}
@@ -314,27 +341,117 @@ void JitCompiler::Compile_DP4(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
+
+ MOVAPS(SRC2, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
+ ADDPS(SRC1, R(SRC2));
+
+ MOVAPS(SRC2, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
+ ADDPS(SRC1, R(SRC2));
+
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_DPH(Instruction instr) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
+ } else {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ }
+
if (Common::GetCPUCaps().sse4_1) {
- DPPS(SRC1, R(SRC2), 0xff);
+ // Set 4th component to 1.0
+ BLENDPS(SRC1, R(ONE), 0x8); // 0b1000
} else {
- MULPS(SRC1, R(SRC2));
+ // Set 4th component to 1.0
+ MOVAPS(SCRATCH, R(SRC1));
+ UNPCKHPS(SCRATCH, R(ONE)); // XYZW, 1111 -> Z1__
+ UNPCKLPD(SRC1, R(SCRATCH)); // XYZW, Z1__ -> XYZ1
+ }
- MOVAPS(SRC2, R(SRC1));
- SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
- ADDPS(SRC1, R(SRC2));
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
- MOVAPS(SRC2, R(SRC1));
- SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
- ADDPS(SRC1, R(SRC2));
- }
+ MOVAPS(SRC2, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
+ ADDPS(SRC1, R(SRC2));
+
+ MOVAPS(SRC2, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
+ ADDPS(SRC1, R(SRC2));
+
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_EX2(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ MOVSS(XMM0, R(SRC1));
+ // The following will actually break the stack alignment
+ ABI_PushAllCallerSavedRegsAndAdjustStack();
+ Compile_PushCallerSavedXMM();
+ ABI_CallFunction(reinterpret_cast<const void*>(exp2f));
+ Compile_PopCallerSavedXMM();
+ ABI_PopAllCallerSavedRegsAndAdjustStack();
+
+ SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
+ MOVAPS(SRC1, R(XMM0));
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_LG2(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ MOVSS(XMM0, R(SRC1));
+
+ // The following will actually break the stack alignment
+ ABI_PushAllCallerSavedRegsAndAdjustStack();
+ Compile_PushCallerSavedXMM();
+ ABI_CallFunction(reinterpret_cast<const void*>(log2f));
+ Compile_PopCallerSavedXMM();
+ ABI_PopAllCallerSavedRegsAndAdjustStack();
+
+ SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
+ MOVAPS(SRC1, R(XMM0));
Compile_DestEnable(instr, SRC1);
}
void JitCompiler::Compile_MUL(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- MULPS(SRC1, R(SRC2));
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_SGE(Instruction instr) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
+ } else {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ }
+
+ CMPPS(SRC1, R(SRC2), CMP_NLT);
+ ANDPS(SRC1, R(ONE));
+
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_SLT(Instruction instr) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
+ } else {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ }
+
+ CMPPS(SRC1, R(SRC2), CMP_LT);
+ ANDPS(SRC1, R(ONE));
+
Compile_DestEnable(instr, SRC1);
}
@@ -354,6 +471,7 @@ void JitCompiler::Compile_FLR(Instruction instr) {
void JitCompiler::Compile_MAX(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
MAXPS(SRC1, R(SRC2));
Compile_DestEnable(instr, SRC1);
}
@@ -361,6 +479,7 @@ void JitCompiler::Compile_MAX(Instruction instr) {
void JitCompiler::Compile_MIN(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
MINPS(SRC1, R(SRC2));
Compile_DestEnable(instr, SRC1);
}
@@ -374,8 +493,8 @@ void JitCompiler::Compile_MOVA(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- // Convert floats to integers (only care about X and Y components)
- CVTPS2DQ(SRC1, R(SRC1));
+ // Convert floats to integers using truncation (only care about X and Y components)
+ CVTTPS2DQ(SRC1, R(SRC1));
// Get result
MOVQ_xmm(R(RAX), SRC1);
@@ -415,22 +534,13 @@ void JitCompiler::Compile_MOV(Instruction instr) {
Compile_DestEnable(instr, SRC1);
}
-void JitCompiler::Compile_SLTI(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
- Compile_SwizzleSrc(instr, 1, instr.common.src2i, SRC2);
-
- CMPSS(SRC1, R(SRC2), CMP_LT);
- ANDPS(SRC1, R(ONE));
-
- Compile_DestEnable(instr, SRC1);
-}
-
void JitCompiler::Compile_RCP(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- // TODO(bunnei): RCPPS is a pretty rough approximation, this might cause problems if Pica
+ // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
// performs this operation more accurately. This should be checked on hardware.
- RCPPS(SRC1, R(SRC1));
+ RCPSS(SRC1, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
Compile_DestEnable(instr, SRC1);
}
@@ -438,9 +548,10 @@ void JitCompiler::Compile_RCP(Instruction instr) {
void JitCompiler::Compile_RSQ(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- // TODO(bunnei): RSQRTPS is a pretty rough approximation, this might cause problems if Pica
+ // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
// performs this operation more accurately. This should be checked on hardware.
- RSQRTPS(SRC1, R(SRC1));
+ RSQRTSS(SRC1, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
Compile_DestEnable(instr, SRC1);
}
@@ -475,27 +586,42 @@ void JitCompiler::Compile_CALLU(Instruction instr) {
}
void JitCompiler::Compile_CMP(Instruction instr) {
+ using Op = Instruction::Common::CompareOpType::Op;
+ Op op_x = instr.common.compare_op.x;
+ Op op_y = instr.common.compare_op.y;
+
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_NLE, CMP_NLT };
+ // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to
+ // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here
+ // because they don't match when used with NaNs.
+ static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE };
+
+ bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual);
+ Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1;
+ Gen::X64Reg rhs_x = invert_op_x ? SRC1 : SRC2;
- if (instr.common.compare_op.x == instr.common.compare_op.y) {
+ if (op_x == op_y) {
// Compare X-component and Y-component together
- CMPPS(SRC1, R(SRC2), cmp[instr.common.compare_op.x]);
+ CMPPS(lhs_x, R(rhs_x), cmp[op_x]);
+ MOVQ_xmm(R(COND0), lhs_x);
- MOVQ_xmm(R(COND0), SRC1);
MOV(64, R(COND1), R(COND0));
} else {
+ bool invert_op_y = (op_y == Op::GreaterThan || op_y == Op::GreaterEqual);
+ Gen::X64Reg lhs_y = invert_op_y ? SRC2 : SRC1;
+ Gen::X64Reg rhs_y = invert_op_y ? SRC1 : SRC2;
+
// Compare X-component
- MOVAPS(SCRATCH, R(SRC1));
- CMPSS(SCRATCH, R(SRC2), cmp[instr.common.compare_op.x]);
+ MOVAPS(SCRATCH, R(lhs_x));
+ CMPSS(SCRATCH, R(rhs_x), cmp[op_x]);
// Compare Y-component
- CMPPS(SRC1, R(SRC2), cmp[instr.common.compare_op.y]);
+ CMPPS(lhs_y, R(rhs_y), cmp[op_y]);
MOVQ_xmm(R(COND0), SCRATCH);
- MOVQ_xmm(R(COND1), SRC1);
+ MOVQ_xmm(R(COND1), lhs_y);
}
SHR(32, R(COND0), Imm8(31));
@@ -513,12 +639,8 @@ void JitCompiler::Compile_MAD(Instruction instr) {
Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3);
}
- if (Common::GetCPUCaps().fma) {
- VFMADD213PS(SRC1, SRC2, R(SRC3));
- } else {
- MULPS(SRC1, R(SRC2));
- ADDPS(SRC1, R(SRC3));
- }
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
+ ADDPS(SRC1, R(SRC3));
Compile_DestEnable(instr, SRC1);
}
@@ -646,12 +768,12 @@ CompiledShader* JitCompiler::Compile() {
// Used to set a register to one
static const __m128 one = { 1.f, 1.f, 1.f, 1.f };
MOV(PTRBITS, R(RAX), ImmPtr(&one));
- MOVAPS(ONE, MDisp(RAX, 0));
+ MOVAPS(ONE, MatR(RAX));
// Used to negate registers
static const __m128 neg = { -0.f, -0.f, -0.f, -0.f };
MOV(PTRBITS, R(RAX), ImmPtr(&neg));
- MOVAPS(NEGBIT, MDisp(RAX, 0));
+ MOVAPS(NEGBIT, MatR(RAX));
looping = false;
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index b88f2a0d2..58828ecc8 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -37,7 +37,12 @@ public:
void Compile_ADD(Instruction instr);
void Compile_DP3(Instruction instr);
void Compile_DP4(Instruction instr);
+ void Compile_DPH(Instruction instr);
+ void Compile_EX2(Instruction instr);
+ void Compile_LG2(Instruction instr);
void Compile_MUL(Instruction instr);
+ void Compile_SGE(Instruction instr);
+ void Compile_SLT(Instruction instr);
void Compile_FLR(Instruction instr);
void Compile_MAX(Instruction instr);
void Compile_MIN(Instruction instr);
@@ -45,7 +50,6 @@ public:
void Compile_RSQ(Instruction instr);
void Compile_MOVA(Instruction instr);
void Compile_MOV(Instruction instr);
- void Compile_SLTI(Instruction instr);
void Compile_NOP(Instruction instr);
void Compile_END(Instruction instr);
void Compile_CALL(Instruction instr);
@@ -64,9 +68,18 @@ private:
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
+ /**
+ * Compiles a `MUL src1, src2` operation, properly handling the PICA semantics when multiplying
+ * zero by inf. Clobbers `src2` and `scratch`.
+ */
+ void Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch);
+
void Compile_EvaluateCondition(Instruction instr);
void Compile_UniformCondition(Instruction instr);
+ void Compile_PushCallerSavedXMM();
+ void Compile_PopCallerSavedXMM();
+
/// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks.
unsigned* offset_ptr = nullptr;