diff options
Diffstat (limited to '')
-rw-r--r-- | external/optick/optick_gpu.d3d12.cpp | 382 |
1 files changed, 382 insertions, 0 deletions
diff --git a/external/optick/optick_gpu.d3d12.cpp b/external/optick/optick_gpu.d3d12.cpp new file mode 100644 index 0000000..1ee4dd9 --- /dev/null +++ b/external/optick/optick_gpu.d3d12.cpp @@ -0,0 +1,382 @@ +#include "optick.config.h" +#if USE_OPTICK +#if OPTICK_ENABLE_GPU_D3D12 + +#include "optick_common.h" +#include "optick_memory.h" +#include "optick_core.h" +#include "optick_gpu.h" + +#include <atomic> +#include <thread> + +#include <d3d12.h> +#include <dxgi.h> +#include <dxgi1_4.h> + + +#define OPTICK_CHECK(args) do { HRESULT __hr = args; (void)__hr; OPTICK_ASSERT(__hr == S_OK, "Failed check"); } while(false); + +namespace Optick +{ + class GPUProfilerD3D12 : public GPUProfiler + { + struct Frame + { + ID3D12CommandAllocator* commandAllocator; + ID3D12GraphicsCommandList* commandList; + + Frame() : commandAllocator(nullptr), commandList(nullptr) + { + Reset(); + } + + void Reset() + { + } + + void Shutdown(); + + ~Frame() + { + Shutdown(); + } + }; + + struct NodePayload + { + ID3D12CommandQueue* commandQueue; + ID3D12QueryHeap* queryHeap; + ID3D12Fence* syncFence; + array<Frame, NUM_FRAMES_DELAY> frames; + + NodePayload() : commandQueue(nullptr), queryHeap(nullptr), syncFence(nullptr) {} + ~NodePayload(); + }; + vector<NodePayload*> nodePayloads; + + ID3D12Resource* queryBuffer; + ID3D12Device* device; + + // VSync Stats + DXGI_FRAME_STATISTICS prevFrameStatistics; + + //void UpdateRange(uint32_t start, uint32_t finish) + void InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue); + + void ResolveTimestamps(uint32_t startIndex, uint32_t count); + + void WaitForFrame(uint64_t frameNumber); + + public: + GPUProfilerD3D12(); + ~GPUProfilerD3D12(); + + void InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues); + + void QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp); + + void Flip(IDXGISwapChain* swapChain); + + + // Interface implementation + ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) override; + + void QueryTimestamp(void* context, int64_t* outCpuTimestamp) override + { + QueryTimestamp((ID3D12GraphicsCommandList*)context, outCpuTimestamp); + } + + void Flip(void* swapChain) override + { + Flip(static_cast<IDXGISwapChain*>(swapChain)); + } + }; + + template <class T> void SafeRelease(T **ppT) + { + if (*ppT) + { + (*ppT)->Release(); + *ppT = NULL; + } + } + + void InitGpuD3D12(void* device, void** cmdQueues, uint32_t numQueues) + { + GPUProfilerD3D12* gpuProfiler = Memory::New<GPUProfilerD3D12>(); + gpuProfiler->InitDevice((ID3D12Device*)device, (ID3D12CommandQueue**)cmdQueues, numQueues); + Core::Get().InitGPUProfiler(gpuProfiler); + } + + GPUProfilerD3D12::GPUProfilerD3D12() : queryBuffer(nullptr), device(nullptr) + { + prevFrameStatistics = { 0 }; + } + + GPUProfilerD3D12::~GPUProfilerD3D12() + { + WaitForFrame(frameNumber - 1); + + for (NodePayload* payload : nodePayloads) + Memory::Delete(payload); + nodePayloads.clear(); + + for (Node* node : nodes) + Memory::Delete(node); + nodes.clear(); + + SafeRelease(&queryBuffer); + } + + void GPUProfilerD3D12::InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues) + { + device = pDevice; + + uint32_t nodeCount = numCommandQueues; // device->GetNodeCount(); + + nodes.resize(nodeCount); + nodePayloads.resize(nodeCount); + + D3D12_HEAP_PROPERTIES heapDesc; + heapDesc.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapDesc.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapDesc.CreationNodeMask = 0; + heapDesc.VisibleNodeMask = (1u << nodeCount) - 1u; + heapDesc.Type = D3D12_HEAP_TYPE_READBACK; + + D3D12_RESOURCE_DESC resourceDesc; + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resourceDesc.Alignment = 0; + resourceDesc.Width = MAX_QUERIES_COUNT * sizeof(int64_t); + resourceDesc.Height = 1; + resourceDesc.DepthOrArraySize = 1; + resourceDesc.MipLevels = 1; + resourceDesc.Format = DXGI_FORMAT_UNKNOWN; + resourceDesc.SampleDesc.Count = 1; + resourceDesc.SampleDesc.Quality = 0; + resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + OPTICK_CHECK(device->CreateCommittedResource( + &heapDesc, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&queryBuffer))); + + // Get Device Name + LUID adapterLUID = pDevice->GetAdapterLuid(); + + IDXGIFactory4* factory; + OPTICK_CHECK(CreateDXGIFactory2(0, IID_PPV_ARGS(&factory))); + + IDXGIAdapter1* adapter; + factory->EnumAdapterByLuid(adapterLUID, IID_PPV_ARGS(&adapter)); + + DXGI_ADAPTER_DESC1 desc; + adapter->GetDesc1(&desc); + + adapter->Release(); + factory->Release(); + + char deviceName[128] = { 0 }; + wcstombs_s(deviceName, desc.Description, OPTICK_ARRAY_SIZE(deviceName) - 1); + + for (uint32_t nodeIndex = 0; nodeIndex < nodeCount; ++nodeIndex) + InitNodeInternal(deviceName, nodeIndex, pCommandQueues[nodeIndex]); + } + + void GPUProfilerD3D12::InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue) + { + GPUProfiler::InitNode(nodeName, nodeIndex); + + NodePayload* node = Memory::New<NodePayload>(); + nodePayloads[nodeIndex] = node; + node->commandQueue = pCmdQueue; + + D3D12_QUERY_HEAP_DESC queryHeapDesc; + queryHeapDesc.Count = MAX_QUERIES_COUNT; + queryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + queryHeapDesc.NodeMask = 1u << nodeIndex; + OPTICK_CHECK(device->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(&node->queryHeap))); + + OPTICK_CHECK(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&node->syncFence))); + + for (Frame& frame : node->frames) + { + OPTICK_CHECK(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&frame.commandAllocator))); + OPTICK_CHECK(device->CreateCommandList(1u << nodeIndex, D3D12_COMMAND_LIST_TYPE_DIRECT, frame.commandAllocator, nullptr, IID_PPV_ARGS(&frame.commandList))); + OPTICK_CHECK(frame.commandList->Close()); + } + } + + void GPUProfilerD3D12::QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp) + { + if (currentState == STATE_RUNNING) + { + uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp); + context->EndQuery(nodePayloads[currentNode]->queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, index); + } + } + + void GPUProfilerD3D12::ResolveTimestamps(uint32_t startIndex, uint32_t count) + { + if (count) + { + Node* node = nodes[currentNode]; + + D3D12_RANGE range = { sizeof(uint64_t)*startIndex, sizeof(uint64_t)*(startIndex + count) }; + void* pData = nullptr; + queryBuffer->Map(0, &range, &pData); + memcpy(&node->queryGpuTimestamps[startIndex], (uint64_t*)pData + startIndex, sizeof(uint64_t) * count); + queryBuffer->Unmap(0, 0); + + // Convert GPU timestamps => CPU Timestamps + for (uint32_t index = startIndex; index < startIndex + count; ++index) + *node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]); + } + } + + void GPUProfilerD3D12::WaitForFrame(uint64_t frameNumberToWait) + { + OPTICK_EVENT(); + + NodePayload* payload = nodePayloads[currentNode]; + while (frameNumberToWait > payload->syncFence->GetCompletedValue()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } + + void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain) + { + OPTICK_CATEGORY("GPUProfilerD3D12::Flip", Category::Debug); + + std::lock_guard<std::recursive_mutex> lock(updateLock); + + if (currentState == STATE_STARTING) + currentState = STATE_RUNNING; + + if (currentState == STATE_RUNNING) + { + Node& node = *nodes[currentNode]; + NodePayload& payload = *nodePayloads[currentNode]; + + uint32_t currentFrameIndex = frameNumber % NUM_FRAMES_DELAY; + uint32_t nextFrameIndex = (frameNumber + 1) % NUM_FRAMES_DELAY; + + //Frame& currentFrame = frames[frameNumber % NUM_FRAMES_DELAY]; + //Frame& nextFrame = frames[(frameNumber + 1) % NUM_FRAMES_DELAY]; + + QueryFrame& currentFrame = node.queryGpuframes[currentFrameIndex]; + QueryFrame& nextFrame = node.queryGpuframes[nextFrameIndex]; + + ID3D12GraphicsCommandList* commandList = payload.frames[currentFrameIndex].commandList; + ID3D12CommandAllocator* commandAllocator = payload.frames[currentFrameIndex].commandAllocator; + commandAllocator->Reset(); + commandList->Reset(commandAllocator, nullptr); + + if (EventData* frameEvent = currentFrame.frameEvent) + QueryTimestamp(commandList, &frameEvent->finish); + + // Generate GPU Frame event for the next frame + EventData& event = AddFrameEvent(); + QueryTimestamp(commandList, &event.start); + QueryTimestamp(commandList, &AddFrameTag().timestamp); + nextFrame.frameEvent = &event; + + uint32_t queryBegin = currentFrame.queryIndexStart; + uint32_t queryEnd = node.queryIndex; + + if (queryBegin != (uint32_t)-1) + { + OPTICK_ASSERT(queryEnd - queryBegin <= MAX_QUERIES_COUNT, "Too many queries in one frame? Increase GPUProfiler::MAX_QUERIES_COUNT to fix the problem!"); + currentFrame.queryIndexCount = queryEnd - queryBegin; + + uint32_t startIndex = queryBegin % MAX_QUERIES_COUNT; + uint32_t finishIndex = queryEnd % MAX_QUERIES_COUNT; + + if (startIndex < finishIndex) + { + commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, queryEnd - queryBegin, queryBuffer, startIndex * sizeof(int64_t)); + } + else + { + commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, MAX_QUERIES_COUNT - startIndex, queryBuffer, startIndex * sizeof(int64_t)); + commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, finishIndex, queryBuffer, 0); + } + } + + commandList->Close(); + + payload.commandQueue->ExecuteCommandLists(1, (ID3D12CommandList*const*)&commandList); + payload.commandQueue->Signal(payload.syncFence, frameNumber); + + // Preparing Next Frame + // Try resolve timestamps for the current frame + if (frameNumber >= NUM_FRAMES_DELAY && nextFrame.queryIndexCount) + { + WaitForFrame(frameNumber + 1 - NUM_FRAMES_DELAY); + + uint32_t resolveStart = nextFrame.queryIndexStart % MAX_QUERIES_COUNT; + uint32_t resolveFinish = resolveStart + nextFrame.queryIndexCount; + ResolveTimestamps(resolveStart, std::min<uint32_t>(resolveFinish, MAX_QUERIES_COUNT) - resolveStart); + if (resolveFinish > MAX_QUERIES_COUNT) + ResolveTimestamps(0, resolveFinish - MAX_QUERIES_COUNT); + } + + nextFrame.queryIndexStart = queryEnd; + nextFrame.queryIndexCount = 0; + + // Process VSync + DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 }; + HRESULT result = swapChain->GetFrameStatistics(¤tFrameStatistics); + if ((result == S_OK) && (prevFrameStatistics.PresentCount + 1 == currentFrameStatistics.PresentCount)) + { + EventData& data = AddVSyncEvent(); + data.start = prevFrameStatistics.SyncQPCTime.QuadPart; + data.finish = currentFrameStatistics.SyncQPCTime.QuadPart; + } + prevFrameStatistics = currentFrameStatistics; + } + + ++frameNumber; + } + + GPUProfiler::ClockSynchronization GPUProfilerD3D12::GetClockSynchronization(uint32_t nodeIndex) + { + ClockSynchronization clock; + clock.frequencyCPU = GetHighPrecisionFrequency(); + nodePayloads[nodeIndex]->commandQueue->GetTimestampFrequency((uint64_t*)&clock.frequencyGPU); + nodePayloads[nodeIndex]->commandQueue->GetClockCalibration((uint64_t*)&clock.timestampGPU, (uint64_t*)&clock.timestampCPU); + return clock; + } + + GPUProfilerD3D12::NodePayload::~NodePayload() + { + SafeRelease(&queryHeap); + SafeRelease(&syncFence); + } + + void GPUProfilerD3D12::Frame::Shutdown() + { + SafeRelease(&commandAllocator); + SafeRelease(&commandList); + } +} + +#else +#include "optick_common.h" + +namespace Optick +{ + void InitGpuD3D12(void* /*device*/, void** /*cmdQueues*/, uint32_t /*numQueues*/) + { + OPTICK_FAILED("OPTICK_ENABLE_GPU_D3D12 is disabled! Can't initialize GPU Profiler!"); + } +} + +#endif //OPTICK_ENABLE_GPU_D3D12 +#endif //USE_OPTICK
\ No newline at end of file |