Merge remote-tracking branch 'tracy/master' into manual-lifetime

# Conflicts:
#	AUTHORS
This commit is contained in:
Andrey Voroshilov 2020-07-13 01:49:11 -07:00
commit cbfb19816b
33 changed files with 1551 additions and 200 deletions

View File

@ -15,11 +15,13 @@ build_script:
- cmd: msbuild .\update\build\win32\update.vcxproj
- cmd: msbuild .\profiler\build\win32\Tracy.vcxproj
- cmd: msbuild .\capture\build\win32\capture.vcxproj
- cmd: msbuild .\csvexport\build\win32\csvexport.vcxproj
- cmd: msbuild .\library\win32\TracyProfiler.vcxproj /property:Configuration=Release
- sh: sudo apt-get update && sudo apt-get -y install libglfw3-dev libgtk2.0-dev libcapstone-dev
- sh: make -C update/build/unix debug release
- sh: make -C profiler/build/unix debug release
- sh: make -C capture/build/unix debug release
- sh: make -C csvexport/build/unix debug release
- sh: make -C library/unix debug release
- sh: make -C test
- sh: make -C test clean

View File

@ -29,6 +29,8 @@ jobs:
run: make -j -C update/build/unix debug release
- name: Capture utility
run: make -j -C capture/build/unix debug release
- name: Csvexport utility
run: make -j -C csvexport/build/unix debug release
- name: Import-chrome utility
run: make -j -C import-chrome/build/unix debug release
- name: Library

View File

@ -30,6 +30,10 @@ jobs:
run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Debug /property:Platform=x64
- name: Capture utility Release
run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Release /property:Platform=x64
- name: Csvexport utility Debug
run: msbuild .\csvexport\build\win32\csvexport.vcxproj /property:Configuration=Debug /property:Platform=x64
- name: Csvexport utility Release
run: msbuild .\csvexport\build\win32\csvexport.vcxproj /property:Configuration=Release /property:Platform=x64
- name: Import-chrome utility Debug
run: msbuild .\import-chrome\build\win32\import-chrome.vcxproj /property:Configuration=Debug /property:Platform=x64
- name: Import-chrome utility Release

View File

@ -10,4 +10,5 @@ Michał Cichoń <michcic@gmail.com> (OSX call stack decoding b
Thales Sabino <thales@codeplay.com> (OpenCL support)
Andrew Depke <andrewdepke@gmail.com> (Direct3D 12 support)
Simonas Kazlauskas <git@kazlauskas.me> (OSX CI, external bindings)
Jakub Žádník <kubouch@gmail.com> (csvexport utility)
Andrey Voroshilov <andrew.voroshilov@gmail.com> (multi-DLL fixes)

4
NEWS
View File

@ -19,6 +19,10 @@ v0.7.1 (xxxx-xx-xx)
- Fixed attachment of postponed frame images.
- Source location data can be now copied to clipboard from zone info window.
- Zones in find zones menu can be now grouped by zone name.
- Vulkan and D3D12 GPU contexts can be now calibrated.
- Added CSV export utility.
- "Go to frame" popup no longer has a dedicated button. To show it, click on
the frame counter.
v0.7 (2020-06-11)
-----------------

View File

@ -50,8 +50,8 @@ namespace tracy
bool m_initialized = false;
ID3D12Device* m_device;
ID3D12CommandQueue* m_queue;
ID3D12Device* m_device = nullptr;
ID3D12CommandQueue* m_queue = nullptr;
uint8_t m_context;
Microsoft::WRL::ComPtr<ID3D12QueryHeap> m_queryHeap;
Microsoft::WRL::ComPtr<ID3D12Resource> m_readbackBuffer;
@ -65,6 +65,9 @@ namespace tracy
Microsoft::WRL::ComPtr<ID3D12Fence> m_payloadFence;
std::queue<D3D12QueryPayload> m_payloadQueue;
int64_t m_prevCalibration = 0;
int64_t m_qpcToNs = int64_t{ 1000000000 / GetFrequencyQpc() };
public:
D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue)
: m_device(device)
@ -98,6 +101,9 @@ namespace tracy
assert(false && "Failed to get queue clock calibration.");
}
// Save the device cpu timestamp, not the profiler's timestamp.
m_prevCalibration = cpuTimestamp * m_qpcToNs;
cpuTimestamp = Profiler::GetTime();
D3D12_QUERY_HEAP_DESC heapDesc{};
@ -150,7 +156,7 @@ namespace tracy
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency));
MemWrite(&item->gpuNewContext.context, m_context);
MemWrite(&item->gpuNewContext.accuracyBits, uint8_t{ 0 });
MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
#ifdef TRACY_ON_DEMAND
@ -233,6 +239,34 @@ namespace tracy
}
m_readbackBuffer->Unmap(0, nullptr);
// Recalibrate to account for drift.
uint64_t cpuTimestamp;
uint64_t gpuTimestamp;
if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
{
assert(false && "Failed to get queue clock calibration.");
}
cpuTimestamp *= m_qpcToNs;
const auto cpuDelta = cpuTimestamp - m_prevCalibration;
if (cpuDelta > 0)
{
m_prevCalibration = cpuTimestamp;
cpuTimestamp = Profiler::GetTime();
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuCalibration);
MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp);
MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp);
MemWrite(&item->gpuCalibration.cpuDelta, cpuDelta);
MemWrite(&item->gpuCalibration.context, m_context);
Profiler::QueueSerialFinish();
}
}
private:

View File

@ -72,7 +72,7 @@ namespace tracy {
MemWrite(&item->gpuNewContext.period, 1.0f);
MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL);
MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId);
MemWrite(&item->gpuNewContext.accuracyBits, (uint8_t)0);
MemWrite(&item->gpuNewContext.flags, (uint8_t)0);
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem(*item);
#endif

View File

@ -110,7 +110,7 @@ public:
MemWrite( &item->gpuNewContext.thread, thread );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits );
MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) );
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
#ifdef TRACY_ON_DEMAND

View File

@ -4,6 +4,7 @@
#if !defined TRACY_ENABLE
#define TracyVkContext(x,y,z,w) nullptr
#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr
#define TracyVkDestroy(x)
#define TracyVkNamedZone(c,x,y,z,w)
#define TracyVkNamedZoneC(c,x,y,z,w,a)
@ -42,16 +43,36 @@ class VkCtx
enum { QueryCount = 64 * 1024 };
public:
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf )
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT )
: m_device( device )
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
, m_head( 0 )
, m_tail( 0 )
, m_oldCnt( 0 )
, m_queryCount( QueryCount )
, m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT )
{
assert( m_context != 255 );
if( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && _vkGetCalibratedTimestampsEXT )
{
uint32_t num;
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, nullptr );
if( num > 4 ) num = 4;
VkTimeDomainEXT data[4];
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, data );
for( uint32_t i=0; i<num; i++ )
{
// TODO VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT
if( data[i] == VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT )
{
m_timeDomain = data[i];
break;
}
}
}
VkPhysicalDeviceProperties prop;
vkGetPhysicalDeviceProperties( physdev, &prop );
const float period = prop.limits.timestampPeriod;
@ -81,21 +102,56 @@ public:
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
int64_t tcpu, tgpu;
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
{
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
int64_t tcpu = Profiler::GetTime();
int64_t tgpu;
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
tcpu = Profiler::GetTime();
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
}
else
{
enum { NumProbes = 32 };
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation[NumProbes];
for( int i=0; i<NumProbes; i++ )
{
_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, deviation+i );
}
uint64_t minDeviation = deviation[0];
for( int i=1; i<NumProbes; i++ )
{
if( minDeviation > deviation[i] )
{
minDeviation = deviation[i];
}
}
m_deviation = minDeviation * 3 / 2;
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
Calibrate( device, m_prevCalibration, tgpu );
tcpu = Profiler::GetTime();
}
uint8_t flags = 0;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
@ -104,7 +160,7 @@ public:
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) );
MemWrite( &item->gpuNewContext.flags, flags );
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
#ifdef TRACY_ON_DEMAND
@ -132,6 +188,8 @@ public:
{
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
m_head = m_tail = 0;
int64_t tgpu;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
return;
}
#endif
@ -163,6 +221,25 @@ public:
Profiler::QueueSerialFinish();
}
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT )
{
int64_t tgpu, tcpu;
Calibrate( m_device, tcpu, tgpu );
const auto refCpu = Profiler::GetTime();
const auto delta = tcpu - m_prevCalibration;
if( delta > 0 )
{
m_prevCalibration = tcpu;
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuCalibration );
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
MemWrite( &item->gpuCalibration.cpuDelta, delta );
MemWrite( &item->gpuCalibration.context, m_context );
Profiler::QueueSerialFinish();
}
}
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
m_tail += cnt;
@ -183,8 +260,35 @@ private:
return m_context;
}
tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu )
{
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation;
do
{
m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation );
}
while( deviation > m_deviation );
#if defined _WIN32 || defined __CYGWIN__
tGpu = ts[0];
tCpu = ts[1] * m_qpcToNs;
#else
assert( false );
#endif
}
VkDevice m_device;
VkQueryPool m_query;
VkTimeDomainEXT m_timeDomain;
uint64_t m_deviation;
int64_t m_qpcToNs;
int64_t m_prevCalibration;
uint8_t m_context;
unsigned int m_head;
@ -193,6 +297,8 @@ private:
unsigned int m_queryCount;
int64_t* m_res;
PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT;
};
class VkCtxScope
@ -271,11 +377,11 @@ private:
VkCtx* m_ctx;
};
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf )
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
{
InitRPMallocThread();
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
new(ctx) VkCtx( physdev, device, queue, cmdbuf );
new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
return ctx;
}
@ -289,7 +395,8 @@ static inline void DestroyVkContext( VkCtx* ctx )
using TracyVkCtx = tracy::VkCtx*;
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf );
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr );
#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct );
#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );

View File

@ -138,6 +138,7 @@
<ClCompile Include="..\..\..\common\TracySystem.cpp" />
<ClCompile Include="..\..\..\common\tracy_lz4.cpp" />
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp" />
<ClCompile Include="..\..\..\getopt\getopt.c" />
<ClCompile Include="..\..\..\server\TracyMemory.cpp" />
<ClCompile Include="..\..\..\server\TracyMmap.cpp" />
<ClCompile Include="..\..\..\server\TracyPrint.cpp" />
@ -171,7 +172,6 @@
<ClCompile Include="..\..\..\zstd\zstd_ldm.c" />
<ClCompile Include="..\..\..\zstd\zstd_opt.c" />
<ClCompile Include="..\..\src\capture.cpp" />
<ClCompile Include="..\..\src\getopt.c" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\common\TracyAlign.hpp" />
@ -184,6 +184,7 @@
<ClInclude Include="..\..\..\common\TracySystem.hpp" />
<ClInclude Include="..\..\..\common\tracy_lz4.hpp" />
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp" />
<ClInclude Include="..\..\..\getopt\getopt.h" />
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp" />
<ClInclude Include="..\..\..\server\TracyEvent.hpp" />
<ClInclude Include="..\..\..\server\TracyFileRead.hpp" />
@ -227,7 +228,6 @@
<ClInclude Include="..\..\..\zstd\zstd_lazy.h" />
<ClInclude Include="..\..\..\zstd\zstd_ldm.h" />
<ClInclude Include="..\..\..\zstd\zstd_opt.h" />
<ClInclude Include="..\..\src\getopt.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">

View File

@ -13,6 +13,9 @@
<Filter Include="zstd">
<UniqueIdentifier>{043ecb94-f240-4986-94b0-bc5bbd415a82}</UniqueIdentifier>
</Filter>
<Filter Include="getopt">
<UniqueIdentifier>{ee9737d2-69c7-44da-b9c7-539d18f9d4b4}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
@ -33,9 +36,6 @@
<ClCompile Include="..\..\src\capture.cpp">
<Filter>src</Filter>
</ClCompile>
<ClCompile Include="..\..\src\getopt.c">
<Filter>src</Filter>
</ClCompile>
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp">
<Filter>common</Filter>
</ClCompile>
@ -129,6 +129,9 @@
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\getopt\getopt.c">
<Filter>getopt</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
@ -179,9 +182,6 @@
<ClInclude Include="..\..\..\server\TracyWorker.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\src\getopt.h">
<Filter>src</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyAlign.hpp">
<Filter>common</Filter>
</ClInclude>
@ -293,5 +293,8 @@
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\getopt\getopt.h">
<Filter>getopt</Filter>
</ClInclude>
</ItemGroup>
</Project>

View File

@ -15,7 +15,7 @@
#include "../../server/TracyMemory.hpp"
#include "../../server/TracyPrint.hpp"
#include "../../server/TracyWorker.hpp"
#include "getopt.h"
#include "../../getopt/getopt.h"
bool disconnect = false;

View File

@ -873,6 +873,17 @@ static Thread* s_sysTraceThread = nullptr;
TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; }
TRACY_API int64_t GetFrequencyQpc()
{
#if defined _WIN32 || defined __CYGWIN__
LARGE_INTEGER t;
QueryPerformanceFrequency( &t );
return t.QuadPart;
#else
return 0;
#endif
}
#ifdef TRACY_DELAYED_INIT
struct ThreadNameData;
TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue();

View File

@ -64,6 +64,7 @@ TRACY_API GpuCtxWrapper& GetGpuCtx();
TRACY_API uint64_t GetThreadHandle();
TRACY_API void InitRPMallocThread();
TRACY_API bool ProfilerAvailable();
TRACY_API int64_t GetFrequencyQpc();
struct SourceLocationData
{

View File

@ -9,7 +9,7 @@ namespace tracy
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
enum : uint32_t { ProtocolVersion = 36 };
enum : uint32_t { ProtocolVersion = 37 };
enum : uint32_t { BroadcastVersion = 1 };
using lz4sz_t = uint32_t;

View File

@ -56,6 +56,7 @@ enum class QueueType : uint8_t
Terminate,
KeepAlive,
ThreadContext,
GpuCalibration,
Crash,
CrashReport,
ZoneValidation,
@ -268,6 +269,11 @@ enum class GpuContextType : uint8_t
Direct3D12
};
enum GpuContextFlags : uint8_t
{
GpuContextCalibration = 1 << 0
};
struct QueueGpuNewContext
{
int64_t cpuTime;
@ -275,7 +281,7 @@ struct QueueGpuNewContext
uint64_t thread;
float period;
uint8_t context;
uint8_t accuracyBits;
GpuContextFlags flags;
GpuContextType type;
};
@ -303,6 +309,14 @@ struct QueueGpuTime
uint8_t context;
};
struct QueueGpuCalibration
{
int64_t gpuTime;
int64_t cpuTime;
int64_t cpuDelta;
uint8_t context;
};
struct QueueMemAlloc
{
int64_t time;
@ -477,6 +491,7 @@ struct QueueItem
QueueGpuZoneBegin gpuZoneBegin;
QueueGpuZoneEnd gpuZoneEnd;
QueueGpuTime gpuTime;
QueueGpuCalibration gpuCalibration;
QueueMemAlloc memAlloc;
QueueMemFree memFree;
QueueCallstackMemory callstackMemory;
@ -553,6 +568,7 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ), // terminate
sizeof( QueueHeader ), // keep alive
sizeof( QueueHeader ) + sizeof( QueueThreadContext ),
sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ),
sizeof( QueueHeader ), // crash
sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
sizeof( QueueHeader ) + sizeof( QueueZoneValidation ),

View File

@ -0,0 +1,12 @@
all: debug
debug:
@+make -f debug.mk all
release:
@+make -f release.mk all
clean:
@+make -f build.mk clean
.PHONY: all clean debug release

View File

@ -0,0 +1,60 @@
CFLAGS +=
CXXFLAGS := $(CFLAGS) -std=gnu++17
# DEFINES += -DTRACY_NO_STATISTICS
INCLUDES := $(shell pkg-config --cflags capstone)
LIBS := $(shell pkg-config --libs capstone) -lpthread
PROJECT := csvexport
IMAGE := $(PROJECT)-$(BUILD)
FILTER :=
BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
SRC := $(filter-out $(FILTER),$(BASE))
SRC2 := $(filter-out $(FILTER),$(BASE2))
TBB := $(shell ld -ltbb -o /dev/null 2>/dev/null; echo $$?)
ifeq ($(TBB),0)
LIBS += -ltbb
endif
OBJDIRBASE := obj/$(BUILD)
OBJDIR := $(OBJDIRBASE)/o/o/o
OBJ := $(addprefix $(OBJDIR)/,$(SRC:%.cpp=%.o))
OBJ2 := $(addprefix $(OBJDIR)/,$(SRC2:%.c=%.o))
all: $(IMAGE)
$(OBJDIR)/%.o: %.cpp
$(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@
$(OBJDIR)/%.d : %.cpp
@echo Resolving dependencies of $<
@mkdir -p $(@D)
@$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \
rm -f $@.$$$$
$(OBJDIR)/%.o: %.c
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(OBJDIR)/%.d : %.c
@echo Resolving dependencies of $<
@mkdir -p $(@D)
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \
rm -f $@.$$$$
$(IMAGE): $(OBJ) $(OBJ2)
$(CXX) $(CXXFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(LIBS) -o $@
ifneq "$(MAKECMDGOALS)" "clean"
-include $(addprefix $(OBJDIR)/,$(SRC:.cpp=.d)) $(addprefix $(OBJDIR)/,$(SRC2:.c=.d))
endif
clean:
rm -rf $(OBJDIRBASE) $(IMAGE)*
.PHONY: clean all

View File

@ -0,0 +1,11 @@
ARCH := $(shell uname -m)
CFLAGS := -g3 -Wall
DEFINES := -DDEBUG
BUILD := debug
ifeq ($(ARCH),x86_64)
CFLAGS += -msse4.1
endif
include build.mk

View File

@ -0,0 +1,7 @@
ARCH := $(shell uname -m)
CFLAGS := -O3 -s -march=native
DEFINES := -DNDEBUG
BUILD := release
include build.mk

View File

@ -0,0 +1,31 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.30225.117
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "csvexport", "csvexport.vcxproj", "{447D58BF-94CD-4469-BB90-549C05D03E00}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.ActiveCfg = Debug|x64
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.Build.0 = Debug|x64
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x86.ActiveCfg = Debug|Win32
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x86.Build.0 = Debug|Win32
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.ActiveCfg = Release|x64
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.Build.0 = Release|x64
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x86.ActiveCfg = Release|Win32
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {3E51386C-43EA-44AC-9F24-AFAFE4D63ADE}
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,235 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{447D58BF-94CD-4469-BB90-549C05D03E00}</ProjectGuid>
<RootNamespace>capture</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
<VcpkgTriplet>x64-windows-static</VcpkgTriplet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<PreprocessorDefinitions>_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<LanguageStandard>stdcpplatest</LanguageStandard>
<AdditionalIncludeDirectories>..\..\..\vcpkg\vcpkg\installed\x64-windows-static\include</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Console</SubSystem>
<AdditionalLibraryDirectories>..\..\..\vcpkg\vcpkg\installed\x64-windows-static\debug\lib</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<PreprocessorDefinitions>NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<LanguageStandard>stdcpplatest</LanguageStandard>
<AdditionalIncludeDirectories>..\..\..\vcpkg\vcpkg\installed\x64-windows-static\include</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Console</SubSystem>
<AdditionalLibraryDirectories>..\..\..\vcpkg\vcpkg\installed\x64-windows-static\lib</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\..\..\common\TracySocket.cpp" />
<ClCompile Include="..\..\..\common\TracySystem.cpp" />
<ClCompile Include="..\..\..\common\tracy_lz4.cpp" />
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp" />
<ClCompile Include="..\..\..\getopt\getopt.c" />
<ClCompile Include="..\..\..\server\TracyMemory.cpp" />
<ClCompile Include="..\..\..\server\TracyMmap.cpp" />
<ClCompile Include="..\..\..\server\TracyPrint.cpp" />
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp" />
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp" />
<ClCompile Include="..\..\..\server\TracyThreadCompress.cpp" />
<ClCompile Include="..\..\..\server\TracyWorker.cpp" />
<ClCompile Include="..\..\..\zstd\debug.c" />
<ClCompile Include="..\..\..\zstd\entropy_common.c" />
<ClCompile Include="..\..\..\zstd\error_private.c" />
<ClCompile Include="..\..\..\zstd\fse_compress.c" />
<ClCompile Include="..\..\..\zstd\fse_decompress.c" />
<ClCompile Include="..\..\..\zstd\hist.c" />
<ClCompile Include="..\..\..\zstd\huf_compress.c" />
<ClCompile Include="..\..\..\zstd\huf_decompress.c" />
<ClCompile Include="..\..\..\zstd\pool.c" />
<ClCompile Include="..\..\..\zstd\threading.c" />
<ClCompile Include="..\..\..\zstd\xxhash.c" />
<ClCompile Include="..\..\..\zstd\zstdmt_compress.c" />
<ClCompile Include="..\..\..\zstd\zstd_common.c" />
<ClCompile Include="..\..\..\zstd\zstd_compress.c" />
<ClCompile Include="..\..\..\zstd\zstd_compress_literals.c" />
<ClCompile Include="..\..\..\zstd\zstd_compress_sequences.c" />
<ClCompile Include="..\..\..\zstd\zstd_compress_superblock.c" />
<ClCompile Include="..\..\..\zstd\zstd_ddict.c" />
<ClCompile Include="..\..\..\zstd\zstd_decompress.c" />
<ClCompile Include="..\..\..\zstd\zstd_decompress_block.c" />
<ClCompile Include="..\..\..\zstd\zstd_double_fast.c" />
<ClCompile Include="..\..\..\zstd\zstd_fast.c" />
<ClCompile Include="..\..\..\zstd\zstd_lazy.c" />
<ClCompile Include="..\..\..\zstd\zstd_ldm.c" />
<ClCompile Include="..\..\..\zstd\zstd_opt.c" />
<ClCompile Include="..\..\src\csvexport.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\common\TracyAlign.hpp" />
<ClInclude Include="..\..\..\common\TracyAlloc.hpp" />
<ClInclude Include="..\..\..\common\TracyColor.hpp" />
<ClInclude Include="..\..\..\common\TracyForceInline.hpp" />
<ClInclude Include="..\..\..\common\TracyProtocol.hpp" />
<ClInclude Include="..\..\..\common\TracyQueue.hpp" />
<ClInclude Include="..\..\..\common\TracySocket.hpp" />
<ClInclude Include="..\..\..\common\TracySystem.hpp" />
<ClInclude Include="..\..\..\common\tracy_lz4.hpp" />
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp" />
<ClInclude Include="..\..\..\getopt\getopt.h" />
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp" />
<ClInclude Include="..\..\..\server\TracyEvent.hpp" />
<ClInclude Include="..\..\..\server\TracyFileRead.hpp" />
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp" />
<ClInclude Include="..\..\..\server\TracyMemory.hpp" />
<ClInclude Include="..\..\..\server\TracyMmap.hpp" />
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp" />
<ClInclude Include="..\..\..\server\TracyPrint.hpp" />
<ClInclude Include="..\..\..\server\TracySlab.hpp" />
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp" />
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp" />
<ClInclude Include="..\..\..\server\TracyThreadCompress.hpp" />
<ClInclude Include="..\..\..\server\TracyVector.hpp" />
<ClInclude Include="..\..\..\server\TracyWorker.hpp" />
<ClInclude Include="..\..\..\zstd\bitstream.h" />
<ClInclude Include="..\..\..\zstd\compiler.h" />
<ClInclude Include="..\..\..\zstd\cpu.h" />
<ClInclude Include="..\..\..\zstd\debug.h" />
<ClInclude Include="..\..\..\zstd\error_private.h" />
<ClInclude Include="..\..\..\zstd\fse.h" />
<ClInclude Include="..\..\..\zstd\hist.h" />
<ClInclude Include="..\..\..\zstd\huf.h" />
<ClInclude Include="..\..\..\zstd\mem.h" />
<ClInclude Include="..\..\..\zstd\pool.h" />
<ClInclude Include="..\..\..\zstd\threading.h" />
<ClInclude Include="..\..\..\zstd\xxhash.h" />
<ClInclude Include="..\..\..\zstd\zstd.h" />
<ClInclude Include="..\..\..\zstd\zstdmt_compress.h" />
<ClInclude Include="..\..\..\zstd\zstd_compress_internal.h" />
<ClInclude Include="..\..\..\zstd\zstd_compress_literals.h" />
<ClInclude Include="..\..\..\zstd\zstd_compress_sequences.h" />
<ClInclude Include="..\..\..\zstd\zstd_compress_superblock.h" />
<ClInclude Include="..\..\..\zstd\zstd_cwksp.h" />
<ClInclude Include="..\..\..\zstd\zstd_ddict.h" />
<ClInclude Include="..\..\..\zstd\zstd_decompress_block.h" />
<ClInclude Include="..\..\..\zstd\zstd_decompress_internal.h" />
<ClInclude Include="..\..\..\zstd\zstd_double_fast.h" />
<ClInclude Include="..\..\..\zstd\zstd_errors.h" />
<ClInclude Include="..\..\..\zstd\zstd_fast.h" />
<ClInclude Include="..\..\..\zstd\zstd_internal.h" />
<ClInclude Include="..\..\..\zstd\zstd_lazy.h" />
<ClInclude Include="..\..\..\zstd\zstd_ldm.h" />
<ClInclude Include="..\..\..\zstd\zstd_opt.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@ -0,0 +1,300 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="src">
<UniqueIdentifier>{729c80ee-4d26-4a5e-8f1f-6c075783eb56}</UniqueIdentifier>
</Filter>
<Filter Include="server">
<UniqueIdentifier>{cf23ef7b-7694-4154-830b-00cf053350ea}</UniqueIdentifier>
</Filter>
<Filter Include="common">
<UniqueIdentifier>{e39d3623-47cd-4752-8da9-3ea324f964c1}</UniqueIdentifier>
</Filter>
<Filter Include="zstd">
<UniqueIdentifier>{043ecb94-f240-4986-94b0-bc5bbd415a82}</UniqueIdentifier>
</Filter>
<Filter Include="getopt">
<UniqueIdentifier>{ee9737d2-69c7-44da-b9c7-539d18f9d4b4}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\common\TracySocket.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\common\TracySystem.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyMemory.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyWorker.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyPrint.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyThreadCompress.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\debug.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\entropy_common.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\error_private.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\fse_compress.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\fse_decompress.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\hist.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\huf_compress.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\huf_decompress.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\pool.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\threading.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\xxhash.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstd_common.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstd_compress.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstd_compress_literals.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstd_compress_sequences.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstd_compress_superblock.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstd_ddict.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstd_decompress.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstd_decompress_block.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstd_double_fast.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstd_fast.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstd_lazy.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstd_ldm.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstd_opt.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\zstdmt_compress.c">
<Filter>zstd</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyMmap.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\getopt\getopt.c">
<Filter>getopt</Filter>
</ClCompile>
<ClCompile Include="..\..\src\csvexport.cpp">
<Filter>src</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyAlloc.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyColor.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyForceInline.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyProtocol.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyQueue.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracySocket.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracySystem.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyEvent.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyMemory.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracySlab.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyVector.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyWorker.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyAlign.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyPrint.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyThreadCompress.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\bitstream.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compiler.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\cpu.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\debug.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\error_private.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\fse.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\hist.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\huf.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\mem.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\pool.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\threading.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\xxhash.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_compress_internal.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_compress_literals.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_compress_sequences.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_compress_superblock.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_cwksp.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_ddict.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_decompress_block.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_decompress_internal.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_double_fast.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_errors.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_fast.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_internal.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_lazy.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_ldm.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_opt.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstdmt_compress.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyFileRead.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyMmap.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\getopt\getopt.h">
<Filter>getopt</Filter>
</ClInclude>
</ItemGroup>
</Project>

311
csvexport/src/csvexport.cpp Normal file
View File

@ -0,0 +1,311 @@
#ifdef _WIN32
# include <windows.h>
#endif
#include <algorithm>
#include <cctype>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <math.h>
#include <stdio.h>
#include <stdint.h>
#include "../../server/TracyFileRead.hpp"
#include "../../server/TracyWorker.hpp"
#include "../../getopt/getopt.h"
void print_usage_exit(int e)
{
fprintf(stderr, "Extract statistics from a trace to a CSV format\n");
fprintf(stderr, "Usage:\n");
fprintf(stderr, " extract [OPTION...] <trace file>\n");
fprintf(stderr, "\n");
fprintf(stderr, " -h, --help Print usage\n");
fprintf(stderr, " -f, --filter arg Filter zone names (default: "")\n");
fprintf(stderr, " -s, --sep arg CSV separator (default: ,)\n");
fprintf(stderr, " -c, --case Case sensitive filtering\n");
fprintf(stderr, " -e, --self Get self times\n");
fprintf(stderr, " -u, --unwrap Report each zone event\n");
exit(e);
}
struct Args {
const char* filter;
const char* separator;
const char* trace_file;
bool case_sensitive;
bool self_time;
bool unwrap;
};
Args parse_args(int argc, char** argv)
{
if (argc == 1)
{
print_usage_exit(1);
}
Args args = { "", ",", "", false, false, false };
struct option long_opts[] = {
{ "help", no_argument, NULL, 'h' },
{ "filter", optional_argument, NULL, 'f' },
{ "sep", optional_argument, NULL, 's' },
{ "case", no_argument, NULL, 'c' },
{ "self", no_argument, NULL, 'e' },
{ "unwrap", no_argument, NULL, 'u' },
{ NULL, 0, NULL, 0 }
};
int c;
while ((c = getopt_long(argc, argv, "hf:s:ceu", long_opts, NULL)) != -1)
{
switch (c)
{
case 'h':
print_usage_exit(0);
break;
case 'f':
args.filter = optarg;
break;
case 's':
args.separator = optarg;
break;
case 'c':
args.case_sensitive = true;
break;
case 'e':
args.self_time = true;
break;
case 'u':
args.unwrap = true;
break;
default:
print_usage_exit(1);
break;
}
}
if (argc != optind + 1)
{
print_usage_exit(1);
}
args.trace_file = argv[optind];
return args;
}
bool is_substring(
const char* term,
const char* s,
bool case_sensitive = false
){
auto new_term = std::string(term);
auto new_s = std::string(s);
if (!case_sensitive) {
std::transform(
new_term.begin(),
new_term.end(),
new_term.begin(),
[](unsigned char c){ return std::tolower(c); }
);
std::transform(
new_s.begin(),
new_s.end(),
new_s.begin(),
[](unsigned char c){ return std::tolower(c); }
);
}
return new_s.find(new_term) != std::string::npos;
}
const char* get_name(int32_t id, const tracy::Worker& worker)
{
auto& srcloc = worker.GetSourceLocation(id);
return worker.GetString(srcloc.name.active ? srcloc.name : srcloc.function);
}
template <typename T>
std::string join(const T& v, const char* sep) {
std::ostringstream s;
for (const auto& i : v) {
if (&i != &v[0]) {
s << sep;
}
s << i;
}
return s.str();
}
// From TracyView.cpp
int64_t GetZoneChildTimeFast(
const tracy::Worker& worker,
const tracy::ZoneEvent& zone
){
int64_t time = 0;
if( zone.HasChildren() )
{
auto& children = worker.GetZoneChildren( zone.Child() );
if( children.is_magic() )
{
auto& vec = *(tracy::Vector<tracy::ZoneEvent>*)&children;
for( auto& v : vec )
{
assert( v.IsEndValid() );
time += v.End() - v.Start();
}
}
else
{
for( auto& v : children )
{
assert( v->IsEndValid() );
time += v->End() - v->Start();
}
}
}
return time;
}
int main(int argc, char** argv)
{
#ifdef _WIN32
if (!AttachConsole(ATTACH_PARENT_PROCESS))
{
AllocConsole();
SetConsoleMode(GetStdHandle(STD_OUTPUT_HANDLE), 0x07);
}
#endif
Args args = parse_args(argc, argv);
auto f = std::unique_ptr<tracy::FileRead>(
tracy::FileRead::Open(args.trace_file)
);
if (!f)
{
fprintf(stderr, "Could not open file %s\n", args.trace_file);
return 1;
}
auto worker = tracy::Worker(*f);
while (!worker.AreSourceLocationZonesReady())
{
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
auto& slz = worker.GetSourceLocationZones();
tracy::Vector<decltype(slz.begin())> slz_selected;
slz_selected.reserve(slz.size());
uint32_t total_cnt = 0;
for(auto it = slz.begin(); it != slz.end(); ++it)
{
if(it->second.total != 0)
{
++total_cnt;
if(args.filter[0] == '\0')
{
slz_selected.push_back_no_space_check(it);
}
else
{
auto name = get_name(it->first, worker);
if(is_substring(args.filter, name, args.case_sensitive))
{
slz_selected.push_back_no_space_check(it);
}
}
}
}
std::vector<const char*> columns;
if (args.unwrap)
{
columns = {
"name", "src_file", "src_line", "ns_since_start", "exec_time_ns"
};
}
else
{
columns = {
"name", "src_file", "src_line", "total_ns", "total_perc",
"counts", "mean_ns", "min_ns", "max_ns", "std_ns"
};
}
std::string header = join(columns, args.separator);
printf("%s\n", header.data());
const auto last_time = worker.GetLastTime();
for(auto& it : slz_selected)
{
std::vector<std::string> values(columns.size());
values[0] = get_name(it->first, worker);
const auto& srcloc = worker.GetSourceLocation(it->first);
values[1] = worker.GetString(srcloc.file);
values[2] = std::to_string(srcloc.line);
const auto& zone_data = it->second;
if (args.unwrap)
{
int i = 0;
for (const auto& zone_thread_data : zone_data.zones) {
const auto zone_event = zone_thread_data.Zone();
const auto start = zone_event->Start();
const auto end = zone_event->End();
values[3] = std::to_string(start);
auto timespan = end - start;
if (args.self_time) {
timespan -= GetZoneChildTimeFast(worker, *zone_event);
}
values[4] = std::to_string(timespan);
std::string row = join(values, args.separator);
printf("%s\n", row.data());
}
}
else
{
const auto time = args.self_time ? zone_data.selfTotal : zone_data.total;
values[3] = std::to_string(time);
values[4] = std::to_string(100. * time / last_time);
values[5] = std::to_string(zone_data.zones.size());
const auto avg = (args.self_time ? zone_data.selfTotal : zone_data.total)
/ zone_data.zones.size();
values[6] = std::to_string(avg);
const auto tmin = args.self_time ? zone_data.selfMin : zone_data.min;
const auto tmax = args.self_time ? zone_data.selfMax : zone_data.max;
values[7] = std::to_string(tmin);
values[8] = std::to_string(tmax);
const auto sz = zone_data.zones.size();
const auto ss = zone_data.sumSq
- 2. * zone_data.total * avg
+ avg * avg * sz;
const auto std = sqrt(ss / (sz - 1));
values[9] = std::to_string(std);
std::string row = join(values, args.separator);
printf("%s\n", row.data());
}
}
return 0;
}

View File

@ -104,6 +104,7 @@ Hello and welcome to the Tracy Profiler user manual! Here you will find all the
\item Chapter~\ref{client}, \emph{\nameref{client}}, provides information on how to instrument your application, in order to retrieve useful profiling data.
\item Chapter~\ref{capturing}, \emph{\nameref{capturing}}, goes into more detail on how the profiling information can be captured and stored on disk.
\item Chapter~\ref{analyzingdata}, \emph{\nameref{analyzingdata}}, guides you through the graphical user interface of the profiler.
\item Chapter~\ref{csvexport}, \emph{\nameref{csvexport}}, explains how to export some zone timing statistics into a CSV format.
\item Chapter~\ref{importingdata}, \emph{\nameref{importingdata}}, documents how to import data from other profilers.
\item Chapter~\ref{configurationfiles}, \emph{\nameref{configurationfiles}}, gives information on the profiler settings.
\end{itemize}
@ -1184,7 +1185,7 @@ This requirement is relaxed in the on-demand mode (section~\ref{ondemand}), beca
Tracy provides bindings for profiling OpenGL, Vulkan, Direct3D 12 and OpenCL execution time on GPU.
Note that the CPU and GPU timers may be not synchronized. You can correct the resulting desynchronization in the profiler's options (section~\ref{options}).
Note that the CPU and GPU timers may be not synchronized, unless a calibrated context is created. Since availability of calibrated contexts is limited, you can correct the desynchronization of uncalibrated contexts in the profiler's options (section~\ref{options}).
\subsubsection{OpenGL}
@ -1217,6 +1218,12 @@ To mark a GPU zone use the \texttt{TracyVkZone(ctx, cmdbuf, name)} macro, where
You also need to periodically collect the GPU events using the \texttt{TracyVkCollect(ctx, cmdbuf)} macro\footnote{It is considerably faster than the OpenGL's \texttt{TracyGpuCollect}.}. The provided command buffer must be in the recording state and outside of a render pass instance.
\subparagraph{Calibrated context}
In order to maintain synchronization between CPU and GPU time domains, you will need to enable the \texttt{VK\_EXT\_calibrated\_timestamps} device extension and retrieve the following function pointers: \texttt{vkGetPhysicalDeviceCalibrateableTimeDomainsEXT} and \texttt{vkGetCalibratedTimestampsEXT}.
To enable calibrated context, replace the macro \texttt{TracyVkContext} with \texttt{TracyVkContextCalibrated} and pass the two functions as additional parameters, in the order specified above.
\subsubsection{Direct3D 12}
To enable Direct3D 12 support, include the \texttt{tracy/TracyD3D12.hpp} header file. Tracing Direct3D 12 queues is nearly on par with the Vulkan implementation, where a \texttt{TracyD3D12Ctx} is returned from a call to \texttt{TracyD3D12Context(device, queue)}, which should be later cleaned up with the \texttt{TracyD3D12Destroy(ctx)} macro. Multiple contexts can be created, each with any queue type.
@ -1227,6 +1234,10 @@ Using GPU zones is the same as the Vulkan implementation, where the \texttt{Trac
The macro \texttt{TracyD3D12NewFrame(ctx)} is used to mark a new frame, and should appear before or after recording command lists, similar to \texttt{FrameMark}. This macro is a key component that enables automatic query data synchronization, so the user doesn't have to worry about synchronizing GPU execution before invoking a collection. Event data can then be collected and sent to the profiler using the \texttt{TracyD3D12Collect(ctx)} macro.
Note that due to artifacts from dynamic frequency scaling, GPU profiling may be slightly inaccurate. To counter this, \texttt{ID3D12Device::SetStablePowerState()} can be used to enable accurate profiling, at the expense of some performance. If the machine is not in developer mode, the device will be removed upon calling. Do not use this in shipping code.
Direct3D 12 contexts are always calibrated.
\subsubsection{OpenCL}
OpenCL support is achieved by including the \texttt{tracy/TracyOpenCL.hpp} header file. Tracing OpenCL requires the creation of a Tracy OpenCL context using the macro \texttt{TracyCLContext(context, device)}, which will return an instance of \texttt{TracyCLCtx} object that must be used when creating zones. The specified \texttt{device} must be part of the \texttt{context}. Cleanup is performed using the \texttt{TracyCLDestroy(ctx)} macro. Although not common, it is possible to create multiple OpenCL contexts for the same application.
@ -1913,9 +1924,8 @@ The main profiler window is split into three sections, as seen on figure~\ref{ma
\draw (0.6, -0.7) node[anchor=north west] {Frames: 364};
\draw[rounded corners=5pt] (2.8, -0.7) rectangle+(0.4, -0.5) node [midway] {\faCaretRight};
\draw[rounded corners=5pt] (3.3, -0.7) rectangle+(0.5, -0.5) node [midway] {\faCaretDown};
\draw[rounded corners=5pt] (3.9, -0.7) rectangle+(0.5, -0.5) node [midway] {\faCrosshairs};
\draw (4.5, -0.65) node[anchor=north west] {\faEye~52.7 ms \hspace{5pt} \faDatabase~6.06 s \hspace{5pt} \faMemory~195.2 MB};
\draw[dashed] (10.6, -0.75) rectangle+(3.2, -0.4) node[midway] {Notification area};
\draw (4, -0.65) node[anchor=north west] {\faEye~52.7 ms \hspace{5pt} \faDatabase~6.06 s \hspace{5pt} \faMemory~195.2 MB};
\draw[dashed] (10.1, -0.75) rectangle+(3.2, -0.4) node[midway] {Notification area};
\draw (0.1, -1.3) rectangle+(15.3, -1) node [midway] {Frame time graph};
\draw (0.1, -2.4) rectangle+(15.3, -3) node [midway] {Timeline view};
@ -1950,9 +1960,9 @@ The control menu (top row of buttons) provides access to various features of the
\end{itemize}
\end{itemize}
The frame information block consists of four elements: the current frame set name along with the number of captured frames, the two navigational buttons \faCaretLeft{} and \faCaretRight{}, which allow you to focus the timeline view on the previous or next frame, and the frame set selection button \faCaretDown{}, which is used to switch to a another frame set\footnote{See section~\ref{framesets} for another way to change the active frame set.}. The \emph{\faCrosshairs{}~Go to frame} button allows zooming the timeline view on the specified frame. For more information about marking frames, see section~\ref{markingframes}.
The frame information block consists of four elements: the current frame set name along with the number of captured frames (click on it with the \LMB{}~left mouse button to go to a specified frame), the two navigational buttons \faCaretLeft{} and \faCaretRight{}, which allow you to focus the timeline view on the previous or next frame, and the frame set selection button \faCaretDown{}, which is used to switch to a another frame set\footnote{See section~\ref{framesets} for another way to change the active frame set.}. For more information about marking frames, see section~\ref{markingframes}.
The next three items show the \emph{\faEye{}~view time range}, the \emph{\faDatabase{}~time span} of the whole capture, and the \emph{\faMemory{}~memory usage} of the profiler.
The next three items show the \emph{\faEye{}~view time range}, the \emph{\faDatabase{}~time span} of the whole capture (clicking on it with the \MMB{} middle mouse button will set the view range to the entire capture), and the \emph{\faMemory{}~memory usage} of the profiler.
\paragraph{Notification area}
@ -2366,7 +2376,7 @@ In this window you can set various trace-related options. The timeline view migh
\begin{itemize}
\item \emph{\faSignature{} Draw CPU usage graph} -- You can disable drawing of the CPU usage graph here.
\end{itemize}
\item \emph{\faEye{} Draw GPU zones} -- Allows disabling display of OpenGL/Vulkan/Direct3D/OpenCL zones. The \emph{GPU zones} drop-down allows disabling individual GPU contexts and setting CPU/GPU drift offsets (see section~\ref{gpuprofiling} for more information). The \emph{\faRobot~Auto} button automatically measures the GPU drift value\footnote{There is an assumption that drift is linear. Automated measurement calculates and removes change over time in delay-to-execution of GPU zones. Resulting value may still be incorrect.}.
\item \emph{\faEye{} Draw GPU zones} -- Allows disabling display of OpenGL/Vulkan/Direct3D/OpenCL zones. The \emph{GPU zones} drop-down allows disabling individual GPU contexts and setting CPU/GPU drift offsets of uncalibrated contexts (see section~\ref{gpuprofiling} for more information). The \emph{\faRobot~Auto} button automatically measures the GPU drift value\footnote{There is an assumption that drift is linear. Automated measurement calculates and removes change over time in delay-to-execution of GPU zones. Resulting value may still be incorrect.}.
\item \emph{\faMicrochip{} Draw CPU zones} -- Determines whether CPU zones are displayed.
\begin{itemize}
\item \emph{\faGhost{} Draw ghost zones} -- Controls if ghost zones should be displayed in threads which don't have any instrumented zones available.
@ -3011,6 +3021,37 @@ This window lists all annotations marked on the timeline. Each annotation is pre
\label{figannlist}
\end{figure}
\section{Exporting zone statistics to CSV}
\label{csvexport}
You can use a command-line utility in the \texttt{csvexport} directory to export basic zone statistics from a saved trace into a CSV format.
The tool requires a single .tracy file as an argument and prints results into the standard output (stdout) from where you can redirect it into a file or use it as an input into another tool.
By default, the utility will list all zones with the following columns:
\begin{itemize}
\item \texttt{name} -- Zone name
\item \texttt{src\_file} -- Source file where the zone was set
\item \texttt{src\_line} -- Line in the source file where the zone was set
\item \texttt{total\_ns} -- Total zone time in nanoseconds
\item \texttt{total\_perc} -- Total zone time as a percentage of the program's execution time
\item \texttt{counts} -- Zone count
\item \texttt{mean\_ns} -- Mean zone time (equivalent in MPTC in the profiler GUI) in nanoseconds
\item \texttt{min\_ns} -- Minimum zone time in nanoseconds
\item \texttt{max\_ns} -- Maximum zone time in nanoseconds
\item \texttt{std\_ns} -- Standard deviation of the zone time in nanoseconds
\end{itemize}
You can customize the output with the following command line options:
\begin{itemize}
\item \texttt{-h, -\hspace{-1.25ex} -help} -- display a help message
\item \texttt{-f, -\hspace{-1.25ex} -filter <name>} -- filter the zone names
\item \texttt{-c, -\hspace{-1.25ex} -case} -- make the name filtering case sensitive
\item \texttt{-s, -\hspace{-1.25ex} -sep <separator>} -- customize the CSV separator (default is ``\texttt{,}'')
\item \texttt{-e, -\hspace{-1.25ex} -self} -- use self time (equivalent to the ``Self time'' toggle in the profiler GUI)
\item \texttt{-u, -\hspace{-1.25ex} -unwrap} -- report each zone individually; this will discard the statistics columns and instead reports for each zone entry its timestamp and the duration of the zone entry.
\end{itemize}
\section{Importing external profiling data}
\label{importingdata}

View File

@ -562,10 +562,13 @@ struct GpuCtxData
int64_t timeDiff;
uint64_t thread;
uint64_t count;
uint8_t accuracyBits;
float period;
GpuContextType type;
bool hasPeriod;
bool hasCalibration;
int64_t calibratedGpuTime;
int64_t calibratedCpuTime;
double calibrationMod;
unordered_flat_map<uint64_t, GpuCtxThreadData> threadData;
short_ptr<GpuEvent> query[64*1024];
};

View File

@ -7,7 +7,7 @@ namespace Version
{
enum { Major = 0 };
enum { Minor = 7 };
enum { Patch = 0 };
enum { Patch = 1 };
}
}

View File

@ -616,6 +616,7 @@ bool View::DrawImpl()
{
ImGui::PopStyleColor();
}
if( ImGui::IsItemClicked() ) ImGui::OpenPopup( "GoToFramePopup" );
}
ImGui::SameLine();
if( ImGui::SmallButton( " " ICON_FA_CARET_RIGHT " " ) ) ZoomToNextFrame();
@ -639,14 +640,6 @@ bool View::DrawImpl()
}
ImGui::EndCombo();
}
ImGui::SameLine();
if( ImGui::Button( ICON_FA_CROSSHAIRS ) ) ImGui::OpenPopup( "GoToFramePopup" );
if( ImGui::IsItemHovered() )
{
ImGui::BeginTooltip();
ImGui::TextUnformatted( "Go to frame" );
ImGui::EndTooltip();
}
if( ImGui::BeginPopup( "GoToFramePopup" ) )
{
static int frameNum = 1;
@ -691,6 +684,10 @@ bool View::DrawImpl()
ImGui::BeginTooltip();
ImGui::Text( "Time span" );
ImGui::EndTooltip();
if( ImGui::IsItemClicked( 2 ) )
{
ZoomToRange( 0, m_worker.GetLastTime() );
}
}
ImGui::SameLine();
dx = ImGui::GetCursorPosX() - cx;
@ -2170,6 +2167,13 @@ bool View::DrawZoneFrames( const FrameData& frames )
auto tx = ImGui::CalcTextSize( buf ).x;
uint32_t color = ( frames.name == 0 && i == 0 ) ? redColor : activeColor;
if( fsz - 7 <= tx )
{
static char tmp[256];
sprintf( tmp, "%s (%s)", RealToString( i ), TimeToString( ftime ) );
buf = tmp;
tx = ImGui::CalcTextSize( buf ).x;
}
if( fsz - 7 <= tx )
{
buf = TimeToString( ftime );
@ -2595,17 +2599,10 @@ void View::DrawZones()
}
}
TextFocused( "Zone count:", RealToString( v->count ) );
//TextFocused( "Top-level zones:", RealToString( v->timeline.size() ) );
if( isMultithreaded )
{
TextFocused( "Timestamp accuracy:", TimeToString( v->period ) );
}
else
{
TextDisabledUnformatted( "Query accuracy bits:" );
ImGui::SameLine();
ImGui::Text( "%i", v->accuracyBits );
}
ImGui::EndTooltip();
}
}
@ -7913,80 +7910,83 @@ void View::DrawOptions()
{
ImGui::TextDisabled( "%s threads", RealToString( gpuData[i]->threadData.size() ) );
}
ImGui::TreePush();
auto& drift = GpuDrift( gpuData[i] );
ImGui::SetNextItemWidth( 120 );
ImGui::PushID( i );
ImGui::InputInt( "Drift (ns/s)", &drift );
ImGui::PopID();
if( timeline.size() > 1 )
if( !gpuData[i]->hasCalibration )
{
ImGui::SameLine();
if( ImGui::Button( ICON_FA_ROBOT " Auto" ) )
ImGui::TreePush();
auto& drift = GpuDrift( gpuData[i] );
ImGui::SetNextItemWidth( 120 );
ImGui::PushID( i );
ImGui::InputInt( "Drift (ns/s)", &drift );
ImGui::PopID();
if( timeline.size() > 1 )
{
size_t lastidx = 0;
if( timeline.is_magic() )
ImGui::SameLine();
if( ImGui::Button( ICON_FA_ROBOT " Auto" ) )
{
auto& tl = *((Vector<GpuEvent>*)&timeline);
for( size_t j=tl.size()-1; j > 0; j-- )
size_t lastidx = 0;
if( timeline.is_magic() )
{
if( tl[j].GpuEnd() >= 0 )
auto& tl = *((Vector<GpuEvent>*)&timeline);
for( size_t j=tl.size()-1; j > 0; j-- )
{
lastidx = j;
break;
if( tl[j].GpuEnd() >= 0 )
{
lastidx = j;
break;
}
}
}
}
else
{
for( size_t j=timeline.size()-1; j > 0; j-- )
else
{
if( timeline[j]->GpuEnd() >= 0 )
for( size_t j=timeline.size()-1; j > 0; j-- )
{
lastidx = j;
break;
if( timeline[j]->GpuEnd() >= 0 )
{
lastidx = j;
break;
}
}
}
}
enum { NumSlopes = 10000 };
std::random_device rd;
std::default_random_engine gen( rd() );
std::uniform_int_distribution<size_t> dist( 0, lastidx - 1 );
float slopes[NumSlopes];
size_t idx = 0;
if( timeline.is_magic() )
{
auto& tl = *((Vector<GpuEvent>*)&timeline);
do
enum { NumSlopes = 10000 };
std::random_device rd;
std::default_random_engine gen( rd() );
std::uniform_int_distribution<size_t> dist( 0, lastidx - 1 );
float slopes[NumSlopes];
size_t idx = 0;
if( timeline.is_magic() )
{
const auto p0 = dist( gen );
const auto p1 = dist( gen );
if( p0 != p1 )
auto& tl = *((Vector<GpuEvent>*)&timeline);
do
{
slopes[idx++] = float( 1.0 - double( tl[p1].GpuStart() - tl[p0].GpuStart() ) / double( tl[p1].CpuStart() - tl[p0].CpuStart() ) );
const auto p0 = dist( gen );
const auto p1 = dist( gen );
if( p0 != p1 )
{
slopes[idx++] = float( 1.0 - double( tl[p1].GpuStart() - tl[p0].GpuStart() ) / double( tl[p1].CpuStart() - tl[p0].CpuStart() ) );
}
}
while( idx < NumSlopes );
}
while( idx < NumSlopes );
}
else
{
do
else
{
const auto p0 = dist( gen );
const auto p1 = dist( gen );
if( p0 != p1 )
do
{
slopes[idx++] = float( 1.0 - double( timeline[p1]->GpuStart() - timeline[p0]->GpuStart() ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) );
const auto p0 = dist( gen );
const auto p1 = dist( gen );
if( p0 != p1 )
{
slopes[idx++] = float( 1.0 - double( timeline[p1]->GpuStart() - timeline[p0]->GpuStart() ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) );
}
}
while( idx < NumSlopes );
}
while( idx < NumSlopes );
std::sort( slopes, slopes+NumSlopes );
drift = int( 1000000000 * -slopes[NumSlopes/2] );
}
std::sort( slopes, slopes+NumSlopes );
drift = int( 1000000000 * -slopes[NumSlopes/2] );
}
ImGui::TreePop();
}
ImGui::TreePop();
}
ImGui::TreePop();
}
@ -8527,11 +8527,12 @@ void View::DrawMessages()
size_t tsz = 0;
for( const auto& t : m_threadOrder ) if( !t->messages.empty() ) tsz++;
m_messageFilter.Draw( ICON_FA_FILTER " Filter messages", 200 );
bool filterChanged = m_messageFilter.Draw( ICON_FA_FILTER " Filter messages", 200 );
ImGui::SameLine();
if( ImGui::Button( ICON_FA_BACKSPACE " Clear" ) )
{
m_messageFilter.Clear();
filterChanged = true;
}
ImGui::SameLine();
ImGui::Spacing();
@ -8549,6 +8550,7 @@ void View::DrawMessages()
ImGui::Checkbox( ICON_FA_IMAGE " Show frame images", &m_showMessageImages );
}
bool threadsChanged = false;
auto expand = ImGui::TreeNode( ICON_FA_RANDOM " Visible threads:" );
ImGui::SameLine();
ImGui::TextDisabled( "(%zu)", tsz );
@ -8563,6 +8565,7 @@ void View::DrawMessages()
{
VisibleMsgThread( t->id ) = true;
}
threadsChanged = true;
}
ImGui::SameLine();
if( ImGui::SmallButton( "Unselect all" ) )
@ -8571,6 +8574,7 @@ void View::DrawMessages()
{
VisibleMsgThread( t->id ) = false;
}
threadsChanged = true;
}
int idx = 0;
@ -8581,7 +8585,10 @@ void View::DrawMessages()
const auto threadColor = GetThreadColor( t->id, 0 );
SmallColorBox( threadColor );
ImGui::SameLine();
SmallCheckbox( m_worker.GetThreadName( t->id ), &VisibleMsgThread( t->id ) );
if( SmallCheckbox( m_worker.GetThreadName( t->id ), &VisibleMsgThread( t->id ) ) )
{
threadsChanged = true;
}
ImGui::PopID();
ImGui::SameLine();
ImGui::TextDisabled( "(%s)", RealToString( t->messages.size() ) );
@ -8594,6 +8601,78 @@ void View::DrawMessages()
ImGui::TreePop();
}
const bool msgsChanged = msgs.size() != m_prevMessages;
if( filterChanged || threadsChanged )
{
m_msgList.reserve( msgs.size() );
m_msgList.clear();
if( m_messageFilter.IsActive() )
{
for( size_t i=0; i<msgs.size(); i++ )
{
const auto& v = msgs[i];
const auto tid = m_worker.DecompressThread( v->thread );
if( VisibleMsgThread( tid ) )
{
const auto text = m_worker.GetString( msgs[i]->ref );
if( m_messageFilter.PassFilter( text ) )
{
m_msgList.push_back_no_space_check( uint32_t( i ) );
}
}
}
}
else
{
for( size_t i=0; i<msgs.size(); i++ )
{
const auto& v = msgs[i];
const auto tid = m_worker.DecompressThread( v->thread );
if( VisibleMsgThread( tid ) )
{
m_msgList.push_back_no_space_check( uint32_t( i ) );
}
}
}
m_visibleMessages = m_msgList.size();
if( msgsChanged ) m_prevMessages = msgs.size();
}
else if( msgsChanged )
{
assert( m_prevMessages < msgs.size() );
m_msgList.reserve( msgs.size() );
if( m_messageFilter.IsActive() )
{
for( size_t i=m_prevMessages; i<msgs.size(); i++ )
{
const auto& v = msgs[i];
const auto tid = m_worker.DecompressThread( v->thread );
if( VisibleMsgThread( tid ) )
{
const auto text = m_worker.GetString( msgs[i]->ref );
if( m_messageFilter.PassFilter( text ) )
{
m_msgList.push_back_no_space_check( uint32_t( i ) );
}
}
}
}
else
{
for( size_t i=m_prevMessages; i<msgs.size(); i++ )
{
const auto& v = msgs[i];
const auto tid = m_worker.DecompressThread( v->thread );
if( VisibleMsgThread( tid ) )
{
m_msgList.push_back_no_space_check( uint32_t( i ) );
}
}
}
m_visibleMessages = m_msgList.size();
m_prevMessages = msgs.size();
}
bool hasCallstack = m_worker.GetCallstackFrameCount() != 0;
ImGui::Separator();
ImGui::BeginChild( "##messages" );
@ -8627,85 +8706,25 @@ void View::DrawMessages()
}
ImGui::Separator();
int msgcnt = 0;
const auto filterActive = m_messageFilter.IsActive();
int idx = 0;
for( const auto& v : msgs )
if( m_msgToFocus )
{
const auto tid = m_worker.DecompressThread( v->thread );
if( VisibleMsgThread( tid ) )
for( const auto& msgIdx : m_msgList )
{
const auto text = m_worker.GetString( v->ref );
if( !filterActive || m_messageFilter.PassFilter( text ) )
DrawMessageLine( *msgs[msgIdx], hasCallstack, idx );
}
}
else
{
ImGuiListClipper clipper( m_msgList.size() );
while( clipper.Step() )
{
for( auto i=clipper.DisplayStart; i<clipper.DisplayEnd; i++ )
{
ImGui::PushID( v );
if( ImGui::Selectable( TimeToStringExact( v->time ), m_msgHighlight == v, ImGuiSelectableFlags_SpanAllColumns | ImGuiSelectableFlags_AllowItemOverlap ) )
{
CenterAtTime( v->time );
}
if( ImGui::IsItemHovered() )
{
m_msgHighlight = v;
if( m_showMessageImages )
{
const auto frameIdx = m_worker.GetFrameRange( *m_frames, v->time, v->time ).first;
auto fi = m_worker.GetFrameImage( *m_frames, frameIdx );
if( fi )
{
ImGui::BeginTooltip();
if( fi != m_frameTexturePtr )
{
if( !m_frameTexture ) m_frameTexture = MakeTexture();
UpdateTexture( m_frameTexture, m_worker.UnpackFrameImage( *fi ), fi->w, fi->h );
m_frameTexturePtr = fi;
}
if( fi->flip )
{
ImGui::Image( m_frameTexture, ImVec2( fi->w, fi->h ), ImVec2( 0, 1 ), ImVec2( 1, 0 ) );
}
else
{
ImGui::Image( m_frameTexture, ImVec2( fi->w, fi->h ) );
}
ImGui::EndTooltip();
}
}
}
if( m_msgToFocus == v )
{
ImGui::SetScrollHereY();
m_msgToFocus.Decay( nullptr );
m_messagesScrollBottom = false;
}
ImGui::PopID();
ImGui::NextColumn();
SmallColorBox( GetThreadColor( tid, 0 ) );
ImGui::SameLine();
ImGui::TextUnformatted( m_worker.GetThreadName( tid ) );
ImGui::SameLine();
ImGui::TextDisabled( "(%s)", RealToString( tid ) );
ImGui::NextColumn();
ImGui::PushStyleColor( ImGuiCol_Text, v->color );
ImGui::TextWrapped( "%s", text );
ImGui::PopStyleColor();
ImGui::NextColumn();
if( hasCallstack )
{
const auto cs = v->callstack.Val();
if( cs != 0 )
{
SmallCallstackButton( ICON_FA_ALIGN_JUSTIFY, cs, idx );
ImGui::SameLine();
DrawCallstackCalls( cs, 4 );
}
ImGui::NextColumn();
}
msgcnt++;
DrawMessageLine( *msgs[m_msgList[i]], hasCallstack, idx );
}
}
}
m_visibleMessages = msgcnt;
if( m_worker.IsConnected() && ImGui::GetScrollY() >= ImGui::GetScrollMaxY() )
{
@ -8717,6 +8736,84 @@ void View::DrawMessages()
ImGui::End();
}
void View::DrawMessageLine( const MessageData& msg, bool hasCallstack, int& idx )
{
const auto text = m_worker.GetString( msg.ref );
const auto tid = m_worker.DecompressThread( msg.thread );
ImGui::PushID( &msg );
if( ImGui::Selectable( TimeToStringExact( msg.time ), m_msgHighlight == &msg, ImGuiSelectableFlags_SpanAllColumns | ImGuiSelectableFlags_AllowItemOverlap ) )
{
CenterAtTime( msg.time );
}
if( ImGui::IsItemHovered() )
{
m_msgHighlight = &msg;
if( m_showMessageImages )
{
const auto frameIdx = m_worker.GetFrameRange( *m_frames, msg.time, msg.time ).first;
auto fi = m_worker.GetFrameImage( *m_frames, frameIdx );
if( fi )
{
ImGui::BeginTooltip();
if( fi != m_frameTexturePtr )
{
if( !m_frameTexture ) m_frameTexture = MakeTexture();
UpdateTexture( m_frameTexture, m_worker.UnpackFrameImage( *fi ), fi->w, fi->h );
m_frameTexturePtr = fi;
}
if( fi->flip )
{
ImGui::Image( m_frameTexture, ImVec2( fi->w, fi->h ), ImVec2( 0, 1 ), ImVec2( 1, 0 ) );
}
else
{
ImGui::Image( m_frameTexture, ImVec2( fi->w, fi->h ) );
}
ImGui::EndTooltip();
}
}
}
if( m_msgToFocus == &msg )
{
ImGui::SetScrollHereY();
m_msgToFocus.Decay( nullptr );
m_messagesScrollBottom = false;
}
ImGui::PopID();
ImGui::NextColumn();
SmallColorBox( GetThreadColor( tid, 0 ) );
ImGui::SameLine();
ImGui::TextUnformatted( m_worker.GetThreadName( tid ) );
ImGui::SameLine();
ImGui::TextDisabled( "(%s)", RealToString( tid ) );
ImGui::NextColumn();
ImGui::PushStyleColor( ImGuiCol_Text, msg.color );
const auto cw = ImGui::GetContentRegionAvail().x;
const auto tw = ImGui::CalcTextSize( text ).x;
ImGui::TextUnformatted( text );
if( tw > cw && ImGui::IsItemHovered() )
{
ImGui::SetNextWindowSize( ImVec2( 1000, 0 ) );
ImGui::BeginTooltip();
ImGui::TextWrapped( "%s", text );
ImGui::EndTooltip();
}
ImGui::PopStyleColor();
ImGui::NextColumn();
if( hasCallstack )
{
const auto cs = msg.callstack.Val();
if( cs != 0 )
{
SmallCallstackButton( ICON_FA_ALIGN_JUSTIFY, cs, idx );
ImGui::SameLine();
DrawCallstackCalls( cs, 4 );
}
ImGui::NextColumn();
}
}
uint64_t View::GetSelectionTarget( const Worker::ZoneThreadData& ev, FindZone::GroupBy groupBy ) const
{
switch( groupBy )

View File

@ -155,6 +155,7 @@ private:
int DrawCpuData( int offset, double pxns, const ImVec2& wpos, bool hover, float yMin, float yMax );
void DrawOptions();
void DrawMessages();
void DrawMessageLine( const MessageData& msg, bool hasCallstack, int& idx );
void DrawFindZone();
void DrawStatistics();
void DrawMemory();
@ -329,6 +330,8 @@ private:
bool m_showMessageImages = false;
ImGuiTextFilter m_statisticsFilter;
int m_visibleMessages = 0;
size_t m_prevMessages = 0;
Vector<uint32_t> m_msgList;
bool m_disconnectIssued = false;
DecayValue<uint64_t> m_drawThreadMigrations = 0;
DecayValue<uint64_t> m_drawThreadHighlight = 0;

View File

@ -923,14 +923,25 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
for( uint64_t i=0; i<sz; i++ )
{
auto ctx = m_slab.AllocInit<GpuCtxData>();
if( fileVer >= FileVersion( 0, 6, 14 ) )
if( fileVer >= FileVersion( 0, 7, 1 ) )
{
f.Read5( ctx->thread, ctx->accuracyBits, ctx->count, ctx->period, ctx->type );
uint8_t calibration;
f.Read5( ctx->thread, calibration, ctx->count, ctx->period, ctx->type );
ctx->hasCalibration = calibration;
}
else
{
f.Read4( ctx->thread, ctx->accuracyBits, ctx->count, ctx->period );
ctx->type = ctx->thread == 0 ? GpuContextType::Vulkan : GpuContextType::OpenGl;
uint8_t accuracy;
if( fileVer >= FileVersion( 0, 6, 14 ) )
{
f.Read5( ctx->thread, accuracy, ctx->count, ctx->period, ctx->type );
}
else
{
f.Read4( ctx->thread, accuracy, ctx->count, ctx->period );
ctx->type = ctx->thread == 0 ? GpuContextType::Vulkan : GpuContextType::OpenGl;
}
ctx->hasCalibration = false;
}
ctx->hasPeriod = ctx->period != 1.f;
m_data.gpuCnt += ctx->count;
@ -3955,6 +3966,9 @@ bool Worker::Process( const QueueItem& ev )
case QueueType::GpuTime:
ProcessGpuTime( ev.gpuTime );
break;
case QueueType::GpuCalibration:
ProcessGpuCalibration( ev.gpuCalibration );
break;
case QueueType::MemAlloc:
ProcessMemAlloc( ev.memAlloc );
break;
@ -4886,15 +4900,19 @@ void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev )
gpuTime = int64_t( double( ev.period ) * ev.gpuTime ); // precision loss
}
const auto cpuTime = TscTime( ev.cpuTime - m_data.baseTime );
auto gpu = m_slab.AllocInit<GpuCtxData>();
memset( gpu->query, 0, sizeof( gpu->query ) );
gpu->timeDiff = TscTime( ev.cpuTime - m_data.baseTime ) - gpuTime;
gpu->timeDiff = cpuTime - gpuTime;
gpu->thread = ev.thread;
gpu->accuracyBits = ev.accuracyBits;
gpu->period = ev.period;
gpu->count = 0;
gpu->type = ev.type;
gpu->hasPeriod = ev.period != 1.f;
gpu->hasCalibration = ev.flags & GpuContextCalibration;
gpu->calibratedGpuTime = gpuTime;
gpu->calibratedCpuTime = cpuTime;
gpu->calibrationMod = 1.;
m_data.gpuData.push_back( gpu );
m_gpuCtxMap[ev.context] = gpu;
}
@ -5028,11 +5046,25 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev )
int64_t gpuTime;
if( !ctx->hasPeriod )
{
gpuTime = t;
if( !ctx->hasCalibration )
{
gpuTime = t + ctx->timeDiff;
}
else
{
gpuTime = int64_t( ( t - ctx->calibratedGpuTime ) * ctx->calibrationMod + ctx->calibratedCpuTime );
}
}
else
{
gpuTime = int64_t( double( ctx->period ) * t ); // precision loss
if( !ctx->hasCalibration )
{
gpuTime = int64_t( double( ctx->period ) * t ) + ctx->timeDiff; // precision loss
}
else
{
gpuTime = int64_t( ( double( ctx->period ) * t - ctx->calibratedGpuTime ) * ctx->calibrationMod + ctx->calibratedCpuTime );
}
}
auto zone = ctx->query[ev.queryId];
@ -5041,25 +5073,46 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev )
if( zone->GpuStart() < 0 )
{
const auto time = ctx->timeDiff + gpuTime;
zone->SetGpuStart( time );
if( m_data.lastTime < time ) m_data.lastTime = time;
zone->SetGpuStart( gpuTime );
if( m_data.lastTime < gpuTime ) m_data.lastTime = gpuTime;
ctx->count++;
}
else
{
auto time = ctx->timeDiff + gpuTime;
if( time < zone->GpuStart() )
if( gpuTime < zone->GpuStart() )
{
auto tmp = zone->GpuStart();
std::swap( time, tmp );
std::swap( gpuTime, tmp );
zone->SetGpuStart( tmp );
}
zone->SetGpuEnd( time );
if( m_data.lastTime < time ) m_data.lastTime = time;
zone->SetGpuEnd( gpuTime );
if( m_data.lastTime < gpuTime ) m_data.lastTime = gpuTime;
}
}
void Worker::ProcessGpuCalibration( const QueueGpuCalibration& ev )
{
auto ctx = m_gpuCtxMap[ev.context];
assert( ctx );
assert( ctx->hasCalibration );
int64_t gpuTime;
if( !ctx->hasPeriod )
{
gpuTime = ev.gpuTime;
}
else
{
gpuTime = int64_t( double( ctx->period ) * ev.gpuTime ); // precision loss
}
const auto cpuDelta = ev.cpuDelta;
const auto gpuDelta = gpuTime - ctx->calibratedGpuTime;
ctx->calibrationMod = double( cpuDelta ) / gpuDelta;
ctx->calibratedGpuTime = gpuTime;
ctx->calibratedCpuTime = TscTime( ev.cpuTime - m_data.baseTime );
}
void Worker::ProcessMemAlloc( const QueueMemAlloc& ev )
{
const auto refTime = m_refTimeSerial + ev.time;
@ -6575,7 +6628,8 @@ void Worker::Write( FileWrite& f )
for( auto& ctx : m_data.gpuData )
{
f.Write( &ctx->thread, sizeof( ctx->thread ) );
f.Write( &ctx->accuracyBits, sizeof( ctx->accuracyBits ) );
uint8_t calibration = ctx->hasCalibration;
f.Write( &calibration, sizeof( calibration ) );
f.Write( &ctx->count, sizeof( ctx->count ) );
f.Write( &ctx->period, sizeof( ctx->period ) );
f.Write( &ctx->type, sizeof( ctx->type ) );

View File

@ -643,6 +643,7 @@ private:
tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial );
tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial );
tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev );
tracy_force_inline void ProcessGpuCalibration( const QueueGpuCalibration& ev );
tracy_force_inline void ProcessMemAlloc( const QueueMemAlloc& ev );
tracy_force_inline bool ProcessMemFree( const QueueMemFree& ev );
tracy_force_inline void ProcessMemAllocCallstack( const QueueMemAlloc& ev );