From 1013ec8db7b40d8bfa485df14aaa3698c45d2cc9 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Tue, 7 Jul 2020 01:21:36 +0200 Subject: [PATCH 01/37] Drop accuracy bits from GPU context. --- server/TracyEvent.hpp | 1 - server/TracyView.cpp | 7 ------- server/TracyWorker.cpp | 11 +++++++---- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index c1401fbd..a140201b 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -562,7 +562,6 @@ struct GpuCtxData int64_t timeDiff; uint64_t thread; uint64_t count; - uint8_t accuracyBits; float period; GpuContextType type; bool hasPeriod; diff --git a/server/TracyView.cpp b/server/TracyView.cpp index d142ac78..11fc23f7 100644 --- a/server/TracyView.cpp +++ b/server/TracyView.cpp @@ -2595,17 +2595,10 @@ void View::DrawZones() } } TextFocused( "Zone count:", RealToString( v->count ) ); - //TextFocused( "Top-level zones:", RealToString( v->timeline.size() ) ); if( isMultithreaded ) { TextFocused( "Timestamp accuracy:", TimeToString( v->period ) ); } - else - { - TextDisabledUnformatted( "Query accuracy bits:" ); - ImGui::SameLine(); - ImGui::Text( "%i", v->accuracyBits ); - } ImGui::EndTooltip(); } } diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index 5d19a8b6..510f408f 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -923,13 +923,15 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks ) for( uint64_t i=0; i(); + // TODO remove + uint8_t accuracy; if( fileVer >= FileVersion( 0, 6, 14 ) ) { - f.Read5( ctx->thread, ctx->accuracyBits, ctx->count, ctx->period, ctx->type ); + f.Read5( ctx->thread, accuracy, ctx->count, ctx->period, ctx->type ); } else { - f.Read4( ctx->thread, ctx->accuracyBits, ctx->count, ctx->period ); + f.Read4( ctx->thread, accuracy, ctx->count, ctx->period ); ctx->type = ctx->thread == 0 ? GpuContextType::Vulkan : GpuContextType::OpenGl; } ctx->hasPeriod = ctx->period != 1.f; @@ -4890,7 +4892,6 @@ void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev ) memset( gpu->query, 0, sizeof( gpu->query ) ); gpu->timeDiff = TscTime( ev.cpuTime - m_data.baseTime ) - gpuTime; gpu->thread = ev.thread; - gpu->accuracyBits = ev.accuracyBits; gpu->period = ev.period; gpu->count = 0; gpu->type = ev.type; @@ -6575,7 +6576,9 @@ void Worker::Write( FileWrite& f ) for( auto& ctx : m_data.gpuData ) { f.Write( &ctx->thread, sizeof( ctx->thread ) ); - f.Write( &ctx->accuracyBits, sizeof( ctx->accuracyBits ) ); + // TODO remove + uint8_t zero = 0; + f.Write( &zero, sizeof( zero ) ); f.Write( &ctx->count, sizeof( ctx->count ) ); f.Write( &ctx->period, sizeof( ctx->period ) ); f.Write( &ctx->type, sizeof( ctx->type ) ); From 5e5bf928a5e6796ee72ee42fefffa901b882ca4d Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Tue, 7 Jul 2020 21:24:44 +0200 Subject: [PATCH 02/37] Add QPC frequency query to API. --- client/TracyProfiler.cpp | 11 +++++++++++ client/TracyProfiler.hpp | 1 + 2 files changed, 12 insertions(+) diff --git a/client/TracyProfiler.cpp b/client/TracyProfiler.cpp index 8d2da1a8..8c329ec0 100644 --- a/client/TracyProfiler.cpp +++ b/client/TracyProfiler.cpp @@ -873,6 +873,17 @@ static Thread* s_sysTraceThread = nullptr; TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; } +TRACY_API int64_t GetFrequencyQpc() +{ +#if defined _WIN32 || defined __CYGWIN__ + LARGE_INTEGER t; + QueryPerformanceFrequency( &t ); + return t.QuadPart; +#else + return 0; +#endif +} + #ifdef TRACY_DELAYED_INIT struct ThreadNameData; TRACY_API moodycamel::ConcurrentQueue& GetQueue(); diff --git a/client/TracyProfiler.hpp b/client/TracyProfiler.hpp index 1507418f..b48fd680 100644 --- a/client/TracyProfiler.hpp +++ b/client/TracyProfiler.hpp @@ -60,6 +60,7 @@ TRACY_API GpuCtxWrapper& GetGpuCtx(); TRACY_API uint64_t GetThreadHandle(); TRACY_API void InitRPMallocThread(); TRACY_API bool ProfilerAvailable(); +TRACY_API int64_t GetFrequencyQpc(); struct SourceLocationData { From dc91affdc51b9bd8c24fc74af575283dc6d809e0 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Tue, 7 Jul 2020 20:03:03 +0200 Subject: [PATCH 03/37] Allow creating calibrated VK context. --- TracyVulkan.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/TracyVulkan.hpp b/TracyVulkan.hpp index 7f6b9182..fe9d10de 100644 --- a/TracyVulkan.hpp +++ b/TracyVulkan.hpp @@ -4,6 +4,7 @@ #if !defined TRACY_ENABLE #define TracyVkContext(x,y,z,w) nullptr +#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr #define TracyVkDestroy(x) #define TracyVkNamedZone(c,x,y,z,w) #define TracyVkNamedZoneC(c,x,y,z,w,a) @@ -42,7 +43,7 @@ class VkCtx enum { QueryCount = 64 * 1024 }; public: - VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf ) + VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT ) : m_device( device ) , m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) , m_head( 0 ) @@ -271,11 +272,11 @@ private: VkCtx* m_ctx; }; -static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf ) +static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) { InitRPMallocThread(); auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); - new(ctx) VkCtx( physdev, device, queue, cmdbuf ); + new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct ); return ctx; } @@ -289,7 +290,8 @@ static inline void DestroyVkContext( VkCtx* ctx ) using TracyVkCtx = tracy::VkCtx*; -#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf ); +#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr ); +#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct ); #define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx ); #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK # define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active ); From f6d320ebdcb0638760b1c6c649649d719c92e882 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Tue, 7 Jul 2020 20:04:17 +0200 Subject: [PATCH 04/37] Detect time domains available for calibration. --- TracyVulkan.hpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/TracyVulkan.hpp b/TracyVulkan.hpp index fe9d10de..24857814 100644 --- a/TracyVulkan.hpp +++ b/TracyVulkan.hpp @@ -45,14 +45,34 @@ class VkCtx public: VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT ) : m_device( device ) + , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) , m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) , m_head( 0 ) , m_tail( 0 ) , m_oldCnt( 0 ) , m_queryCount( QueryCount ) + , m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT ) { assert( m_context != 255 ); + if( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && _vkGetCalibratedTimestampsEXT ) + { + uint32_t num; + _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, nullptr ); + if( num > 4 ) num = 4; + VkTimeDomainEXT data[4]; + _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, data ); + for( uint32_t i=0; i Date: Tue, 7 Jul 2020 20:21:38 +0200 Subject: [PATCH 05/37] Vulkan calibration function. --- TracyVulkan.hpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/TracyVulkan.hpp b/TracyVulkan.hpp index 24857814..1cc7e20e 100644 --- a/TracyVulkan.hpp +++ b/TracyVulkan.hpp @@ -204,10 +204,35 @@ private: return m_context; } + tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu ) + { + assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ); + VkCalibratedTimestampInfoEXT spec[2] = { + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT }, + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain }, + }; + uint64_t ts[2]; + uint64_t deviation; + do + { + m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation ); + } + while( deviation > m_deviation ); + +#if defined _WIN32 || defined __CYGWIN__ + tGpu = ts[0]; + tCpu = ts[1] * m_qpcToNs; +#else + assert( false ); +#endif + } + VkDevice m_device; VkQueryPool m_query; VkTimeDomainEXT m_timeDomain; uint64_t m_deviation; + int64_t m_qpcToNs; + int64_t m_prevCalibration; uint8_t m_context; unsigned int m_head; From 1b6bc1b69adf42d33c160149f28ca38bb3024c0f Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Tue, 7 Jul 2020 20:32:25 +0200 Subject: [PATCH 06/37] Send Vulkan GPU calibration messages. --- TracyD3D12.hpp | 2 +- TracyOpenCL.hpp | 2 +- TracyOpenGL.hpp | 2 +- TracyVulkan.hpp | 84 +++++++++++++++++++++++++++++++++------- common/TracyProtocol.hpp | 2 +- common/TracyQueue.hpp | 18 ++++++++- server/TracyWorker.cpp | 8 ++++ server/TracyWorker.hpp | 1 + 8 files changed, 100 insertions(+), 19 deletions(-) diff --git a/TracyD3D12.hpp b/TracyD3D12.hpp index 8f75d2ea..93d4b244 100644 --- a/TracyD3D12.hpp +++ b/TracyD3D12.hpp @@ -150,7 +150,7 @@ namespace tracy memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread)); MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast(timestampFrequency)); MemWrite(&item->gpuNewContext.context, m_context); - MemWrite(&item->gpuNewContext.accuracyBits, uint8_t{ 0 }); + MemWrite(&item->gpuNewContext.flags, uint8_t{ 0 }); MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12); #ifdef TRACY_ON_DEMAND diff --git a/TracyOpenCL.hpp b/TracyOpenCL.hpp index 77c7de98..5dba8488 100644 --- a/TracyOpenCL.hpp +++ b/TracyOpenCL.hpp @@ -72,7 +72,7 @@ namespace tracy { MemWrite(&item->gpuNewContext.period, 1.0f); MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL); MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId); - MemWrite(&item->gpuNewContext.accuracyBits, (uint8_t)0); + MemWrite(&item->gpuNewContext.flags, (uint8_t)0); #ifdef TRACY_ON_DEMAND GetProfiler().DeferItem(*item); #endif diff --git a/TracyOpenGL.hpp b/TracyOpenGL.hpp index e4d831d3..90c5dcbc 100644 --- a/TracyOpenGL.hpp +++ b/TracyOpenGL.hpp @@ -110,7 +110,7 @@ public: MemWrite( &item->gpuNewContext.thread, thread ); MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.context, m_context ); - MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits ); + MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) ); MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl ); #ifdef TRACY_ON_DEMAND diff --git a/TracyVulkan.hpp b/TracyVulkan.hpp index 1cc7e20e..9ba6738b 100644 --- a/TracyVulkan.hpp +++ b/TracyVulkan.hpp @@ -102,21 +102,56 @@ public: vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); vkQueueWaitIdle( queue ); - vkBeginCommandBuffer( cmdbuf, &beginInfo ); - vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ); - vkEndCommandBuffer( cmdbuf ); - vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); - vkQueueWaitIdle( queue ); + int64_t tcpu, tgpu; + if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT ) + { + vkBeginCommandBuffer( cmdbuf, &beginInfo ); + vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ); + vkEndCommandBuffer( cmdbuf ); + vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); + vkQueueWaitIdle( queue ); - int64_t tcpu = Profiler::GetTime(); - int64_t tgpu; - vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ); + tcpu = Profiler::GetTime(); + vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ); - vkBeginCommandBuffer( cmdbuf, &beginInfo ); - vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ); - vkEndCommandBuffer( cmdbuf ); - vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); - vkQueueWaitIdle( queue ); + vkBeginCommandBuffer( cmdbuf, &beginInfo ); + vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ); + vkEndCommandBuffer( cmdbuf ); + vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); + vkQueueWaitIdle( queue ); + } + else + { + enum { NumProbes = 32 }; + + VkCalibratedTimestampInfoEXT spec[2] = { + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT }, + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain }, + }; + uint64_t ts[2]; + uint64_t deviation[NumProbes]; + for( int i=0; i deviation[i] ) + { + minDeviation = deviation[i]; + } + } + m_deviation = minDeviation * 3 / 2; + + m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() ); + + Calibrate( device, m_prevCalibration, tgpu ); + tcpu = Profiler::GetTime(); + } + + uint8_t flags = 0; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration; auto item = Profiler::QueueSerial(); MemWrite( &item->hdr.type, QueueType::GpuNewContext ); @@ -125,7 +160,7 @@ public: memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.context, m_context ); - MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) ); + MemWrite( &item->gpuNewContext.flags, flags ); MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan ); #ifdef TRACY_ON_DEMAND @@ -153,6 +188,8 @@ public: { vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ); m_head = m_tail = 0; + int64_t tgpu; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu ); return; } #endif @@ -184,6 +221,25 @@ public: Profiler::QueueSerialFinish(); } + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) + { + int64_t tgpu, tcpu; + Calibrate( m_device, tcpu, tgpu ); + const auto refCpu = Profiler::GetTime(); + const auto delta = tcpu - m_prevCalibration; + if( delta > 0 ) + { + m_prevCalibration = tcpu; + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuCalibration ); + MemWrite( &item->gpuCalibration.gpuTime, tgpu ); + MemWrite( &item->gpuCalibration.cpuTime, refCpu ); + MemWrite( &item->gpuCalibration.cpuDelta, delta ); + MemWrite( &item->gpuCalibration.context, m_context ); + Profiler::QueueSerialFinish(); + } + } + vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt ); m_tail += cnt; diff --git a/common/TracyProtocol.hpp b/common/TracyProtocol.hpp index 634b5846..cc47fe41 100644 --- a/common/TracyProtocol.hpp +++ b/common/TracyProtocol.hpp @@ -9,7 +9,7 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -enum : uint32_t { ProtocolVersion = 36 }; +enum : uint32_t { ProtocolVersion = 37 }; enum : uint32_t { BroadcastVersion = 1 }; using lz4sz_t = uint32_t; diff --git a/common/TracyQueue.hpp b/common/TracyQueue.hpp index 4bd48b54..815d27ab 100644 --- a/common/TracyQueue.hpp +++ b/common/TracyQueue.hpp @@ -56,6 +56,7 @@ enum class QueueType : uint8_t Terminate, KeepAlive, ThreadContext, + GpuCalibration, Crash, CrashReport, ZoneValidation, @@ -268,6 +269,11 @@ enum class GpuContextType : uint8_t Direct3D12 }; +enum GpuContextFlags : uint8_t +{ + GpuContextCalibration = 1 << 0 +}; + struct QueueGpuNewContext { int64_t cpuTime; @@ -275,7 +281,7 @@ struct QueueGpuNewContext uint64_t thread; float period; uint8_t context; - uint8_t accuracyBits; + GpuContextFlags flags; GpuContextType type; }; @@ -303,6 +309,14 @@ struct QueueGpuTime uint8_t context; }; +struct QueueGpuCalibration +{ + int64_t gpuTime; + int64_t cpuTime; + int64_t cpuDelta; + uint8_t context; +}; + struct QueueMemAlloc { int64_t time; @@ -477,6 +491,7 @@ struct QueueItem QueueGpuZoneBegin gpuZoneBegin; QueueGpuZoneEnd gpuZoneEnd; QueueGpuTime gpuTime; + QueueGpuCalibration gpuCalibration; QueueMemAlloc memAlloc; QueueMemFree memFree; QueueCallstackMemory callstackMemory; @@ -553,6 +568,7 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ), // terminate sizeof( QueueHeader ), // keep alive sizeof( QueueHeader ) + sizeof( QueueThreadContext ), + sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ), sizeof( QueueHeader ), // crash sizeof( QueueHeader ) + sizeof( QueueCrashReport ), sizeof( QueueHeader ) + sizeof( QueueZoneValidation ), diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index 510f408f..cefb2394 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -3957,6 +3957,9 @@ bool Worker::Process( const QueueItem& ev ) case QueueType::GpuTime: ProcessGpuTime( ev.gpuTime ); break; + case QueueType::GpuCalibration: + ProcessGpuCalibration( ev.gpuCalibration ); + break; case QueueType::MemAlloc: ProcessMemAlloc( ev.memAlloc ); break; @@ -5061,6 +5064,11 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev ) } } +void Worker::ProcessGpuCalibration( const QueueGpuCalibration& ev ) +{ + +} + void Worker::ProcessMemAlloc( const QueueMemAlloc& ev ) { const auto refTime = m_refTimeSerial + ev.time; diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 74a83c98..7390593d 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -643,6 +643,7 @@ private: tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial ); tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ); tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev ); + tracy_force_inline void ProcessGpuCalibration( const QueueGpuCalibration& ev ); tracy_force_inline void ProcessMemAlloc( const QueueMemAlloc& ev ); tracy_force_inline bool ProcessMemFree( const QueueMemFree& ev ); tracy_force_inline void ProcessMemAllocCallstack( const QueueMemAlloc& ev ); From 21f4981f3807c53a91ef07be214c1fa574768179 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Tue, 7 Jul 2020 21:09:37 +0200 Subject: [PATCH 07/37] Process GPU calibration on server. --- server/TracyEvent.hpp | 4 +++ server/TracyWorker.cpp | 57 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index a140201b..63b74f86 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -565,6 +565,10 @@ struct GpuCtxData float period; GpuContextType type; bool hasPeriod; + bool hasCalibration; + int64_t calibratedGpuTime; + int64_t calibratedCpuTime; + double calibrationMod; unordered_flat_map threadData; short_ptr query[64*1024]; }; diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index cefb2394..3587d83b 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -4891,14 +4891,19 @@ void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev ) gpuTime = int64_t( double( ev.period ) * ev.gpuTime ); // precision loss } + const auto cpuTime = TscTime( ev.cpuTime - m_data.baseTime ); auto gpu = m_slab.AllocInit(); memset( gpu->query, 0, sizeof( gpu->query ) ); - gpu->timeDiff = TscTime( ev.cpuTime - m_data.baseTime ) - gpuTime; + gpu->timeDiff = cpuTime - gpuTime; gpu->thread = ev.thread; gpu->period = ev.period; gpu->count = 0; gpu->type = ev.type; gpu->hasPeriod = ev.period != 1.f; + gpu->hasCalibration = ev.flags & GpuContextCalibration; + gpu->calibratedGpuTime = gpuTime; + gpu->calibratedCpuTime = cpuTime; + gpu->calibrationMod = 1.; m_data.gpuData.push_back( gpu ); m_gpuCtxMap[ev.context] = gpu; } @@ -5032,11 +5037,25 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev ) int64_t gpuTime; if( !ctx->hasPeriod ) { - gpuTime = t; + if( !ctx->hasCalibration ) + { + gpuTime = t + ctx->timeDiff; + } + else + { + gpuTime = int64_t( ( t - ctx->calibratedGpuTime ) * ctx->calibrationMod + ctx->calibratedCpuTime ); + } } else { - gpuTime = int64_t( double( ctx->period ) * t ); // precision loss + if( !ctx->hasCalibration ) + { + gpuTime = int64_t( double( ctx->period ) * t ) + ctx->timeDiff; // precision loss + } + else + { + gpuTime = int64_t( ( double( ctx->period ) * t - ctx->calibratedGpuTime ) * ctx->calibrationMod + ctx->calibratedCpuTime ); + } } auto zone = ctx->query[ev.queryId]; @@ -5045,28 +5064,44 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev ) if( zone->GpuStart() < 0 ) { - const auto time = ctx->timeDiff + gpuTime; - zone->SetGpuStart( time ); - if( m_data.lastTime < time ) m_data.lastTime = time; + zone->SetGpuStart( gpuTime ); + if( m_data.lastTime < gpuTime ) m_data.lastTime = gpuTime; ctx->count++; } else { - auto time = ctx->timeDiff + gpuTime; - if( time < zone->GpuStart() ) + if( gpuTime < zone->GpuStart() ) { auto tmp = zone->GpuStart(); - std::swap( time, tmp ); + std::swap( gpuTime, tmp ); zone->SetGpuStart( tmp ); } - zone->SetGpuEnd( time ); - if( m_data.lastTime < time ) m_data.lastTime = time; + zone->SetGpuEnd( gpuTime ); + if( m_data.lastTime < gpuTime ) m_data.lastTime = gpuTime; } } void Worker::ProcessGpuCalibration( const QueueGpuCalibration& ev ) { + auto ctx = m_gpuCtxMap[ev.context]; + assert( ctx ); + assert( ctx->hasCalibration ); + int64_t gpuTime; + if( !ctx->hasPeriod ) + { + gpuTime = ev.gpuTime; + } + else + { + gpuTime = int64_t( double( ctx->period ) * ev.gpuTime ); // precision loss + } + + const auto cpuDelta = ev.cpuDelta; + const auto gpuDelta = gpuTime - ctx->calibratedGpuTime; + ctx->calibrationMod = double( cpuDelta ) / gpuDelta; + ctx->calibratedGpuTime = gpuTime; + ctx->calibratedCpuTime = TscTime( ev.cpuTime - m_data.baseTime ); } void Worker::ProcessMemAlloc( const QueueMemAlloc& ev ) From a3c51f0e7e7c473214383d7d824e2beb17fe63bb Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Tue, 7 Jul 2020 21:16:08 +0200 Subject: [PATCH 08/37] Don't show drift adjustment for calibrated GPU contexts. --- server/TracyView.cpp | 105 ++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 51 deletions(-) diff --git a/server/TracyView.cpp b/server/TracyView.cpp index 11fc23f7..370807b8 100644 --- a/server/TracyView.cpp +++ b/server/TracyView.cpp @@ -7906,80 +7906,83 @@ void View::DrawOptions() { ImGui::TextDisabled( "%s threads", RealToString( gpuData[i]->threadData.size() ) ); } - ImGui::TreePush(); - auto& drift = GpuDrift( gpuData[i] ); - ImGui::SetNextItemWidth( 120 ); - ImGui::PushID( i ); - ImGui::InputInt( "Drift (ns/s)", &drift ); - ImGui::PopID(); - if( timeline.size() > 1 ) + if( !gpuData[i]->hasCalibration ) { - ImGui::SameLine(); - if( ImGui::Button( ICON_FA_ROBOT " Auto" ) ) + ImGui::TreePush(); + auto& drift = GpuDrift( gpuData[i] ); + ImGui::SetNextItemWidth( 120 ); + ImGui::PushID( i ); + ImGui::InputInt( "Drift (ns/s)", &drift ); + ImGui::PopID(); + if( timeline.size() > 1 ) { - size_t lastidx = 0; - if( timeline.is_magic() ) + ImGui::SameLine(); + if( ImGui::Button( ICON_FA_ROBOT " Auto" ) ) { - auto& tl = *((Vector*)&timeline); - for( size_t j=tl.size()-1; j > 0; j-- ) + size_t lastidx = 0; + if( timeline.is_magic() ) { - if( tl[j].GpuEnd() >= 0 ) + auto& tl = *((Vector*)&timeline); + for( size_t j=tl.size()-1; j > 0; j-- ) { - lastidx = j; - break; + if( tl[j].GpuEnd() >= 0 ) + { + lastidx = j; + break; + } } } - } - else - { - for( size_t j=timeline.size()-1; j > 0; j-- ) + else { - if( timeline[j]->GpuEnd() >= 0 ) + for( size_t j=timeline.size()-1; j > 0; j-- ) { - lastidx = j; - break; + if( timeline[j]->GpuEnd() >= 0 ) + { + lastidx = j; + break; + } } } - } - enum { NumSlopes = 10000 }; - std::random_device rd; - std::default_random_engine gen( rd() ); - std::uniform_int_distribution dist( 0, lastidx - 1 ); - float slopes[NumSlopes]; - size_t idx = 0; - if( timeline.is_magic() ) - { - auto& tl = *((Vector*)&timeline); - do + enum { NumSlopes = 10000 }; + std::random_device rd; + std::default_random_engine gen( rd() ); + std::uniform_int_distribution dist( 0, lastidx - 1 ); + float slopes[NumSlopes]; + size_t idx = 0; + if( timeline.is_magic() ) { - const auto p0 = dist( gen ); - const auto p1 = dist( gen ); - if( p0 != p1 ) + auto& tl = *((Vector*)&timeline); + do { - slopes[idx++] = float( 1.0 - double( tl[p1].GpuStart() - tl[p0].GpuStart() ) / double( tl[p1].CpuStart() - tl[p0].CpuStart() ) ); + const auto p0 = dist( gen ); + const auto p1 = dist( gen ); + if( p0 != p1 ) + { + slopes[idx++] = float( 1.0 - double( tl[p1].GpuStart() - tl[p0].GpuStart() ) / double( tl[p1].CpuStart() - tl[p0].CpuStart() ) ); + } } + while( idx < NumSlopes ); } - while( idx < NumSlopes ); - } - else - { - do + else { - const auto p0 = dist( gen ); - const auto p1 = dist( gen ); - if( p0 != p1 ) + do { - slopes[idx++] = float( 1.0 - double( timeline[p1]->GpuStart() - timeline[p0]->GpuStart() ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) ); + const auto p0 = dist( gen ); + const auto p1 = dist( gen ); + if( p0 != p1 ) + { + slopes[idx++] = float( 1.0 - double( timeline[p1]->GpuStart() - timeline[p0]->GpuStart() ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) ); + } } + while( idx < NumSlopes ); } - while( idx < NumSlopes ); + std::sort( slopes, slopes+NumSlopes ); + drift = int( 1000000000 * -slopes[NumSlopes/2] ); } - std::sort( slopes, slopes+NumSlopes ); - drift = int( 1000000000 * -slopes[NumSlopes/2] ); } + ImGui::TreePop(); } - ImGui::TreePop(); } ImGui::TreePop(); } From e48b5611c51bc1ebe205d760b46c7694344b75f0 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Tue, 7 Jul 2020 21:19:33 +0200 Subject: [PATCH 09/37] Save GPU context calibration state. --- server/TracyVersion.hpp | 2 +- server/TracyWorker.cpp | 26 +++++++++++++++++--------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/server/TracyVersion.hpp b/server/TracyVersion.hpp index 9f745627..77825c70 100644 --- a/server/TracyVersion.hpp +++ b/server/TracyVersion.hpp @@ -7,7 +7,7 @@ namespace Version { enum { Major = 0 }; enum { Minor = 7 }; -enum { Patch = 0 }; +enum { Patch = 1 }; } } diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index 3587d83b..4fa2ec62 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -923,16 +923,25 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks ) for( uint64_t i=0; i(); - // TODO remove - uint8_t accuracy; - if( fileVer >= FileVersion( 0, 6, 14 ) ) + if( fileVer >= FileVersion( 0, 7, 1 ) ) { - f.Read5( ctx->thread, accuracy, ctx->count, ctx->period, ctx->type ); + uint8_t calibration; + f.Read5( ctx->thread, calibration, ctx->count, ctx->period, ctx->type ); + ctx->hasCalibration = calibration; } else { - f.Read4( ctx->thread, accuracy, ctx->count, ctx->period ); - ctx->type = ctx->thread == 0 ? GpuContextType::Vulkan : GpuContextType::OpenGl; + uint8_t accuracy; + if( fileVer >= FileVersion( 0, 6, 14 ) ) + { + f.Read5( ctx->thread, accuracy, ctx->count, ctx->period, ctx->type ); + } + else + { + f.Read4( ctx->thread, accuracy, ctx->count, ctx->period ); + ctx->type = ctx->thread == 0 ? GpuContextType::Vulkan : GpuContextType::OpenGl; + } + ctx->hasCalibration = false; } ctx->hasPeriod = ctx->period != 1.f; m_data.gpuCnt += ctx->count; @@ -6619,9 +6628,8 @@ void Worker::Write( FileWrite& f ) for( auto& ctx : m_data.gpuData ) { f.Write( &ctx->thread, sizeof( ctx->thread ) ); - // TODO remove - uint8_t zero = 0; - f.Write( &zero, sizeof( zero ) ); + uint8_t calibration = ctx->hasCalibration; + f.Write( &calibration, sizeof( calibration ) ); f.Write( &ctx->count, sizeof( ctx->count ) ); f.Write( &ctx->period, sizeof( ctx->period ) ); f.Write( &ctx->type, sizeof( ctx->type ) ); From 9bcf7a9214f88808fe07270a002b7fdcc4b187c8 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Tue, 7 Jul 2020 21:20:13 +0200 Subject: [PATCH 10/37] Update NEWS. --- NEWS | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS b/NEWS index 924ee631..0514cb7e 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,7 @@ v0.7.1 (xxxx-xx-xx) - Fixed attachment of postponed frame images. - Source location data can be now copied to clipboard from zone info window. - Zones in find zones menu can be now grouped by zone name. +- Vulkan GPU contexts can be now calibrated. v0.7 (2020-06-11) ----------------- From 660f2cec7f567cffc8e3394db22f148c5f9490ce Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Tue, 7 Jul 2020 21:40:56 +0200 Subject: [PATCH 11/37] Update manual. --- manual/tracy.tex | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/manual/tracy.tex b/manual/tracy.tex index 5e85ea0d..54120fef 100644 --- a/manual/tracy.tex +++ b/manual/tracy.tex @@ -1180,7 +1180,7 @@ This requirement is relaxed in the on-demand mode (section~\ref{ondemand}), beca Tracy provides bindings for profiling OpenGL, Vulkan, Direct3D 12 and OpenCL execution time on GPU. -Note that the CPU and GPU timers may be not synchronized. You can correct the resulting desynchronization in the profiler's options (section~\ref{options}). +Note that the CPU and GPU timers may be not synchronized, unless a calibrated context is created. Since availability of calibrated contexts is limited, you can correct the desynchronization of uncalibrated contexts in the profiler's options (section~\ref{options}). \subsubsection{OpenGL} @@ -1213,6 +1213,12 @@ To mark a GPU zone use the \texttt{TracyVkZone(ctx, cmdbuf, name)} macro, where You also need to periodically collect the GPU events using the \texttt{TracyVkCollect(ctx, cmdbuf)} macro\footnote{It is considerably faster than the OpenGL's \texttt{TracyGpuCollect}.}. The provided command buffer must be in the recording state and outside of a render pass instance. +\subparagraph{Calibrated context} + +In order to maintain synchronization between CPU and GPU time domains, you will need to enable the \texttt{VK\_EXT\_calibrated\_timestamps} device extension and retrieve the following function pointers: \texttt{vkGetPhysicalDeviceCalibrateableTimeDomainsEXT} and \texttt{vkGetCalibratedTimestampsEXT}. + +To enable calibrated context, replace the macro \texttt{TracyVkContext} with \texttt{TracyVkContextCalibrated} and pass the two functions as additional parameters, in the order specified above. + \subsubsection{Direct3D 12} To enable Direct3D 12 support, include the \texttt{tracy/TracyD3D12.hpp} header file. Tracing Direct3D 12 queues is nearly on par with the Vulkan implementation, where a \texttt{TracyD3D12Ctx} is returned from a call to \texttt{TracyD3D12Context(device, queue)}, which should be later cleaned up with the \texttt{TracyD3D12Destroy(ctx)} macro. Multiple contexts can be created, each with any queue type. @@ -2362,7 +2368,7 @@ In this window you can set various trace-related options. The timeline view migh \begin{itemize} \item \emph{\faSignature{} Draw CPU usage graph} -- You can disable drawing of the CPU usage graph here. \end{itemize} -\item \emph{\faEye{} Draw GPU zones} -- Allows disabling display of OpenGL/Vulkan/Direct3D/OpenCL zones. The \emph{GPU zones} drop-down allows disabling individual GPU contexts and setting CPU/GPU drift offsets (see section~\ref{gpuprofiling} for more information). The \emph{\faRobot~Auto} button automatically measures the GPU drift value\footnote{There is an assumption that drift is linear. Automated measurement calculates and removes change over time in delay-to-execution of GPU zones. Resulting value may still be incorrect.}. +\item \emph{\faEye{} Draw GPU zones} -- Allows disabling display of OpenGL/Vulkan/Direct3D/OpenCL zones. The \emph{GPU zones} drop-down allows disabling individual GPU contexts and setting CPU/GPU drift offsets of uncalibrated contexts (see section~\ref{gpuprofiling} for more information). The \emph{\faRobot~Auto} button automatically measures the GPU drift value\footnote{There is an assumption that drift is linear. Automated measurement calculates and removes change over time in delay-to-execution of GPU zones. Resulting value may still be incorrect.}. \item \emph{\faMicrochip{} Draw CPU zones} -- Determines whether CPU zones are displayed. \begin{itemize} \item \emph{\faGhost{} Draw ghost zones} -- Controls if ghost zones should be displayed in threads which don't have any instrumented zones available. From c768068ee7898847c4a7ffcb8c3bec371e0be1ae Mon Sep 17 00:00:00 2001 From: Andrew Depke Date: Wed, 8 Jul 2020 15:42:42 -0600 Subject: [PATCH 12/37] Implemented GPU synchronization protocol --- TracyD3D12.hpp | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/TracyD3D12.hpp b/TracyD3D12.hpp index 93d4b244..680c981b 100644 --- a/TracyD3D12.hpp +++ b/TracyD3D12.hpp @@ -50,8 +50,8 @@ namespace tracy bool m_initialized = false; - ID3D12Device* m_device; - ID3D12CommandQueue* m_queue; + ID3D12Device* m_device = nullptr; + ID3D12CommandQueue* m_queue = nullptr; uint8_t m_context; Microsoft::WRL::ComPtr m_queryHeap; Microsoft::WRL::ComPtr m_readbackBuffer; @@ -65,6 +65,9 @@ namespace tracy Microsoft::WRL::ComPtr m_payloadFence; std::queue m_payloadQueue; + int64_t m_prevCalibration = 0; + int64_t m_qpcToNs = int64_t{ 1000000000 / GetFrequencyQpc() }; + public: D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue) : m_device(device) @@ -98,6 +101,9 @@ namespace tracy assert(false && "Failed to get queue clock calibration."); } + // Save the device cpu timestamp, not the profiler's timestamp. + m_prevCalibration = cpuTimestamp * m_qpcToNs; + cpuTimestamp = Profiler::GetTime(); D3D12_QUERY_HEAP_DESC heapDesc{}; @@ -233,6 +239,34 @@ namespace tracy } m_readbackBuffer->Unmap(0, nullptr); + + // Recalibrate to account for drift. + + uint64_t cpuTimestamp; + uint64_t gpuTimestamp; + + if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) + { + assert(false && "Failed to get queue clock calibration."); + } + + cpuTimestamp *= m_qpcToNs; + + const auto cpuDelta = cpuTimestamp - m_prevCalibration; + if (cpuDelta > 0) + { + m_prevCalibration = cpuTimestamp; + cpuTimestamp = Profiler::GetTime(); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuCalibration); + MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp); + MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp); + MemWrite(&item->gpuCalibration.cpuDelta, cpuDelta); + MemWrite(&item->gpuCalibration.context, m_context); + + Profiler::QueueSerialFinish(); + } } private: From a75781beaff018bdaa29ab05dfcb778984eb489e Mon Sep 17 00:00:00 2001 From: Andrew Depke Date: Wed, 8 Jul 2020 16:04:12 -0600 Subject: [PATCH 13/37] Fixed missing calibration flag --- TracyD3D12.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TracyD3D12.hpp b/TracyD3D12.hpp index 680c981b..831ee9a5 100644 --- a/TracyD3D12.hpp +++ b/TracyD3D12.hpp @@ -156,7 +156,7 @@ namespace tracy memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread)); MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast(timestampFrequency)); MemWrite(&item->gpuNewContext.context, m_context); - MemWrite(&item->gpuNewContext.flags, uint8_t{ 0 }); + MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12); #ifdef TRACY_ON_DEMAND From 239e77db6851791b59939a13973dc9b03129692c Mon Sep 17 00:00:00 2001 From: Andrew Depke Date: Wed, 8 Jul 2020 16:10:19 -0600 Subject: [PATCH 14/37] Updated manual --- manual/tracy.tex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/manual/tracy.tex b/manual/tracy.tex index 54120fef..ec04eaa1 100644 --- a/manual/tracy.tex +++ b/manual/tracy.tex @@ -1229,6 +1229,8 @@ Using GPU zones is the same as the Vulkan implementation, where the \texttt{Trac The macro \texttt{TracyD3D12NewFrame(ctx)} is used to mark a new frame, and should appear before or after recording command lists, similar to \texttt{FrameMark}. This macro is a key component that enables automatic query data synchronization, so the user doesn't have to worry about synchronizing GPU execution before invoking a collection. Event data can then be collected and sent to the profiler using the \texttt{TracyD3D12Collect(ctx)} macro. +Note that due to artifacts from dynamic frequency scaling, GPU profiling may be slightly inaccurate. To counter this, \texttt{ID3D12Device::SetStablePowerState()} can be used to enable accurate profiling, at the expense of some performance. If the machine is not in developer mode, the device will be removed upon calling. Do not use this in shipping code. + \subsubsection{OpenCL} OpenCL support is achieved by including the \texttt{tracy/TracyOpenCL.hpp} header file. Tracing OpenCL requires the creation of a Tracy OpenCL context using the macro \texttt{TracyCLContext(context, device)}, which will return an instance of \texttt{TracyCLCtx} object that must be used when creating zones. The specified \texttt{device} must be part of the \texttt{context}. Cleanup is performed using the \texttt{TracyCLDestroy(ctx)} macro. Although not common, it is possible to create multiple OpenCL contexts for the same application. From 04bc3e47c52cb972999a032e47249013d5fe1f77 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Thu, 9 Jul 2020 00:19:26 +0200 Subject: [PATCH 15/37] Update NEWS. --- NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 0514cb7e..b1790ade 100644 --- a/NEWS +++ b/NEWS @@ -19,7 +19,7 @@ v0.7.1 (xxxx-xx-xx) - Fixed attachment of postponed frame images. - Source location data can be now copied to clipboard from zone info window. - Zones in find zones menu can be now grouped by zone name. -- Vulkan GPU contexts can be now calibrated. +- Vulkan and D3D12 GPU contexts can be now calibrated. v0.7 (2020-06-11) ----------------- From 0977952bde30bf0a0d31a93d8dacb9b952df64b8 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Thu, 9 Jul 2020 00:21:21 +0200 Subject: [PATCH 16/37] Update manual. --- manual/tracy.tex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/manual/tracy.tex b/manual/tracy.tex index ec04eaa1..6acc448a 100644 --- a/manual/tracy.tex +++ b/manual/tracy.tex @@ -1231,6 +1231,8 @@ The macro \texttt{TracyD3D12NewFrame(ctx)} is used to mark a new frame, and shou Note that due to artifacts from dynamic frequency scaling, GPU profiling may be slightly inaccurate. To counter this, \texttt{ID3D12Device::SetStablePowerState()} can be used to enable accurate profiling, at the expense of some performance. If the machine is not in developer mode, the device will be removed upon calling. Do not use this in shipping code. +Direct3D 12 contexts are always calibrated. + \subsubsection{OpenCL} OpenCL support is achieved by including the \texttt{tracy/TracyOpenCL.hpp} header file. Tracing OpenCL requires the creation of a Tracy OpenCL context using the macro \texttt{TracyCLContext(context, device)}, which will return an instance of \texttt{TracyCLCtx} object that must be used when creating zones. The specified \texttt{device} must be part of the \texttt{context}. Cleanup is performed using the \texttt{TracyCLDestroy(ctx)} macro. Although not common, it is possible to create multiple OpenCL contexts for the same application. From 1fd1d3bd6b6458faacc0f1a41040c0ca6e0e873d Mon Sep 17 00:00:00 2001 From: kubouch Date: Sat, 4 Jul 2020 21:47:49 +0300 Subject: [PATCH 17/37] Add CSV export of basic zone statistics --- csvexport/build/unix/Makefile | 12 + csvexport/build/unix/build.mk | 60 + csvexport/build/unix/debug.mk | 11 + csvexport/build/unix/release.mk | 7 + csvexport/build/win32/csvexport.vcxproj | 37 + csvexport/src/csvexport.cpp | 275 +++ csvexport/src/cxxopts.hpp | 2197 +++++++++++++++++++++++ 7 files changed, 2599 insertions(+) create mode 100644 csvexport/build/unix/Makefile create mode 100644 csvexport/build/unix/build.mk create mode 100644 csvexport/build/unix/debug.mk create mode 100644 csvexport/build/unix/release.mk create mode 100644 csvexport/build/win32/csvexport.vcxproj create mode 100644 csvexport/src/csvexport.cpp create mode 100644 csvexport/src/cxxopts.hpp diff --git a/csvexport/build/unix/Makefile b/csvexport/build/unix/Makefile new file mode 100644 index 00000000..3b50301c --- /dev/null +++ b/csvexport/build/unix/Makefile @@ -0,0 +1,12 @@ +all: debug + +debug: + @+make -f debug.mk all + +release: + @+make -f release.mk all + +clean: + @+make -f build.mk clean + +.PHONY: all clean debug release diff --git a/csvexport/build/unix/build.mk b/csvexport/build/unix/build.mk new file mode 100644 index 00000000..a7a67460 --- /dev/null +++ b/csvexport/build/unix/build.mk @@ -0,0 +1,60 @@ +CFLAGS += +CXXFLAGS := $(CFLAGS) -std=gnu++17 +# DEFINES += -DTRACY_NO_STATISTICS +INCLUDES := $(shell pkg-config --cflags capstone) +LIBS := $(shell pkg-config --libs capstone) -lpthread +PROJECT := csvexport +IMAGE := $(PROJECT)-$(BUILD) + +FILTER := + +BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g') +BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g') + +SRC := $(filter-out $(FILTER),$(BASE)) +SRC2 := $(filter-out $(FILTER),$(BASE2)) + +TBB := $(shell ld -ltbb -o /dev/null 2>/dev/null; echo $$?) +ifeq ($(TBB),0) + LIBS += -ltbb +endif + +OBJDIRBASE := obj/$(BUILD) +OBJDIR := $(OBJDIRBASE)/o/o/o + +OBJ := $(addprefix $(OBJDIR)/,$(SRC:%.cpp=%.o)) +OBJ2 := $(addprefix $(OBJDIR)/,$(SRC2:%.c=%.o)) + +all: $(IMAGE) + +$(OBJDIR)/%.o: %.cpp + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@ + +$(OBJDIR)/%.d : %.cpp + @echo Resolving dependencies of $< + @mkdir -p $(@D) + @$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \ + sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ + +$(OBJDIR)/%.o: %.c + $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ + +$(OBJDIR)/%.d : %.c + @echo Resolving dependencies of $< + @mkdir -p $(@D) + @$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \ + sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ + +$(IMAGE): $(OBJ) $(OBJ2) + $(CXX) $(CXXFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(LIBS) -o $@ + +ifneq "$(MAKECMDGOALS)" "clean" +-include $(addprefix $(OBJDIR)/,$(SRC:.cpp=.d)) $(addprefix $(OBJDIR)/,$(SRC2:.c=.d)) +endif + +clean: + rm -rf $(OBJDIRBASE) $(IMAGE)* + +.PHONY: clean all diff --git a/csvexport/build/unix/debug.mk b/csvexport/build/unix/debug.mk new file mode 100644 index 00000000..04d925a6 --- /dev/null +++ b/csvexport/build/unix/debug.mk @@ -0,0 +1,11 @@ +ARCH := $(shell uname -m) + +CFLAGS := -g3 -Wall +DEFINES := -DDEBUG +BUILD := debug + +ifeq ($(ARCH),x86_64) +CFLAGS += -msse4.1 +endif + +include build.mk diff --git a/csvexport/build/unix/release.mk b/csvexport/build/unix/release.mk new file mode 100644 index 00000000..b59abd5c --- /dev/null +++ b/csvexport/build/unix/release.mk @@ -0,0 +1,7 @@ +ARCH := $(shell uname -m) + +CFLAGS := -O3 -s -march=native +DEFINES := -DNDEBUG +BUILD := release + +include build.mk diff --git a/csvexport/build/win32/csvexport.vcxproj b/csvexport/build/win32/csvexport.vcxproj new file mode 100644 index 00000000..87c3cfe2 --- /dev/null +++ b/csvexport/build/win32/csvexport.vcxproj @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/csvexport/src/csvexport.cpp b/csvexport/src/csvexport.cpp new file mode 100644 index 00000000..8b07f485 --- /dev/null +++ b/csvexport/src/csvexport.cpp @@ -0,0 +1,275 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "../../server/TracyFileRead.hpp" +#include "../../server/TracyWorker.hpp" +#include "cxxopts.hpp" + +struct Args { + std::string filter; + std::string separator; + std::string trace_file; + bool case_sensitive; + bool self_time; + bool unwrap; +}; + +Args parse_args(int argc, char** argv) +{ + cxxopts::Options options( + "extract", + "Extract statistics from a trace to a CSV format" + ); + + std::string filter; + std::string separator; + std::string trace_file; + bool case_sensitive = false; + bool self_time = false; + bool unwrap = false; + + options.add_options() + ("h,help", "Print usage") + ("f,filter", "Filter zone names", + cxxopts::value(filter)->default_value("")) + ("s,separator", "CSV separator", + cxxopts::value(separator)->default_value(",")) + ("t,trace", "same as ", + cxxopts::value(trace_file)) + ("case", "Case sensitive filtering", + cxxopts::value(case_sensitive)) + ("self", "Get self times", + cxxopts::value(self_time)) + ("unwrap", "Report each zone event", + cxxopts::value(unwrap)) + ; + + options.positional_help(""); + options.parse_positional("trace"); + auto result = options.parse(argc, argv); + if (result.count("help")) + { + fprintf(stderr, "%s\n", options.help().data()); + exit(0); + } + + if (result.count("trace") == 0) + { + fprintf(stderr, "Requires a trace file"); + exit(1); + } + + return Args { + filter, separator, trace_file, case_sensitive, self_time, unwrap + }; +} + +bool is_substring( + const std::string term, + const std::string s, + bool case_sensitive = false +){ + std::string new_term = term; + std::string new_s = s; + + if (!case_sensitive) { + std::transform( + new_term.begin(), + new_term.end(), + new_term.begin(), + [](unsigned char c){ return std::tolower(c); } + ); + + std::transform( + new_s.begin(), + new_s.end(), + new_s.begin(), + [](unsigned char c){ return std::tolower(c); } + ); + } + + return new_s.find(new_term) != std::string::npos; +} + +const char* get_name(int32_t id, const tracy::Worker& worker) +{ + auto& srcloc = worker.GetSourceLocation(id); + return worker.GetString(srcloc.name.active ? srcloc.name : srcloc.function); +} + +template +std::string join(const T& v, std::string sep) { + std::ostringstream s; + for (const auto& i : v) { + if (&i != &v[0]) { + s << sep; + } + s << i; + } + return s.str(); +} + +// From TracyView.cpp +int64_t GetZoneChildTimeFast( + const tracy::Worker& worker, + const tracy::ZoneEvent& zone +){ + int64_t time = 0; + if( zone.HasChildren() ) + { + auto& children = worker.GetZoneChildren( zone.Child() ); + if( children.is_magic() ) + { + auto& vec = *(tracy::Vector*)&children; + for( auto& v : vec ) + { + assert( v.IsEndValid() ); + time += v.End() - v.Start(); + } + } + else + { + for( auto& v : children ) + { + assert( v->IsEndValid() ); + time += v->End() - v->Start(); + } + } + } + return time; +} + +int main(int argc, char** argv) +{ + Args args = parse_args(argc, argv); + + auto f = std::unique_ptr( + tracy::FileRead::Open(args.trace_file.data()) + ); + if (!f) + { + fprintf(stderr, "Could not open file %s\n", args.trace_file.data()); + return 1; + } + + auto worker = tracy::Worker(*f); + + while (!worker.AreSourceLocationZonesReady()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + auto& slz = worker.GetSourceLocationZones(); + tracy::Vector slz_selected; + slz_selected.reserve(slz.size()); + + uint32_t total_cnt = 0; + for(auto it = slz.begin(); it != slz.end(); ++it) + { + if(it->second.total != 0) + { + ++total_cnt; + if(args.filter.empty()) + { + slz_selected.push_back_no_space_check(it); + } + else + { + auto name = get_name(it->first, worker); + if(is_substring(args.filter, name, args.case_sensitive)) + { + slz_selected.push_back_no_space_check(it); + } + } + } + } + + std::vector columns; + if (args.unwrap) + { + columns = { + "name", "src_file", "src_line", "ns_since_start", "exec_time_ns" + }; + } + else + { + columns = { + "name", "src_file", "src_line", "total_ns", "total_perc", + "counts", "mean_ns", "min_ns", "max_ns", "std_ns" + }; + } + std::string header = join(columns, args.separator); + printf("%s\n", header.data()); + + const auto last_time = worker.GetLastTime(); + for(auto& it : slz_selected) + { + std::vector values(columns.size()); + + values[0] = get_name(it->first, worker); + + const auto& srcloc = worker.GetSourceLocation(it->first); + values[1] = worker.GetString(srcloc.file); + values[2] = std::to_string(srcloc.line); + + const auto& zone_data = it->second; + + if (args.unwrap) + { + int i = 0; + for (const auto& zone_thread_data : zone_data.zones) { + const auto zone_event = zone_thread_data.Zone(); + const auto start = zone_event->Start(); + const auto end = zone_event->End(); + + values[3] = std::to_string(start); + + auto timespan = end - start; + if (args.self_time) { + timespan -= GetZoneChildTimeFast(worker, *zone_event); + } + values[4] = std::to_string(timespan); + + std::string row = join(values, args.separator); + printf("%s\n", row.data()); + } + } + else + { + const auto time = args.self_time ? zone_data.selfTotal : zone_data.total; + values[3] = std::to_string(time); + values[4] = std::to_string(100. * time / last_time); + + values[5] = std::to_string(zone_data.zones.size()); + + const auto avg = (args.self_time ? zone_data.selfTotal : zone_data.total) + / zone_data.zones.size(); + values[6] = std::to_string(avg); + + const auto tmin = args.self_time ? zone_data.selfMin : zone_data.min; + const auto tmax = args.self_time ? zone_data.selfMax : zone_data.max; + values[7] = std::to_string(tmin); + values[8] = std::to_string(tmax); + + const auto sz = zone_data.zones.size(); + const auto ss = zone_data.sumSq + - 2. * zone_data.total * avg + + avg * avg * sz; + const auto std = sqrt(ss / (sz - 1)); + values[9] = std::to_string(std); + + std::string row = join(values, args.separator); + printf("%s\n", row.data()); + } + } + + return 0; +} diff --git a/csvexport/src/cxxopts.hpp b/csvexport/src/cxxopts.hpp new file mode 100644 index 00000000..97381a96 --- /dev/null +++ b/csvexport/src/cxxopts.hpp @@ -0,0 +1,2197 @@ +/* + +Copyright (c) 2014, 2015, 2016, 2017 Jarryd Beck + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifndef CXXOPTS_HPP_INCLUDED +#define CXXOPTS_HPP_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cpp_lib_optional +#include +#define CXXOPTS_HAS_OPTIONAL +#endif + +#ifndef CXXOPTS_VECTOR_DELIMITER +#define CXXOPTS_VECTOR_DELIMITER ',' +#endif + +#define CXXOPTS__VERSION_MAJOR 2 +#define CXXOPTS__VERSION_MINOR 2 +#define CXXOPTS__VERSION_PATCH 0 + +namespace cxxopts +{ + static constexpr struct { + uint8_t major, minor, patch; + } version = { + CXXOPTS__VERSION_MAJOR, + CXXOPTS__VERSION_MINOR, + CXXOPTS__VERSION_PATCH + }; +} // namespace cxxopts + +//when we ask cxxopts to use Unicode, help strings are processed using ICU, +//which results in the correct lengths being computed for strings when they +//are formatted for the help output +//it is necessary to make sure that can be found by the +//compiler, and that icu-uc is linked in to the binary. + +#ifdef CXXOPTS_USE_UNICODE +#include + +namespace cxxopts +{ + typedef icu::UnicodeString String; + + inline + String + toLocalString(std::string s) + { + return icu::UnicodeString::fromUTF8(std::move(s)); + } + + class UnicodeStringIterator : public + std::iterator + { + public: + + UnicodeStringIterator(const icu::UnicodeString* string, int32_t pos) + : s(string) + , i(pos) + { + } + + value_type + operator*() const + { + return s->char32At(i); + } + + bool + operator==(const UnicodeStringIterator& rhs) const + { + return s == rhs.s && i == rhs.i; + } + + bool + operator!=(const UnicodeStringIterator& rhs) const + { + return !(*this == rhs); + } + + UnicodeStringIterator& + operator++() + { + ++i; + return *this; + } + + UnicodeStringIterator + operator+(int32_t v) + { + return UnicodeStringIterator(s, i + v); + } + + private: + const icu::UnicodeString* s; + int32_t i; + }; + + inline + String& + stringAppend(String&s, String a) + { + return s.append(std::move(a)); + } + + inline + String& + stringAppend(String& s, int n, UChar32 c) + { + for (int i = 0; i != n; ++i) + { + s.append(c); + } + + return s; + } + + template + String& + stringAppend(String& s, Iterator begin, Iterator end) + { + while (begin != end) + { + s.append(*begin); + ++begin; + } + + return s; + } + + inline + size_t + stringLength(const String& s) + { + return s.length(); + } + + inline + std::string + toUTF8String(const String& s) + { + std::string result; + s.toUTF8String(result); + + return result; + } + + inline + bool + empty(const String& s) + { + return s.isEmpty(); + } +} + +namespace std +{ + inline + cxxopts::UnicodeStringIterator + begin(const icu::UnicodeString& s) + { + return cxxopts::UnicodeStringIterator(&s, 0); + } + + inline + cxxopts::UnicodeStringIterator + end(const icu::UnicodeString& s) + { + return cxxopts::UnicodeStringIterator(&s, s.length()); + } +} + +//ifdef CXXOPTS_USE_UNICODE +#else + +namespace cxxopts +{ + typedef std::string String; + + template + T + toLocalString(T&& t) + { + return std::forward(t); + } + + inline + size_t + stringLength(const String& s) + { + return s.length(); + } + + inline + String& + stringAppend(String&s, const String& a) + { + return s.append(a); + } + + inline + String& + stringAppend(String& s, size_t n, char c) + { + return s.append(n, c); + } + + template + String& + stringAppend(String& s, Iterator begin, Iterator end) + { + return s.append(begin, end); + } + + template + std::string + toUTF8String(T&& t) + { + return std::forward(t); + } + + inline + bool + empty(const std::string& s) + { + return s.empty(); + } +} // namespace cxxopts + +//ifdef CXXOPTS_USE_UNICODE +#endif + +namespace cxxopts +{ + namespace + { +#ifdef _WIN32 + const std::string LQUOTE("\'"); + const std::string RQUOTE("\'"); +#else + const std::string LQUOTE("‘"); + const std::string RQUOTE("’"); +#endif + } // namespace + + class Value : public std::enable_shared_from_this + { + public: + + virtual ~Value() = default; + + virtual + std::shared_ptr + clone() const = 0; + + virtual void + parse(const std::string& text) const = 0; + + virtual void + parse() const = 0; + + virtual bool + has_default() const = 0; + + virtual bool + is_container() const = 0; + + virtual bool + has_implicit() const = 0; + + virtual std::string + get_default_value() const = 0; + + virtual std::string + get_implicit_value() const = 0; + + virtual std::shared_ptr + default_value(const std::string& value) = 0; + + virtual std::shared_ptr + implicit_value(const std::string& value) = 0; + + virtual std::shared_ptr + no_implicit_value() = 0; + + virtual bool + is_boolean() const = 0; + }; + + class OptionException : public std::exception + { + public: + explicit OptionException(std::string message) + : m_message(std::move(message)) + { + } + + const char* + what() const noexcept override + { + return m_message.c_str(); + } + + private: + std::string m_message; + }; + + class OptionSpecException : public OptionException + { + public: + + explicit OptionSpecException(const std::string& message) + : OptionException(message) + { + } + }; + + class OptionParseException : public OptionException + { + public: + explicit OptionParseException(const std::string& message) + : OptionException(message) + { + } + }; + + class option_exists_error : public OptionSpecException + { + public: + explicit option_exists_error(const std::string& option) + : OptionSpecException("Option " + LQUOTE + option + RQUOTE + " already exists") + { + } + }; + + class invalid_option_format_error : public OptionSpecException + { + public: + explicit invalid_option_format_error(const std::string& format) + : OptionSpecException("Invalid option format " + LQUOTE + format + RQUOTE) + { + } + }; + + class option_syntax_exception : public OptionParseException { + public: + explicit option_syntax_exception(const std::string& text) + : OptionParseException("Argument " + LQUOTE + text + RQUOTE + + " starts with a - but has incorrect syntax") + { + } + }; + + class option_not_exists_exception : public OptionParseException + { + public: + explicit option_not_exists_exception(const std::string& option) + : OptionParseException("Option " + LQUOTE + option + RQUOTE + " does not exist") + { + } + }; + + class missing_argument_exception : public OptionParseException + { + public: + explicit missing_argument_exception(const std::string& option) + : OptionParseException( + "Option " + LQUOTE + option + RQUOTE + " is missing an argument" + ) + { + } + }; + + class option_requires_argument_exception : public OptionParseException + { + public: + explicit option_requires_argument_exception(const std::string& option) + : OptionParseException( + "Option " + LQUOTE + option + RQUOTE + " requires an argument" + ) + { + } + }; + + class option_not_has_argument_exception : public OptionParseException + { + public: + option_not_has_argument_exception + ( + const std::string& option, + const std::string& arg + ) + : OptionParseException( + "Option " + LQUOTE + option + RQUOTE + + " does not take an argument, but argument " + + LQUOTE + arg + RQUOTE + " given" + ) + { + } + }; + + class option_not_present_exception : public OptionParseException + { + public: + explicit option_not_present_exception(const std::string& option) + : OptionParseException("Option " + LQUOTE + option + RQUOTE + " not present") + { + } + }; + + class argument_incorrect_type : public OptionParseException + { + public: + explicit argument_incorrect_type + ( + const std::string& arg + ) + : OptionParseException( + "Argument " + LQUOTE + arg + RQUOTE + " failed to parse" + ) + { + } + }; + + class option_required_exception : public OptionParseException + { + public: + explicit option_required_exception(const std::string& option) + : OptionParseException( + "Option " + LQUOTE + option + RQUOTE + " is required but not present" + ) + { + } + }; + + template + void throw_or_mimic(const std::string& text) + { + static_assert(std::is_base_of::value, + "throw_or_mimic only works on std::exception and " + "deriving classes"); + +#ifndef CXXOPTS_NO_EXCEPTIONS + // If CXXOPTS_NO_EXCEPTIONS is not defined, just throw + throw T{text}; +#else + // Otherwise manually instantiate the exception, print what() to stderr, + // and exit + T exception{text}; + std::cerr << exception.what() << std::endl; + std::exit(EXIT_FAILURE); +#endif + } + + namespace values + { + namespace + { + std::basic_regex integer_pattern + ("(-)?(0x)?([0-9a-zA-Z]+)|((0x)?0)"); + std::basic_regex truthy_pattern + ("(t|T)(rue)?|1"); + std::basic_regex falsy_pattern + ("(f|F)(alse)?|0"); + } // namespace + + namespace detail + { + template + struct SignedCheck; + + template + struct SignedCheck + { + template + void + operator()(bool negative, U u, const std::string& text) + { + if (negative) + { + if (u > static_cast((std::numeric_limits::min)())) + { + throw_or_mimic(text); + } + } + else + { + if (u > static_cast((std::numeric_limits::max)())) + { + throw_or_mimic(text); + } + } + } + }; + + template + struct SignedCheck + { + template + void + operator()(bool, U, const std::string&) {} + }; + + template + void + check_signed_range(bool negative, U value, const std::string& text) + { + SignedCheck::is_signed>()(negative, value, text); + } + } // namespace detail + + template + R + checked_negate(T&& t, const std::string&, std::true_type) + { + // if we got to here, then `t` is a positive number that fits into + // `R`. So to avoid MSVC C4146, we first cast it to `R`. + // See https://github.com/jarro2783/cxxopts/issues/62 for more details. + return static_cast(-static_cast(t-1)-1); + } + + template + T + checked_negate(T&& t, const std::string& text, std::false_type) + { + throw_or_mimic(text); + return t; + } + + template + void + integer_parser(const std::string& text, T& value) + { + std::smatch match; + std::regex_match(text, match, integer_pattern); + + if (match.length() == 0) + { + throw_or_mimic(text); + } + + if (match.length(4) > 0) + { + value = 0; + return; + } + + using US = typename std::make_unsigned::type; + + constexpr bool is_signed = std::numeric_limits::is_signed; + const bool negative = match.length(1) > 0; + const uint8_t base = match.length(2) > 0 ? 16 : 10; + + auto value_match = match[3]; + + US result = 0; + + for (auto iter = value_match.first; iter != value_match.second; ++iter) + { + US digit = 0; + + if (*iter >= '0' && *iter <= '9') + { + digit = static_cast(*iter - '0'); + } + else if (base == 16 && *iter >= 'a' && *iter <= 'f') + { + digit = static_cast(*iter - 'a' + 10); + } + else if (base == 16 && *iter >= 'A' && *iter <= 'F') + { + digit = static_cast(*iter - 'A' + 10); + } + else + { + throw_or_mimic(text); + } + + const US next = static_cast(result * base + digit); + if (result > next) + { + throw_or_mimic(text); + } + + result = next; + } + + detail::check_signed_range(negative, result, text); + + if (negative) + { + value = checked_negate(result, + text, + std::integral_constant()); + } + else + { + value = static_cast(result); + } + } + + template + void stringstream_parser(const std::string& text, T& value) + { + std::stringstream in(text); + in >> value; + if (!in) { + throw_or_mimic(text); + } + } + + inline + void + parse_value(const std::string& text, uint8_t& value) + { + integer_parser(text, value); + } + + inline + void + parse_value(const std::string& text, int8_t& value) + { + integer_parser(text, value); + } + + inline + void + parse_value(const std::string& text, uint16_t& value) + { + integer_parser(text, value); + } + + inline + void + parse_value(const std::string& text, int16_t& value) + { + integer_parser(text, value); + } + + inline + void + parse_value(const std::string& text, uint32_t& value) + { + integer_parser(text, value); + } + + inline + void + parse_value(const std::string& text, int32_t& value) + { + integer_parser(text, value); + } + + inline + void + parse_value(const std::string& text, uint64_t& value) + { + integer_parser(text, value); + } + + inline + void + parse_value(const std::string& text, int64_t& value) + { + integer_parser(text, value); + } + + inline + void + parse_value(const std::string& text, bool& value) + { + std::smatch result; + std::regex_match(text, result, truthy_pattern); + + if (!result.empty()) + { + value = true; + return; + } + + std::regex_match(text, result, falsy_pattern); + if (!result.empty()) + { + value = false; + return; + } + + throw_or_mimic(text); + } + + inline + void + parse_value(const std::string& text, std::string& value) + { + value = text; + } + + // The fallback parser. It uses the stringstream parser to parse all types + // that have not been overloaded explicitly. It has to be placed in the + // source code before all other more specialized templates. + template + void + parse_value(const std::string& text, T& value) { + stringstream_parser(text, value); + } + + template + void + parse_value(const std::string& text, std::vector& value) + { + std::stringstream in(text); + std::string token; + while(!in.eof() && std::getline(in, token, CXXOPTS_VECTOR_DELIMITER)) { + T v; + parse_value(token, v); + value.emplace_back(std::move(v)); + } + } + +#ifdef CXXOPTS_HAS_OPTIONAL + template + void + parse_value(const std::string& text, std::optional& value) + { + T result; + parse_value(text, result); + value = std::move(result); + } +#endif + + inline + void parse_value(const std::string& text, char& c) + { + if (text.length() != 1) + { + throw_or_mimic(text); + } + + c = text[0]; + } + + template + struct type_is_container + { + static constexpr bool value = false; + }; + + template + struct type_is_container> + { + static constexpr bool value = true; + }; + + template + class abstract_value : public Value + { + using Self = abstract_value; + + public: + abstract_value() + : m_result(std::make_shared()) + , m_store(m_result.get()) + { + } + + explicit abstract_value(T* t) + : m_store(t) + { + } + + ~abstract_value() override = default; + + abstract_value(const abstract_value& rhs) + { + if (rhs.m_result) + { + m_result = std::make_shared(); + m_store = m_result.get(); + } + else + { + m_store = rhs.m_store; + } + + m_default = rhs.m_default; + m_implicit = rhs.m_implicit; + m_default_value = rhs.m_default_value; + m_implicit_value = rhs.m_implicit_value; + } + + void + parse(const std::string& text) const override + { + parse_value(text, *m_store); + } + + bool + is_container() const override + { + return type_is_container::value; + } + + void + parse() const override + { + parse_value(m_default_value, *m_store); + } + + bool + has_default() const override + { + return m_default; + } + + bool + has_implicit() const override + { + return m_implicit; + } + + std::shared_ptr + default_value(const std::string& value) override + { + m_default = true; + m_default_value = value; + return shared_from_this(); + } + + std::shared_ptr + implicit_value(const std::string& value) override + { + m_implicit = true; + m_implicit_value = value; + return shared_from_this(); + } + + std::shared_ptr + no_implicit_value() override + { + m_implicit = false; + return shared_from_this(); + } + + std::string + get_default_value() const override + { + return m_default_value; + } + + std::string + get_implicit_value() const override + { + return m_implicit_value; + } + + bool + is_boolean() const override + { + return std::is_same::value; + } + + const T& + get() const + { + if (m_store == nullptr) + { + return *m_result; + } + return *m_store; + } + + protected: + std::shared_ptr m_result; + T* m_store; + + bool m_default = false; + bool m_implicit = false; + + std::string m_default_value; + std::string m_implicit_value; + }; + + template + class standard_value : public abstract_value + { + public: + using abstract_value::abstract_value; + + std::shared_ptr + clone() const + { + return std::make_shared>(*this); + } + }; + + template <> + class standard_value : public abstract_value + { + public: + ~standard_value() override = default; + + standard_value() + { + set_default_and_implicit(); + } + + explicit standard_value(bool* b) + : abstract_value(b) + { + set_default_and_implicit(); + } + + std::shared_ptr + clone() const override + { + return std::make_shared>(*this); + } + + private: + + void + set_default_and_implicit() + { + m_default = true; + m_default_value = "false"; + m_implicit = true; + m_implicit_value = "true"; + } + }; + } // namespace values + + template + std::shared_ptr + value() + { + return std::make_shared>(); + } + + template + std::shared_ptr + value(T& t) + { + return std::make_shared>(&t); + } + + class OptionAdder; + + class OptionDetails + { + public: + OptionDetails + ( + std::string short_, + std::string long_, + String desc, + std::shared_ptr val + ) + : m_short(std::move(short_)) + , m_long(std::move(long_)) + , m_desc(std::move(desc)) + , m_value(std::move(val)) + , m_count(0) + { + } + + OptionDetails(const OptionDetails& rhs) + : m_desc(rhs.m_desc) + , m_count(rhs.m_count) + { + m_value = rhs.m_value->clone(); + } + + OptionDetails(OptionDetails&& rhs) = default; + + const String& + description() const + { + return m_desc; + } + + const Value& value() const { + return *m_value; + } + + std::shared_ptr + make_storage() const + { + return m_value->clone(); + } + + const std::string& + short_name() const + { + return m_short; + } + + const std::string& + long_name() const + { + return m_long; + } + + private: + std::string m_short; + std::string m_long; + String m_desc; + std::shared_ptr m_value; + int m_count; + }; + + struct HelpOptionDetails + { + std::string s; + std::string l; + String desc; + bool has_default; + std::string default_value; + bool has_implicit; + std::string implicit_value; + std::string arg_help; + bool is_container; + bool is_boolean; + }; + + struct HelpGroupDetails + { + std::string name; + std::string description; + std::vector options; + }; + + class OptionValue + { + public: + void + parse + ( + const std::shared_ptr& details, + const std::string& text + ) + { + ensure_value(details); + ++m_count; + m_value->parse(text); + } + + void + parse_default(const std::shared_ptr& details) + { + ensure_value(details); + m_default = true; + m_value->parse(); + } + + size_t + count() const noexcept + { + return m_count; + } + + // TODO: maybe default options should count towards the number of arguments + bool + has_default() const noexcept + { + return m_default; + } + + template + const T& + as() const + { + if (m_value == nullptr) { + throw_or_mimic("No value"); + } + +#ifdef CXXOPTS_NO_RTTI + return static_cast&>(*m_value).get(); +#else + return dynamic_cast&>(*m_value).get(); +#endif + } + + private: + void + ensure_value(const std::shared_ptr& details) + { + if (m_value == nullptr) + { + m_value = details->make_storage(); + } + } + + std::shared_ptr m_value; + size_t m_count = 0; + bool m_default = false; + }; + + class KeyValue + { + public: + KeyValue(std::string key_, std::string value_) + : m_key(std::move(key_)) + , m_value(std::move(value_)) + { + } + + const + std::string& + key() const + { + return m_key; + } + + const + std::string& + value() const + { + return m_value; + } + + template + T + as() const + { + T result; + values::parse_value(m_value, result); + return result; + } + + private: + std::string m_key; + std::string m_value; + }; + + class ParseResult + { + public: + + ParseResult( + std::shared_ptr< + std::unordered_map> + >, + std::vector, + bool allow_unrecognised, + int&, char**&); + + size_t + count(const std::string& o) const + { + auto iter = m_options->find(o); + if (iter == m_options->end()) + { + return 0; + } + + auto riter = m_results.find(iter->second); + + return riter->second.count(); + } + + const OptionValue& + operator[](const std::string& option) const + { + auto iter = m_options->find(option); + + if (iter == m_options->end()) + { + throw_or_mimic(option); + } + + auto riter = m_results.find(iter->second); + + return riter->second; + } + + const std::vector& + arguments() const + { + return m_sequential; + } + + private: + + void + parse(int& argc, char**& argv); + + void + add_to_option(const std::string& option, const std::string& arg); + + bool + consume_positional(const std::string& a); + + void + parse_option + ( + const std::shared_ptr& value, + const std::string& name, + const std::string& arg = "" + ); + + void + parse_default(const std::shared_ptr& details); + + void + checked_parse_arg + ( + int argc, + char* argv[], + int& current, + const std::shared_ptr& value, + const std::string& name + ); + + const std::shared_ptr< + std::unordered_map> + > m_options; + std::vector m_positional; + std::vector::iterator m_next_positional; + std::unordered_set m_positional_set; + std::unordered_map, OptionValue> m_results; + + bool m_allow_unrecognised; + + std::vector m_sequential; + }; + + struct Option + { + Option + ( + std::string opts, + std::string desc, + std::shared_ptr value = ::cxxopts::value(), + std::string arg_help = "" + ) + : opts_(std::move(opts)) + , desc_(std::move(desc)) + , value_(std::move(value)) + , arg_help_(std::move(arg_help)) + { + } + + std::string opts_; + std::string desc_; + std::shared_ptr value_; + std::string arg_help_; + }; + + class Options + { + using OptionMap = std::unordered_map>; + public: + + explicit Options(std::string program, std::string help_string = "") + : m_program(std::move(program)) + , m_help_string(toLocalString(std::move(help_string))) + , m_custom_help("[OPTION...]") + , m_positional_help("positional parameters") + , m_show_positional(false) + , m_allow_unrecognised(false) + , m_options(std::make_shared()) + , m_next_positional(m_positional.end()) + { + } + + Options& + positional_help(std::string help_text) + { + m_positional_help = std::move(help_text); + return *this; + } + + Options& + custom_help(std::string help_text) + { + m_custom_help = std::move(help_text); + return *this; + } + + Options& + show_positional_help() + { + m_show_positional = true; + return *this; + } + + Options& + allow_unrecognised_options() + { + m_allow_unrecognised = true; + return *this; + } + + ParseResult + parse(int& argc, char**& argv); + + OptionAdder + add_options(std::string group = ""); + + void + add_options + ( + const std::string& group, + std::initializer_list