From e606c75695aa4c27328f3b466f7ccd4c27003601 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Sat, 29 May 2021 12:01:03 +0200 Subject: [PATCH 01/11] Don't require GL headers if tracing is disabled. --- TracyOpenGL.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/TracyOpenGL.hpp b/TracyOpenGL.hpp index b30457e9..8a487a33 100644 --- a/TracyOpenGL.hpp +++ b/TracyOpenGL.hpp @@ -1,10 +1,6 @@ #ifndef __TRACYOPENGL_HPP__ #define __TRACYOPENGL_HPP__ -#if !defined GL_TIMESTAMP && !defined GL_TIMESTAMP_EXT -# error "You must include OpenGL 3.2 headers before including TracyOpenGL.hpp" -#endif - #if !defined TRACY_ENABLE || defined __APPLE__ #define TracyGpuContext @@ -35,6 +31,10 @@ public: #else +#if !defined GL_TIMESTAMP && !defined GL_TIMESTAMP_EXT +# error "You must include OpenGL 3.2 headers before including TracyOpenGL.hpp" +#endif + #include #include #include From 629b6d88bb91e00c25c1e2b50642d4bd2b4f7c11 Mon Sep 17 00:00:00 2001 From: Timo Suoranta Date: Thu, 15 Apr 2021 09:58:25 +0300 Subject: [PATCH 02/11] Provide CMake TracyClient header only library --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..2189e11d --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,4 @@ +cmake_minimum_required(VERSION 3.10) +project(TracyClient LANGUAGES CXX) +add_library(TracyClient INTERFACE) +target_include_directories(TracyClient INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) From f6eb909152cc1a236b3914a2909dbd8070691aaf Mon Sep 17 00:00:00 2001 From: Timo Suoranta Date: Sat, 29 May 2021 17:09:01 +0300 Subject: [PATCH 03/11] Documentation for CMake FetchContent support --- manual/tracy.tex | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/manual/tracy.tex b/manual/tracy.tex index 202f77c6..d2eaaf95 100644 --- a/manual/tracy.tex +++ b/manual/tracy.tex @@ -423,6 +423,33 @@ The application you want to profile should be compiled with all the usual optimi Finally, on Unix make sure that the application is linked with libraries \texttt{libpthread} and \texttt{libdl}. BSD systems will also need to be linked with \texttt{libexecinfo}. + +\begin{bclogo}[ +noborder=true, +couleur=black!5, +logo=\bclampe +]{CMake FetchContent} +When using CMake 3.11 or newer, you can use Tracy via CMake FetchContent. In this case, you do not need to manually add a git submodule for Tracy. Add this to your CMakeLists.txt: + +\begin{lstlisting} +FetchContent_Declare( + tracy + GIT_REPOSITORY https://github.com/wolfpld/tracy.git + GIT_TAG master + GIT_SHALLOW TRUE + GIT_PROGRESS TRUE +) + +FetchContent_MakeAvailable(tracy) +\end{lstlisting} + +Then add this to any target where you use tracy for profiling: + +\begin{lstlisting} +target_link_libraries(${_target} PUBLIC TracyClient) +\end{lstlisting} +\end{bclogo} + \subsubsection{Short-lived applications} In case you want to profile a short-lived program (for example, a compression utility that finishes its work in one second), set the \texttt{TRACY\_NO\_EXIT} environment variable to $1$. With this option enabled, Tracy will not exit until an incoming connection is made, even if the application has already finished executing. If your platform doesn't support easy setup of environment variables, you may also add the \texttt{TRACY\_NO\_EXIT} define to your build configuration, which has the same effect. From 3feb2473a2042337db65d3ff4c0dd17c8e69430f Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Sun, 30 May 2021 13:38:29 +0200 Subject: [PATCH 04/11] Fix rpmalloc on ios. https://github.com/mjansson/rpmalloc/issues/146 --- client/tracy_rpmalloc.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/client/tracy_rpmalloc.cpp b/client/tracy_rpmalloc.cpp index 8aae78e0..c2628d79 100644 --- a/client/tracy_rpmalloc.cpp +++ b/client/tracy_rpmalloc.cpp @@ -130,7 +130,9 @@ # include # include # if defined(__APPLE__) -# include +# if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR +# include +# endif # include # include # endif From c41473b4459cd3c29e4e8e1ffdd6fcac067d54eb Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Mon, 31 May 2021 02:12:16 +0200 Subject: [PATCH 05/11] Cosmetics. --- TracyD3D11.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TracyD3D11.hpp b/TracyD3D11.hpp index 712e1a17..da2c5004 100644 --- a/TracyD3D11.hpp +++ b/TracyD3D11.hpp @@ -271,7 +271,7 @@ public: MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); - + Profiler::QueueSerialFinish(); } @@ -376,7 +376,7 @@ public: MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() ); - + Profiler::QueueSerialFinish(); } From 94ec6a0d9d0880824fd54a7aeef8924e65abaee7 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Mon, 31 May 2021 02:19:35 +0200 Subject: [PATCH 06/11] Move TracyYield.hpp to common. --- {server => common}/TracyYield.hpp | 2 +- profiler/build/win32/Tracy.vcxproj | 2 +- profiler/build/win32/Tracy.vcxproj.filters | 6 +++--- server/TracyFileRead.hpp | 2 +- server/TracyWorker.cpp | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) rename {server => common}/TracyYield.hpp (89%) diff --git a/server/TracyYield.hpp b/common/TracyYield.hpp similarity index 89% rename from server/TracyYield.hpp rename to common/TracyYield.hpp index 4eb3851a..403ca29c 100644 --- a/server/TracyYield.hpp +++ b/common/TracyYield.hpp @@ -7,7 +7,7 @@ # include #endif -#include "../common/TracyForceInline.hpp" +#include "TracyForceInline.hpp" namespace tracy { diff --git a/profiler/build/win32/Tracy.vcxproj b/profiler/build/win32/Tracy.vcxproj index b6dac3d5..e059b5d0 100644 --- a/profiler/build/win32/Tracy.vcxproj +++ b/profiler/build/win32/Tracy.vcxproj @@ -185,6 +185,7 @@ + @@ -236,7 +237,6 @@ - diff --git a/profiler/build/win32/Tracy.vcxproj.filters b/profiler/build/win32/Tracy.vcxproj.filters index 2f97b54e..62d599e4 100644 --- a/profiler/build/win32/Tracy.vcxproj.filters +++ b/profiler/build/win32/Tracy.vcxproj.filters @@ -425,9 +425,6 @@ server - - server - server @@ -578,6 +575,9 @@ zstd\dictBuilder + + common + diff --git a/server/TracyFileRead.hpp b/server/TracyFileRead.hpp index 1579e205..609731b7 100644 --- a/server/TracyFileRead.hpp +++ b/server/TracyFileRead.hpp @@ -22,7 +22,7 @@ #include "TracyFileHeader.hpp" #include "TracyMmap.hpp" -#include "TracyYield.hpp" +#include "../common/TracyYield.hpp" #include "../common/tracy_lz4.hpp" #include "../common/TracyForceInline.hpp" #include "../zstd/zstd.h" diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index d4ce97cb..02161adb 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -26,13 +26,13 @@ #include "../common/TracyProtocol.hpp" #include "../common/TracySystem.hpp" +#include "../common/TracyYield.hpp" #include "TracyFileRead.hpp" #include "TracyFileWrite.hpp" #include "TracySort.hpp" #include "TracyTaskDispatch.hpp" #include "TracyVersion.hpp" #include "TracyWorker.hpp" -#include "TracyYield.hpp" namespace tracy { From 92fb197aac5174ddd4347aa89383da727c7d58d6 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Mon, 31 May 2021 02:22:13 +0200 Subject: [PATCH 07/11] Use weak compare, yield thread. --- client/TracyProfiler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/client/TracyProfiler.cpp b/client/TracyProfiler.cpp index bad57ef5..3de17c07 100644 --- a/client/TracyProfiler.cpp +++ b/client/TracyProfiler.cpp @@ -64,6 +64,7 @@ #include "../common/TracyAlign.hpp" #include "../common/TracySocket.hpp" #include "../common/TracySystem.hpp" +#include "../common/TracyYield.hpp" #include "../common/tracy_lz4.hpp" #include "tracy_rpmalloc.hpp" #include "TracyCallstack.hpp" @@ -1030,7 +1031,7 @@ static ProfilerData& GetProfilerData() if( !ptr ) { int expected = 0; - while( !profilerDataLock.compare_exchange_strong( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; } + while( !profilerDataLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); } ptr = profilerData.load( std::memory_order_acquire ); if( !ptr ) { From b0fc0d5dcc921e3647763c9e9f7c333dd922885f Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Mon, 31 May 2021 02:30:59 +0200 Subject: [PATCH 08/11] Check if rpmalloc has to be initialized before each operation. The C++11 spec states in [basic.stc.thread] thread storage duration: 2. A variable with thread storage duration shall be initialized before its first odr-use (3.2) and, if constructed, shall be destroyed on thread exit. Previously Tracy relied on the TLS data being initialized: - During thread creation (MSVC). - Or during first use in a thread, but the initialization was performed for the whole TLS block. It seems that new compilers are more granular with how they perform the initialization, hence rpmalloc init has to be checked before each allocation, as it cannot be "folded" into, for example, initialization of the profiler itself. --- TracyC.h | 1 - TracyD3D11.hpp | 1 - TracyD3D12.hpp | 2 -- TracyOpenCL.hpp | 1 - TracyOpenGL.hpp | 2 +- TracyVulkan.hpp | 1 - client/TracyProfiler.cpp | 73 +++------------------------------------- client/TracyProfiler.hpp | 10 ------ common/TracyAlloc.hpp | 34 +++++++++++++++++++ common/TracySystem.cpp | 2 -- 10 files changed, 39 insertions(+), 88 deletions(-) diff --git a/TracyC.h b/TracyC.h index 3e0c3f69..ea247399 100644 --- a/TracyC.h +++ b/TracyC.h @@ -101,7 +101,6 @@ struct ___tracy_c_zone_context // This struct, as visible to user, is immutable, so treat it as if const was declared here. typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx; -TRACY_API void ___tracy_init_thread(void); TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ); TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ); diff --git a/TracyD3D11.hpp b/TracyD3D11.hpp index da2c5004..8552f36f 100644 --- a/TracyD3D11.hpp +++ b/TracyD3D11.hpp @@ -389,7 +389,6 @@ private: static inline D3D11Ctx* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx ) { - InitRPMallocThread(); auto ctx = (D3D11Ctx*)tracy_malloc( sizeof( D3D11Ctx ) ); new(ctx) D3D11Ctx( device, devicectx ); return ctx; diff --git a/TracyD3D12.hpp b/TracyD3D12.hpp index 2de349f3..1c1d97dc 100644 --- a/TracyD3D12.hpp +++ b/TracyD3D12.hpp @@ -451,8 +451,6 @@ namespace tracy static inline D3D12QueueCtx* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue) { - InitRPMallocThread(); - auto* ctx = static_cast(tracy_malloc(sizeof(D3D12QueueCtx))); new (ctx) D3D12QueueCtx{ device, queue }; diff --git a/TracyOpenCL.hpp b/TracyOpenCL.hpp index 393ada9a..1fd3e741 100644 --- a/TracyOpenCL.hpp +++ b/TracyOpenCL.hpp @@ -286,7 +286,6 @@ namespace tracy { static inline OpenCLCtx* CreateCLContext(cl_context context, cl_device_id device) { - InitRPMallocThread(); auto ctx = (OpenCLCtx*)tracy_malloc(sizeof(OpenCLCtx)); new (ctx) OpenCLCtx(context, device); return ctx; diff --git a/TracyOpenGL.hpp b/TracyOpenGL.hpp index 8a487a33..2dba3aa8 100644 --- a/TracyOpenGL.hpp +++ b/TracyOpenGL.hpp @@ -53,7 +53,7 @@ public: # define glQueryCounter glQueryCounterEXT #endif -#define TracyGpuContext tracy::InitRPMallocThread(); tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx; +#define TracyGpuContext tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx; #define TracyGpuContextName( name, size ) tracy::GetGpuCtx().ptr->Name( name, size ); #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK # define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active ); diff --git a/TracyVulkan.hpp b/TracyVulkan.hpp index 6885fdba..7c1dabb3 100644 --- a/TracyVulkan.hpp +++ b/TracyVulkan.hpp @@ -456,7 +456,6 @@ private: static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) { - InitRPMallocThread(); auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct ); return ctx; diff --git a/client/TracyProfiler.cpp b/client/TracyProfiler.cpp index 3de17c07..128d42d6 100644 --- a/client/TracyProfiler.cpp +++ b/client/TracyProfiler.cpp @@ -119,45 +119,6 @@ extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PR namespace tracy { -namespace -{ -# if ( defined _WIN32 || defined __CYGWIN__ ) && _WIN32_WINNT >= _WIN32_WINNT_VISTA - BOOL CALLBACK InitOnceCallback( PINIT_ONCE /*initOnce*/, PVOID /*Parameter*/, PVOID* /*Context*/) - { - rpmalloc_initialize(); - return TRUE; - } - INIT_ONCE InitOnce = INIT_ONCE_STATIC_INIT; -# elif defined __linux__ - void InitOnceCallback() - { - rpmalloc_initialize(); - } - pthread_once_t once_control = PTHREAD_ONCE_INIT; -# else - void InitOnceCallback() - { - rpmalloc_initialize(); - } - std::once_flag once_flag; -# endif -} - -struct RPMallocInit -{ - RPMallocInit() - { -# if ( defined _WIN32 || defined __CYGWIN__ ) && _WIN32_WINNT >= _WIN32_WINNT_VISTA - InitOnceExecuteOnce( &InitOnce, InitOnceCallback, nullptr, nullptr ); -# elif defined __linux__ - pthread_once( &once_control, InitOnceCallback ); -# else - std::call_once( once_flag, InitOnceCallback ); -# endif - rpmalloc_thread_initialize(); - } -}; - #ifndef TRACY_DELAYED_INIT struct InitTimeWrapper @@ -965,12 +926,6 @@ TRACY_API int64_t GetFrequencyQpc() #ifdef TRACY_DELAYED_INIT struct ThreadNameData; TRACY_API moodycamel::ConcurrentQueue& GetQueue(); -TRACY_API void InitRPMallocThread(); - -void InitRPMallocThread() -{ - RPMallocInit rpinit; -} struct ProfilerData { @@ -992,7 +947,6 @@ struct ProducerWrapper struct ProfilerThreadData { ProfilerThreadData( ProfilerData& data ) : token( data ), gpuCtx( { nullptr } ) {} - RPMallocInit rpmalloc_init; ProducerWrapper token; GpuCtxWrapper gpuCtx; # ifdef TRACY_ON_DEMAND @@ -1004,7 +958,6 @@ struct ProfilerThreadData ProfilerData* s_profilerData = nullptr; TRACY_API void StartupProfiler() { - RPMallocInit init; s_profilerData = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); new (s_profilerData) ProfilerData(); s_profilerData->profiler.SpawnWorkerThreads(); @@ -1022,6 +975,8 @@ TRACY_API void ShutdownProfiler() rpmalloc_finalize(); } # else +std::atomic RpInitDone { 0 }; +std::atomic RpInitLock { 0 }; static std::atomic profilerDataLock { 0 }; static std::atomic profilerData { nullptr }; @@ -1035,7 +990,6 @@ static ProfilerData& GetProfilerData() ptr = profilerData.load( std::memory_order_acquire ); if( !ptr ) { - RPMallocInit init; ptr = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); new (ptr) ProfilerData(); profilerData.store( ptr, std::memory_order_release ); @@ -1072,7 +1026,6 @@ public: void* p = pthread_getspecific(m_key); if (!p) { - RPMallocInit init; p = (ProfilerThreadData*)tracy_malloc( sizeof( ProfilerThreadData ) ); new (p) ProfilerThreadData(GetProfilerData()); pthread_setspecific(m_key, p); @@ -1124,18 +1077,12 @@ namespace # endif #else -TRACY_API void InitRPMallocThread() -{ - rpmalloc_thread_initialize(); -} // MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this. // 1a. But s_queue is needed for initialization of variables in point 2. extern moodycamel::ConcurrentQueue s_queue; -thread_local RPMallocInit init_order(106) s_rpmalloc_thread_init; - // 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread. thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue ); thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_producer( s_token_detail ) }; @@ -1148,7 +1095,8 @@ thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThr # endif static InitTimeWrapper init_order(101) s_initTime { SetupHwTimer() }; -static RPMallocInit init_order(102) s_rpmalloc_init; +std::atomic init_order(102) RpInitDone( 0 ); +std::atomic init_order(102) RpInitLock( 0 ); moodycamel::ConcurrentQueue init_order(103) s_queue( QueuePrealloc ); std::atomic init_order(104) s_lockCounter( 0 ); std::atomic init_order(104) s_gpuCtxCounter( 0 ); @@ -3614,19 +3562,6 @@ TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); } -// thread_locals are not initialized on thread creation. At least on GNU/Linux. Instead they are -// initialized on their first ODR-use. This means that the allocator is not automagically -// initialized every time a thread is created. As thus, expose to the C API users a simple API to -// call every time they create a thread. Here we can then put all sorts of per-thread -// initialization. -TRACY_API void ___tracy_init_thread(void) { -#ifdef TRACY_DELAYED_INIT - (void)tracy::GetProfilerThreadData(); -#else - (void)tracy::s_rpmalloc_thread_init; -#endif -} - #ifdef __cplusplus } #endif diff --git a/client/TracyProfiler.hpp b/client/TracyProfiler.hpp index 230835ca..99c62fd7 100644 --- a/client/TracyProfiler.hpp +++ b/client/TracyProfiler.hpp @@ -63,7 +63,6 @@ TRACY_API std::atomic& GetLockCounter(); TRACY_API std::atomic& GetGpuCtxCounter(); TRACY_API GpuCtxWrapper& GetGpuCtx(); TRACY_API uint64_t GetThreadHandle(); -TRACY_API void InitRPMallocThread(); TRACY_API bool ProfilerAvailable(); TRACY_API int64_t GetFrequencyQpc(); @@ -295,7 +294,6 @@ public: #endif if( callstack != 0 ) { - InitRPMallocThread(); tracy::GetProfiler().SendCallstack( callstack ); } @@ -315,7 +313,6 @@ public: #endif if( callstack != 0 ) { - InitRPMallocThread(); tracy::GetProfiler().SendCallstack( callstack ); } @@ -333,7 +330,6 @@ public: #endif if( callstack != 0 ) { - InitRPMallocThread(); tracy::GetProfiler().SendCallstack( callstack ); } @@ -356,7 +352,6 @@ public: #endif if( callstack != 0 ) { - InitRPMallocThread(); tracy::GetProfiler().SendCallstack( callstack ); } @@ -372,7 +367,6 @@ public: static tracy_force_inline void MessageAppInfo( const char* txt, size_t size ) { assert( size < std::numeric_limits::max() ); - InitRPMallocThread(); auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, txt, size ); TracyLfqPrepare( QueueType::MessageAppInfo ); @@ -423,7 +417,6 @@ public: # endif const auto thread = GetThreadHandle(); - InitRPMallocThread(); auto callstack = Callstack( depth ); profiler.m_serialLock.lock(); @@ -445,7 +438,6 @@ public: # endif const auto thread = GetThreadHandle(); - InitRPMallocThread(); auto callstack = Callstack( depth ); profiler.m_serialLock.lock(); @@ -495,7 +487,6 @@ public: # endif const auto thread = GetThreadHandle(); - InitRPMallocThread(); auto callstack = Callstack( depth ); profiler.m_serialLock.lock(); @@ -518,7 +509,6 @@ public: # endif const auto thread = GetThreadHandle(); - InitRPMallocThread(); auto callstack = Callstack( depth ); profiler.m_serialLock.lock(); diff --git a/common/TracyAlloc.hpp b/common/TracyAlloc.hpp index a3cbec05..1981c09a 100644 --- a/common/TracyAlloc.hpp +++ b/common/TracyAlloc.hpp @@ -4,15 +4,47 @@ #include #ifdef TRACY_ENABLE +# include +# include "TracyForceInline.hpp" +# include "TracyYield.hpp" # include "../client/tracy_rpmalloc.hpp" #endif namespace tracy { +#ifdef TRACY_ENABLE +extern std::atomic RpInitDone; +extern std::atomic RpInitLock; + +namespace +{ +static inline void InitRpmallocPlumbing() +{ + int expected = 0; + while( !RpInitLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); } + const auto done = RpInitDone.load( std::memory_order_acquire ); + if( !done ) + { + rpmalloc_initialize(); + RpInitDone.store( 1, std::memory_order_release ); + } + RpInitLock.store( 0, std::memory_order_release ); +} + +static tracy_force_inline void InitRpmalloc() +{ + const auto done = RpInitDone.load( std::memory_order_acquire ); + if( !done ) InitRpmallocPlumbing(); + rpmalloc_thread_initialize(); +} +} +#endif + static inline void* tracy_malloc( size_t size ) { #ifdef TRACY_ENABLE + InitRpmalloc(); return rpmalloc( size ); #else return malloc( size ); @@ -22,6 +54,7 @@ static inline void* tracy_malloc( size_t size ) static inline void tracy_free( void* ptr ) { #ifdef TRACY_ENABLE + InitRpmalloc(); rpfree( ptr ); #else free( ptr ); @@ -31,6 +64,7 @@ static inline void tracy_free( void* ptr ) static inline void* tracy_realloc( void* ptr, size_t size ) { #ifdef TRACY_ENABLE + InitRpmalloc(); return rprealloc( ptr, size ); #else return realloc( ptr, size ); diff --git a/common/TracySystem.cpp b/common/TracySystem.cpp index 3cda186c..03078fca 100644 --- a/common/TracySystem.cpp +++ b/common/TracySystem.cpp @@ -96,7 +96,6 @@ struct ThreadNameData ThreadNameData* next; }; std::atomic& GetThreadNameData(); -TRACY_API void InitRPMallocThread(); #endif #ifdef _MSC_VER @@ -161,7 +160,6 @@ TRACY_API void SetThreadName( const char* name ) #endif #ifdef TRACY_ENABLE { - InitRPMallocThread(); const auto sz = strlen( name ); char* buf = (char*)tracy_malloc( sz+1 ); memcpy( buf, name, sz ); From 3da84d1579722e77aacedf95276f78874e14d41f Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Mon, 31 May 2021 22:38:22 +0200 Subject: [PATCH 09/11] Hide rpmalloc init behind thread local boolean. --- TracyClient.cpp | 1 + client/TracyAlloc.cpp | 35 +++++++++++++++++++++++++++++++++++ client/TracyProfiler.cpp | 2 ++ common/TracyAlloc.hpp | 27 ++++----------------------- 4 files changed, 42 insertions(+), 23 deletions(-) create mode 100644 client/TracyAlloc.cpp diff --git a/TracyClient.cpp b/TracyClient.cpp index 838c3bd8..67f99cd1 100644 --- a/TracyClient.cpp +++ b/TracyClient.cpp @@ -27,6 +27,7 @@ #include "common/TracySocket.cpp" #include "client/tracy_rpmalloc.cpp" #include "client/TracyDxt1.cpp" +#include "client/TracyAlloc.cpp" #if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 # include "libbacktrace/alloc.cpp" diff --git a/client/TracyAlloc.cpp b/client/TracyAlloc.cpp new file mode 100644 index 00000000..eb94dd8d --- /dev/null +++ b/client/TracyAlloc.cpp @@ -0,0 +1,35 @@ +#ifdef TRACY_ENABLE + +#include + +#include "../common/TracyAlloc.hpp" +#include "../common/TracyYield.hpp" + +namespace tracy +{ + +extern std::atomic RpInitDone; +extern std::atomic RpInitLock; + +TRACY_API void InitRpmallocPlumbing() +{ + const auto done = RpInitDone.load( std::memory_order_acquire ); + if( !done ) + { + int expected = 0; + while( !RpInitLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); } + const auto done = RpInitDone.load( std::memory_order_acquire ); + if( !done ) + { + rpmalloc_initialize(); + RpInitDone.store( 1, std::memory_order_release ); + } + RpInitLock.store( 0, std::memory_order_release ); + } + rpmalloc_thread_initialize(); + RpThreadInitDone = true; +} + +} + +#endif diff --git a/client/TracyProfiler.cpp b/client/TracyProfiler.cpp index 128d42d6..6a722cd1 100644 --- a/client/TracyProfiler.cpp +++ b/client/TracyProfiler.cpp @@ -977,6 +977,7 @@ TRACY_API void ShutdownProfiler() # else std::atomic RpInitDone { 0 }; std::atomic RpInitLock { 0 }; +thread_local bool RpThreadInitDone = false; static std::atomic profilerDataLock { 0 }; static std::atomic profilerData { nullptr }; @@ -1097,6 +1098,7 @@ thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThr static InitTimeWrapper init_order(101) s_initTime { SetupHwTimer() }; std::atomic init_order(102) RpInitDone( 0 ); std::atomic init_order(102) RpInitLock( 0 ); +thread_local bool RpThreadInitDone = false; moodycamel::ConcurrentQueue init_order(103) s_queue( QueuePrealloc ); std::atomic init_order(104) s_lockCounter( 0 ); std::atomic init_order(104) s_gpuCtxCounter( 0 ); diff --git a/common/TracyAlloc.hpp b/common/TracyAlloc.hpp index 1981c09a..27861c97 100644 --- a/common/TracyAlloc.hpp +++ b/common/TracyAlloc.hpp @@ -4,9 +4,8 @@ #include #ifdef TRACY_ENABLE -# include +# include "TracyApi.h" # include "TracyForceInline.hpp" -# include "TracyYield.hpp" # include "../client/tracy_rpmalloc.hpp" #endif @@ -14,30 +13,12 @@ namespace tracy { #ifdef TRACY_ENABLE -extern std::atomic RpInitDone; -extern std::atomic RpInitLock; - -namespace -{ -static inline void InitRpmallocPlumbing() -{ - int expected = 0; - while( !RpInitLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); } - const auto done = RpInitDone.load( std::memory_order_acquire ); - if( !done ) - { - rpmalloc_initialize(); - RpInitDone.store( 1, std::memory_order_release ); - } - RpInitLock.store( 0, std::memory_order_release ); -} +extern thread_local bool RpThreadInitDone; +TRACY_API void InitRpmallocPlumbing(); static tracy_force_inline void InitRpmalloc() { - const auto done = RpInitDone.load( std::memory_order_acquire ); - if( !done ) InitRpmallocPlumbing(); - rpmalloc_thread_initialize(); -} + if( !RpThreadInitDone ) InitRpmallocPlumbing(); } #endif From 1badc53e716aa1314f9571d2490a76d470a9eec9 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Mon, 31 May 2021 22:40:57 +0200 Subject: [PATCH 10/11] Update NEWS. --- NEWS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS b/NEWS index f9ac3f86..0b97bb43 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,9 @@ v0.x.x (xxxx-xx-xx) - Added TRACY_NO_CALLSTACK_INLINES macro to disable inline functions resolution in call stacks on Windows. - Limited client query response rate. +- Improved function matching algorithm in compare traces view. +- Added minimal CMake integration layer. +- Reworked rpmalloc initialization. v0.7.8 (2021-05-19) From f4d80a4f5ff303862d43172fa0b23e6c1f619d9c Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Mon, 31 May 2021 22:55:30 +0200 Subject: [PATCH 11/11] Fix rpmalloc init for TRACY_MANUAL_LIFETIME path. --- client/TracyProfiler.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/client/TracyProfiler.cpp b/client/TracyProfiler.cpp index 6a722cd1..e3da7563 100644 --- a/client/TracyProfiler.cpp +++ b/client/TracyProfiler.cpp @@ -954,6 +954,10 @@ struct ProfilerThreadData # endif }; +std::atomic RpInitDone { 0 }; +std::atomic RpInitLock { 0 }; +thread_local bool RpThreadInitDone = false; + # ifdef TRACY_MANUAL_LIFETIME ProfilerData* s_profilerData = nullptr; TRACY_API void StartupProfiler() @@ -975,9 +979,6 @@ TRACY_API void ShutdownProfiler() rpmalloc_finalize(); } # else -std::atomic RpInitDone { 0 }; -std::atomic RpInitLock { 0 }; -thread_local bool RpThreadInitDone = false; static std::atomic profilerDataLock { 0 }; static std::atomic profilerData { nullptr };