mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-22 14:44:34 +00:00
Check if rpmalloc has to be initialized before each operation.
The C++11 spec states in [basic.stc.thread] thread storage duration: 2. A variable with thread storage duration shall be initialized before its first odr-use (3.2) and, if constructed, shall be destroyed on thread exit. Previously Tracy relied on the TLS data being initialized: - During thread creation (MSVC). - Or during first use in a thread, but the initialization was performed for the whole TLS block. It seems that new compilers are more granular with how they perform the initialization, hence rpmalloc init has to be checked before each allocation, as it cannot be "folded" into, for example, initialization of the profiler itself.
This commit is contained in:
parent
92fb197aac
commit
b0fc0d5dcc
1
TracyC.h
1
TracyC.h
@ -101,7 +101,6 @@ struct ___tracy_c_zone_context
|
||||
// This struct, as visible to user, is immutable, so treat it as if const was declared here.
|
||||
typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx;
|
||||
|
||||
TRACY_API void ___tracy_init_thread(void);
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz );
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz );
|
||||
|
||||
|
@ -389,7 +389,6 @@ private:
|
||||
|
||||
static inline D3D11Ctx* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx )
|
||||
{
|
||||
InitRPMallocThread();
|
||||
auto ctx = (D3D11Ctx*)tracy_malloc( sizeof( D3D11Ctx ) );
|
||||
new(ctx) D3D11Ctx( device, devicectx );
|
||||
return ctx;
|
||||
|
@ -451,8 +451,6 @@ namespace tracy
|
||||
|
||||
static inline D3D12QueueCtx* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||
{
|
||||
InitRPMallocThread();
|
||||
|
||||
auto* ctx = static_cast<D3D12QueueCtx*>(tracy_malloc(sizeof(D3D12QueueCtx)));
|
||||
new (ctx) D3D12QueueCtx{ device, queue };
|
||||
|
||||
|
@ -286,7 +286,6 @@ namespace tracy {
|
||||
|
||||
static inline OpenCLCtx* CreateCLContext(cl_context context, cl_device_id device)
|
||||
{
|
||||
InitRPMallocThread();
|
||||
auto ctx = (OpenCLCtx*)tracy_malloc(sizeof(OpenCLCtx));
|
||||
new (ctx) OpenCLCtx(context, device);
|
||||
return ctx;
|
||||
|
@ -53,7 +53,7 @@ public:
|
||||
# define glQueryCounter glQueryCounterEXT
|
||||
#endif
|
||||
|
||||
#define TracyGpuContext tracy::InitRPMallocThread(); tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx;
|
||||
#define TracyGpuContext tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx;
|
||||
#define TracyGpuContextName( name, size ) tracy::GetGpuCtx().ptr->Name( name, size );
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
|
@ -456,7 +456,6 @@ private:
|
||||
|
||||
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
|
||||
{
|
||||
InitRPMallocThread();
|
||||
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
|
||||
new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
|
||||
return ctx;
|
||||
|
@ -119,45 +119,6 @@ extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PR
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
# if ( defined _WIN32 || defined __CYGWIN__ ) && _WIN32_WINNT >= _WIN32_WINNT_VISTA
|
||||
BOOL CALLBACK InitOnceCallback( PINIT_ONCE /*initOnce*/, PVOID /*Parameter*/, PVOID* /*Context*/)
|
||||
{
|
||||
rpmalloc_initialize();
|
||||
return TRUE;
|
||||
}
|
||||
INIT_ONCE InitOnce = INIT_ONCE_STATIC_INIT;
|
||||
# elif defined __linux__
|
||||
void InitOnceCallback()
|
||||
{
|
||||
rpmalloc_initialize();
|
||||
}
|
||||
pthread_once_t once_control = PTHREAD_ONCE_INIT;
|
||||
# else
|
||||
void InitOnceCallback()
|
||||
{
|
||||
rpmalloc_initialize();
|
||||
}
|
||||
std::once_flag once_flag;
|
||||
# endif
|
||||
}
|
||||
|
||||
struct RPMallocInit
|
||||
{
|
||||
RPMallocInit()
|
||||
{
|
||||
# if ( defined _WIN32 || defined __CYGWIN__ ) && _WIN32_WINNT >= _WIN32_WINNT_VISTA
|
||||
InitOnceExecuteOnce( &InitOnce, InitOnceCallback, nullptr, nullptr );
|
||||
# elif defined __linux__
|
||||
pthread_once( &once_control, InitOnceCallback );
|
||||
# else
|
||||
std::call_once( once_flag, InitOnceCallback );
|
||||
# endif
|
||||
rpmalloc_thread_initialize();
|
||||
}
|
||||
};
|
||||
|
||||
#ifndef TRACY_DELAYED_INIT
|
||||
|
||||
struct InitTimeWrapper
|
||||
@ -965,12 +926,6 @@ TRACY_API int64_t GetFrequencyQpc()
|
||||
#ifdef TRACY_DELAYED_INIT
|
||||
struct ThreadNameData;
|
||||
TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue();
|
||||
TRACY_API void InitRPMallocThread();
|
||||
|
||||
void InitRPMallocThread()
|
||||
{
|
||||
RPMallocInit rpinit;
|
||||
}
|
||||
|
||||
struct ProfilerData
|
||||
{
|
||||
@ -992,7 +947,6 @@ struct ProducerWrapper
|
||||
struct ProfilerThreadData
|
||||
{
|
||||
ProfilerThreadData( ProfilerData& data ) : token( data ), gpuCtx( { nullptr } ) {}
|
||||
RPMallocInit rpmalloc_init;
|
||||
ProducerWrapper token;
|
||||
GpuCtxWrapper gpuCtx;
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
@ -1004,7 +958,6 @@ struct ProfilerThreadData
|
||||
ProfilerData* s_profilerData = nullptr;
|
||||
TRACY_API void StartupProfiler()
|
||||
{
|
||||
RPMallocInit init;
|
||||
s_profilerData = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) );
|
||||
new (s_profilerData) ProfilerData();
|
||||
s_profilerData->profiler.SpawnWorkerThreads();
|
||||
@ -1022,6 +975,8 @@ TRACY_API void ShutdownProfiler()
|
||||
rpmalloc_finalize();
|
||||
}
|
||||
# else
|
||||
std::atomic<int> RpInitDone { 0 };
|
||||
std::atomic<int> RpInitLock { 0 };
|
||||
static std::atomic<int> profilerDataLock { 0 };
|
||||
static std::atomic<ProfilerData*> profilerData { nullptr };
|
||||
|
||||
@ -1035,7 +990,6 @@ static ProfilerData& GetProfilerData()
|
||||
ptr = profilerData.load( std::memory_order_acquire );
|
||||
if( !ptr )
|
||||
{
|
||||
RPMallocInit init;
|
||||
ptr = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) );
|
||||
new (ptr) ProfilerData();
|
||||
profilerData.store( ptr, std::memory_order_release );
|
||||
@ -1072,7 +1026,6 @@ public:
|
||||
void* p = pthread_getspecific(m_key);
|
||||
if (!p)
|
||||
{
|
||||
RPMallocInit init;
|
||||
p = (ProfilerThreadData*)tracy_malloc( sizeof( ProfilerThreadData ) );
|
||||
new (p) ProfilerThreadData(GetProfilerData());
|
||||
pthread_setspecific(m_key, p);
|
||||
@ -1124,18 +1077,12 @@ namespace
|
||||
# endif
|
||||
|
||||
#else
|
||||
TRACY_API void InitRPMallocThread()
|
||||
{
|
||||
rpmalloc_thread_initialize();
|
||||
}
|
||||
|
||||
// MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this.
|
||||
|
||||
// 1a. But s_queue is needed for initialization of variables in point 2.
|
||||
extern moodycamel::ConcurrentQueue<QueueItem> s_queue;
|
||||
|
||||
thread_local RPMallocInit init_order(106) s_rpmalloc_thread_init;
|
||||
|
||||
// 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread.
|
||||
thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue );
|
||||
thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_producer( s_token_detail ) };
|
||||
@ -1148,7 +1095,8 @@ thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThr
|
||||
# endif
|
||||
|
||||
static InitTimeWrapper init_order(101) s_initTime { SetupHwTimer() };
|
||||
static RPMallocInit init_order(102) s_rpmalloc_init;
|
||||
std::atomic<int> init_order(102) RpInitDone( 0 );
|
||||
std::atomic<int> init_order(102) RpInitLock( 0 );
|
||||
moodycamel::ConcurrentQueue<QueueItem> init_order(103) s_queue( QueuePrealloc );
|
||||
std::atomic<uint32_t> init_order(104) s_lockCounter( 0 );
|
||||
std::atomic<uint8_t> init_order(104) s_gpuCtxCounter( 0 );
|
||||
@ -3614,19 +3562,6 @@ TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source
|
||||
return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
}
|
||||
|
||||
// thread_locals are not initialized on thread creation. At least on GNU/Linux. Instead they are
|
||||
// initialized on their first ODR-use. This means that the allocator is not automagically
|
||||
// initialized every time a thread is created. As thus, expose to the C API users a simple API to
|
||||
// call every time they create a thread. Here we can then put all sorts of per-thread
|
||||
// initialization.
|
||||
TRACY_API void ___tracy_init_thread(void) {
|
||||
#ifdef TRACY_DELAYED_INIT
|
||||
(void)tracy::GetProfilerThreadData();
|
||||
#else
|
||||
(void)tracy::s_rpmalloc_thread_init;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -63,7 +63,6 @@ TRACY_API std::atomic<uint32_t>& GetLockCounter();
|
||||
TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter();
|
||||
TRACY_API GpuCtxWrapper& GetGpuCtx();
|
||||
TRACY_API uint64_t GetThreadHandle();
|
||||
TRACY_API void InitRPMallocThread();
|
||||
TRACY_API bool ProfilerAvailable();
|
||||
TRACY_API int64_t GetFrequencyQpc();
|
||||
|
||||
@ -295,7 +294,6 @@ public:
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
{
|
||||
InitRPMallocThread();
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
@ -315,7 +313,6 @@ public:
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
{
|
||||
InitRPMallocThread();
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
@ -333,7 +330,6 @@ public:
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
{
|
||||
InitRPMallocThread();
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
@ -356,7 +352,6 @@ public:
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
{
|
||||
InitRPMallocThread();
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
@ -372,7 +367,6 @@ public:
|
||||
static tracy_force_inline void MessageAppInfo( const char* txt, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
InitRPMallocThread();
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
TracyLfqPrepare( QueueType::MessageAppInfo );
|
||||
@ -423,7 +417,6 @@ public:
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
InitRPMallocThread();
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
@ -445,7 +438,6 @@ public:
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
InitRPMallocThread();
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
@ -495,7 +487,6 @@ public:
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
InitRPMallocThread();
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
@ -518,7 +509,6 @@ public:
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
InitRPMallocThread();
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
|
@ -4,15 +4,47 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
# include <atomic>
|
||||
# include "TracyForceInline.hpp"
|
||||
# include "TracyYield.hpp"
|
||||
# include "../client/tracy_rpmalloc.hpp"
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
extern std::atomic<int> RpInitDone;
|
||||
extern std::atomic<int> RpInitLock;
|
||||
|
||||
namespace
|
||||
{
|
||||
static inline void InitRpmallocPlumbing()
|
||||
{
|
||||
int expected = 0;
|
||||
while( !RpInitLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); }
|
||||
const auto done = RpInitDone.load( std::memory_order_acquire );
|
||||
if( !done )
|
||||
{
|
||||
rpmalloc_initialize();
|
||||
RpInitDone.store( 1, std::memory_order_release );
|
||||
}
|
||||
RpInitLock.store( 0, std::memory_order_release );
|
||||
}
|
||||
|
||||
static tracy_force_inline void InitRpmalloc()
|
||||
{
|
||||
const auto done = RpInitDone.load( std::memory_order_acquire );
|
||||
if( !done ) InitRpmallocPlumbing();
|
||||
rpmalloc_thread_initialize();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void* tracy_malloc( size_t size )
|
||||
{
|
||||
#ifdef TRACY_ENABLE
|
||||
InitRpmalloc();
|
||||
return rpmalloc( size );
|
||||
#else
|
||||
return malloc( size );
|
||||
@ -22,6 +54,7 @@ static inline void* tracy_malloc( size_t size )
|
||||
static inline void tracy_free( void* ptr )
|
||||
{
|
||||
#ifdef TRACY_ENABLE
|
||||
InitRpmalloc();
|
||||
rpfree( ptr );
|
||||
#else
|
||||
free( ptr );
|
||||
@ -31,6 +64,7 @@ static inline void tracy_free( void* ptr )
|
||||
static inline void* tracy_realloc( void* ptr, size_t size )
|
||||
{
|
||||
#ifdef TRACY_ENABLE
|
||||
InitRpmalloc();
|
||||
return rprealloc( ptr, size );
|
||||
#else
|
||||
return realloc( ptr, size );
|
||||
|
@ -96,7 +96,6 @@ struct ThreadNameData
|
||||
ThreadNameData* next;
|
||||
};
|
||||
std::atomic<ThreadNameData*>& GetThreadNameData();
|
||||
TRACY_API void InitRPMallocThread();
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
@ -161,7 +160,6 @@ TRACY_API void SetThreadName( const char* name )
|
||||
#endif
|
||||
#ifdef TRACY_ENABLE
|
||||
{
|
||||
InitRPMallocThread();
|
||||
const auto sz = strlen( name );
|
||||
char* buf = (char*)tracy_malloc( sz+1 );
|
||||
memcpy( buf, name, sz );
|
||||
|
Loading…
Reference in New Issue
Block a user