mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-23 06:44:35 +00:00
Implement delayed initialization of the profiler.
Enabled on osx, ios.
This commit is contained in:
parent
d560f7a203
commit
ef5e30056e
@ -47,11 +47,15 @@
|
|||||||
#include "TracyThread.hpp"
|
#include "TracyThread.hpp"
|
||||||
#include "../TracyC.h"
|
#include "../TracyC.h"
|
||||||
|
|
||||||
|
#if __APPLE__
|
||||||
|
# define TRACY_DELAYED_INIT
|
||||||
|
#else
|
||||||
# ifdef __GNUC__
|
# ifdef __GNUC__
|
||||||
# define init_order( val ) __attribute__ ((init_priority(val)))
|
# define init_order( val ) __attribute__ ((init_priority(val)))
|
||||||
# else
|
# else
|
||||||
# define init_order(x)
|
# define init_order(x)
|
||||||
# endif
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined TRACY_HW_TIMER && __ARM_ARCH >= 6
|
#if defined TRACY_HW_TIMER && __ARM_ARCH >= 6
|
||||||
# include <signal.h>
|
# include <signal.h>
|
||||||
@ -79,6 +83,7 @@ extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW );
|
|||||||
namespace tracy
|
namespace tracy
|
||||||
{
|
{
|
||||||
|
|
||||||
|
#ifndef TRACY_DELAYED_INIT
|
||||||
# if defined TRACY_USE_INIT_ONCE
|
# if defined TRACY_USE_INIT_ONCE
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -110,8 +115,6 @@ struct RPMallocInit
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct RPMallocThreadInit
|
struct RPMallocThreadInit
|
||||||
{
|
{
|
||||||
RPMallocThreadInit()
|
RPMallocThreadInit()
|
||||||
@ -128,6 +131,13 @@ struct InitTimeWrapper
|
|||||||
int64_t val;
|
int64_t val;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ProducerWrapper
|
||||||
|
{
|
||||||
|
tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined TRACY_HW_TIMER && __ARM_ARCH >= 6
|
#if defined TRACY_HW_TIMER && __ARM_ARCH >= 6
|
||||||
int64_t (*GetTimeImpl)();
|
int64_t (*GetTimeImpl)();
|
||||||
|
|
||||||
@ -713,18 +723,99 @@ static void CrashHandler( int signal, siginfo_t* info, void* ucontext )
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct ProducerWrapper
|
|
||||||
{
|
|
||||||
tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
enum { QueuePrealloc = 256 * 1024 };
|
enum { QueuePrealloc = 256 * 1024 };
|
||||||
|
|
||||||
// MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this.
|
static Profiler* s_instance;
|
||||||
|
static Thread* s_thread;
|
||||||
|
|
||||||
static Profiler* s_instance = nullptr;
|
#ifdef TRACY_DELAYED_INIT
|
||||||
static Thread* s_thread = nullptr;
|
struct ThreadNameData;
|
||||||
|
moodycamel::ConcurrentQueue<QueueItem>& GetQueue();
|
||||||
|
|
||||||
|
struct RPMallocInit { RPMallocInit() { rpmalloc_initialize(); } };
|
||||||
|
struct RPMallocThreadInit { RPMallocThreadInit() { rpmalloc_thread_initialize(); } };
|
||||||
|
|
||||||
|
void InitRPMallocThread()
|
||||||
|
{
|
||||||
|
rpmalloc_initialize();
|
||||||
|
rpmalloc_thread_initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ProfilerData
|
||||||
|
{
|
||||||
|
int64_t initTime = SetupHwTimer();
|
||||||
|
RPMallocInit rpmalloc_init;
|
||||||
|
moodycamel::ConcurrentQueue<QueueItem> queue;
|
||||||
|
Profiler profiler;
|
||||||
|
std::atomic<uint32_t> lockCounter = 0;
|
||||||
|
std::atomic<uint8_t> gpuCtxCounter = 0;
|
||||||
|
# ifdef TRACY_COLLECT_THREAD_NAMES
|
||||||
|
std::atomic<ThreadNameData*> threadNameData = nullptr;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ProducerWrapper
|
||||||
|
{
|
||||||
|
ProducerWrapper( ProfilerData& data ) : detail( data.queue ), ptr( data.queue.get_explicit_producer( detail ) ) {}
|
||||||
|
moodycamel::ProducerToken detail;
|
||||||
|
tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ProfilerThreadData
|
||||||
|
{
|
||||||
|
ProfilerThreadData( ProfilerData& data ) : token( data ), gpuCtx( { nullptr } ) {}
|
||||||
|
RPMallocInit rpmalloc_init;
|
||||||
|
RPMallocThreadInit rpmalloc_thread_init;
|
||||||
|
ProducerWrapper token;
|
||||||
|
GpuCtxWrapper gpuCtx;
|
||||||
|
# ifdef TRACY_ON_DEMAND
|
||||||
|
LuaZoneState luaZoneState;
|
||||||
|
# endif
|
||||||
|
};
|
||||||
|
|
||||||
|
static ProfilerData* profilerData;
|
||||||
|
|
||||||
|
static ProfilerData& GetProfilerData()
|
||||||
|
{
|
||||||
|
// Cannot use magic statics here.
|
||||||
|
if( !profilerData )
|
||||||
|
{
|
||||||
|
profilerData = (ProfilerData*)malloc( sizeof( ProfilerData ) );
|
||||||
|
new (profilerData) ProfilerData();
|
||||||
|
}
|
||||||
|
return *profilerData;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ProfilerThreadData& GetProfilerThreadData()
|
||||||
|
{
|
||||||
|
thread_local ProfilerThreadData data( GetProfilerData() );
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken() { return GetProfilerThreadData().token.ptr; }
|
||||||
|
Profiler& GetProfiler() { return GetProfilerData().profiler; }
|
||||||
|
moodycamel::ConcurrentQueue<QueueItem>& GetQueue() { return GetProfilerData().queue; }
|
||||||
|
int64_t GetInitTime() { return GetProfilerData().initTime; }
|
||||||
|
std::atomic<uint32_t>& GetLockCounter() { return GetProfilerData().lockCounter; }
|
||||||
|
std::atomic<uint8_t>& GetGpuCtxCounter() { return GetProfilerData().gpuCtxCounter; }
|
||||||
|
GpuCtxWrapper& GetGpuCtx() { return GetProfilerThreadData().gpuCtx; }
|
||||||
|
|
||||||
|
# ifdef TRACY_COLLECT_THREAD_NAMES
|
||||||
|
std::atomic<ThreadNameData*>& GetThreadNameData() { return GetProfilerData().threadNameData; }
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# ifdef TRACY_ON_DEMAND
|
||||||
|
LuaZoneState& GetLuaZoneState() { return GetProfilerThreadData().luaZoneState; }
|
||||||
|
# endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
void InitRPMallocThread()
|
||||||
|
{
|
||||||
|
rpmalloc_thread_initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
// MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this.
|
||||||
|
|
||||||
// 1a. But s_queue is needed for initialization of variables in point 2.
|
// 1a. But s_queue is needed for initialization of variables in point 2.
|
||||||
extern moodycamel::ConcurrentQueue<QueueItem> s_queue;
|
extern moodycamel::ConcurrentQueue<QueueItem> s_queue;
|
||||||
@ -761,17 +852,10 @@ thread_local LuaZoneState init_order(104) s_luaZoneState { 0, false };
|
|||||||
|
|
||||||
static Profiler init_order(105) s_profiler;
|
static Profiler init_order(105) s_profiler;
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
# define DLL_EXPORT __declspec(dllexport)
|
|
||||||
#else
|
|
||||||
# define DLL_EXPORT __attribute__((visibility("default")))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken() { return s_token.ptr; }
|
moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken() { return s_token.ptr; }
|
||||||
Profiler& GetProfiler() { return s_profiler; }
|
Profiler& GetProfiler() { return s_profiler; }
|
||||||
moodycamel::ConcurrentQueue<QueueItem>& GetQueue() { return s_queue; }
|
moodycamel::ConcurrentQueue<QueueItem>& GetQueue() { return s_queue; }
|
||||||
InitTimeWrapper& GetInitTime() { return s_initTime; }
|
int64_t GetInitTime() { return s_initTime.val; }
|
||||||
std::atomic<uint32_t>& GetLockCounter() { return s_lockCounter; }
|
std::atomic<uint32_t>& GetLockCounter() { return s_lockCounter; }
|
||||||
std::atomic<uint8_t>& GetGpuCtxCounter() { return s_gpuCtxCounter; }
|
std::atomic<uint8_t>& GetGpuCtxCounter() { return s_gpuCtxCounter; }
|
||||||
GpuCtxWrapper& GetGpuCtx() { return s_gpuCtx; }
|
GpuCtxWrapper& GetGpuCtx() { return s_gpuCtx; }
|
||||||
@ -783,8 +867,15 @@ std::atomic<ThreadNameData*>& GetThreadNameData() { return s_threadNameData; }
|
|||||||
# ifdef TRACY_ON_DEMAND
|
# ifdef TRACY_ON_DEMAND
|
||||||
LuaZoneState& GetLuaZoneState() { return s_luaZoneState; }
|
LuaZoneState& GetLuaZoneState() { return s_luaZoneState; }
|
||||||
# endif
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// DLL exports to enable TracyClientDLL.cpp to retrieve the instances of Tracy objects and functions
|
// DLL exports to enable TracyClientDLL.cpp to retrieve the instances of Tracy objects and functions
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define DLL_EXPORT __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
# define DLL_EXPORT __attribute__((visibility("default")))
|
||||||
|
#endif
|
||||||
|
|
||||||
DLL_EXPORT void*(*get_rpmalloc())(size_t size) { return rpmalloc; }
|
DLL_EXPORT void*(*get_rpmalloc())(size_t size) { return rpmalloc; }
|
||||||
DLL_EXPORT void(*get_rpfree())(void* ptr) { return rpfree; }
|
DLL_EXPORT void(*get_rpfree())(void* ptr) { return rpfree; }
|
||||||
DLL_EXPORT moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer*(*get_token())() { return GetToken; }
|
DLL_EXPORT moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer*(*get_token())() { return GetToken; }
|
||||||
@ -835,10 +926,12 @@ Profiler::Profiler()
|
|||||||
assert( !s_instance );
|
assert( !s_instance );
|
||||||
s_instance = this;
|
s_instance = this;
|
||||||
|
|
||||||
|
#ifndef TRACY_DELAYED_INIT
|
||||||
# ifdef _MSC_VER
|
# ifdef _MSC_VER
|
||||||
// 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here.
|
// 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here.
|
||||||
s_token_detail = moodycamel::ProducerToken( s_queue );
|
s_token_detail = moodycamel::ProducerToken( s_queue );
|
||||||
s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) };
|
s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) };
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
CalibrateTimer();
|
CalibrateTimer();
|
||||||
@ -939,7 +1032,7 @@ void Profiler::Worker()
|
|||||||
|
|
||||||
WelcomeMessage welcome;
|
WelcomeMessage welcome;
|
||||||
MemWrite( &welcome.timerMul, m_timerMul );
|
MemWrite( &welcome.timerMul, m_timerMul );
|
||||||
MemWrite( &welcome.initBegin, GetInitTime().val );
|
MemWrite( &welcome.initBegin, GetInitTime() );
|
||||||
MemWrite( &welcome.initEnd, m_timeBegin.load( std::memory_order_relaxed ) );
|
MemWrite( &welcome.initEnd, m_timeBegin.load( std::memory_order_relaxed ) );
|
||||||
MemWrite( &welcome.delay, m_delay );
|
MemWrite( &welcome.delay, m_delay );
|
||||||
MemWrite( &welcome.resolution, m_resolution );
|
MemWrite( &welcome.resolution, m_resolution );
|
||||||
|
@ -55,6 +55,8 @@ std::atomic<uint32_t>& GetLockCounter();
|
|||||||
std::atomic<uint8_t>& GetGpuCtxCounter();
|
std::atomic<uint8_t>& GetGpuCtxCounter();
|
||||||
GpuCtxWrapper& GetGpuCtx();
|
GpuCtxWrapper& GetGpuCtx();
|
||||||
|
|
||||||
|
void InitRPMallocThread();
|
||||||
|
|
||||||
struct SourceLocationData
|
struct SourceLocationData
|
||||||
{
|
{
|
||||||
const char* name;
|
const char* name;
|
||||||
@ -273,19 +275,20 @@ public:
|
|||||||
|
|
||||||
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth )
|
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth )
|
||||||
{
|
{
|
||||||
|
auto& profiler = GetProfiler();
|
||||||
#ifdef TRACY_HAS_CALLSTACK
|
#ifdef TRACY_HAS_CALLSTACK
|
||||||
# ifdef TRACY_ON_DEMAND
|
# ifdef TRACY_ON_DEMAND
|
||||||
if( !GetProfiler().IsConnected() ) return;
|
if( !profiler.IsConnected() ) return;
|
||||||
# endif
|
# endif
|
||||||
const auto thread = GetThreadHandle();
|
const auto thread = GetThreadHandle();
|
||||||
|
|
||||||
rpmalloc_thread_initialize();
|
rpmalloc_thread_initialize();
|
||||||
auto callstack = Callstack( depth );
|
auto callstack = Callstack( depth );
|
||||||
|
|
||||||
GetProfiler().m_serialLock.lock();
|
profiler.m_serialLock.lock();
|
||||||
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
|
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
|
||||||
SendCallstackMemory( callstack );
|
SendCallstackMemory( callstack );
|
||||||
GetProfiler().m_serialLock.unlock();
|
profiler.m_serialLock.unlock();
|
||||||
#else
|
#else
|
||||||
MemAlloc( ptr, size );
|
MemAlloc( ptr, size );
|
||||||
#endif
|
#endif
|
||||||
@ -293,19 +296,20 @@ public:
|
|||||||
|
|
||||||
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth )
|
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth )
|
||||||
{
|
{
|
||||||
|
auto& profiler = GetProfiler();
|
||||||
#ifdef TRACY_HAS_CALLSTACK
|
#ifdef TRACY_HAS_CALLSTACK
|
||||||
# ifdef TRACY_ON_DEMAND
|
# ifdef TRACY_ON_DEMAND
|
||||||
if( !GetProfiler().IsConnected() ) return;
|
if( !profiler.IsConnected() ) return;
|
||||||
# endif
|
# endif
|
||||||
const auto thread = GetThreadHandle();
|
const auto thread = GetThreadHandle();
|
||||||
|
|
||||||
rpmalloc_thread_initialize();
|
rpmalloc_thread_initialize();
|
||||||
auto callstack = Callstack( depth );
|
auto callstack = Callstack( depth );
|
||||||
|
|
||||||
GetProfiler().m_serialLock.lock();
|
profiler.m_serialLock.lock();
|
||||||
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
|
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
|
||||||
SendCallstackMemory( callstack );
|
SendCallstackMemory( callstack );
|
||||||
GetProfiler().m_serialLock.unlock();
|
profiler.m_serialLock.unlock();
|
||||||
#else
|
#else
|
||||||
MemFree( ptr );
|
MemFree( ptr );
|
||||||
#endif
|
#endif
|
||||||
|
@ -45,6 +45,7 @@ struct ThreadNameData
|
|||||||
ThreadNameData* next;
|
ThreadNameData* next;
|
||||||
};
|
};
|
||||||
std::atomic<ThreadNameData*>& GetThreadNameData();
|
std::atomic<ThreadNameData*>& GetThreadNameData();
|
||||||
|
void InitRPMallocThread();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void SetThreadName( std::thread& thread, const char* name )
|
void SetThreadName( std::thread& thread, const char* name )
|
||||||
@ -104,7 +105,7 @@ void SetThreadName( std::thread::native_handle_type handle, const char* name )
|
|||||||
#endif
|
#endif
|
||||||
#ifdef TRACY_COLLECT_THREAD_NAMES
|
#ifdef TRACY_COLLECT_THREAD_NAMES
|
||||||
{
|
{
|
||||||
rpmalloc_thread_initialize();
|
InitRPMallocThread();
|
||||||
const auto sz = strlen( name );
|
const auto sz = strlen( name );
|
||||||
char* buf = (char*)tracy_malloc( sz+1 );
|
char* buf = (char*)tracy_malloc( sz+1 );
|
||||||
memcpy( buf, name, sz );
|
memcpy( buf, name, sz );
|
||||||
|
Loading…
Reference in New Issue
Block a user