Reduce memory requirements of allocated call stacks.

This commit is contained in:
Bartosz Taudul 2020-07-05 17:33:29 +02:00
parent f718761905
commit 384e2e3fa1
4 changed files with 25 additions and 22 deletions

View File

@ -150,9 +150,9 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
const char* func[64]; const char* func[64];
uint32_t fsz[64]; uint32_t fsz[64];
uint32_t ssz[64]; uint32_t ssz[64];
uint32_t spaceNeeded = 4; // cnt uint16_t spaceNeeded = 2; // cnt
uint32_t cnt; uint8_t cnt;
for( cnt=0; cnt<depth; cnt++ ) for( cnt=0; cnt<depth; cnt++ )
{ {
if( lua_getstack( L, cnt+1, dbg+cnt ) == 0 ) break; if( lua_getstack( L, cnt+1, dbg+cnt ) == 0 ) break;
@ -162,22 +162,24 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
ssz[cnt] = uint32_t( strlen( dbg[cnt].source ) ); ssz[cnt] = uint32_t( strlen( dbg[cnt].source ) );
spaceNeeded += fsz[cnt] + ssz[cnt]; spaceNeeded += fsz[cnt] + ssz[cnt];
} }
spaceNeeded += cnt * ( 4 + 4 + 4 ); // source line, function string length, source string length spaceNeeded += cnt * ( 4 + 2 + 2 ); // source line, function string length, source string length
auto ptr = (char*)tracy_malloc( spaceNeeded + 4 ); auto ptr = (char*)tracy_malloc( spaceNeeded + 2 );
auto dst = ptr; auto dst = ptr;
memcpy( dst, &spaceNeeded, 4 ); dst += 4; memcpy( dst, &spaceNeeded, 2 ); dst += 2;
memcpy( dst, &cnt, 4 ); dst += 4; memcpy( dst, &cnt, 1 ); dst++;
for( uint32_t i=0; i<cnt; i++ ) for( uint8_t i=0; i<cnt; i++ )
{ {
const uint32_t line = dbg[i].currentline; const uint32_t line = dbg[i].currentline;
memcpy( dst, &line, 4 ); dst += 4; memcpy( dst, &line, 4 ); dst += 4;
memcpy( dst, fsz+i, 4 ); dst += 4; assert( fsz[i] <= std::numeric_limits<uint16_t>::max() );
memcpy( dst, fsz+i, 2 ); dst += 2;
memcpy( dst, func[i], fsz[i] ); dst += fsz[i]; memcpy( dst, func[i], fsz[i] ); dst += fsz[i];
memcpy( dst, ssz+i, 4 ); dst += 4; assert( ssz[i] <= std::numeric_limits<uint16_t>::max() );
memcpy( dst, ssz+i, 2 ); dst += 2;
memcpy( dst, dbg[i].source, ssz[i] ), dst += ssz[i]; memcpy( dst, dbg[i].source, ssz[i] ), dst += ssz[i];
} }
assert( dst - ptr == spaceNeeded + 4 ); assert( dst - ptr == spaceNeeded + 2 );
TracyLfqPrepare( QueueType::CallstackAlloc ); TracyLfqPrepare( QueueType::CallstackAlloc );
MemWrite( &item->callstackAlloc.ptr, (uint64_t)ptr ); MemWrite( &item->callstackAlloc.ptr, (uint64_t)ptr );

View File

@ -2307,15 +2307,15 @@ void Profiler::SendCallstackAlloc( uint64_t _ptr )
MemWrite( &item.hdr.type, QueueType::CallstackAllocPayload ); MemWrite( &item.hdr.type, QueueType::CallstackAllocPayload );
MemWrite( &item.stringTransfer.ptr, _ptr ); MemWrite( &item.stringTransfer.ptr, _ptr );
const auto len = *((uint32_t*)ptr); uint16_t len;
assert( len <= std::numeric_limits<uint16_t>::max() ); memcpy( &len, ptr, 2 );
const auto l16 = uint16_t( len ); ptr += 2;
NeedDataSize( QueueDataSize[(int)QueueType::CallstackAllocPayload] + sizeof( l16 ) + l16 ); NeedDataSize( QueueDataSize[(int)QueueType::CallstackAllocPayload] + sizeof( len ) + len );
AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackAllocPayload] ); AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackAllocPayload] );
AppendDataUnsafe( &l16, sizeof( l16 ) ); AppendDataUnsafe( &len, sizeof( len ) );
AppendDataUnsafe( ptr + 4, l16 ); AppendDataUnsafe( ptr, len );
} }
void Profiler::SendCallstackFrame( uint64_t ptr ) void Profiler::SendCallstackFrame( uint64_t ptr )

View File

@ -9,7 +9,7 @@ namespace tracy
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
enum : uint32_t { ProtocolVersion = 35 }; enum : uint32_t { ProtocolVersion = 36 };
enum : uint32_t { BroadcastVersion = 1 }; enum : uint32_t { BroadcastVersion = 1 };
using lz4sz_t = uint32_t; using lz4sz_t = uint32_t;

View File

@ -3487,16 +3487,17 @@ void Worker::AddCallstackPayload( uint64_t ptr, const char* _data, size_t _sz )
void Worker::AddCallstackAllocPayload( uint64_t ptr, const char* data, size_t _sz ) void Worker::AddCallstackAllocPayload( uint64_t ptr, const char* data, size_t _sz )
{ {
CallstackFrameId stack[64]; CallstackFrameId stack[64];
const auto sz = *(uint32_t*)data; data += 4; uint8_t sz;
memcpy( &sz, data, 1 ); data++;
assert( sz <= 64 ); assert( sz <= 64 );
for( uint32_t i=0; i<sz; i++ ) for( uint8_t i=0; i<sz; i++ )
{ {
uint32_t sz; uint16_t sz;
CallstackFrame cf; CallstackFrame cf;
memcpy( &cf.line, data, 4 ); data += 4; memcpy( &cf.line, data, 4 ); data += 4;
memcpy( &sz, data, 4 ); data += 4; memcpy( &sz, data, 2 ); data += 2;
cf.name = StoreString( data, sz ).idx; data += sz; cf.name = StoreString( data, sz ).idx; data += sz;
memcpy( &sz, data, 4 ); data += 4; memcpy( &sz, data, 2 ); data += 2;
cf.file = StoreString( data, sz ).idx; data += sz; cf.file = StoreString( data, sz ).idx; data += sz;
cf.symAddr = 0; cf.symAddr = 0;
CallstackFrameData cfd = { &cf, 1 }; CallstackFrameData cfd = { &cf, 1 };