More compact GpuEvent struct (save 4 bytes).

Memory usage reduction of various traces:

big         9011 -> 9007
frameimages 561  -> 552
fi-big      4144 -> 4139
long        5253 -> 5125
This commit is contained in:
Bartosz Taudul 2019-10-13 14:36:59 +02:00
parent c044df6324
commit 215dc8a804
3 changed files with 95 additions and 63 deletions

View File

@ -188,13 +188,20 @@ static_assert( std::numeric_limits<decltype(LockEventPtr::lockCount)>::max() >=
struct GpuEvent
{
int64_t cpuStart;
int64_t cpuEnd;
int64_t CpuStart() const { return int64_t( _cpuStart_srcloc ) >> 16; }
void SetCpuStart( int64_t cpuStart ) { assert( cpuStart < (int64_t)( 1ull << 47 ) ); _cpuStart_srcloc = ( _cpuStart_srcloc & 0xFFFF ) | ( uint64_t( cpuStart ) << 16 ); }
int64_t CpuEnd() const { return int64_t( _cpuEnd_thread ) >> 16; }
void SetCpuEnd( int64_t cpuEnd ) { assert( cpuEnd < (int64_t)( 1ull << 47 ) ); _cpuEnd_thread = ( _cpuEnd_thread & 0xFFFF ) | ( uint64_t( cpuEnd ) << 16 ); }
int16_t SrcLoc() const { return int16_t( _cpuStart_srcloc & 0xFFFF ); }
void SetSrcLoc( int16_t srcloc ) { _cpuStart_srcloc = ( _cpuStart_srcloc & 0xFFFFFFFFFFFF0000 ) | uint16_t( srcloc ); }
uint16_t Thread() const { return uint16_t( _cpuEnd_thread & 0xFFFF ); }
void SetThread( uint16_t thread ) { _cpuEnd_thread = ( _cpuEnd_thread & 0xFFFFFFFFFFFF0000 ) | thread; }
uint64_t _cpuStart_srcloc;
uint64_t _cpuEnd_thread;
int64_t gpuStart;
int64_t gpuEnd;
int16_t srcloc;
Int24 callstack;
uint16_t thread;
int32_t child;
};

View File

@ -2146,7 +2146,7 @@ void View::DrawZones()
{
if( !it->second.timeline.empty() )
{
tid = m_worker.DecompressThread( (*it->second.timeline.begin())->thread );
tid = m_worker.DecompressThread( (*it->second.timeline.begin())->Thread() );
}
}
TextFocused( "Thread:", m_worker.GetThreadName( tid ) );
@ -2552,8 +2552,8 @@ void View::DrawZones()
}
if( m_gpuInfoWindow )
{
const auto px0 = ( m_gpuInfoWindow->cpuStart - m_vd.zvStart ) * pxns;
const auto px1 = std::max( px0 + std::max( 1.0, pxns * 0.5 ), ( m_gpuInfoWindow->cpuEnd - m_vd.zvStart ) * pxns );
const auto px0 = ( m_gpuInfoWindow->CpuStart() - m_vd.zvStart ) * pxns;
const auto px1 = std::max( px0 + std::max( 1.0, pxns * 0.5 ), ( m_gpuInfoWindow->CpuEnd() - m_vd.zvStart ) * pxns );
draw->AddRectFilled( ImVec2( wpos.x + px0, linepos.y ), ImVec2( wpos.x + px1, linepos.y + lineh ), 0x2288DD88 );
draw->AddRect( ImVec2( wpos.x + px0, linepos.y ), ImVec2( wpos.x + px1, linepos.y + lineh ), 0x4488DD88 );
}
@ -3276,8 +3276,8 @@ int View::DrawGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
}
m_gpuThread = thread;
m_gpuStart = ev.cpuStart;
m_gpuEnd = ev.cpuEnd;
m_gpuStart = ev.CpuStart();
m_gpuEnd = ev.CpuEnd();
}
}
char tmp[64];
@ -3345,8 +3345,8 @@ int View::DrawGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
}
m_gpuThread = thread;
m_gpuStart = ev.cpuStart;
m_gpuEnd = ev.cpuEnd;
m_gpuStart = ev.CpuStart();
m_gpuEnd = ev.CpuEnd();
}
++it;
@ -6216,7 +6216,7 @@ void View::DrawZoneInfoWindow()
void View::DrawGpuInfoWindow()
{
auto& ev = *m_gpuInfoWindow;
const auto& srcloc = m_worker.GetSourceLocation( ev.srcloc );
const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() );
ImGui::SetNextWindowSize( ImVec2( 500, 400 ), ImGuiCond_FirstUseEver );
bool show = true;
@ -6331,19 +6331,19 @@ void View::DrawGpuInfoWindow()
ImGui::SameLine();
ImGui::TextDisabled( "(%.2f%%)", 100.f * selftime / ztime );
}
TextFocused( "CPU command setup time:", TimeToString( ev.cpuEnd - ev.cpuStart ) );
TextFocused( "CPU command setup time:", TimeToString( ev.CpuEnd() - ev.CpuStart() ) );
auto ctx = GetZoneCtx( ev );
if( !ctx )
{
TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.cpuStart ) );
TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.CpuStart() ) );
}
else
{
const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.thread ) );
const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) );
assert( td != ctx->threadData.end() );
const auto begin = td->second.timeline.front()->gpuStart;
const auto drift = GpuDrift( ctx );
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.cpuStart ) );
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.CpuStart() ) );
}
ImGui::Separator();
@ -6358,7 +6358,7 @@ void View::DrawGpuInfoWindow()
DrawZoneTrace<const GpuEvent*>( &ev, zoneTrace, m_worker, m_zoneinfoBuzzAnim, *this, m_showUnknownFrames, [&idx, this] ( const GpuEvent* v, int& fidx ) {
ImGui::TextDisabled( "%i.", fidx++ );
ImGui::SameLine();
const auto& srcloc = m_worker.GetSourceLocation( v->srcloc );
const auto& srcloc = m_worker.GetSourceLocation( v->SrcLoc() );
const auto txt = m_worker.GetZoneName( *v, srcloc );
ImGui::PushID( idx++ );
auto sel = ImGui::Selectable( txt, false );
@ -6432,7 +6432,7 @@ void View::DrawGpuInfoWindow()
const auto& child = *children[i];
const auto cend = m_worker.GetZoneEnd( child );
const auto ct = cend - child.gpuStart;
const auto srcloc = child.srcloc;
const auto srcloc = child.SrcLoc();
ctime += ct;
auto it = cmap.find( srcloc );
@ -6742,7 +6742,7 @@ void View::DrawOptions()
const auto p1 = dist( gen );
if( p0 != p1 )
{
slopes[idx++] = float( 1.0 - double( timeline[p1]->gpuStart - timeline[p0]->gpuStart ) / double( timeline[p1]->cpuStart - timeline[p0]->cpuStart ) );
slopes[idx++] = float( 1.0 - double( timeline[p1]->gpuStart - timeline[p0]->gpuStart ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) );
}
}
while( idx < NumSlopes );
@ -12533,7 +12533,7 @@ uint32_t View::GetRawZoneColor( const ZoneEvent& ev, uint64_t thread, int depth
uint32_t View::GetZoneColor( const GpuEvent& ev )
{
const auto& srcloc = m_worker.GetSourceLocation( ev.srcloc );
const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() );
const auto color = srcloc.color;
return color != 0 ? ( color | 0xFF000000 ) : 0xFF222288;
}
@ -12633,7 +12633,7 @@ void View::ZoomToZone( const GpuEvent& ev )
}
else
{
const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.thread ) );
const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) );
assert( td != ctx->threadData.end() );
const auto begin = td->second.timeline.front()->gpuStart;
const auto drift = GpuDrift( ctx );
@ -12816,7 +12816,7 @@ void View::ZoneTooltip( const ZoneEvent& ev )
void View::ZoneTooltip( const GpuEvent& ev )
{
const auto tid = GetZoneThread( ev );
const auto& srcloc = m_worker.GetSourceLocation( ev.srcloc );
const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() );
const auto end = m_worker.GetZoneEnd( ev );
const auto ztime = end - ev.gpuStart;
const auto selftime = GetZoneSelfTime( ev );
@ -12839,19 +12839,19 @@ void View::ZoneTooltip( const GpuEvent& ev )
ImGui::SameLine();
ImGui::TextDisabled( "(%.2f%%)", 100.f * selftime / ztime );
}
TextFocused( "CPU command setup time:", TimeToString( ev.cpuEnd - ev.cpuStart ) );
TextFocused( "CPU command setup time:", TimeToString( ev.CpuEnd() - ev.CpuStart() ) );
auto ctx = GetZoneCtx( ev );
if( !ctx )
{
TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.cpuStart ) );
TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.CpuStart() ) );
}
else
{
const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.thread ) );
const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) );
assert( td != ctx->threadData.end() );
const auto begin = td->second.timeline.front()->gpuStart;
const auto drift = GpuDrift( ctx );
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.cpuStart ) );
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.CpuStart() ) );
}
ImGui::EndTooltip();
@ -13010,7 +13010,7 @@ uint64_t View::GetZoneThread( const ZoneEvent& zone ) const
uint64_t View::GetZoneThread( const GpuEvent& zone ) const
{
if( zone.thread == 0 )
if( zone.Thread() == 0 )
{
for( const auto& ctx : m_worker.GetGpuData() )
{
@ -13031,7 +13031,7 @@ uint64_t View::GetZoneThread( const GpuEvent& zone ) const
}
else
{
return m_worker.DecompressThread( zone.thread );
return m_worker.DecompressThread( zone.Thread() );
}
}

View File

@ -2152,7 +2152,7 @@ const char* Worker::GetZoneName( const ZoneEvent& ev, const SourceLocation& srcl
const char* Worker::GetZoneName( const GpuEvent& ev ) const
{
auto& srcloc = GetSourceLocation( ev.srcloc );
auto& srcloc = GetSourceLocation( ev.SrcLoc() );
return GetZoneName( ev, srcloc );
}
@ -3985,11 +3985,11 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e
CheckSourceLocation( ev.srcloc );
zone->cpuStart = TscTime( ev.cpuTime - m_data.baseTime );
zone->cpuEnd = -1;
zone->SetCpuStart( TscTime( ev.cpuTime - m_data.baseTime ) );
zone->SetCpuEnd( -1 );
zone->gpuStart = std::numeric_limits<int64_t>::max();
zone->gpuEnd = -1;
zone->srcloc = ShrinkSourceLocation( ev.srcloc );
zone->SetSrcLoc( ShrinkSourceLocation( ev.srcloc ) );
zone->callstack.SetVal( 0 );
zone->child = -1;
@ -3997,18 +3997,18 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e
if( ctx->thread == 0 )
{
// Vulkan context is not bound to any single thread.
zone->thread = CompressThread( ev.thread );
zone->SetThread( CompressThread( ev.thread ) );
ztid = ev.thread;
}
else
{
// OpenGL doesn't need per-zone thread id. It still can be sent,
// because it may be needed for callstack collection purposes.
zone->thread = 0;
zone->SetThread( 0 );
ztid = 0;
}
m_data.lastTime = std::max( m_data.lastTime, zone->cpuStart );
m_data.lastTime = std::max( m_data.lastTime, zone->CpuStart() );
auto td = ctx->threadData.find( ztid );
if( td == ctx->threadData.end() )
@ -4065,8 +4065,8 @@ void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev )
assert( !ctx->query[ev.queryId] );
ctx->query[ev.queryId] = zone;
zone->cpuEnd = TscTime( ev.cpuTime - m_data.baseTime );
m_data.lastTime = std::max( m_data.lastTime, zone->cpuEnd );
zone->SetCpuEnd( TscTime( ev.cpuTime - m_data.baseTime ) );
m_data.lastTime = std::max( m_data.lastTime, zone->CpuEnd() );
}
void Worker::ProcessGpuTime( const QueueGpuTime& ev )
@ -4922,17 +4922,22 @@ void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, i
// Use zone->gpuStart as scratch buffer for CPU zone start time offset.
// Use zone->gpuEnd as scratch buffer for GPU zone start time offset.
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) + sizeof( zone->srcloc ) );
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
int16_t srcloc;
f.Read( srcloc );
zone->SetSrcLoc( srcloc );
f.Read( &zone->callstack, sizeof( zone->callstack ) );
f.Read( &zone->thread, sizeof( zone->thread ) );
uint16_t thread;
f.Read( thread );
zone->SetThread( thread );
refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd;
zone->cpuStart = refTime;
zone->SetCpuStart( refTime );
zone->gpuStart = refGpuTime;
ReadTimeline( f, zone, refTime, refGpuTime );
zone->cpuEnd = ReadTimeOffset( f, refTime );
zone->SetCpuEnd( ReadTimeOffset( f, refTime ) );
zone->gpuEnd = ReadTimeOffset( f, refGpuTime );
}
while( ++zone != zptr );
@ -4952,12 +4957,18 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
if( fileVer <= FileVersion( 0, 4, 1 ) )
{
f.Read( zone, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) );
zone->cpuStart -= m_data.baseTime;
if( zone->cpuEnd >= 0 ) zone->cpuEnd -= m_data.baseTime;
int64_t cpuStart, cpuEnd;
f.Read2( cpuStart, cpuEnd );
cpuStart -= m_data.baseTime;
if( cpuEnd >= 0 ) cpuEnd -= m_data.baseTime;
zone->SetCpuStart( cpuStart );
zone->SetCpuEnd( cpuEnd );
f.Read( &zone->gpuStart, sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) );
if( zone->gpuStart != std::numeric_limits<int64_t>::max() ) zone->gpuStart -= m_data.baseTime;
if( zone->gpuEnd >= 0 ) zone->gpuEnd -= m_data.baseTime;
f.Read( zone->srcloc );
int16_t srcloc;
f.Read( srcloc );
zone->SetSrcLoc( srcloc );
f.Skip( 2 );
f.Read( zone->callstack );
f.Skip( 1 );
@ -4965,23 +4976,25 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
f.Read( thread );
if( thread == 0 )
{
zone->thread = 0;
zone->SetThread( 0 );
}
else
{
zone->thread = CompressThread( thread );
zone->SetThread( CompressThread( thread ) );
}
}
else if( fileVer <= FileVersion( 0, 4, 3 ) )
{
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
f.Read( zone->srcloc );
int16_t srcloc;
f.Read( srcloc );
zone->SetSrcLoc( srcloc );
f.Skip( 2 );
f.Read( zone->callstack );
f.Skip( 1 );
refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd;
zone->cpuStart = refTime - m_data.baseTime;
zone->SetCpuStart( refTime - m_data.baseTime );
zone->gpuStart = refGpuTime;
if( zone->gpuStart != std::numeric_limits<int64_t>::max() ) zone->gpuStart -= m_data.baseTime;
@ -4989,11 +5002,11 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
f.Read( thread );
if( thread == 0 )
{
zone->thread = 0;
zone->SetThread( 0 );
}
else
{
zone->thread = CompressThread( thread );
zone->SetThread( CompressThread( thread ) );
}
}
else if( fileVer <= FileVersion( 0, 5, 1 ) )
@ -5001,14 +5014,18 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
// Use zone->gpuStart as scratch buffer for CPU zone start time offset.
// Use zone->gpuEnd as scratch buffer for GPU zone start time offset.
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
f.Read( zone->srcloc );
int16_t srcloc;
f.Read( srcloc );
zone->SetSrcLoc( srcloc );
f.Skip( 2 );
f.Read( zone->callstack );
f.Skip( 1 );
f.Read( zone->thread );
uint16_t thread;
f.Read( thread );
zone->SetThread( thread );
refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd;
zone->cpuStart = refTime - m_data.baseTime;
zone->SetCpuStart( refTime - m_data.baseTime );
zone->gpuStart = refGpuTime;
if( zone->gpuStart != std::numeric_limits<int64_t>::max() ) zone->gpuStart -= m_data.baseTime;
}
@ -5016,21 +5033,27 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
{
// Use zone->gpuStart as scratch buffer for CPU zone start time offset.
// Use zone->gpuEnd as scratch buffer for GPU zone start time offset.
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) + sizeof( zone->srcloc ) );
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
int16_t srcloc;
f.Read( srcloc );
zone->SetSrcLoc( srcloc );
f.Read( &zone->callstack, sizeof( zone->callstack ) );
f.Skip( 1 );
f.Read( &zone->thread, sizeof( zone->thread ) );
uint16_t thread;
f.Read( thread );
zone->SetThread( thread );
refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd;
zone->cpuStart = refTime;
zone->SetCpuStart( refTime );
zone->gpuStart = refGpuTime;
}
ReadTimelinePre059( f, zone, refTime, refGpuTime, fileVer );
if( fileVer > FileVersion( 0, 4, 1 ) )
{
zone->cpuEnd = ReadTimeOffset( f, refTime );
int64_t cpuEnd = ReadTimeOffset( f, refTime );
if( cpuEnd > 0 ) cpuEnd -= m_data.baseTime;
zone->SetCpuEnd( cpuEnd );
zone->gpuEnd = ReadTimeOffset( f, refGpuTime );
if( zone->cpuEnd > 0 ) zone->cpuEnd -= m_data.baseTime;
if( zone->gpuEnd > 0 ) zone->gpuEnd -= m_data.baseTime;
}
}
@ -5455,11 +5478,13 @@ void Worker::WriteTimeline( FileWrite& f, const Vector<GpuEvent*>& vec, int64_t&
for( auto& v : vec )
{
WriteTimeOffset( f, refTime, v->cpuStart );
WriteTimeOffset( f, refTime, v->CpuStart() );
WriteTimeOffset( f, refGpuTime, v->gpuStart );
f.Write( &v->srcloc, sizeof( v->srcloc ) );
const int16_t srcloc = v->SrcLoc();
f.Write( &srcloc, sizeof( srcloc ) );
f.Write( &v->callstack, sizeof( v->callstack ) );
f.Write( &v->thread, sizeof( v->thread ) );
const uint16_t thread = v->Thread();
f.Write( &thread, sizeof( thread ) );
if( v->child < 0 )
{
@ -5471,7 +5496,7 @@ void Worker::WriteTimeline( FileWrite& f, const Vector<GpuEvent*>& vec, int64_t&
WriteTimeline( f, GetGpuChildren( v->child ), refTime, refGpuTime );
}
WriteTimeOffset( f, refTime, v->cpuEnd );
WriteTimeOffset( f, refTime, v->CpuEnd() );
WriteTimeOffset( f, refGpuTime, v->gpuEnd );
}
}