Multithreaded frame image compression.

This commit is contained in:
Bartosz Taudul 2019-09-20 23:03:12 +02:00
parent 6f5a23a198
commit d8e0853cd8

View File

@ -36,6 +36,7 @@
#include "../common/TracySystem.hpp" #include "../common/TracySystem.hpp"
#include "TracyFileRead.hpp" #include "TracyFileRead.hpp"
#include "TracyFileWrite.hpp" #include "TracyFileWrite.hpp"
#include "TracyTaskDispatch.hpp"
#include "TracyVersion.hpp" #include "TracyVersion.hpp"
#include "TracyWorker.hpp" #include "TracyWorker.hpp"
@ -1347,11 +1348,28 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
if( eventMask & EventType::FrameImages ) if( eventMask & EventType::FrameImages )
{ {
size_t tmpbufsz = 0;
char* tmpbuf = nullptr;
f.Read( sz ); f.Read( sz );
m_data.frameImage.reserve_exact( sz, m_slab ); m_data.frameImage.reserve_exact( sz, m_slab );
s_loadProgress.subTotal.store( sz, std::memory_order_relaxed ); s_loadProgress.subTotal.store( sz, std::memory_order_relaxed );
if( sz != 0 )
{
struct JobData
{
enum State : int { InProgress, Available, DataReady };
FrameImage* fi;
char* buf = nullptr;
size_t bufsz = 0;
char* outbuf = nullptr;
size_t outsz = 0;
std::atomic<State> state = Available;
};
// Leave one thread for file reader, second thread for dispatch (this thread)
// Minimum 2 threads to have at least two buffers (one in use, second one filling up)
const auto jobs = std::max<int>( std::thread::hardware_concurrency() - 2, 2 );
auto td = std::make_unique<TaskDispatch>( jobs );
auto data = std::make_unique<JobData[]>( jobs );
for( uint64_t i=0; i<sz; i++ ) for( uint64_t i=0; i<sz; i++ )
{ {
s_loadProgress.subProgress.store( i, std::memory_order_relaxed ); s_loadProgress.subProgress.store( i, std::memory_order_relaxed );
@ -1359,17 +1377,59 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
f.Read2( fi->w, fi->h ); f.Read2( fi->w, fi->h );
f.Read( fi->flip ); f.Read( fi->flip );
const auto sz = size_t( fi->w * fi->h / 2 ); const auto sz = size_t( fi->w * fi->h / 2 );
if( tmpbufsz < sz )
int idx = -1;
for(;;)
{ {
tmpbufsz = sz; for( int j=0; j<jobs; j++ )
delete[] tmpbuf; {
tmpbuf = new char[sz]; const auto state = data[j].state.load( std::memory_order_acquire );
if( state != JobData::InProgress )
{
if( state == JobData::DataReady )
{
char* tmp = (char*)m_slab.AllocBig( data[j].fi->csz );
memcpy( tmp, data[j].outbuf, data[j].fi->csz );
data[j].fi->ptr = tmp;
} }
f.Read( tmpbuf, sz ); idx = j;
fi->ptr = PackFrameImage( tmpbuf, fi->w, fi->h, fi->csz ); break;
}
}
if( idx >= 0 ) break;
std::this_thread::yield();
}
if( data[idx].bufsz < sz )
{
data[idx].bufsz = sz;
delete[] data[idx].buf;
data[idx].buf = new char[sz];
}
f.Read( data[idx].buf, sz );
data[idx].fi = fi;
data[idx].state.store( JobData::InProgress, std::memory_order_release );
td->Queue( [this, &data, idx, fi] {
PackFrameImage( data[idx].outbuf, data[idx].outsz, data[idx].buf, fi->w, fi->h, fi->csz );
data[idx].state.store( JobData::DataReady, std::memory_order_release );
} );
m_data.frameImage[i] = fi; m_data.frameImage[i] = fi;
} }
delete[] tmpbuf; td->Sync();
td.reset();
for( size_t i=0; i<jobs; i++ )
{
if( data[i].state.load( std::memory_order_acquire ) == JobData::DataReady )
{
char* tmp = (char*)m_slab.AllocBig( data[i].fi->csz );
memcpy( tmp, data[i].outbuf, data[i].fi->csz );
data[i].fi->ptr = tmp;
}
delete[] data[i].buf;
delete[] data[i].outbuf;
}
const auto& frames = GetFramesBase()->frames; const auto& frames = GetFramesBase()->frames;
const auto fsz = uint32_t( frames.size() ); const auto fsz = uint32_t( frames.size() );
@ -1382,6 +1442,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
} }
} }
} }
}
else else
{ {
f.Read( sz ); f.Read( sz );