Compress frame image data right as it arrives.

This removes the need to store temporary uncompressed image buffers,
which involves constant memory allocation and freeing. Instead, just one
permanent buffer is used, and only because the input data cannot change
during processing.
This commit is contained in:
Bartosz Taudul 2019-11-06 23:29:59 +01:00
parent 46d33f45bf
commit dfad9695d2
2 changed files with 39 additions and 28 deletions

View File

@ -1836,6 +1836,7 @@ Worker::~Worker()
LZ4_freeStreamDecode( (LZ4_streamDecode_t*)m_stream ); LZ4_freeStreamDecode( (LZ4_streamDecode_t*)m_stream );
delete[] m_frameImageBuffer; delete[] m_frameImageBuffer;
delete[] m_frameImageCompressedBuffer;
for( auto& v : m_data.threads ) for( auto& v : m_data.threads )
{ {
@ -1867,10 +1868,6 @@ Worker::~Worker()
{ {
v.second->~LockMap(); v.second->~LockMap();
} }
for( auto& v : m_pendingFrameImageData )
{
delete[] (char*)v.second;
}
} }
uint64_t Worker::GetLockCount() const uint64_t Worker::GetLockCount() const
@ -3117,18 +3114,26 @@ static const uint8_t DxtcIndexTable[256] = {
void Worker::AddFrameImageData( uint64_t ptr, char* data, size_t sz ) void Worker::AddFrameImageData( uint64_t ptr, char* data, size_t sz )
{ {
assert( m_pendingFrameImageData.find( ptr ) == m_pendingFrameImageData.end() ); assert( m_pendingFrameImageData.find( ptr ) == m_pendingFrameImageData.end() );
auto image = new char[sz];
auto src = (uint8_t*)data;
auto dst = (uint8_t*)image;
assert( sz % 8 == 0 ); assert( sz % 8 == 0 );
// Input data buffer cannot be changed, as it is used as LZ4 dictionary.
if( m_frameImageBufferSize < sz )
{
m_frameImageBufferSize = sz;
delete[] m_frameImageBuffer;
m_frameImageBuffer = new char[sz];
}
auto src = (uint8_t*)data;
auto dst = (uint8_t*)m_frameImageBuffer;
for( size_t i=0; i<sz; i+=8 ) for( size_t i=0; i<sz; i+=8 )
{ {
memcpy( dst, src, 4 ); memcpy( dst, src, 4 );
dst += 4; for( int j=4; j<8; j++ ) dst[j] = DxtcIndexTable[src[j]];
src += 4; src += 8;
for( int j=0; j<4; j++ ) *dst++ = DxtcIndexTable[*src++]; dst += 8;
} }
m_pendingFrameImageData.emplace( ptr, image ); uint32_t csz;
auto image = PackFrameImage( m_frameImageBuffer, sz, csz );
m_pendingFrameImageData.emplace( ptr, FrameImagePending { image, csz } );
} }
uint64_t Worker::GetCanonicalPointer( const CallstackFrameId& id ) const uint64_t Worker::GetCanonicalPointer( const CallstackFrameId& id ) const
@ -3846,19 +3851,18 @@ void Worker::ProcessFrameImage( const QueueFrameImage& ev )
const auto fidx = int64_t( ev.frame ) - int64_t( m_data.frameOffset ) + 1; const auto fidx = int64_t( ev.frame ) - int64_t( m_data.frameOffset ) + 1;
if( m_onDemand && fidx <= 1 ) if( m_onDemand && fidx <= 1 )
{ {
delete[] (char*)it->second;
m_pendingFrameImageData.erase( it ); m_pendingFrameImageData.erase( it );
return; return;
} }
else if( fidx <= 0 ) else if( fidx <= 0 )
{ {
delete[] (char*)it->second;
FrameImageIndexFailure(); FrameImageIndexFailure();
return; return;
} }
auto fi = m_slab.Alloc<FrameImage>(); auto fi = m_slab.Alloc<FrameImage>();
fi->ptr = PackFrameImage( (const char*)it->second, ev.w * ev.h / 2, fi->csz ); fi->ptr = it->second.image;
fi->csz = it->second.csz;
fi->w = ev.w; fi->w = ev.w;
fi->h = ev.h; fi->h = ev.h;
fi->frameRef = uint32_t( fidx ); fi->frameRef = uint32_t( fidx );
@ -3866,7 +3870,6 @@ void Worker::ProcessFrameImage( const QueueFrameImage& ev )
const auto idx = m_data.frameImage.size(); const auto idx = m_data.frameImage.size();
m_data.frameImage.push_back( fi ); m_data.frameImage.push_back( fi );
delete[] it->second;
m_pendingFrameImageData.erase( it ); m_pendingFrameImageData.erase( it );
if( fidx >= frames.size() ) if( fidx >= frames.size() )
@ -5974,30 +5977,30 @@ void Worker::PackFrameImage( char*& buf, size_t& bufsz, const char* image, uint3
const char* Worker::PackFrameImage( const char* image, uint32_t inBytes, uint32_t& csz ) const char* Worker::PackFrameImage( const char* image, uint32_t inBytes, uint32_t& csz )
{ {
const auto maxout = LZ4_COMPRESSBOUND( inBytes ); const auto maxout = LZ4_COMPRESSBOUND( inBytes );
if( m_frameImageBufferSize < maxout ) if( m_frameImageCompressedBufferSize < maxout )
{ {
m_frameImageBufferSize = maxout; m_frameImageCompressedBufferSize = maxout;
delete[] m_frameImageBuffer; delete[] m_frameImageCompressedBuffer;
m_frameImageBuffer = new char[maxout]; m_frameImageCompressedBuffer = new char[maxout];
} }
const auto outsz = LZ4_compress_default( image, m_frameImageBuffer, inBytes, maxout ); const auto outsz = LZ4_compress_default( image, m_frameImageCompressedBuffer, inBytes, maxout );
csz = uint32_t( outsz ); csz = uint32_t( outsz );
auto ptr = (char*)m_slab.AllocBig( outsz ); auto ptr = (char*)m_slab.AllocBig( outsz );
memcpy( ptr, m_frameImageBuffer, outsz ); memcpy( ptr, m_frameImageCompressedBuffer, outsz );
return ptr; return ptr;
} }
const char* Worker::UnpackFrameImage( const FrameImage& image ) const char* Worker::UnpackFrameImage( const FrameImage& image )
{ {
const auto outsz = size_t( image.w ) * size_t( image.h ) / 2; const auto outsz = size_t( image.w ) * size_t( image.h ) / 2;
if( m_frameImageBufferSize < outsz ) if( m_frameImageCompressedBufferSize < outsz )
{ {
m_frameImageBufferSize = outsz; m_frameImageCompressedBufferSize = outsz;
delete[] m_frameImageBuffer; delete[] m_frameImageCompressedBuffer;
m_frameImageBuffer = new char[outsz]; m_frameImageCompressedBuffer = new char[outsz];
} }
LZ4_decompress_safe( image.ptr, m_frameImageBuffer, image.csz, outsz ); LZ4_decompress_safe( image.ptr, m_frameImageCompressedBuffer, image.csz, outsz );
return m_frameImageBuffer; return m_frameImageCompressedBuffer;
} }
} }

View File

@ -264,6 +264,12 @@ private:
int16_t srcloc; int16_t srcloc;
}; };
struct FrameImagePending
{
const char* image;
uint32_t csz;
};
public: public:
enum class Failure enum class Failure
{ {
@ -624,7 +630,7 @@ private:
flat_hash_map<uint64_t, int16_t, nohash<uint64_t>> m_sourceLocationShrink; flat_hash_map<uint64_t, int16_t, nohash<uint64_t>> m_sourceLocationShrink;
flat_hash_map<uint64_t, ThreadData*, nohash<uint64_t>> m_threadMap; flat_hash_map<uint64_t, ThreadData*, nohash<uint64_t>> m_threadMap;
flat_hash_map<uint64_t, NextCallstack, nohash<uint64_t>> m_nextCallstack; flat_hash_map<uint64_t, NextCallstack, nohash<uint64_t>> m_nextCallstack;
flat_hash_map<uint64_t, void*, nohash<uint64_t>> m_pendingFrameImageData; flat_hash_map<uint64_t, FrameImagePending, nohash<uint64_t>> m_pendingFrameImageData;
uint32_t m_pendingStrings; uint32_t m_pendingStrings;
uint32_t m_pendingThreads; uint32_t m_pendingThreads;
@ -662,6 +668,8 @@ private:
flat_hash_map<uint64_t, int32_t> m_frameImageStaging; flat_hash_map<uint64_t, int32_t> m_frameImageStaging;
char* m_frameImageBuffer = nullptr; char* m_frameImageBuffer = nullptr;
size_t m_frameImageBufferSize = 0; size_t m_frameImageBufferSize = 0;
char* m_frameImageCompressedBuffer = nullptr;
size_t m_frameImageCompressedBufferSize = 0;
uint64_t m_threadCtx = 0; uint64_t m_threadCtx = 0;
int64_t m_refTimeThread = 0; int64_t m_refTimeThread = 0;