From dfad9695d2b9da2ca719e81a971f2a296d12eea8 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Wed, 6 Nov 2019 23:29:59 +0100 Subject: [PATCH] Compress frame image data right as it arrives. This removes the need to store temporary uncompressed image buffers, which involves constant memory allocation and freeing. Instead, just one permanent buffer is used, and only because the input data cannot change during processing. --- server/TracyWorker.cpp | 57 ++++++++++++++++++++++-------------------- server/TracyWorker.hpp | 10 +++++++- 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index be20c0bd..3c24720e 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -1836,6 +1836,7 @@ Worker::~Worker() LZ4_freeStreamDecode( (LZ4_streamDecode_t*)m_stream ); delete[] m_frameImageBuffer; + delete[] m_frameImageCompressedBuffer; for( auto& v : m_data.threads ) { @@ -1867,10 +1868,6 @@ Worker::~Worker() { v.second->~LockMap(); } - for( auto& v : m_pendingFrameImageData ) - { - delete[] (char*)v.second; - } } uint64_t Worker::GetLockCount() const @@ -3117,18 +3114,26 @@ static const uint8_t DxtcIndexTable[256] = { void Worker::AddFrameImageData( uint64_t ptr, char* data, size_t sz ) { assert( m_pendingFrameImageData.find( ptr ) == m_pendingFrameImageData.end() ); - auto image = new char[sz]; - auto src = (uint8_t*)data; - auto dst = (uint8_t*)image; assert( sz % 8 == 0 ); + // Input data buffer cannot be changed, as it is used as LZ4 dictionary. + if( m_frameImageBufferSize < sz ) + { + m_frameImageBufferSize = sz; + delete[] m_frameImageBuffer; + m_frameImageBuffer = new char[sz]; + } + auto src = (uint8_t*)data; + auto dst = (uint8_t*)m_frameImageBuffer; for( size_t i=0; isecond; m_pendingFrameImageData.erase( it ); return; } else if( fidx <= 0 ) { - delete[] (char*)it->second; FrameImageIndexFailure(); return; } auto fi = m_slab.Alloc(); - fi->ptr = PackFrameImage( (const char*)it->second, ev.w * ev.h / 2, fi->csz ); + fi->ptr = it->second.image; + fi->csz = it->second.csz; fi->w = ev.w; fi->h = ev.h; fi->frameRef = uint32_t( fidx ); @@ -3866,7 +3870,6 @@ void Worker::ProcessFrameImage( const QueueFrameImage& ev ) const auto idx = m_data.frameImage.size(); m_data.frameImage.push_back( fi ); - delete[] it->second; m_pendingFrameImageData.erase( it ); if( fidx >= frames.size() ) @@ -5974,30 +5977,30 @@ void Worker::PackFrameImage( char*& buf, size_t& bufsz, const char* image, uint3 const char* Worker::PackFrameImage( const char* image, uint32_t inBytes, uint32_t& csz ) { const auto maxout = LZ4_COMPRESSBOUND( inBytes ); - if( m_frameImageBufferSize < maxout ) + if( m_frameImageCompressedBufferSize < maxout ) { - m_frameImageBufferSize = maxout; - delete[] m_frameImageBuffer; - m_frameImageBuffer = new char[maxout]; + m_frameImageCompressedBufferSize = maxout; + delete[] m_frameImageCompressedBuffer; + m_frameImageCompressedBuffer = new char[maxout]; } - const auto outsz = LZ4_compress_default( image, m_frameImageBuffer, inBytes, maxout ); + const auto outsz = LZ4_compress_default( image, m_frameImageCompressedBuffer, inBytes, maxout ); csz = uint32_t( outsz ); auto ptr = (char*)m_slab.AllocBig( outsz ); - memcpy( ptr, m_frameImageBuffer, outsz ); + memcpy( ptr, m_frameImageCompressedBuffer, outsz ); return ptr; } const char* Worker::UnpackFrameImage( const FrameImage& image ) { const auto outsz = size_t( image.w ) * size_t( image.h ) / 2; - if( m_frameImageBufferSize < outsz ) + if( m_frameImageCompressedBufferSize < outsz ) { - m_frameImageBufferSize = outsz; - delete[] m_frameImageBuffer; - m_frameImageBuffer = new char[outsz]; + m_frameImageCompressedBufferSize = outsz; + delete[] m_frameImageCompressedBuffer; + m_frameImageCompressedBuffer = new char[outsz]; } - LZ4_decompress_safe( image.ptr, m_frameImageBuffer, image.csz, outsz ); - return m_frameImageBuffer; + LZ4_decompress_safe( image.ptr, m_frameImageCompressedBuffer, image.csz, outsz ); + return m_frameImageCompressedBuffer; } } diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 3dbe21ac..b192997e 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -264,6 +264,12 @@ private: int16_t srcloc; }; + struct FrameImagePending + { + const char* image; + uint32_t csz; + }; + public: enum class Failure { @@ -624,7 +630,7 @@ private: flat_hash_map> m_sourceLocationShrink; flat_hash_map> m_threadMap; flat_hash_map> m_nextCallstack; - flat_hash_map> m_pendingFrameImageData; + flat_hash_map> m_pendingFrameImageData; uint32_t m_pendingStrings; uint32_t m_pendingThreads; @@ -662,6 +668,8 @@ private: flat_hash_map m_frameImageStaging; char* m_frameImageBuffer = nullptr; size_t m_frameImageBufferSize = 0; + char* m_frameImageCompressedBuffer = nullptr; + size_t m_frameImageCompressedBufferSize = 0; uint64_t m_threadCtx = 0; int64_t m_refTimeThread = 0;