Compress frame images using zstd.

Memory usage and trace load times:

!comp         587 MB,  439 ms  ->    541 MB,  523 ms    (92%, 119%)
android-vk    197 MB,  136 ms  ->    188 MB,  178 ms    (95%, 130%)
big2         4463 MB,  2.93 s  ->   4198 MB,  3.65 s    (94%, 124%)
fi            483 MB,  346 ms  ->    416 MB,  409 ms    (86%, 118%)
fi-big       3307 MB,  3.15 s  ->   2985 MB,  3.53 s    (90%, 112%)
large       19.74 GB, 10.05 s  ->  19.28 GB, 11.16 s    (97%, 110%)
This commit is contained in:
Bartosz Taudul 2020-02-09 21:22:12 +01:00
parent 99f2734d28
commit 53e5eb749d
2 changed files with 27 additions and 8 deletions

View File

@ -13,6 +13,8 @@
#include <string.h> #include <string.h>
#include <inttypes.h> #include <inttypes.h>
#include "../zstd/zstd.h"
#include "../common/TracyProtocol.hpp" #include "../common/TracyProtocol.hpp"
#include "../common/TracySystem.hpp" #include "../common/TracySystem.hpp"
#include "TracyFileRead.hpp" #include "TracyFileRead.hpp"
@ -230,6 +232,8 @@ Worker::Worker( const char* addr, int port )
, m_callstackFrameStaging( nullptr ) , m_callstackFrameStaging( nullptr )
, m_traceVersion( CurrentVersion ) , m_traceVersion( CurrentVersion )
, m_loadTime( 0 ) , m_loadTime( 0 )
, m_fiCctx( ZSTD_createCCtx() )
, m_fiDctx( ZSTD_createDCtx() )
{ {
m_data.sourceLocationExpand.push_back( 0 ); m_data.sourceLocationExpand.push_back( 0 );
m_data.localThreadCompress.InitZero(); m_data.localThreadCompress.InitZero();
@ -375,6 +379,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
: m_hasData( true ) : m_hasData( true )
, m_stream( nullptr ) , m_stream( nullptr )
, m_buffer( nullptr ) , m_buffer( nullptr )
, m_fiDctx( ZSTD_createDCtx() )
{ {
auto loadStart = std::chrono::high_resolution_clock::now(); auto loadStart = std::chrono::high_resolution_clock::now();
@ -1364,6 +1369,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
size_t bufsz = 0; size_t bufsz = 0;
char* outbuf = nullptr; char* outbuf = nullptr;
size_t outsz = 0; size_t outsz = 0;
ZSTD_CCtx* ctx = ZSTD_createCCtx();
alignas(64) std::atomic<State> state = Available; alignas(64) std::atomic<State> state = Available;
}; };
@ -1413,7 +1419,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
data[idx].state.store( JobData::InProgress, std::memory_order_release ); data[idx].state.store( JobData::InProgress, std::memory_order_release );
td->Queue( [this, &data, idx, fi] { td->Queue( [this, &data, idx, fi] {
PackFrameImage( data[idx].outbuf, data[idx].outsz, data[idx].buf, fi->w * fi->h / 2, fi->csz ); PackFrameImage( data[idx].ctx, data[idx].outbuf, data[idx].outsz, data[idx].buf, fi->w * fi->h / 2, fi->csz );
data[idx].state.store( JobData::DataReady, std::memory_order_release ); data[idx].state.store( JobData::DataReady, std::memory_order_release );
} ); } );
@ -1429,6 +1435,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
memcpy( tmp, data[i].outbuf, data[i].fi->csz ); memcpy( tmp, data[i].outbuf, data[i].fi->csz );
data[i].fi->ptr = tmp; data[i].fi->ptr = tmp;
} }
ZSTD_freeCCtx( data[i].ctx );
delete[] data[i].buf; delete[] data[i].buf;
delete[] data[i].outbuf; delete[] data[i].outbuf;
} }
@ -1680,6 +1687,9 @@ Worker::~Worker()
{ {
v.second->~LockMap(); v.second->~LockMap();
} }
if( m_fiCctx ) ZSTD_freeCCtx( m_fiCctx );
if( m_fiDctx ) ZSTD_freeDCtx( m_fiDctx );
} }
uint64_t Worker::GetLockCount() const uint64_t Worker::GetLockCount() const
@ -5875,29 +5885,31 @@ const char* Worker::GetFailureString( Worker::Failure failure )
return s_failureReasons[(int)failure]; return s_failureReasons[(int)failure];
} }
void Worker::PackFrameImage( char*& buf, size_t& bufsz, const char* image, uint32_t inBytes, uint32_t& csz ) const void Worker::PackFrameImage( struct ZSTD_CCtx_s* ctx, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes, uint32_t& csz ) const
{ {
const auto maxout = LZ4_COMPRESSBOUND( inBytes ); const auto maxout = ZSTD_COMPRESSBOUND( inBytes );
if( bufsz < maxout ) if( bufsz < maxout )
{ {
bufsz = maxout; bufsz = maxout;
delete[] buf; delete[] buf;
buf = new char[maxout]; buf = new char[maxout];
} }
const auto outsz = LZ4_compress_default( image, buf, inBytes, maxout ); assert( ctx );
const auto outsz = ZSTD_compressCCtx( ctx, buf, maxout, image, inBytes, 3 );
csz = uint32_t( outsz ); csz = uint32_t( outsz );
} }
const char* Worker::PackFrameImage( const char* image, uint32_t inBytes, uint32_t& csz ) const char* Worker::PackFrameImage( const char* image, uint32_t inBytes, uint32_t& csz )
{ {
const auto maxout = LZ4_COMPRESSBOUND( inBytes ); const auto maxout = ZSTD_COMPRESSBOUND( inBytes );
if( m_frameImageCompressedBufferSize < maxout ) if( m_frameImageCompressedBufferSize < maxout )
{ {
m_frameImageCompressedBufferSize = maxout; m_frameImageCompressedBufferSize = maxout;
delete[] m_frameImageCompressedBuffer; delete[] m_frameImageCompressedBuffer;
m_frameImageCompressedBuffer = new char[maxout]; m_frameImageCompressedBuffer = new char[maxout];
} }
const auto outsz = LZ4_compress_default( image, m_frameImageCompressedBuffer, inBytes, maxout ); assert( m_fiCctx );
const auto outsz = ZSTD_compressCCtx( m_fiCctx, m_frameImageCompressedBuffer, maxout, image, inBytes, 1 );
csz = uint32_t( outsz ); csz = uint32_t( outsz );
auto ptr = (char*)m_slab.AllocBig( outsz ); auto ptr = (char*)m_slab.AllocBig( outsz );
memcpy( ptr, m_frameImageCompressedBuffer, outsz ); memcpy( ptr, m_frameImageCompressedBuffer, outsz );
@ -5913,7 +5925,8 @@ const char* Worker::UnpackFrameImage( const FrameImage& image )
delete[] m_frameImageCompressedBuffer; delete[] m_frameImageCompressedBuffer;
m_frameImageCompressedBuffer = new char[outsz]; m_frameImageCompressedBuffer = new char[outsz];
} }
LZ4_decompress_safe( image.ptr, m_frameImageCompressedBuffer, image.csz, outsz ); assert( m_fiDctx );
ZSTD_decompressDCtx( m_fiDctx, m_frameImageCompressedBuffer, outsz, image.ptr, image.csz );
return m_frameImageCompressedBuffer; return m_frameImageCompressedBuffer;
} }

View File

@ -24,6 +24,10 @@
#include "TracyThreadCompress.hpp" #include "TracyThreadCompress.hpp"
#include "TracyVarArray.hpp" #include "TracyVarArray.hpp"
struct ZSTD_CCtx_s;
struct ZSTD_DCtx_s;
namespace tracy namespace tracy
{ {
@ -444,7 +448,7 @@ public:
const FailureData& GetFailureData() const { return m_failureData; } const FailureData& GetFailureData() const { return m_failureData; }
static const char* GetFailureString( Failure failure ); static const char* GetFailureString( Failure failure );
void PackFrameImage( char*& buf, size_t& bufsz, const char* image, uint32_t inBytes, uint32_t& csz ) const; void PackFrameImage( struct ZSTD_CCtx_s* ctx, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes, uint32_t& csz ) const;
const char* PackFrameImage( const char* image, uint32_t inBytes, uint32_t& csz ); const char* PackFrameImage( const char* image, uint32_t inBytes, uint32_t& csz );
const char* UnpackFrameImage( const FrameImage& image ); const char* UnpackFrameImage( const FrameImage& image );
@ -720,6 +724,8 @@ private:
size_t m_frameImageBufferSize = 0; size_t m_frameImageBufferSize = 0;
char* m_frameImageCompressedBuffer = nullptr; char* m_frameImageCompressedBuffer = nullptr;
size_t m_frameImageCompressedBufferSize = 0; size_t m_frameImageCompressedBufferSize = 0;
struct ZSTD_CCtx_s* m_fiCctx = nullptr;
struct ZSTD_DCtx_s* m_fiDctx = nullptr;
uint64_t m_threadCtx = 0; uint64_t m_threadCtx = 0;
ThreadData* m_threadCtxData = nullptr; ThreadData* m_threadCtxData = nullptr;