Use zstd dict for packing/unpacking frame images.

This only affects run-time memory usage and needs an offline calculation of
the dictionary. Results vary depending on similarity of image blocks.

agora        34.96 MB ->  28.21 MB
agora2       40.75 MB ->  34.14 MB
android-vk   36.21 MB ->  18.44 MB
astar3       44.72 MB ->  43.38 MB
clipper1    134.36 MB ->  52.16 MB
fi           50.82 MB ->  40.79 MB
fi-big      537.74 MB -> 469.54 MB
test         23.26 MB ->   1.87 MB
This commit is contained in:
Bartosz Taudul 2021-05-15 17:02:25 +02:00
parent d555256546
commit c91c7a7fd5
No known key found for this signature in database
GPG Key ID: B7FE2008B7575DF3
3 changed files with 43 additions and 3 deletions

View File

@ -11,6 +11,7 @@ TextureCompression::TextureCompression()
, m_bufSize( 0 ) , m_bufSize( 0 )
, m_cctx( ZSTD_createCCtx() ) , m_cctx( ZSTD_createCCtx() )
, m_dctx( ZSTD_createDCtx() ) , m_dctx( ZSTD_createDCtx() )
, m_dict( nullptr )
{ {
} }
@ -19,6 +20,7 @@ TextureCompression::~TextureCompression()
delete[] m_buf; delete[] m_buf;
ZSTD_freeCCtx( m_cctx ); ZSTD_freeCCtx( m_cctx );
ZSTD_freeDCtx( m_dctx ); ZSTD_freeDCtx( m_dctx );
ZSTD_freeDDict( m_dict );
} }
uint32_t TextureCompression::Pack( struct ZSTD_CCtx_s* ctx, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes ) uint32_t TextureCompression::Pack( struct ZSTD_CCtx_s* ctx, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes )
@ -67,7 +69,14 @@ const char* TextureCompression::Unpack( const FrameImage& image )
m_buf = new char[outsz]; m_buf = new char[outsz];
} }
assert( m_dctx ); assert( m_dctx );
if( m_dict )
{
ZSTD_decompress_usingDDict( m_dctx, m_buf, outsz, image.ptr, image.csz, m_dict );
}
else
{
ZSTD_decompressDCtx( m_dctx, m_buf, outsz, image.ptr, image.csz ); ZSTD_decompressDCtx( m_dctx, m_buf, outsz, image.ptr, image.csz );
}
return m_buf; return m_buf;
} }

View File

@ -11,6 +11,7 @@
struct ZSTD_CCtx_s; struct ZSTD_CCtx_s;
struct ZSTD_DCtx_s; struct ZSTD_DCtx_s;
struct ZSTD_CDict_s; struct ZSTD_CDict_s;
struct ZSTD_DDict_s;
namespace tracy namespace tracy
{ {
@ -23,6 +24,8 @@ public:
TextureCompression(); TextureCompression();
~TextureCompression(); ~TextureCompression();
void SetDict( struct ZSTD_DDict_s* dict ) { m_dict = dict; }
uint32_t Pack( struct ZSTD_CCtx_s* ctx, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes ); uint32_t Pack( struct ZSTD_CCtx_s* ctx, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes );
uint32_t Pack( struct ZSTD_CCtx_s* ctx, const struct ZSTD_CDict_s* dict, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes ); uint32_t Pack( struct ZSTD_CCtx_s* ctx, const struct ZSTD_CDict_s* dict, char*& buf, size_t& bufsz, const char* image, uint32_t inBytes );
@ -49,6 +52,7 @@ private:
size_t m_bufSize; size_t m_bufSize;
struct ZSTD_CCtx_s* m_cctx; struct ZSTD_CCtx_s* m_cctx;
struct ZSTD_DCtx_s* m_dctx; struct ZSTD_DCtx_s* m_dctx;
struct ZSTD_DDict_s* m_dict;
std::atomic<uint64_t> m_inputBytes { 0 }; std::atomic<uint64_t> m_inputBytes { 0 };
std::atomic<uint64_t> m_outputBytes { 0 }; std::atomic<uint64_t> m_outputBytes { 0 };

View File

@ -1291,6 +1291,18 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
if( eventMask & EventType::FrameImages ) if( eventMask & EventType::FrameImages )
{ {
ZSTD_CDict* cdict = nullptr;
if( fileVer >= FileVersion( 0, 7, 8 ) )
{
uint32_t dsz;
f.Read( dsz );
auto dict = new char[dsz];
f.Read( dict, dsz );
cdict = ZSTD_createCDict( dict, dsz, 3 );
m_texcomp.SetDict( ZSTD_createDDict( dict, dsz ) );
delete[] dict;
}
f.Read( sz ); f.Read( sz );
m_data.frameImage.reserve_exact( sz, m_slab ); m_data.frameImage.reserve_exact( sz, m_slab );
s_loadProgress.subTotal.store( sz, std::memory_order_relaxed ); s_loadProgress.subTotal.store( sz, std::memory_order_relaxed );
@ -1353,9 +1365,16 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
data[idx].fi = fi; data[idx].fi = fi;
data[idx].state.store( JobData::InProgress, std::memory_order_release ); data[idx].state.store( JobData::InProgress, std::memory_order_release );
td->Queue( [this, &data, idx, fi, fileVer] { td->Queue( [this, &data, idx, fi, fileVer, cdict] {
if( fileVer <= FileVersion( 0, 6, 9 ) ) m_texcomp.Rdo( data[idx].buf, fi->w * fi->h / 16 ); if( fileVer <= FileVersion( 0, 6, 9 ) ) m_texcomp.Rdo( data[idx].buf, fi->w * fi->h / 16 );
if( cdict )
{
fi->csz = m_texcomp.Pack( data[idx].ctx, cdict, data[idx].outbuf, data[idx].outsz, data[idx].buf, fi->w * fi->h / 2 );
}
else
{
fi->csz = m_texcomp.Pack( data[idx].ctx, data[idx].outbuf, data[idx].outsz, data[idx].buf, fi->w * fi->h / 2 ); fi->csz = m_texcomp.Pack( data[idx].ctx, data[idx].outbuf, data[idx].outsz, data[idx].buf, fi->w * fi->h / 2 );
}
data[idx].state.store( JobData::DataReady, std::memory_order_release ); data[idx].state.store( JobData::DataReady, std::memory_order_release );
} ); } );
@ -1387,9 +1406,17 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
} }
} }
} }
ZSTD_freeCDict( cdict );
} }
else else
{ {
if( fileVer >= FileVersion( 0, 7, 8 ) )
{
uint32_t dsz;
f.Read( dsz );
f.Skip( dsz );
}
f.Read( sz ); f.Read( sz );
s_loadProgress.subTotal.store( sz, std::memory_order_relaxed ); s_loadProgress.subTotal.store( sz, std::memory_order_relaxed );
for( uint64_t i=0; i<sz; i++ ) for( uint64_t i=0; i<sz; i++ )