mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-22 14:44:34 +00:00
Add zstd dict builder.
This commit is contained in:
parent
959ddc3501
commit
859b8e4193
@ -123,6 +123,10 @@
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_ddict.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\src\capture.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
@ -182,6 +186,9 @@
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_ddict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_block.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_errors.h" />
|
||||
</ItemGroup>
|
||||
|
@ -25,6 +25,9 @@
|
||||
<Filter Include="zstd\decompress">
|
||||
<UniqueIdentifier>{c1f99170-d904-4af1-8010-0a3ded5736c8}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd\dictBuilder">
|
||||
<UniqueIdentifier>{456e6786-ea57-42b8-ae38-829cd2d918bd}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
|
||||
@ -144,6 +147,18 @@
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
|
||||
@ -320,5 +335,14 @@
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -122,6 +122,10 @@
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_ddict.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\src\csvexport.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
@ -180,6 +184,9 @@
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_ddict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_block.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_errors.h" />
|
||||
</ItemGroup>
|
||||
|
@ -25,6 +25,9 @@
|
||||
<Filter Include="zstd\decompress">
|
||||
<UniqueIdentifier>{d4181058-2198-4931-ae31-b7eda0312458}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd\dictBuilder">
|
||||
<UniqueIdentifier>{873c22fe-b4d7-480d-ad67-48271296f4c1}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
|
||||
@ -141,6 +144,18 @@
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstdmt_compress.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
|
||||
@ -314,5 +329,14 @@
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstdmt_compress.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -120,6 +120,10 @@
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_ddict.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\src\import-chrome.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
@ -176,6 +180,9 @@
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_ddict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_block.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_errors.h" />
|
||||
</ItemGroup>
|
||||
|
@ -22,6 +22,9 @@
|
||||
<Filter Include="zstd\decompress">
|
||||
<UniqueIdentifier>{438fff23-197c-4b6f-91f0-74f8b3878571}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd\dictBuilder">
|
||||
<UniqueIdentifier>{e5c7021a-e0e4-45c2-b461-e806bc036d5f}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
|
||||
@ -132,6 +135,18 @@
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
|
||||
@ -299,5 +314,14 @@
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -160,6 +160,10 @@
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_ddict.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\libs\gl3w\GL\gl3w.c" />
|
||||
<ClCompile Include="..\..\src\HttpRequest.cpp" />
|
||||
<ClCompile Include="..\..\src\imgui_impl_glfw.cpp" />
|
||||
@ -266,6 +270,9 @@
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_ddict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_block.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_errors.h" />
|
||||
<ClInclude Include="..\..\libs\gl3w\GL\gl3w.h" />
|
||||
|
@ -34,6 +34,9 @@
|
||||
<Filter Include="zstd\decompress">
|
||||
<UniqueIdentifier>{c93ef7c5-f1df-40a6-a4e3-81441e6df174}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd\dictBuilder">
|
||||
<UniqueIdentifier>{200ab875-91e5-4c7a-8b98-ddbae19d2f98}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
|
||||
@ -234,6 +237,18 @@
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstdmt_compress.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
|
||||
@ -554,6 +569,15 @@
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstdmt_compress.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Natvis Include="DebugVis.natvis" />
|
||||
|
@ -122,6 +122,10 @@
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_ddict.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\src\update.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
@ -180,6 +184,9 @@
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_ddict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_block.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_errors.h" />
|
||||
</ItemGroup>
|
||||
|
@ -25,6 +25,9 @@
|
||||
<Filter Include="zstd\decompress">
|
||||
<UniqueIdentifier>{aeb60a40-d098-408e-a8c6-3de1c75cd9b4}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd\dictBuilder">
|
||||
<UniqueIdentifier>{375ceb06-6b2f-4a00-af80-64d17bcadaac}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
|
||||
@ -141,6 +144,18 @@
|
||||
<ClCompile Include="..\..\..\zstd\common\zstd_common.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
|
||||
@ -314,5 +329,14 @@
|
||||
<ClInclude Include="..\..\..\zstd\common\zstd_trace.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
1246
zstd/dictBuilder/cover.c
Normal file
1246
zstd/dictBuilder/cover.c
Normal file
File diff suppressed because it is too large
Load Diff
158
zstd/dictBuilder/cover.h
Normal file
158
zstd/dictBuilder/cover.h
Normal file
@ -0,0 +1,158 @@
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZDICT_STATIC_LINKING_ONLY
|
||||
# define ZDICT_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
#include "../common/mem.h" /* read */
|
||||
#include "../common/pool.h"
|
||||
#include "../common/threading.h"
|
||||
#include "../common/zstd_internal.h" /* includes zstd.h */
|
||||
#include "../zdict.h"
|
||||
|
||||
/**
|
||||
* COVER_best_t is used for two purposes:
|
||||
* 1. Synchronizing threads.
|
||||
* 2. Saving the best parameters and dictionary.
|
||||
*
|
||||
* All of the methods except COVER_best_init() are thread safe if zstd is
|
||||
* compiled with multithreaded support.
|
||||
*/
|
||||
typedef struct COVER_best_s {
|
||||
ZSTD_pthread_mutex_t mutex;
|
||||
ZSTD_pthread_cond_t cond;
|
||||
size_t liveJobs;
|
||||
void *dict;
|
||||
size_t dictSize;
|
||||
ZDICT_cover_params_t parameters;
|
||||
size_t compressedSize;
|
||||
} COVER_best_t;
|
||||
|
||||
/**
|
||||
* A segment is a range in the source as well as the score of the segment.
|
||||
*/
|
||||
typedef struct {
|
||||
U32 begin;
|
||||
U32 end;
|
||||
U32 score;
|
||||
} COVER_segment_t;
|
||||
|
||||
/**
|
||||
*Number of epochs and size of each epoch.
|
||||
*/
|
||||
typedef struct {
|
||||
U32 num;
|
||||
U32 size;
|
||||
} COVER_epoch_info_t;
|
||||
|
||||
/**
|
||||
* Struct used for the dictionary selection function.
|
||||
*/
|
||||
typedef struct COVER_dictSelection {
|
||||
BYTE* dictContent;
|
||||
size_t dictSize;
|
||||
size_t totalCompressedSize;
|
||||
} COVER_dictSelection_t;
|
||||
|
||||
/**
|
||||
* Computes the number of epochs and the size of each epoch.
|
||||
* We will make sure that each epoch gets at least 10 * k bytes.
|
||||
*
|
||||
* The COVER algorithms divide the data up into epochs of equal size and
|
||||
* select one segment from each epoch.
|
||||
*
|
||||
* @param maxDictSize The maximum allowed dictionary size.
|
||||
* @param nbDmers The number of dmers we are training on.
|
||||
* @param k The parameter k (segment size).
|
||||
* @param passes The target number of passes over the dmer corpus.
|
||||
* More passes means a better dictionary.
|
||||
*/
|
||||
COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
|
||||
U32 k, U32 passes);
|
||||
|
||||
/**
|
||||
* Warns the user when their corpus is too small.
|
||||
*/
|
||||
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);
|
||||
|
||||
/**
|
||||
* Checks total compressed size of a dictionary
|
||||
*/
|
||||
size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
|
||||
const size_t *samplesSizes, const BYTE *samples,
|
||||
size_t *offsets,
|
||||
size_t nbTrainSamples, size_t nbSamples,
|
||||
BYTE *const dict, size_t dictBufferCapacity);
|
||||
|
||||
/**
|
||||
* Returns the sum of the sample sizes.
|
||||
*/
|
||||
size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;
|
||||
|
||||
/**
|
||||
* Initialize the `COVER_best_t`.
|
||||
*/
|
||||
void COVER_best_init(COVER_best_t *best);
|
||||
|
||||
/**
|
||||
* Wait until liveJobs == 0.
|
||||
*/
|
||||
void COVER_best_wait(COVER_best_t *best);
|
||||
|
||||
/**
|
||||
* Call COVER_best_wait() and then destroy the COVER_best_t.
|
||||
*/
|
||||
void COVER_best_destroy(COVER_best_t *best);
|
||||
|
||||
/**
|
||||
* Called when a thread is about to be launched.
|
||||
* Increments liveJobs.
|
||||
*/
|
||||
void COVER_best_start(COVER_best_t *best);
|
||||
|
||||
/**
|
||||
* Called when a thread finishes executing, both on error or success.
|
||||
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
||||
* If this dictionary is the best so far save it and its parameters.
|
||||
*/
|
||||
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
||||
COVER_dictSelection_t selection);
|
||||
/**
|
||||
* Error function for COVER_selectDict function. Checks if the return
|
||||
* value is an error.
|
||||
*/
|
||||
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
|
||||
|
||||
/**
|
||||
* Error function for COVER_selectDict function. Returns a struct where
|
||||
* return.totalCompressedSize is a ZSTD error.
|
||||
*/
|
||||
COVER_dictSelection_t COVER_dictSelectionError(size_t error);
|
||||
|
||||
/**
|
||||
* Always call after selectDict is called to free up used memory from
|
||||
* newly created dictionary.
|
||||
*/
|
||||
void COVER_dictSelectionFree(COVER_dictSelection_t selection);
|
||||
|
||||
/**
|
||||
* Called to finalize the dictionary and select one based on whether or not
|
||||
* the shrink-dict flag was enabled. If enabled the dictionary used is the
|
||||
* smallest dictionary within a specified regression of the compressed size
|
||||
* from the largest dictionary.
|
||||
*/
|
||||
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
||||
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
||||
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
|
1913
zstd/dictBuilder/divsufsort.c
Normal file
1913
zstd/dictBuilder/divsufsort.c
Normal file
File diff suppressed because it is too large
Load Diff
67
zstd/dictBuilder/divsufsort.h
Normal file
67
zstd/dictBuilder/divsufsort.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* divsufsort.h for libdivsufsort-lite
|
||||
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _DIVSUFSORT_H
|
||||
#define _DIVSUFSORT_H 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
|
||||
/*- Prototypes -*/
|
||||
|
||||
/**
|
||||
* Constructs the suffix array of a given string.
|
||||
* @param T [0..n-1] The input string.
|
||||
* @param SA [0..n-1] The output array of suffixes.
|
||||
* @param n The length of the given string.
|
||||
* @param openMP enables OpenMP optimization.
|
||||
* @return 0 if no error occurred, -1 or -2 otherwise.
|
||||
*/
|
||||
int
|
||||
divsufsort(const unsigned char *T, int *SA, int n, int openMP);
|
||||
|
||||
/**
|
||||
* Constructs the burrows-wheeler transformed string of a given string.
|
||||
* @param T [0..n-1] The input string.
|
||||
* @param U [0..n-1] The output string. (can be T)
|
||||
* @param A [0..n-1] The temporary array. (can be NULL)
|
||||
* @param n The length of the given string.
|
||||
* @param num_indexes The length of secondary indexes array. (can be NULL)
|
||||
* @param indexes The secondary indexes array. (can be NULL)
|
||||
* @param openMP enables OpenMP optimization.
|
||||
* @return The primary index if no error occurred, -1 or -2 otherwise.
|
||||
*/
|
||||
int
|
||||
divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* _DIVSUFSORT_H */
|
759
zstd/dictBuilder/fastcover.c
Normal file
759
zstd/dictBuilder/fastcover.c
Normal file
@ -0,0 +1,759 @@
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
|
||||
#ifndef ZDICT_STATIC_LINKING_ONLY
|
||||
# define ZDICT_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
|
||||
#include "../common/mem.h" /* read */
|
||||
#include "../common/pool.h"
|
||||
#include "../common/threading.h"
|
||||
#include "../common/zstd_internal.h" /* includes zstd.h */
|
||||
#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
|
||||
#include "../zdict.h"
|
||||
#include "cover.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
||||
#define FASTCOVER_MAX_F 31
|
||||
#define FASTCOVER_MAX_ACCEL 10
|
||||
#define FASTCOVER_DEFAULT_SPLITPOINT 0.75
|
||||
#define DEFAULT_F 20
|
||||
#define DEFAULT_ACCEL 1
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#ifndef LOCALDISPLAYLEVEL
|
||||
static int g_displayLevel = 2;
|
||||
#endif
|
||||
#undef DISPLAY
|
||||
#define DISPLAY(...) \
|
||||
{ \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
fflush(stderr); \
|
||||
}
|
||||
#undef LOCALDISPLAYLEVEL
|
||||
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
||||
if (displayLevel >= l) { \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
||||
#undef DISPLAYLEVEL
|
||||
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
||||
|
||||
#ifndef LOCALDISPLAYUPDATE
|
||||
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
||||
static clock_t g_time = 0;
|
||||
#endif
|
||||
#undef LOCALDISPLAYUPDATE
|
||||
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
||||
if (displayLevel >= l) { \
|
||||
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
||||
g_time = clock(); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
} \
|
||||
}
|
||||
#undef DISPLAYUPDATE
|
||||
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Hash Functions
|
||||
***************************************/
|
||||
/**
|
||||
* Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
|
||||
*/
|
||||
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
|
||||
if (d == 6) {
|
||||
return ZSTD_hash6Ptr(p, f);
|
||||
}
|
||||
return ZSTD_hash8Ptr(p, f);
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Acceleration
|
||||
***************************************/
|
||||
typedef struct {
|
||||
unsigned finalize; /* Percentage of training samples used for ZDICT_finalizeDictionary */
|
||||
unsigned skip; /* Number of dmer skipped between each dmer counted in computeFrequency */
|
||||
} FASTCOVER_accel_t;
|
||||
|
||||
|
||||
static const FASTCOVER_accel_t FASTCOVER_defaultAccelParameters[FASTCOVER_MAX_ACCEL+1] = {
|
||||
{ 100, 0 }, /* accel = 0, should not happen because accel = 0 defaults to accel = 1 */
|
||||
{ 100, 0 }, /* accel = 1 */
|
||||
{ 50, 1 }, /* accel = 2 */
|
||||
{ 34, 2 }, /* accel = 3 */
|
||||
{ 25, 3 }, /* accel = 4 */
|
||||
{ 20, 4 }, /* accel = 5 */
|
||||
{ 17, 5 }, /* accel = 6 */
|
||||
{ 14, 6 }, /* accel = 7 */
|
||||
{ 13, 7 }, /* accel = 8 */
|
||||
{ 11, 8 }, /* accel = 9 */
|
||||
{ 10, 9 }, /* accel = 10 */
|
||||
};
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Context
|
||||
***************************************/
|
||||
typedef struct {
|
||||
const BYTE *samples;
|
||||
size_t *offsets;
|
||||
const size_t *samplesSizes;
|
||||
size_t nbSamples;
|
||||
size_t nbTrainSamples;
|
||||
size_t nbTestSamples;
|
||||
size_t nbDmers;
|
||||
U32 *freqs;
|
||||
unsigned d;
|
||||
unsigned f;
|
||||
FASTCOVER_accel_t accelParams;
|
||||
} FASTCOVER_ctx_t;
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Helper functions
|
||||
***************************************/
|
||||
/**
|
||||
* Selects the best segment in an epoch.
|
||||
* Segments of are scored according to the function:
|
||||
*
|
||||
* Let F(d) be the frequency of all dmers with hash value d.
|
||||
* Let S_i be hash value of the dmer at position i of segment S which has length k.
|
||||
*
|
||||
* Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
|
||||
*
|
||||
* Once the dmer with hash value d is in the dictionary we set F(d) = 0.
|
||||
*/
|
||||
static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
|
||||
U32 *freqs, U32 begin, U32 end,
|
||||
ZDICT_cover_params_t parameters,
|
||||
U16* segmentFreqs) {
|
||||
/* Constants */
|
||||
const U32 k = parameters.k;
|
||||
const U32 d = parameters.d;
|
||||
const U32 f = ctx->f;
|
||||
const U32 dmersInK = k - d + 1;
|
||||
|
||||
/* Try each segment (activeSegment) and save the best (bestSegment) */
|
||||
COVER_segment_t bestSegment = {0, 0, 0};
|
||||
COVER_segment_t activeSegment;
|
||||
|
||||
/* Reset the activeDmers in the segment */
|
||||
/* The activeSegment starts at the beginning of the epoch. */
|
||||
activeSegment.begin = begin;
|
||||
activeSegment.end = begin;
|
||||
activeSegment.score = 0;
|
||||
|
||||
/* Slide the activeSegment through the whole epoch.
|
||||
* Save the best segment in bestSegment.
|
||||
*/
|
||||
while (activeSegment.end < end) {
|
||||
/* Get hash value of current dmer */
|
||||
const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);
|
||||
|
||||
/* Add frequency of this index to score if this is the first occurrence of index in active segment */
|
||||
if (segmentFreqs[idx] == 0) {
|
||||
activeSegment.score += freqs[idx];
|
||||
}
|
||||
/* Increment end of segment and segmentFreqs*/
|
||||
activeSegment.end += 1;
|
||||
segmentFreqs[idx] += 1;
|
||||
/* If the window is now too large, drop the first position */
|
||||
if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
|
||||
/* Get hash value of the dmer to be eliminated from active segment */
|
||||
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d);
|
||||
segmentFreqs[delIndex] -= 1;
|
||||
/* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */
|
||||
if (segmentFreqs[delIndex] == 0) {
|
||||
activeSegment.score -= freqs[delIndex];
|
||||
}
|
||||
/* Increment start of segment */
|
||||
activeSegment.begin += 1;
|
||||
}
|
||||
|
||||
/* If this segment is the best so far save it */
|
||||
if (activeSegment.score > bestSegment.score) {
|
||||
bestSegment = activeSegment;
|
||||
}
|
||||
}
|
||||
|
||||
/* Zero out rest of segmentFreqs array */
|
||||
while (activeSegment.begin < end) {
|
||||
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d);
|
||||
segmentFreqs[delIndex] -= 1;
|
||||
activeSegment.begin += 1;
|
||||
}
|
||||
|
||||
{
|
||||
/* Zero the frequency of hash value of each dmer covered by the chosen segment. */
|
||||
U32 pos;
|
||||
for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
|
||||
const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, f, d);
|
||||
freqs[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return bestSegment;
|
||||
}
|
||||
|
||||
|
||||
static int FASTCOVER_checkParameters(ZDICT_cover_params_t parameters,
|
||||
size_t maxDictSize, unsigned f,
|
||||
unsigned accel) {
|
||||
/* k, d, and f are required parameters */
|
||||
if (parameters.d == 0 || parameters.k == 0) {
|
||||
return 0;
|
||||
}
|
||||
/* d has to be 6 or 8 */
|
||||
if (parameters.d != 6 && parameters.d != 8) {
|
||||
return 0;
|
||||
}
|
||||
/* k <= maxDictSize */
|
||||
if (parameters.k > maxDictSize) {
|
||||
return 0;
|
||||
}
|
||||
/* d <= k */
|
||||
if (parameters.d > parameters.k) {
|
||||
return 0;
|
||||
}
|
||||
/* 0 < f <= FASTCOVER_MAX_F*/
|
||||
if (f > FASTCOVER_MAX_F || f == 0) {
|
||||
return 0;
|
||||
}
|
||||
/* 0 < splitPoint <= 1 */
|
||||
if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) {
|
||||
return 0;
|
||||
}
|
||||
/* 0 < accel <= 10 */
|
||||
if (accel > 10 || accel == 0) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Clean up a context initialized with `FASTCOVER_ctx_init()`.
|
||||
*/
|
||||
static void
|
||||
FASTCOVER_ctx_destroy(FASTCOVER_ctx_t* ctx)
|
||||
{
|
||||
if (!ctx) return;
|
||||
|
||||
free(ctx->freqs);
|
||||
ctx->freqs = NULL;
|
||||
|
||||
free(ctx->offsets);
|
||||
ctx->offsets = NULL;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate for frequency of hash value of each dmer in ctx->samples
|
||||
*/
|
||||
static void
|
||||
FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
|
||||
{
|
||||
const unsigned f = ctx->f;
|
||||
const unsigned d = ctx->d;
|
||||
const unsigned skip = ctx->accelParams.skip;
|
||||
const unsigned readLength = MAX(d, 8);
|
||||
size_t i;
|
||||
assert(ctx->nbTrainSamples >= 5);
|
||||
assert(ctx->nbTrainSamples <= ctx->nbSamples);
|
||||
for (i = 0; i < ctx->nbTrainSamples; i++) {
|
||||
size_t start = ctx->offsets[i]; /* start of current dmer */
|
||||
size_t const currSampleEnd = ctx->offsets[i+1];
|
||||
while (start + readLength <= currSampleEnd) {
|
||||
const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, d);
|
||||
freqs[dmerIndex]++;
|
||||
start = start + skip + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Prepare a context for dictionary building.
|
||||
* The context is only dependent on the parameter `d` and can used multiple
|
||||
* times.
|
||||
* Returns 0 on success or error code on error.
|
||||
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
||||
*/
|
||||
static size_t
|
||||
FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
||||
const void* samplesBuffer,
|
||||
const size_t* samplesSizes, unsigned nbSamples,
|
||||
unsigned d, double splitPoint, unsigned f,
|
||||
FASTCOVER_accel_t accelParams)
|
||||
{
|
||||
const BYTE* const samples = (const BYTE*)samplesBuffer;
|
||||
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
|
||||
/* Split samples into testing and training sets */
|
||||
const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
|
||||
const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
|
||||
const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
|
||||
const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
|
||||
|
||||
/* Checks */
|
||||
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
||||
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
|
||||
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
||||
(unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
|
||||
/* Check if there are at least 5 training samples */
|
||||
if (nbTrainSamples < 5) {
|
||||
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
|
||||
/* Check if there's testing sample */
|
||||
if (nbTestSamples < 1) {
|
||||
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
|
||||
/* Zero the context */
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
|
||||
(unsigned)trainingSamplesSize);
|
||||
DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
|
||||
(unsigned)testSamplesSize);
|
||||
|
||||
ctx->samples = samples;
|
||||
ctx->samplesSizes = samplesSizes;
|
||||
ctx->nbSamples = nbSamples;
|
||||
ctx->nbTrainSamples = nbTrainSamples;
|
||||
ctx->nbTestSamples = nbTestSamples;
|
||||
ctx->nbDmers = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
|
||||
ctx->d = d;
|
||||
ctx->f = f;
|
||||
ctx->accelParams = accelParams;
|
||||
|
||||
/* The offsets of each file */
|
||||
ctx->offsets = (size_t*)calloc((nbSamples + 1), sizeof(size_t));
|
||||
if (ctx->offsets == NULL) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
|
||||
FASTCOVER_ctx_destroy(ctx);
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
|
||||
/* Fill offsets from the samplesSizes */
|
||||
{ U32 i;
|
||||
ctx->offsets[0] = 0;
|
||||
assert(nbSamples >= 5);
|
||||
for (i = 1; i <= nbSamples; ++i) {
|
||||
ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize frequency array of size 2^f */
|
||||
ctx->freqs = (U32*)calloc(((U64)1 << f), sizeof(U32));
|
||||
if (ctx->freqs == NULL) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
|
||||
FASTCOVER_ctx_destroy(ctx);
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
|
||||
DISPLAYLEVEL(2, "Computing frequencies\n");
|
||||
FASTCOVER_computeFrequency(ctx->freqs, ctx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given the prepared context build the dictionary.
|
||||
*/
|
||||
static size_t
|
||||
FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
|
||||
U32* freqs,
|
||||
void* dictBuffer, size_t dictBufferCapacity,
|
||||
ZDICT_cover_params_t parameters,
|
||||
U16* segmentFreqs)
|
||||
{
|
||||
BYTE *const dict = (BYTE *)dictBuffer;
|
||||
size_t tail = dictBufferCapacity;
|
||||
/* Divide the data into epochs. We will select one segment from each epoch. */
|
||||
const COVER_epoch_info_t epochs = COVER_computeEpochs(
|
||||
(U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1);
|
||||
const size_t maxZeroScoreRun = 10;
|
||||
size_t zeroScoreRun = 0;
|
||||
size_t epoch;
|
||||
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
|
||||
(U32)epochs.num, (U32)epochs.size);
|
||||
/* Loop through the epochs until there are no more segments or the dictionary
|
||||
* is full.
|
||||
*/
|
||||
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
|
||||
const U32 epochBegin = (U32)(epoch * epochs.size);
|
||||
const U32 epochEnd = epochBegin + epochs.size;
|
||||
size_t segmentSize;
|
||||
/* Select a segment */
|
||||
COVER_segment_t segment = FASTCOVER_selectSegment(
|
||||
ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs);
|
||||
|
||||
/* If the segment covers no dmers, then we are out of content.
|
||||
* There may be new content in other epochs, for continue for some time.
|
||||
*/
|
||||
if (segment.score == 0) {
|
||||
if (++zeroScoreRun >= maxZeroScoreRun) {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
zeroScoreRun = 0;
|
||||
|
||||
/* Trim the segment if necessary and if it is too small then we are done */
|
||||
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
|
||||
if (segmentSize < parameters.d) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* We fill the dictionary from the back to allow the best segments to be
|
||||
* referenced with the smallest offsets.
|
||||
*/
|
||||
tail -= segmentSize;
|
||||
memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
|
||||
DISPLAYUPDATE(
|
||||
2, "\r%u%% ",
|
||||
(unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
|
||||
}
|
||||
DISPLAYLEVEL(2, "\r%79s\r", "");
|
||||
return tail;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for FASTCOVER_tryParameters().
|
||||
*/
|
||||
typedef struct FASTCOVER_tryParameters_data_s {
|
||||
const FASTCOVER_ctx_t* ctx;
|
||||
COVER_best_t* best;
|
||||
size_t dictBufferCapacity;
|
||||
ZDICT_cover_params_t parameters;
|
||||
} FASTCOVER_tryParameters_data_t;
|
||||
|
||||
|
||||
/**
|
||||
* Tries a set of parameters and updates the COVER_best_t with the results.
|
||||
* This function is thread safe if zstd is compiled with multithreaded support.
|
||||
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
||||
*/
|
||||
static void FASTCOVER_tryParameters(void* opaque)
|
||||
{
|
||||
/* Save parameters as local variables */
|
||||
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
|
||||
const FASTCOVER_ctx_t *const ctx = data->ctx;
|
||||
const ZDICT_cover_params_t parameters = data->parameters;
|
||||
size_t dictBufferCapacity = data->dictBufferCapacity;
|
||||
size_t totalCompressedSize = ERROR(GENERIC);
|
||||
/* Initialize array to keep track of frequency of dmer within activeSegment */
|
||||
U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
|
||||
/* Allocate space for hash table, dict, and freqs */
|
||||
BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
|
||||
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
||||
U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
|
||||
if (!segmentFreqs || !dict || !freqs) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
||||
goto _cleanup;
|
||||
}
|
||||
/* Copy the frequencies because we need to modify them */
|
||||
memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
|
||||
/* Build the dictionary */
|
||||
{ const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
|
||||
parameters, segmentFreqs);
|
||||
|
||||
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
|
||||
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
||||
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
||||
totalCompressedSize);
|
||||
|
||||
if (COVER_dictSelectionIsError(selection)) {
|
||||
DISPLAYLEVEL(1, "Failed to select dictionary\n");
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
_cleanup:
|
||||
free(dict);
|
||||
COVER_best_finish(data->best, parameters, selection);
|
||||
free(data);
|
||||
free(segmentFreqs);
|
||||
COVER_dictSelectionFree(selection);
|
||||
free(freqs);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
|
||||
ZDICT_cover_params_t* coverParams)
|
||||
{
|
||||
coverParams->k = fastCoverParams.k;
|
||||
coverParams->d = fastCoverParams.d;
|
||||
coverParams->steps = fastCoverParams.steps;
|
||||
coverParams->nbThreads = fastCoverParams.nbThreads;
|
||||
coverParams->splitPoint = fastCoverParams.splitPoint;
|
||||
coverParams->zParams = fastCoverParams.zParams;
|
||||
coverParams->shrinkDict = fastCoverParams.shrinkDict;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
|
||||
ZDICT_fastCover_params_t* fastCoverParams,
|
||||
unsigned f, unsigned accel)
|
||||
{
|
||||
fastCoverParams->k = coverParams.k;
|
||||
fastCoverParams->d = coverParams.d;
|
||||
fastCoverParams->steps = coverParams.steps;
|
||||
fastCoverParams->nbThreads = coverParams.nbThreads;
|
||||
fastCoverParams->splitPoint = coverParams.splitPoint;
|
||||
fastCoverParams->f = f;
|
||||
fastCoverParams->accel = accel;
|
||||
fastCoverParams->zParams = coverParams.zParams;
|
||||
fastCoverParams->shrinkDict = coverParams.shrinkDict;
|
||||
}
|
||||
|
||||
|
||||
ZDICTLIB_API size_t
|
||||
ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer,
|
||||
const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_fastCover_params_t parameters)
|
||||
{
|
||||
BYTE* const dict = (BYTE*)dictBuffer;
|
||||
FASTCOVER_ctx_t ctx;
|
||||
ZDICT_cover_params_t coverParams;
|
||||
FASTCOVER_accel_t accelParams;
|
||||
/* Initialize global data */
|
||||
g_displayLevel = parameters.zParams.notificationLevel;
|
||||
/* Assign splitPoint and f if not provided */
|
||||
parameters.splitPoint = 1.0;
|
||||
parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
|
||||
parameters.accel = parameters.accel == 0 ? DEFAULT_ACCEL : parameters.accel;
|
||||
/* Convert to cover parameter */
|
||||
memset(&coverParams, 0 , sizeof(coverParams));
|
||||
FASTCOVER_convertToCoverParams(parameters, &coverParams);
|
||||
/* Checks */
|
||||
if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
|
||||
parameters.accel)) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
|
||||
return ERROR(parameter_outOfBound);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
||||
ZDICT_DICTSIZE_MIN);
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
/* Assign corresponding FASTCOVER_accel_t to accelParams*/
|
||||
accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
|
||||
/* Initialize context */
|
||||
{
|
||||
size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
||||
coverParams.d, parameters.splitPoint, parameters.f,
|
||||
accelParams);
|
||||
if (ZSTD_isError(initVal)) {
|
||||
DISPLAYLEVEL(1, "Failed to initialize context\n");
|
||||
return initVal;
|
||||
}
|
||||
}
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
|
||||
/* Build the dictionary */
|
||||
DISPLAYLEVEL(2, "Building dictionary\n");
|
||||
{
|
||||
/* Initialize array to keep track of frequency of dmer within activeSegment */
|
||||
U16* segmentFreqs = (U16 *)calloc(((U64)1 << parameters.f), sizeof(U16));
|
||||
const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer,
|
||||
dictBufferCapacity, coverParams, segmentFreqs);
|
||||
const unsigned nbFinalizeSamples = (unsigned)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100);
|
||||
const size_t dictionarySize = ZDICT_finalizeDictionary(
|
||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||
samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams);
|
||||
if (!ZSTD_isError(dictionarySize)) {
|
||||
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
||||
(unsigned)dictionarySize);
|
||||
}
|
||||
FASTCOVER_ctx_destroy(&ctx);
|
||||
free(segmentFreqs);
|
||||
return dictionarySize;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ZDICTLIB_API size_t
|
||||
ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer,
|
||||
const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_fastCover_params_t* parameters)
|
||||
{
|
||||
ZDICT_cover_params_t coverParams;
|
||||
FASTCOVER_accel_t accelParams;
|
||||
/* constants */
|
||||
const unsigned nbThreads = parameters->nbThreads;
|
||||
const double splitPoint =
|
||||
parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
||||
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
||||
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
||||
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
||||
const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
|
||||
const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
|
||||
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
||||
const unsigned kIterations =
|
||||
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
||||
const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
|
||||
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
|
||||
const unsigned shrinkDict = 0;
|
||||
/* Local variables */
|
||||
const int displayLevel = parameters->zParams.notificationLevel;
|
||||
unsigned iteration = 1;
|
||||
unsigned d;
|
||||
unsigned k;
|
||||
COVER_best_t best;
|
||||
POOL_ctx *pool = NULL;
|
||||
int warned = 0;
|
||||
/* Checks */
|
||||
if (splitPoint <= 0 || splitPoint > 1) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
|
||||
return ERROR(parameter_outOfBound);
|
||||
}
|
||||
if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
|
||||
return ERROR(parameter_outOfBound);
|
||||
}
|
||||
if (kMinK < kMaxD || kMaxK < kMinK) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
|
||||
return ERROR(parameter_outOfBound);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
|
||||
return ERROR(srcSize_wrong);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
|
||||
ZDICT_DICTSIZE_MIN);
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
if (nbThreads > 1) {
|
||||
pool = POOL_create(nbThreads, 1);
|
||||
if (!pool) {
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
}
|
||||
/* Initialization */
|
||||
COVER_best_init(&best);
|
||||
memset(&coverParams, 0 , sizeof(coverParams));
|
||||
FASTCOVER_convertToCoverParams(*parameters, &coverParams);
|
||||
accelParams = FASTCOVER_defaultAccelParameters[accel];
|
||||
/* Turn down global display level to clean up display at level 2 and below */
|
||||
g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
|
||||
/* Loop through d first because each new value needs a new context */
|
||||
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
|
||||
kIterations);
|
||||
for (d = kMinD; d <= kMaxD; d += 2) {
|
||||
/* Initialize the context for this value of d */
|
||||
FASTCOVER_ctx_t ctx;
|
||||
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
||||
{
|
||||
size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
|
||||
if (ZSTD_isError(initVal)) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
||||
COVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return initVal;
|
||||
}
|
||||
}
|
||||
if (!warned) {
|
||||
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
|
||||
warned = 1;
|
||||
}
|
||||
/* Loop through k reusing the same context */
|
||||
for (k = kMinK; k <= kMaxK; k += kStepSize) {
|
||||
/* Prepare the arguments */
|
||||
FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc(
|
||||
sizeof(FASTCOVER_tryParameters_data_t));
|
||||
LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
|
||||
if (!data) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
|
||||
COVER_best_destroy(&best);
|
||||
FASTCOVER_ctx_destroy(&ctx);
|
||||
POOL_free(pool);
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
data->ctx = &ctx;
|
||||
data->best = &best;
|
||||
data->dictBufferCapacity = dictBufferCapacity;
|
||||
data->parameters = coverParams;
|
||||
data->parameters.k = k;
|
||||
data->parameters.d = d;
|
||||
data->parameters.splitPoint = splitPoint;
|
||||
data->parameters.steps = kSteps;
|
||||
data->parameters.shrinkDict = shrinkDict;
|
||||
data->parameters.zParams.notificationLevel = g_displayLevel;
|
||||
/* Check the parameters */
|
||||
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
|
||||
data->ctx->f, accel)) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
|
||||
free(data);
|
||||
continue;
|
||||
}
|
||||
/* Call the function and pass ownership of data to it */
|
||||
COVER_best_start(&best);
|
||||
if (pool) {
|
||||
POOL_add(pool, &FASTCOVER_tryParameters, data);
|
||||
} else {
|
||||
FASTCOVER_tryParameters(data);
|
||||
}
|
||||
/* Print status */
|
||||
LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
|
||||
(unsigned)((iteration * 100) / kIterations));
|
||||
++iteration;
|
||||
}
|
||||
COVER_best_wait(&best);
|
||||
FASTCOVER_ctx_destroy(&ctx);
|
||||
}
|
||||
LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
|
||||
/* Fill the output buffer and parameters with output of the best parameters */
|
||||
{
|
||||
const size_t dictSize = best.dictSize;
|
||||
if (ZSTD_isError(best.compressedSize)) {
|
||||
const size_t compressedSize = best.compressedSize;
|
||||
COVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return compressedSize;
|
||||
}
|
||||
FASTCOVER_convertToFastCoverParams(best.parameters, parameters, f, accel);
|
||||
memcpy(dictBuffer, best.dict, dictSize);
|
||||
COVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return dictSize;
|
||||
}
|
||||
|
||||
}
|
1134
zstd/dictBuilder/zdict.c
Normal file
1134
zstd/dictBuilder/zdict.c
Normal file
File diff suppressed because it is too large
Load Diff
452
zstd/zdict.h
Normal file
452
zstd/zdict.h
Normal file
@ -0,0 +1,452 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef DICTBUILDER_H_001
|
||||
#define DICTBUILDER_H_001
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*====== Dependencies ======*/
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
|
||||
/* ===== ZDICTLIB_API : control library symbols visibility ===== */
|
||||
#ifndef ZDICTLIB_VISIBILITY
|
||||
# if defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
|
||||
# else
|
||||
# define ZDICTLIB_VISIBILITY
|
||||
# endif
|
||||
#endif
|
||||
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
||||
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
|
||||
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
||||
# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
||||
#else
|
||||
# define ZDICTLIB_API ZDICTLIB_VISIBILITY
|
||||
#endif
|
||||
|
||||
/*******************************************************************************
|
||||
* Zstd dictionary builder
|
||||
*
|
||||
* FAQ
|
||||
* ===
|
||||
* Why should I use a dictionary?
|
||||
* ------------------------------
|
||||
*
|
||||
* Zstd can use dictionaries to improve compression ratio of small data.
|
||||
* Traditionally small files don't compress well because there is very little
|
||||
* repetion in a single sample, since it is small. But, if you are compressing
|
||||
* many similar files, like a bunch of JSON records that share the same
|
||||
* structure, you can train a dictionary on ahead of time on some samples of
|
||||
* these files. Then, zstd can use the dictionary to find repetitions that are
|
||||
* present across samples. This can vastly improve compression ratio.
|
||||
*
|
||||
* When is a dictionary useful?
|
||||
* ----------------------------
|
||||
*
|
||||
* Dictionaries are useful when compressing many small files that are similar.
|
||||
* The larger a file is, the less benefit a dictionary will have. Generally,
|
||||
* we don't expect dictionary compression to be effective past 100KB. And the
|
||||
* smaller a file is, the more we would expect the dictionary to help.
|
||||
*
|
||||
* How do I use a dictionary?
|
||||
* --------------------------
|
||||
*
|
||||
* Simply pass the dictionary to the zstd compressor with
|
||||
* `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
|
||||
* the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
|
||||
* more advanced functions that allow selecting some options, see zstd.h for
|
||||
* complete documentation.
|
||||
*
|
||||
* What is a zstd dictionary?
|
||||
* --------------------------
|
||||
*
|
||||
* A zstd dictionary has two pieces: Its header, and its content. The header
|
||||
* contains a magic number, the dictionary ID, and entropy tables. These
|
||||
* entropy tables allow zstd to save on header costs in the compressed file,
|
||||
* which really matters for small data. The content is just bytes, which are
|
||||
* repeated content that is common across many samples.
|
||||
*
|
||||
* What is a raw content dictionary?
|
||||
* ---------------------------------
|
||||
*
|
||||
* A raw content dictionary is just bytes. It doesn't have a zstd dictionary
|
||||
* header, a dictionary ID, or entropy tables. Any buffer is a valid raw
|
||||
* content dictionary.
|
||||
*
|
||||
* How do I train a dictionary?
|
||||
* ----------------------------
|
||||
*
|
||||
* Gather samples from your use case. These samples should be similar to each
|
||||
* other. If you have several use cases, you could try to train one dictionary
|
||||
* per use case.
|
||||
*
|
||||
* Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
|
||||
* dictionary. There are a few advanced versions of this function, but this
|
||||
* is a great starting point. If you want to further tune your dictionary
|
||||
* you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
|
||||
* you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
|
||||
*
|
||||
* If the dictionary training function fails, that is likely because you
|
||||
* either passed too few samples, or a dictionary would not be effective
|
||||
* for your data. Look at the messages that the dictionary trainer printed,
|
||||
* if it doesn't say too few samples, then a dictionary would not be effective.
|
||||
*
|
||||
* How large should my dictionary be?
|
||||
* ----------------------------------
|
||||
*
|
||||
* A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
|
||||
* The zstd CLI defaults to a 110KB dictionary. You likely don't need a
|
||||
* dictionary larger than that. But, most use cases can get away with a
|
||||
* smaller dictionary. The advanced dictionary builders can automatically
|
||||
* shrink the dictionary for you, and select a the smallest size that
|
||||
* doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
|
||||
* A smaller dictionary can save memory, and potentially speed up
|
||||
* compression.
|
||||
*
|
||||
* How many samples should I provide to the dictionary builder?
|
||||
* ------------------------------------------------------------
|
||||
*
|
||||
* We generally recommend passing ~100x the size of the dictionary
|
||||
* in samples. A few thousand should suffice. Having too few samples
|
||||
* can hurt the dictionaries effectiveness. Having more samples will
|
||||
* only improve the dictionaries effectiveness. But having too many
|
||||
* samples can slow down the dictionary builder.
|
||||
*
|
||||
* How do I determine if a dictionary will be effective?
|
||||
* -----------------------------------------------------
|
||||
*
|
||||
* Simply train a dictionary and try it out. You can use zstd's built in
|
||||
* benchmarking tool to test the dictionary effectiveness.
|
||||
*
|
||||
* # Benchmark levels 1-3 without a dictionary
|
||||
* zstd -b1e3 -r /path/to/my/files
|
||||
* # Benchmark levels 1-3 with a dictioanry
|
||||
* zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
|
||||
*
|
||||
* When should I retrain a dictionary?
|
||||
* -----------------------------------
|
||||
*
|
||||
* You should retrain a dictionary when its effectiveness drops. Dictionary
|
||||
* effectiveness drops as the data you are compressing changes. Generally, we do
|
||||
* expect dictionaries to "decay" over time, as your data changes, but the rate
|
||||
* at which they decay depends on your use case. Internally, we regularly
|
||||
* retrain dictionaries, and if the new dictionary performs significantly
|
||||
* better than the old dictionary, we will ship the new dictionary.
|
||||
*
|
||||
* I have a raw content dictionary, how do I turn it into a zstd dictionary?
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* If you have a raw content dictionary, e.g. by manually constructing it, or
|
||||
* using a third-party dictionary builder, you can turn it into a zstd
|
||||
* dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
|
||||
* provide some samples of the data. It will add the zstd header to the
|
||||
* raw content, which contains a dictionary ID and entropy tables, which
|
||||
* will improve compression ratio, and allow zstd to write the dictionary ID
|
||||
* into the frame, if you so choose.
|
||||
*
|
||||
* Do I have to use zstd's dictionary builder?
|
||||
* -------------------------------------------
|
||||
*
|
||||
* No! You can construct dictionary content however you please, it is just
|
||||
* bytes. It will always be valid as a raw content dictionary. If you want
|
||||
* a zstd dictionary, which can improve compression ratio, use
|
||||
* `ZDICT_finalizeDictionary()`.
|
||||
*
|
||||
* What is the attack surface of a zstd dictionary?
|
||||
* ------------------------------------------------
|
||||
*
|
||||
* Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
|
||||
* zstd should never crash, or access out-of-bounds memory no matter what
|
||||
* the dictionary is. However, if an attacker can control the dictionary
|
||||
* during decompression, they can cause zstd to generate arbitrary bytes,
|
||||
* just like if they controlled the compressed data.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
|
||||
/*! ZDICT_trainFromBuffer():
|
||||
* Train a dictionary from an array of samples.
|
||||
* Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
|
||||
* f=20, and accel=1.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* Note: Dictionary training will fail if there are not enough samples to construct a
|
||||
* dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
|
||||
* If dictionary training fails, you should use zstd without a dictionary, as the dictionary
|
||||
* would've been ineffective anyways. If you believe your samples would benefit from a dictionary
|
||||
* please open an issue with details, and we can look into it.
|
||||
* Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
|
||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer,
|
||||
const size_t* samplesSizes, unsigned nbSamples);
|
||||
|
||||
typedef struct {
|
||||
int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
|
||||
unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
||||
unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
|
||||
* NOTE: The zstd format reserves some dictionary IDs for future use.
|
||||
* You may use them in private settings, but be warned that they
|
||||
* may be used by zstd in a public dictionary registry in the future.
|
||||
* These dictionary IDs are:
|
||||
* - low range : <= 32767
|
||||
* - high range : >= (2^31)
|
||||
*/
|
||||
} ZDICT_params_t;
|
||||
|
||||
/*! ZDICT_finalizeDictionary():
|
||||
* Given a custom content as a basis for dictionary, and a set of samples,
|
||||
* finalize dictionary by adding headers and statistics according to the zstd
|
||||
* dictionary format.
|
||||
*
|
||||
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each
|
||||
* sample in order. The samples are used to construct the statistics, so they
|
||||
* should be representative of what you will compress with this dictionary.
|
||||
*
|
||||
* The compression level can be set in `parameters`. You should pass the
|
||||
* compression level you expect to use in production. The statistics for each
|
||||
* compression level differ, so tuning the dictionary for the compression level
|
||||
* can help quite a bit.
|
||||
*
|
||||
* You can set an explicit dictionary ID in `parameters`, or allow us to pick
|
||||
* a random dictionary ID for you, but we can't guarantee no collisions.
|
||||
*
|
||||
* The dstDictBuffer and the dictContent may overlap, and the content will be
|
||||
* appended to the end of the header. If the header + the content doesn't fit in
|
||||
* maxDictSize the beginning of the content is truncated to make room, since it
|
||||
* is presumed that the most profitable content is at the end of the dictionary,
|
||||
* since that is the cheapest to reference.
|
||||
*
|
||||
* `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
||||
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
|
||||
*
|
||||
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
|
||||
* or an error code, which can be tested by ZDICT_isError().
|
||||
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if
|
||||
* instructed to, using notificationLevel>0.
|
||||
* NOTE: This function currently may fail in several edge cases including:
|
||||
* * Not enough samples
|
||||
* * Samples are uncompressible
|
||||
* * Samples are all exactly the same
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
|
||||
const void* dictContent, size_t dictContentSize,
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_params_t parameters);
|
||||
|
||||
|
||||
/*====== Helper functions ======*/
|
||||
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
|
||||
ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
|
||||
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
|
||||
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
||||
|
||||
|
||||
|
||||
#ifdef ZDICT_STATIC_LINKING_ONLY
|
||||
|
||||
/* ====================================================================================
|
||||
* The definitions in this section are considered experimental.
|
||||
* They should never be used with a dynamic library, as they may change in the future.
|
||||
* They are provided for advanced usages.
|
||||
* Use them only in association with static linking.
|
||||
* ==================================================================================== */
|
||||
|
||||
#define ZDICT_CONTENTSIZE_MIN 128
|
||||
#define ZDICT_DICTSIZE_MIN 256
|
||||
|
||||
/*! ZDICT_cover_params_t:
|
||||
* k and d are the only required parameters.
|
||||
* For others, value 0 means default.
|
||||
*/
|
||||
typedef struct {
|
||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
||||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
|
||||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
||||
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
|
||||
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
|
||||
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
|
||||
ZDICT_params_t zParams;
|
||||
} ZDICT_cover_params_t;
|
||||
|
||||
typedef struct {
|
||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
||||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
||||
unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
|
||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
|
||||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
||||
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
|
||||
unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
|
||||
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
|
||||
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
|
||||
|
||||
ZDICT_params_t zParams;
|
||||
} ZDICT_fastCover_params_t;
|
||||
|
||||
/*! ZDICT_trainFromBuffer_cover():
|
||||
* Train a dictionary from an array of samples using the COVER algorithm.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
|
||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||
void *dictBuffer, size_t dictBufferCapacity,
|
||||
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_cover_params_t parameters);
|
||||
|
||||
/*! ZDICT_optimizeTrainFromBuffer_cover():
|
||||
* The same requirements as above hold for all the parameters except `parameters`.
|
||||
* This function tries many parameter combinations and picks the best parameters.
|
||||
* `*parameters` is filled with the best parameters found,
|
||||
* dictionary constructed with those parameters is stored in `dictBuffer`.
|
||||
*
|
||||
* All of the parameters d, k, steps are optional.
|
||||
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
|
||||
* if steps is zero it defaults to its default value.
|
||||
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
|
||||
*
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* On success `*parameters` contains the parameters selected.
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||
void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_cover_params_t* parameters);
|
||||
|
||||
/*! ZDICT_trainFromBuffer_fastCover():
|
||||
* Train a dictionary from an array of samples using a modified version of COVER algorithm.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* d and k are required.
|
||||
* All other parameters are optional, will use default values if not provided
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
|
||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
|
||||
size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_fastCover_params_t parameters);
|
||||
|
||||
/*! ZDICT_optimizeTrainFromBuffer_fastCover():
|
||||
* The same requirements as above hold for all the parameters except `parameters`.
|
||||
* This function tries many parameter combinations (specifically, k and d combinations)
|
||||
* and picks the best parameters. `*parameters` is filled with the best parameters found,
|
||||
* dictionary constructed with those parameters is stored in `dictBuffer`.
|
||||
* All of the parameters d, k, steps, f, and accel are optional.
|
||||
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
|
||||
* if steps is zero it defaults to its default value.
|
||||
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
|
||||
* If f is zero, default value of 20 is used.
|
||||
* If accel is zero, default value of 1 is used.
|
||||
*
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* On success `*parameters` contains the parameters selected.
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
|
||||
size_t dictBufferCapacity, const void* samplesBuffer,
|
||||
const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_fastCover_params_t* parameters);
|
||||
|
||||
typedef struct {
|
||||
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
||||
ZDICT_params_t zParams;
|
||||
} ZDICT_legacy_params_t;
|
||||
|
||||
/*! ZDICT_trainFromBuffer_legacy():
|
||||
* Train a dictionary from an array of samples.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* `parameters` is optional and can be provided with values set to 0 to mean "default".
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* See ZDICT_trainFromBuffer() for details on failure modes.
|
||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
||||
void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_legacy_params_t parameters);
|
||||
|
||||
|
||||
/* Deprecation warnings */
|
||||
/* It is generally possible to disable deprecation warnings from compiler,
|
||||
for example with -Wno-deprecated-declarations for gcc
|
||||
or _CRT_SECURE_NO_WARNINGS in Visual.
|
||||
Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
||||
#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
|
||||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
|
||||
#else
|
||||
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
||||
# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
|
||||
# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
|
||||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
|
||||
# elif (ZDICT_GCC_VERSION >= 301)
|
||||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
|
||||
# elif defined(_MSC_VER)
|
||||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
|
||||
# else
|
||||
# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
|
||||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API
|
||||
# endif
|
||||
#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
||||
|
||||
ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
|
||||
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
|
||||
|
||||
|
||||
#endif /* ZDICT_STATIC_LINKING_ONLY */
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* DICTBUILDER_H_001 */
|
Loading…
Reference in New Issue
Block a user