mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-22 14:44:34 +00:00
Update zstd to 1.4.7.
This commit is contained in:
parent
8f48d6e580
commit
993c631103
@ -17,7 +17,6 @@
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This API consists of small unitary functions, which must be inlined for best performance.
|
||||
* Since link-time-optimization is not available for all compilers,
|
||||
@ -36,10 +35,12 @@ extern "C" {
|
||||
/*=========================================
|
||||
* Target specific
|
||||
=========================================*/
|
||||
#if defined(__BMI__) && defined(__GNUC__)
|
||||
# include <immintrin.h> /* support for bextr (experimental) */
|
||||
#elif defined(__ICCARM__)
|
||||
# include <intrinsics.h>
|
||||
#ifndef ZSTD_NO_INTRINSICS
|
||||
# if defined(__BMI__) && defined(__GNUC__)
|
||||
# include <immintrin.h> /* support for bextr (experimental) */
|
||||
# elif defined(__ICCARM__)
|
||||
# include <intrinsics.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define STREAM_ACCUMULATOR_MIN_32 25
|
||||
@ -141,8 +142,12 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
|
||||
assert(val != 0);
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
|
||||
# if STATIC_BMI2 == 1
|
||||
return _lzcnt_u32(val) ^ 31;
|
||||
# else
|
||||
unsigned long r = 0;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
||||
@ -198,7 +203,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
|
||||
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
|
||||
size_t value, unsigned nbBits)
|
||||
{
|
||||
MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
|
||||
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
|
||||
assert(nbBits < BIT_MASK_SIZE);
|
||||
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
|
||||
bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
|
||||
@ -271,7 +276,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
|
||||
*/
|
||||
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
|
||||
{
|
||||
if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
|
||||
if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
|
||||
|
||||
bitD->start = (const char*)srcBuffer;
|
||||
bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
|
||||
@ -317,12 +322,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
||||
return srcSize;
|
||||
}
|
||||
|
||||
MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
|
||||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
|
||||
{
|
||||
return bitContainer >> start;
|
||||
}
|
||||
|
||||
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
|
||||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
|
||||
{
|
||||
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
||||
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
||||
@ -330,10 +335,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co
|
||||
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
||||
}
|
||||
|
||||
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
||||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
||||
{
|
||||
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
|
||||
return _bzhi_u64(bitContainer, nbBits);
|
||||
#else
|
||||
assert(nbBits < BIT_MASK_SIZE);
|
||||
return bitContainer & BIT_mask[nbBits];
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! BIT_lookBits() :
|
||||
@ -342,7 +351,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
||||
* On 32-bits, maxNbBits==24.
|
||||
* On 64-bits, maxNbBits==56.
|
||||
* @return : value extracted */
|
||||
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
||||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
||||
{
|
||||
/* arbitrate between double-shift and shift+mask */
|
||||
#if 1
|
||||
@ -365,7 +374,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
|
||||
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
|
||||
}
|
||||
|
||||
MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
||||
MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
||||
{
|
||||
bitD->bitsConsumed += nbBits;
|
||||
}
|
||||
@ -374,7 +383,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
||||
* Read (consume) next n bits from local register and update.
|
||||
* Pay attention to not read more than nbBits contained into local register.
|
||||
* @return : extracted value. */
|
||||
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
||||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
||||
{
|
||||
size_t const value = BIT_lookBits(bitD, nbBits);
|
||||
BIT_skipBits(bitD, nbBits);
|
||||
|
119
zstd/compiler.h
119
zstd/compiler.h
@ -38,6 +38,17 @@
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
|
||||
This explictly marks such functions as __cdecl so that the code will still compile
|
||||
if a CC other than __cdecl has been made the default.
|
||||
*/
|
||||
#if defined(_MSC_VER)
|
||||
# define WIN_CDECL __cdecl
|
||||
#else
|
||||
# define WIN_CDECL
|
||||
#endif
|
||||
|
||||
/**
|
||||
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
|
||||
* parameters. They must be inlined for the compiler to eliminate the constant
|
||||
@ -114,12 +125,12 @@
|
||||
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
||||
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
|
||||
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
|
||||
# elif defined(__aarch64__)
|
||||
# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
|
||||
# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
|
||||
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
||||
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
|
||||
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
|
||||
# elif defined(__aarch64__)
|
||||
# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
|
||||
# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
|
||||
# else
|
||||
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
|
||||
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
|
||||
@ -172,4 +183,106 @@
|
||||
# pragma warning(disable : 4324) /* disable: C4324: padded structure */
|
||||
#endif
|
||||
|
||||
/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
|
||||
#ifndef STATIC_BMI2
|
||||
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
|
||||
# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
|
||||
# define STATIC_BMI2 1
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef STATIC_BMI2
|
||||
#define STATIC_BMI2 0
|
||||
#endif
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_feature
|
||||
# define __has_feature(x) 0
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under msan */
|
||||
#ifndef ZSTD_MEMORY_SANITIZER
|
||||
# if __has_feature(memory_sanitizer)
|
||||
# define ZSTD_MEMORY_SANITIZER 1
|
||||
# else
|
||||
# define ZSTD_MEMORY_SANITIZER 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if ZSTD_MEMORY_SANITIZER
|
||||
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
|
||||
* We therefore declare the functions we need ourselves, rather than trying to
|
||||
* include the header file... */
|
||||
#include <stddef.h> /* size_t */
|
||||
#define ZSTD_DEPS_NEED_STDINT
|
||||
#include "zstd_deps.h" /* intptr_t */
|
||||
|
||||
/* Make memory region fully initialized (without changing its contents). */
|
||||
void __msan_unpoison(const volatile void *a, size_t size);
|
||||
|
||||
/* Make memory region fully uninitialized (without changing its contents).
|
||||
This is a legacy interface that does not update origin information. Use
|
||||
__msan_allocated_memory() instead. */
|
||||
void __msan_poison(const volatile void *a, size_t size);
|
||||
|
||||
/* Returns the offset of the first (at least partially) poisoned byte in the
|
||||
memory range, or -1 if the whole range is good. */
|
||||
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under asan */
|
||||
#ifndef ZSTD_ADDRESS_SANITIZER
|
||||
# if __has_feature(address_sanitizer)
|
||||
# define ZSTD_ADDRESS_SANITIZER 1
|
||||
# elif defined(__SANITIZE_ADDRESS__)
|
||||
# define ZSTD_ADDRESS_SANITIZER 1
|
||||
# else
|
||||
# define ZSTD_ADDRESS_SANITIZER 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if ZSTD_ADDRESS_SANITIZER
|
||||
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
|
||||
* We therefore declare the functions we need ourselves, rather than trying to
|
||||
* include the header file... */
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
/**
|
||||
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
|
||||
*
|
||||
* This memory must be previously allocated by your program. Instrumented
|
||||
* code is forbidden from accessing addresses in this region until it is
|
||||
* unpoisoned. This function is not guaranteed to poison the entire region -
|
||||
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
|
||||
* alignment restrictions.
|
||||
*
|
||||
* \note This function is not thread-safe because no two threads can poison or
|
||||
* unpoison memory in the same memory region simultaneously.
|
||||
*
|
||||
* \param addr Start of memory region.
|
||||
* \param size Size of memory region. */
|
||||
void __asan_poison_memory_region(void const volatile *addr, size_t size);
|
||||
|
||||
/**
|
||||
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
|
||||
*
|
||||
* This memory must be previously allocated by your program. Accessing
|
||||
* addresses in this region is allowed until this region is poisoned again.
|
||||
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due
|
||||
* to ASan alignment restrictions.
|
||||
*
|
||||
* \note This function is not thread-safe because no two threads can
|
||||
* poison or unpoison memory in the same memory region simultaneously.
|
||||
*
|
||||
* \param addr Start of memory region.
|
||||
* \param size Size of memory region. */
|
||||
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
|
||||
#endif
|
||||
|
||||
#endif /* ZSTD_COMPILER_H */
|
||||
|
@ -16,8 +16,6 @@
|
||||
* https://github.com/facebook/folly/blob/master/folly/CpuId.h
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "mem.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
29
zstd/debug.h
29
zstd/debug.h
@ -51,15 +51,6 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* DEBUGFILE can be defined externally,
|
||||
* typically through compiler command line.
|
||||
* note : currently useless.
|
||||
* Value must be stderr or stdout */
|
||||
#ifndef DEBUGFILE
|
||||
# define DEBUGFILE stderr
|
||||
#endif
|
||||
|
||||
|
||||
/* recommended values for DEBUGLEVEL :
|
||||
* 0 : release mode, no debug, all run-time checks disabled
|
||||
* 1 : enables assert() only, no display
|
||||
@ -76,7 +67,8 @@ extern "C" {
|
||||
*/
|
||||
|
||||
#if (DEBUGLEVEL>=1)
|
||||
# include <assert.h>
|
||||
# define ZSTD_DEPS_NEED_ASSERT
|
||||
# include "zstd_deps.h"
|
||||
#else
|
||||
# ifndef assert /* assert may be already defined, due to prior #include <assert.h> */
|
||||
# define assert(condition) ((void)0) /* disable assert (default) */
|
||||
@ -84,7 +76,8 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#if (DEBUGLEVEL>=2)
|
||||
# include <stdio.h>
|
||||
# define ZSTD_DEPS_NEED_IO
|
||||
# include "zstd_deps.h"
|
||||
extern int g_debuglevel; /* the variable is only declared,
|
||||
it actually lives in debug.c,
|
||||
and is shared by the whole process.
|
||||
@ -92,14 +85,14 @@ extern int g_debuglevel; /* the variable is only declared,
|
||||
It's useful when enabling very verbose levels
|
||||
on selective conditions (such as position in src) */
|
||||
|
||||
# define RAWLOG(l, ...) { \
|
||||
if (l<=g_debuglevel) { \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
# define RAWLOG(l, ...) { \
|
||||
if (l<=g_debuglevel) { \
|
||||
ZSTD_DEBUG_PRINT(__VA_ARGS__); \
|
||||
} }
|
||||
# define DEBUGLOG(l, ...) { \
|
||||
if (l<=g_debuglevel) { \
|
||||
fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
|
||||
fprintf(stderr, " \n"); \
|
||||
# define DEBUGLOG(l, ...) { \
|
||||
if (l<=g_debuglevel) { \
|
||||
ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
|
||||
ZSTD_DEBUG_PRINT(" \n"); \
|
||||
} }
|
||||
#else
|
||||
# define RAWLOG(l, ...) {} /* disabled */
|
||||
|
@ -38,8 +38,31 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
|
||||
/*-**************************************************************
|
||||
* FSE NCount encoding-decoding
|
||||
****************************************************************/
|
||||
size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
static U32 FSE_ctz(U32 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
return _BitScanForward(&r, val) ? (unsigned)r : 0;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
||||
return __builtin_ctz(val);
|
||||
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
||||
return __CTZ(val);
|
||||
# else /* Software version */
|
||||
U32 count = 0;
|
||||
while ((val & 1) == 0) {
|
||||
val >>= 1;
|
||||
++count;
|
||||
}
|
||||
return count;
|
||||
# endif
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
{
|
||||
const BYTE* const istart = (const BYTE*) headerBuffer;
|
||||
const BYTE* const iend = istart + hbSize;
|
||||
@ -50,23 +73,23 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
|
||||
U32 bitStream;
|
||||
int bitCount;
|
||||
unsigned charnum = 0;
|
||||
unsigned const maxSV1 = *maxSVPtr + 1;
|
||||
int previous0 = 0;
|
||||
|
||||
if (hbSize < 4) {
|
||||
/* This function only works when hbSize >= 4 */
|
||||
char buffer[4];
|
||||
memset(buffer, 0, sizeof(buffer));
|
||||
memcpy(buffer, headerBuffer, hbSize);
|
||||
if (hbSize < 8) {
|
||||
/* This function only works when hbSize >= 8 */
|
||||
char buffer[8] = {0};
|
||||
ZSTD_memcpy(buffer, headerBuffer, hbSize);
|
||||
{ size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
|
||||
buffer, sizeof(buffer));
|
||||
if (FSE_isError(countSize)) return countSize;
|
||||
if (countSize > hbSize) return ERROR(corruption_detected);
|
||||
return countSize;
|
||||
} }
|
||||
assert(hbSize >= 4);
|
||||
assert(hbSize >= 8);
|
||||
|
||||
/* init */
|
||||
memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
|
||||
ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
|
||||
bitStream = MEM_readLE32(ip);
|
||||
nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
|
||||
if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
|
||||
@ -77,36 +100,58 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
|
||||
threshold = 1<<nbBits;
|
||||
nbBits++;
|
||||
|
||||
while ((remaining>1) & (charnum<=*maxSVPtr)) {
|
||||
for (;;) {
|
||||
if (previous0) {
|
||||
unsigned n0 = charnum;
|
||||
while ((bitStream & 0xFFFF) == 0xFFFF) {
|
||||
n0 += 24;
|
||||
if (ip < iend-5) {
|
||||
ip += 2;
|
||||
bitStream = MEM_readLE32(ip) >> bitCount;
|
||||
/* Count the number of repeats. Each time the
|
||||
* 2-bit repeat code is 0b11 there is another
|
||||
* repeat.
|
||||
* Avoid UB by setting the high bit to 1.
|
||||
*/
|
||||
int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
|
||||
while (repeats >= 12) {
|
||||
charnum += 3 * 12;
|
||||
if (LIKELY(ip <= iend-7)) {
|
||||
ip += 3;
|
||||
} else {
|
||||
bitStream >>= 16;
|
||||
bitCount += 16;
|
||||
} }
|
||||
while ((bitStream & 3) == 3) {
|
||||
n0 += 3;
|
||||
bitStream >>= 2;
|
||||
bitCount += 2;
|
||||
bitCount -= (int)(8 * (iend - 7 - ip));
|
||||
bitCount &= 31;
|
||||
ip = iend - 4;
|
||||
}
|
||||
bitStream = MEM_readLE32(ip) >> bitCount;
|
||||
repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
|
||||
}
|
||||
n0 += bitStream & 3;
|
||||
charnum += 3 * repeats;
|
||||
bitStream >>= 2 * repeats;
|
||||
bitCount += 2 * repeats;
|
||||
|
||||
/* Add the final repeat which isn't 0b11. */
|
||||
assert((bitStream & 3) < 3);
|
||||
charnum += bitStream & 3;
|
||||
bitCount += 2;
|
||||
if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
|
||||
while (charnum < n0) normalizedCounter[charnum++] = 0;
|
||||
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
|
||||
|
||||
/* This is an error, but break and return an error
|
||||
* at the end, because returning out of a loop makes
|
||||
* it harder for the compiler to optimize.
|
||||
*/
|
||||
if (charnum >= maxSV1) break;
|
||||
|
||||
/* We don't need to set the normalized count to 0
|
||||
* because we already memset the whole buffer to 0.
|
||||
*/
|
||||
|
||||
if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
|
||||
assert((bitCount >> 3) <= 3); /* For first condition to work */
|
||||
ip += bitCount>>3;
|
||||
bitCount &= 7;
|
||||
bitStream = MEM_readLE32(ip) >> bitCount;
|
||||
} else {
|
||||
bitStream >>= 2;
|
||||
} }
|
||||
{ int const max = (2*threshold-1) - remaining;
|
||||
bitCount -= (int)(8 * (iend - 4 - ip));
|
||||
bitCount &= 31;
|
||||
ip = iend - 4;
|
||||
}
|
||||
bitStream = MEM_readLE32(ip) >> bitCount;
|
||||
}
|
||||
{
|
||||
int const max = (2*threshold-1) - remaining;
|
||||
int count;
|
||||
|
||||
if ((bitStream & (threshold-1)) < (U32)max) {
|
||||
@ -119,24 +164,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
|
||||
}
|
||||
|
||||
count--; /* extra accuracy */
|
||||
remaining -= count < 0 ? -count : count; /* -1 means +1 */
|
||||
/* When it matters (small blocks), this is a
|
||||
* predictable branch, because we don't use -1.
|
||||
*/
|
||||
if (count >= 0) {
|
||||
remaining -= count;
|
||||
} else {
|
||||
assert(count == -1);
|
||||
remaining += count;
|
||||
}
|
||||
normalizedCounter[charnum++] = (short)count;
|
||||
previous0 = !count;
|
||||
while (remaining < threshold) {
|
||||
nbBits--;
|
||||
threshold >>= 1;
|
||||
}
|
||||
|
||||
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
|
||||
assert(threshold > 1);
|
||||
if (remaining < threshold) {
|
||||
/* This branch can be folded into the
|
||||
* threshold update condition because we
|
||||
* know that threshold > 1.
|
||||
*/
|
||||
if (remaining <= 1) break;
|
||||
nbBits = BIT_highbit32(remaining) + 1;
|
||||
threshold = 1 << (nbBits - 1);
|
||||
}
|
||||
if (charnum >= maxSV1) break;
|
||||
|
||||
if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
|
||||
ip += bitCount>>3;
|
||||
bitCount &= 7;
|
||||
} else {
|
||||
bitCount -= (int)(8 * (iend - 4 - ip));
|
||||
bitCount &= 31;
|
||||
ip = iend - 4;
|
||||
}
|
||||
bitStream = MEM_readLE32(ip) >> (bitCount & 31);
|
||||
} } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
|
||||
bitStream = MEM_readLE32(ip) >> bitCount;
|
||||
} }
|
||||
if (remaining != 1) return ERROR(corruption_detected);
|
||||
/* Only possible when there are too many zeros. */
|
||||
if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
|
||||
if (bitCount > 32) return ERROR(corruption_detected);
|
||||
*maxSVPtr = charnum-1;
|
||||
|
||||
@ -144,6 +208,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
|
||||
return ip-istart;
|
||||
}
|
||||
|
||||
/* Avoids the FORCE_INLINE of the _body() function. */
|
||||
static size_t FSE_readNCount_body_default(
|
||||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
{
|
||||
return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
|
||||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
{
|
||||
return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t FSE_readNCount_bmi2(
|
||||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize, int bmi2)
|
||||
{
|
||||
#if DYNAMIC_BMI2
|
||||
if (bmi2) {
|
||||
return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
|
||||
}
|
||||
#endif
|
||||
(void)bmi2;
|
||||
return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
|
||||
}
|
||||
|
||||
size_t FSE_readNCount(
|
||||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
{
|
||||
return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
|
||||
/*! HUF_readStats() :
|
||||
Read compact Huffman tree, saved by HUF_writeCTable().
|
||||
@ -155,6 +256,17 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
|
||||
size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
|
||||
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize,
|
||||
int bmi2)
|
||||
{
|
||||
U32 weightTotal;
|
||||
const BYTE* ip = (const BYTE*) src;
|
||||
@ -163,7 +275,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
|
||||
if (!srcSize) return ERROR(srcSize_wrong);
|
||||
iSize = ip[0];
|
||||
/* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
|
||||
/* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
|
||||
|
||||
if (iSize >= 128) { /* special header */
|
||||
oSize = iSize - 127;
|
||||
@ -177,14 +289,14 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
huffWeight[n+1] = ip[n/2] & 15;
|
||||
} } }
|
||||
else { /* header compressed with FSE (normal case) */
|
||||
FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
|
||||
if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
|
||||
oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
|
||||
/* max (hwSize-1) values decoded, as last one is implied */
|
||||
oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
|
||||
if (FSE_isError(oSize)) return oSize;
|
||||
}
|
||||
|
||||
/* collect weight stats */
|
||||
memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
|
||||
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
|
||||
weightTotal = 0;
|
||||
{ U32 n; for (n=0; n<oSize; n++) {
|
||||
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
|
||||
@ -214,3 +326,37 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
*nbSymbolsPtr = (U32)(oSize+1);
|
||||
return iSize+1;
|
||||
}
|
||||
|
||||
/* Avoids the FORCE_INLINE of the _body() function. */
|
||||
static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize,
|
||||
int bmi2)
|
||||
{
|
||||
#if DYNAMIC_BMI2
|
||||
if (bmi2) {
|
||||
return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
|
||||
}
|
||||
#endif
|
||||
(void)bmi2;
|
||||
return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
|
||||
}
|
||||
|
@ -48,6 +48,7 @@ const char* ERR_getErrorString(ERR_enum code)
|
||||
case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
|
||||
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
|
||||
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
|
||||
case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
|
||||
case PREFIX(maxCode):
|
||||
default: return notErrorCode;
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ extern "C" {
|
||||
/* ****************************************
|
||||
* Dependencies
|
||||
******************************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
#include "zstd_errors.h" /* enum list */
|
||||
|
||||
|
||||
|
49
zstd/fse.h
49
zstd/fse.h
@ -23,7 +23,7 @@ extern "C" {
|
||||
/*-*****************************************
|
||||
* Dependencies
|
||||
******************************************/
|
||||
#include <stddef.h> /* size_t, ptrdiff_t */
|
||||
#include "zstd_deps.h" /* size_t, ptrdiff_t */
|
||||
|
||||
|
||||
/*-*****************************************
|
||||
@ -137,10 +137,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
|
||||
/*! FSE_normalizeCount():
|
||||
normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
|
||||
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
|
||||
useLowProbCount is a boolean parameter which trades off compressed size for
|
||||
faster header decoding. When it is set to 1, the compressed data will be slightly
|
||||
smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
|
||||
faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
|
||||
is a good default, since header deserialization makes a big speed difference.
|
||||
Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
|
||||
@return : tableLog,
|
||||
or an errorCode, which can be tested using FSE_isError() */
|
||||
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
|
||||
const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
|
||||
const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
|
||||
|
||||
/*! FSE_NCountWriteBound():
|
||||
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
|
||||
@ -228,6 +234,13 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
|
||||
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
|
||||
const void* rBuffer, size_t rBuffSize);
|
||||
|
||||
/*! FSE_readNCount_bmi2():
|
||||
* Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
|
||||
*/
|
||||
FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
|
||||
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
|
||||
const void* rBuffer, size_t rBuffSize, int bmi2);
|
||||
|
||||
/*! Constructor and Destructor of FSE_DTable.
|
||||
Note that its size depends on 'tableLog' */
|
||||
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
|
||||
@ -288,12 +301,12 @@ If there is an error, the function will return an error code, which can be teste
|
||||
*******************************************/
|
||||
/* FSE buffer bounds */
|
||||
#define FSE_NCOUNTBOUND 512
|
||||
#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
|
||||
#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
|
||||
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
|
||||
|
||||
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
|
||||
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
|
||||
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
|
||||
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
|
||||
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
|
||||
|
||||
/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
|
||||
#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
|
||||
@ -309,9 +322,9 @@ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsi
|
||||
|
||||
/* FSE_compress_wksp() :
|
||||
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
|
||||
* FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
|
||||
* FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
|
||||
*/
|
||||
#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
|
||||
#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
|
||||
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
||||
|
||||
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
|
||||
@ -322,18 +335,29 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
|
||||
|
||||
/* FSE_buildCTable_wksp() :
|
||||
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
|
||||
* `wkspSize` must be >= `(1<<tableLog)`.
|
||||
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog)`.
|
||||
*/
|
||||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * (maxSymbolValue + 2) + (1ull << tableLog))
|
||||
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
||||
|
||||
#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
|
||||
#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
|
||||
FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
||||
/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
|
||||
|
||||
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
|
||||
/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
|
||||
|
||||
size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
|
||||
/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
|
||||
|
||||
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
|
||||
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
|
||||
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue))
|
||||
#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
|
||||
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
|
||||
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
|
||||
|
||||
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
|
||||
/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
|
||||
|
||||
typedef enum {
|
||||
FSE_repeat_none, /**< Cannot use the previous table */
|
||||
@ -644,6 +668,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
|
||||
#ifndef FSE_DEFAULT_MEMORY_USAGE
|
||||
# define FSE_DEFAULT_MEMORY_USAGE 13
|
||||
#endif
|
||||
#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
|
||||
# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
|
||||
#endif
|
||||
|
||||
/*!FSE_MAX_SYMBOL_VALUE :
|
||||
* Maximum symbol value authorized.
|
||||
@ -677,7 +704,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
|
||||
# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
|
||||
#endif
|
||||
|
||||
#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
|
||||
#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
|
||||
|
||||
|
||||
#endif /* FSE_STATIC_LINKING_ONLY */
|
||||
|
@ -15,8 +15,6 @@
|
||||
/* **************************************************************
|
||||
* Includes
|
||||
****************************************************************/
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memcpy, memset */
|
||||
#include "compiler.h"
|
||||
#include "mem.h" /* U32, U16, etc. */
|
||||
#include "debug.h" /* assert, DEBUGLOG */
|
||||
@ -25,6 +23,9 @@
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "fse.h"
|
||||
#include "error_private.h"
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#define ZSTD_DEPS_NEED_MATH64
|
||||
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
@ -74,13 +75,15 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
|
||||
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
|
||||
U32 const step = FSE_TABLESTEP(tableSize);
|
||||
U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
|
||||
|
||||
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
|
||||
U32* cumul = (U32*)workSpace;
|
||||
FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
|
||||
|
||||
U32 highThreshold = tableSize-1;
|
||||
|
||||
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
|
||||
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
/* CTable header */
|
||||
if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
tableU16[-2] = (U16) tableLog;
|
||||
tableU16[-1] = (U16) maxSymbolValue;
|
||||
assert(tableLog < 16); /* required for threshold strategy to work */
|
||||
@ -89,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
|
||||
|
||||
#ifdef __clang_analyzer__
|
||||
memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
|
||||
ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
|
||||
#endif
|
||||
|
||||
/* symbol start positions */
|
||||
@ -168,12 +171,13 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
||||
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
|
||||
return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@ -307,10 +311,10 @@ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
|
||||
size_t size;
|
||||
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
|
||||
size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
|
||||
return (FSE_CTable*)malloc(size);
|
||||
return (FSE_CTable*)ZSTD_malloc(size);
|
||||
}
|
||||
|
||||
void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
|
||||
void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
|
||||
|
||||
/* provides the minimum logSize to safely represent a distribution */
|
||||
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
|
||||
@ -341,11 +345,10 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
|
||||
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
|
||||
}
|
||||
|
||||
|
||||
/* Secondary normalization method.
|
||||
To be used when primary method fails. */
|
||||
|
||||
static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
|
||||
static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
|
||||
{
|
||||
short const NOT_YET_ASSIGNED = -2;
|
||||
U32 s;
|
||||
@ -362,7 +365,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
||||
continue;
|
||||
}
|
||||
if (count[s] <= lowThreshold) {
|
||||
norm[s] = -1;
|
||||
norm[s] = lowProbCount;
|
||||
distributed++;
|
||||
total -= count[s];
|
||||
continue;
|
||||
@ -414,7 +417,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
||||
|
||||
{ U64 const vStepLog = 62 - tableLog;
|
||||
U64 const mid = (1ULL << (vStepLog-1)) - 1;
|
||||
U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
|
||||
U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
|
||||
U64 tmpTotal = mid;
|
||||
for (s=0; s<=maxSymbolValue; s++) {
|
||||
if (norm[s]==NOT_YET_ASSIGNED) {
|
||||
@ -431,10 +434,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
||||
const unsigned* count, size_t total,
|
||||
unsigned maxSymbolValue)
|
||||
unsigned maxSymbolValue, unsigned useLowProbCount)
|
||||
{
|
||||
/* Sanity checks */
|
||||
if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
|
||||
@ -443,8 +445,9 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
||||
if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
|
||||
|
||||
{ static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
|
||||
short const lowProbCount = useLowProbCount ? -1 : 1;
|
||||
U64 const scale = 62 - tableLog;
|
||||
U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
|
||||
U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */
|
||||
U64 const vStep = 1ULL<<(scale-20);
|
||||
int stillToDistribute = 1<<tableLog;
|
||||
unsigned s;
|
||||
@ -456,7 +459,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
||||
if (count[s] == total) return 0; /* rle special case */
|
||||
if (count[s] == 0) { normalizedCounter[s]=0; continue; }
|
||||
if (count[s] <= lowThreshold) {
|
||||
normalizedCounter[s] = -1;
|
||||
normalizedCounter[s] = lowProbCount;
|
||||
stillToDistribute--;
|
||||
} else {
|
||||
short proba = (short)((count[s]*step) >> scale);
|
||||
@ -470,7 +473,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
||||
} }
|
||||
if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
|
||||
/* corner case, need another normalization method */
|
||||
size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
|
||||
size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
}
|
||||
else normalizedCounter[largest] += (short)stillToDistribute;
|
||||
@ -625,6 +628,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
|
||||
|
||||
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
|
||||
|
||||
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
||||
/* FSE_compress_wksp() :
|
||||
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
|
||||
* `wkspSize` size must be `(1<<tableLog)`.
|
||||
@ -643,7 +647,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
|
||||
size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
|
||||
|
||||
/* init conditions */
|
||||
if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
|
||||
if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
|
||||
if (srcSize <= 1) return 0; /* Not compressible */
|
||||
if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
|
||||
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
|
||||
@ -656,7 +660,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
|
||||
}
|
||||
|
||||
tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
|
||||
CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
|
||||
CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );
|
||||
|
||||
/* Write table description header */
|
||||
{ CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
|
||||
@ -678,13 +682,16 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
|
||||
|
||||
typedef struct {
|
||||
FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
|
||||
BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
|
||||
union {
|
||||
U32 hist_wksp[HIST_WKSP_SIZE_U32];
|
||||
BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
|
||||
} workspace;
|
||||
} fseWkspMax_t;
|
||||
|
||||
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
fseWkspMax_t scratchBuffer;
|
||||
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
|
||||
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
|
||||
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
||||
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
|
||||
}
|
||||
@ -693,6 +700,6 @@ size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcS
|
||||
{
|
||||
return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* FSE_COMMONDEFS_ONLY */
|
||||
|
@ -16,13 +16,14 @@
|
||||
/* **************************************************************
|
||||
* Includes
|
||||
****************************************************************/
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memcpy, memset */
|
||||
#include "debug.h" /* assert */
|
||||
#include "bitstream.h"
|
||||
#include "compiler.h"
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "fse.h"
|
||||
#include "error_private.h"
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#include "zstd_deps.h"
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
@ -59,25 +60,27 @@
|
||||
FSE_DTable* FSE_createDTable (unsigned tableLog)
|
||||
{
|
||||
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
|
||||
return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
|
||||
return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
|
||||
}
|
||||
|
||||
void FSE_freeDTable (FSE_DTable* dt)
|
||||
{
|
||||
free(dt);
|
||||
ZSTD_free(dt);
|
||||
}
|
||||
|
||||
size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
||||
static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
|
||||
FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
|
||||
U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
|
||||
U16* symbolNext = (U16*)workSpace;
|
||||
BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
|
||||
|
||||
U32 const maxSV1 = maxSymbolValue + 1;
|
||||
U32 const tableSize = 1 << tableLog;
|
||||
U32 highThreshold = tableSize-1;
|
||||
|
||||
/* Sanity Checks */
|
||||
if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
|
||||
if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
|
||||
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
||||
|
||||
@ -95,11 +98,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
|
||||
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
|
||||
symbolNext[s] = normalizedCounter[s];
|
||||
} } }
|
||||
memcpy(dt, &DTableH, sizeof(DTableH));
|
||||
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
|
||||
}
|
||||
|
||||
/* Spread symbols */
|
||||
{ U32 const tableMask = tableSize-1;
|
||||
if (highThreshold == tableSize - 1) {
|
||||
size_t const tableMask = tableSize-1;
|
||||
size_t const step = FSE_TABLESTEP(tableSize);
|
||||
/* First lay down the symbols in order.
|
||||
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
|
||||
* misses since small blocks generally have small table logs, so nearly
|
||||
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
|
||||
* our buffer to handle the over-write.
|
||||
*/
|
||||
{
|
||||
U64 const add = 0x0101010101010101ull;
|
||||
size_t pos = 0;
|
||||
U64 sv = 0;
|
||||
U32 s;
|
||||
for (s=0; s<maxSV1; ++s, sv += add) {
|
||||
int i;
|
||||
int const n = normalizedCounter[s];
|
||||
MEM_write64(spread + pos, sv);
|
||||
for (i = 8; i < n; i += 8) {
|
||||
MEM_write64(spread + pos + i, sv);
|
||||
}
|
||||
pos += n;
|
||||
}
|
||||
}
|
||||
/* Now we spread those positions across the table.
|
||||
* The benefit of doing it in two stages is that we avoid the the
|
||||
* variable size inner loop, which caused lots of branch misses.
|
||||
* Now we can run through all the positions without any branch misses.
|
||||
* We unroll the loop twice, since that is what emperically worked best.
|
||||
*/
|
||||
{
|
||||
size_t position = 0;
|
||||
size_t s;
|
||||
size_t const unroll = 2;
|
||||
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
||||
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
||||
size_t u;
|
||||
for (u = 0; u < unroll; ++u) {
|
||||
size_t const uPosition = (position + (u * step)) & tableMask;
|
||||
tableDecode[uPosition].symbol = spread[s + u];
|
||||
}
|
||||
position = (position + (unroll * step)) & tableMask;
|
||||
}
|
||||
assert(position == 0);
|
||||
}
|
||||
} else {
|
||||
U32 const tableMask = tableSize-1;
|
||||
U32 const step = FSE_TABLESTEP(tableSize);
|
||||
U32 s, position = 0;
|
||||
for (s=0; s<maxSV1; s++) {
|
||||
@ -124,6 +173,11 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
|
||||
}
|
||||
|
||||
|
||||
#ifndef FSE_COMMONDEFS_ONLY
|
||||
|
||||
@ -251,36 +305,89 @@ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
|
||||
}
|
||||
|
||||
|
||||
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
|
||||
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
unsigned maxLog, void* workSpace, size_t wkspSize,
|
||||
int bmi2)
|
||||
{
|
||||
const BYTE* const istart = (const BYTE*)cSrc;
|
||||
const BYTE* ip = istart;
|
||||
short counting[FSE_MAX_SYMBOL_VALUE+1];
|
||||
unsigned tableLog;
|
||||
unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
|
||||
FSE_DTable* const dtable = (FSE_DTable*)workSpace;
|
||||
|
||||
/* normal FSE decoding mode */
|
||||
size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
|
||||
size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
|
||||
if (FSE_isError(NCountLength)) return NCountLength;
|
||||
/* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
|
||||
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
|
||||
assert(NCountLength <= cSrcSize);
|
||||
ip += NCountLength;
|
||||
cSrcSize -= NCountLength;
|
||||
|
||||
CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
|
||||
if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog);
|
||||
wkspSize -= FSE_DTABLE_SIZE(tableLog);
|
||||
|
||||
return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */
|
||||
CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) );
|
||||
|
||||
{
|
||||
const void* ptr = dtable;
|
||||
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
|
||||
const U32 fastMode = DTableH->fastMode;
|
||||
|
||||
/* select fast mode (static) */
|
||||
if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
|
||||
return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Avoids the FORCE_INLINE of the _body() function. */
|
||||
static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
|
||||
{
|
||||
#if DYNAMIC_BMI2
|
||||
if (bmi2) {
|
||||
return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
|
||||
}
|
||||
#endif
|
||||
(void)bmi2;
|
||||
return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
|
||||
}
|
||||
|
||||
|
||||
typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
|
||||
|
||||
size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
|
||||
return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
|
||||
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
||||
size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
|
||||
U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
|
||||
return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
|
||||
}
|
||||
|
||||
size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
/* Static analyzer seems unable to understand this table will be properly initialized later */
|
||||
U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
|
||||
return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* FSE_COMMONDEFS_ONLY */
|
||||
|
54
zstd/hist.c
54
zstd/hist.c
@ -34,7 +34,7 @@ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
||||
unsigned largestCount=0;
|
||||
|
||||
memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
|
||||
ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
|
||||
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
|
||||
|
||||
while (ip<end) {
|
||||
@ -60,9 +60,9 @@ typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
|
||||
* this design makes better use of OoO cpus,
|
||||
* and is noticeably faster when some values are heavily repeated.
|
||||
* But it needs some additional workspace for intermediate tables.
|
||||
* `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
|
||||
* `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
|
||||
* @return : largest histogram frequency,
|
||||
* or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
|
||||
* or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
|
||||
static size_t HIST_count_parallel_wksp(
|
||||
unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize,
|
||||
@ -71,22 +71,21 @@ static size_t HIST_count_parallel_wksp(
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)source;
|
||||
const BYTE* const iend = ip+sourceSize;
|
||||
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
||||
size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
|
||||
unsigned max=0;
|
||||
U32* const Counting1 = workSpace;
|
||||
U32* const Counting2 = Counting1 + 256;
|
||||
U32* const Counting3 = Counting2 + 256;
|
||||
U32* const Counting4 = Counting3 + 256;
|
||||
|
||||
memset(workSpace, 0, 4*256*sizeof(unsigned));
|
||||
|
||||
/* safety checks */
|
||||
assert(*maxSymbolValuePtr <= 255);
|
||||
if (!sourceSize) {
|
||||
memset(count, 0, maxSymbolValue + 1);
|
||||
ZSTD_memset(count, 0, countSize);
|
||||
*maxSymbolValuePtr = 0;
|
||||
return 0;
|
||||
}
|
||||
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
|
||||
ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
|
||||
|
||||
/* by stripes of 16 bytes */
|
||||
{ U32 cached = MEM_read32(ip); ip += 4;
|
||||
@ -118,21 +117,18 @@ static size_t HIST_count_parallel_wksp(
|
||||
/* finish last symbols */
|
||||
while (ip<iend) Counting1[*ip++]++;
|
||||
|
||||
if (check) { /* verify stats will fit into destination table */
|
||||
U32 s; for (s=255; s>maxSymbolValue; s--) {
|
||||
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
|
||||
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
|
||||
} }
|
||||
|
||||
{ U32 s;
|
||||
if (maxSymbolValue > 255) maxSymbolValue = 255;
|
||||
for (s=0; s<=maxSymbolValue; s++) {
|
||||
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
|
||||
if (count[s] > max) max = count[s];
|
||||
for (s=0; s<256; s++) {
|
||||
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
|
||||
if (Counting1[s] > max) max = Counting1[s];
|
||||
} }
|
||||
|
||||
while (!count[maxSymbolValue]) maxSymbolValue--;
|
||||
*maxSymbolValuePtr = maxSymbolValue;
|
||||
{ unsigned maxSymbolValue = 255;
|
||||
while (!Counting1[maxSymbolValue]) maxSymbolValue--;
|
||||
if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
|
||||
*maxSymbolValuePtr = maxSymbolValue;
|
||||
ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */
|
||||
}
|
||||
return (size_t)max;
|
||||
}
|
||||
|
||||
@ -152,14 +148,6 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
|
||||
}
|
||||
|
||||
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
|
||||
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize)
|
||||
{
|
||||
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
|
||||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
|
||||
}
|
||||
|
||||
/* HIST_count_wksp() :
|
||||
* Same as HIST_count(), but using an externally provided scratch buffer.
|
||||
* `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
|
||||
@ -175,9 +163,19 @@ size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
|
||||
}
|
||||
|
||||
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
||||
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
|
||||
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize)
|
||||
{
|
||||
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
|
||||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
|
||||
}
|
||||
|
||||
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
|
||||
return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
|
||||
}
|
||||
#endif
|
||||
|
@ -14,7 +14,7 @@
|
||||
****************************************************************** */
|
||||
|
||||
/* --- dependencies --- */
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
|
||||
|
||||
/* --- simple histogram functions --- */
|
||||
|
31
zstd/huf.h
31
zstd/huf.h
@ -20,7 +20,7 @@ extern "C" {
|
||||
#define HUF_H_298734234
|
||||
|
||||
/* *** Dependencies *** */
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
|
||||
|
||||
/* *** library symbols visibility *** */
|
||||
@ -111,6 +111,8 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
|
||||
/* *** Dependencies *** */
|
||||
#include "mem.h" /* U32 */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "fse.h"
|
||||
|
||||
|
||||
/* *** Constants *** */
|
||||
@ -133,12 +135,16 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
|
||||
|
||||
/* static allocation of HUF's Compression Table */
|
||||
/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
|
||||
struct HUF_CElt_s {
|
||||
U16 val;
|
||||
BYTE nbBits;
|
||||
}; /* typedef'd to HUF_CElt */
|
||||
typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */
|
||||
#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
|
||||
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
|
||||
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
|
||||
U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
|
||||
void* name##hv = &(name##hb); \
|
||||
HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */
|
||||
HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
|
||||
|
||||
/* static allocation of HUF's DTable */
|
||||
typedef U32 HUF_DTable;
|
||||
@ -184,7 +190,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
||||
* or to save and regenerate 'CTable' using external methods.
|
||||
*/
|
||||
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
|
||||
typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
|
||||
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
|
||||
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
|
||||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||
@ -226,6 +231,19 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
|
||||
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
/*! HUF_readStats_wksp() :
|
||||
* Same as HUF_readStats() but takes an external workspace which must be
|
||||
* 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
|
||||
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
|
||||
*/
|
||||
#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
|
||||
#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
|
||||
size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
|
||||
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workspace, size_t wkspSize,
|
||||
int bmi2);
|
||||
|
||||
/** HUF_readCTable() :
|
||||
* Loading a CTable saved with HUF_writeCTable() */
|
||||
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
|
||||
@ -332,6 +350,9 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
|
||||
#endif
|
||||
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
|
||||
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
|
||||
#endif
|
||||
|
||||
#endif /* HUF_STATIC_LINKING_ONLY */
|
||||
|
||||
|
@ -23,8 +23,7 @@
|
||||
/* **************************************************************
|
||||
* Includes
|
||||
****************************************************************/
|
||||
#include <string.h> /* memcpy, memset */
|
||||
#include <stdio.h> /* printf (debug) */
|
||||
#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
|
||||
#include "compiler.h"
|
||||
#include "bitstream.h"
|
||||
#include "hist.h"
|
||||
@ -70,7 +69,7 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
||||
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
|
||||
|
||||
FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
|
||||
BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
|
||||
BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
|
||||
|
||||
unsigned count[HUF_TABLELOG_MAX+1];
|
||||
S16 norm[HUF_TABLELOG_MAX+1];
|
||||
@ -85,7 +84,7 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
||||
}
|
||||
|
||||
tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
|
||||
CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
|
||||
CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
|
||||
|
||||
/* Write table description header */
|
||||
{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
|
||||
@ -103,11 +102,6 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
||||
}
|
||||
|
||||
|
||||
struct HUF_CElt_s {
|
||||
U16 val;
|
||||
BYTE nbBits;
|
||||
}; /* typedef'd to HUF_CElt within "huf.h" */
|
||||
|
||||
/*! HUF_writeCTable() :
|
||||
`CTable` : Huffman tree to save, using huf representation.
|
||||
@return : size of saved CTable */
|
||||
@ -156,6 +150,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
|
||||
/* get symbol weights */
|
||||
CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
|
||||
*hasZeroWeights = (rankVal[0] > 0);
|
||||
|
||||
/* check result */
|
||||
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
||||
@ -164,16 +159,14 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
/* Prepare base value per rank */
|
||||
{ U32 n, nextRankStart = 0;
|
||||
for (n=1; n<=tableLog; n++) {
|
||||
U32 current = nextRankStart;
|
||||
U32 curr = nextRankStart;
|
||||
nextRankStart += (rankVal[n] << (n-1));
|
||||
rankVal[n] = current;
|
||||
rankVal[n] = curr;
|
||||
} }
|
||||
|
||||
/* fill nbBits */
|
||||
*hasZeroWeights = 0;
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) {
|
||||
const U32 w = huffWeight[n];
|
||||
*hasZeroWeights |= (w == 0);
|
||||
CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
|
||||
} }
|
||||
|
||||
@ -212,32 +205,63 @@ typedef struct nodeElt_s {
|
||||
BYTE nbBits;
|
||||
} nodeElt;
|
||||
|
||||
/**
|
||||
* HUF_setMaxHeight():
|
||||
* Enforces maxNbBits on the Huffman tree described in huffNode.
|
||||
*
|
||||
* It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
|
||||
* the tree to so that it is a valid canonical Huffman tree.
|
||||
*
|
||||
* @pre The sum of the ranks of each symbol == 2^largestBits,
|
||||
* where largestBits == huffNode[lastNonNull].nbBits.
|
||||
* @post The sum of the ranks of each symbol == 2^largestBits,
|
||||
* where largestBits is the return value <= maxNbBits.
|
||||
*
|
||||
* @param huffNode The Huffman tree modified in place to enforce maxNbBits.
|
||||
* @param lastNonNull The symbol with the lowest count in the Huffman tree.
|
||||
* @param maxNbBits The maximum allowed number of bits, which the Huffman tree
|
||||
* may not respect. After this function the Huffman tree will
|
||||
* respect maxNbBits.
|
||||
* @return The maximum number of bits of the Huffman tree after adjustment,
|
||||
* necessarily no more than maxNbBits.
|
||||
*/
|
||||
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
||||
{
|
||||
const U32 largestBits = huffNode[lastNonNull].nbBits;
|
||||
if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */
|
||||
/* early exit : no elt > maxNbBits, so the tree is already valid. */
|
||||
if (largestBits <= maxNbBits) return largestBits;
|
||||
|
||||
/* there are several too large elements (at least >= 2) */
|
||||
{ int totalCost = 0;
|
||||
const U32 baseCost = 1 << (largestBits - maxNbBits);
|
||||
int n = (int)lastNonNull;
|
||||
|
||||
/* Adjust any ranks > maxNbBits to maxNbBits.
|
||||
* Compute totalCost, which is how far the sum of the ranks is
|
||||
* we are over 2^largestBits after adjust the offending ranks.
|
||||
*/
|
||||
while (huffNode[n].nbBits > maxNbBits) {
|
||||
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
|
||||
huffNode[n].nbBits = (BYTE)maxNbBits;
|
||||
n --;
|
||||
} /* n stops at huffNode[n].nbBits <= maxNbBits */
|
||||
while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */
|
||||
n--;
|
||||
}
|
||||
/* n stops at huffNode[n].nbBits <= maxNbBits */
|
||||
assert(huffNode[n].nbBits <= maxNbBits);
|
||||
/* n end at index of smallest symbol using < maxNbBits */
|
||||
while (huffNode[n].nbBits == maxNbBits) --n;
|
||||
|
||||
/* renorm totalCost */
|
||||
totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
|
||||
/* renorm totalCost from 2^largestBits to 2^maxNbBits
|
||||
* note : totalCost is necessarily a multiple of baseCost */
|
||||
assert((totalCost & (baseCost - 1)) == 0);
|
||||
totalCost >>= (largestBits - maxNbBits);
|
||||
assert(totalCost > 0);
|
||||
|
||||
/* repay normalized cost */
|
||||
{ U32 const noSymbol = 0xF0F0F0F0;
|
||||
U32 rankLast[HUF_TABLELOG_MAX+2];
|
||||
|
||||
/* Get pos of last (smallest) symbol per rank */
|
||||
memset(rankLast, 0xF0, sizeof(rankLast));
|
||||
/* Get pos of last (smallest = lowest cum. count) symbol per rank */
|
||||
ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
|
||||
{ U32 currentNbBits = maxNbBits;
|
||||
int pos;
|
||||
for (pos=n ; pos >= 0; pos--) {
|
||||
@ -247,34 +271,65 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
||||
} }
|
||||
|
||||
while (totalCost > 0) {
|
||||
/* Try to reduce the next power of 2 above totalCost because we
|
||||
* gain back half the rank.
|
||||
*/
|
||||
U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
|
||||
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
|
||||
U32 const highPos = rankLast[nBitsToDecrease];
|
||||
U32 const lowPos = rankLast[nBitsToDecrease-1];
|
||||
if (highPos == noSymbol) continue;
|
||||
/* Decrease highPos if no symbols of lowPos or if it is
|
||||
* not cheaper to remove 2 lowPos than highPos.
|
||||
*/
|
||||
if (lowPos == noSymbol) break;
|
||||
{ U32 const highTotal = huffNode[highPos].count;
|
||||
U32 const lowTotal = 2 * huffNode[lowPos].count;
|
||||
if (highTotal <= lowTotal) break;
|
||||
} }
|
||||
/* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
|
||||
assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
|
||||
/* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
|
||||
while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
|
||||
nBitsToDecrease ++;
|
||||
nBitsToDecrease++;
|
||||
assert(rankLast[nBitsToDecrease] != noSymbol);
|
||||
/* Increase the number of bits to gain back half the rank cost. */
|
||||
totalCost -= 1 << (nBitsToDecrease-1);
|
||||
huffNode[rankLast[nBitsToDecrease]].nbBits++;
|
||||
|
||||
/* Fix up the new rank.
|
||||
* If the new rank was empty, this symbol is now its smallest.
|
||||
* Otherwise, this symbol will be the largest in the new rank so no adjustment.
|
||||
*/
|
||||
if (rankLast[nBitsToDecrease-1] == noSymbol)
|
||||
rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
|
||||
huffNode[rankLast[nBitsToDecrease]].nbBits ++;
|
||||
rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
|
||||
/* Fix up the old rank.
|
||||
* If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
|
||||
* it must be the only symbol in its rank, so the old rank now has no symbols.
|
||||
* Otherwise, since the Huffman nodes are sorted by count, the previous position is now
|
||||
* the smallest node in the rank. If the previous position belongs to a different rank,
|
||||
* then the rank is now empty.
|
||||
*/
|
||||
if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
|
||||
rankLast[nBitsToDecrease] = noSymbol;
|
||||
else {
|
||||
rankLast[nBitsToDecrease]--;
|
||||
if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
|
||||
rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
|
||||
} } /* while (totalCost > 0) */
|
||||
}
|
||||
} /* while (totalCost > 0) */
|
||||
|
||||
/* If we've removed too much weight, then we have to add it back.
|
||||
* To avoid overshooting again, we only adjust the smallest rank.
|
||||
* We take the largest nodes from the lowest rank 0 and move them
|
||||
* to rank 1. There's guaranteed to be enough rank 0 symbols because
|
||||
* TODO.
|
||||
*/
|
||||
while (totalCost < 0) { /* Sometimes, cost correction overshoot */
|
||||
if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
|
||||
/* special case : no rank 1 symbol (using maxNbBits-1);
|
||||
* let's create one from largest rank 0 (using maxNbBits).
|
||||
*/
|
||||
if (rankLast[1] == noSymbol) {
|
||||
while (huffNode[n].nbBits == maxNbBits) n--;
|
||||
huffNode[n+1].nbBits--;
|
||||
assert(n >= 0);
|
||||
@ -285,14 +340,16 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
||||
huffNode[ rankLast[1] + 1 ].nbBits--;
|
||||
rankLast[1]++;
|
||||
totalCost ++;
|
||||
} } } /* there are several too large elements (at least >= 2) */
|
||||
}
|
||||
} /* repay normalized cost */
|
||||
} /* there are several too large elements (at least >= 2) */
|
||||
|
||||
return maxNbBits;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
U32 base;
|
||||
U32 current;
|
||||
U32 curr;
|
||||
} rankPos;
|
||||
|
||||
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
||||
@ -304,21 +361,45 @@ typedef struct {
|
||||
rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
|
||||
} HUF_buildCTable_wksp_tables;
|
||||
|
||||
/**
|
||||
* HUF_sort():
|
||||
* Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
|
||||
*
|
||||
* @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
|
||||
* Must have (maxSymbolValue + 1) entries.
|
||||
* @param[in] count Histogram of the symbols.
|
||||
* @param[in] maxSymbolValue Maximum symbol value.
|
||||
* @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
|
||||
*/
|
||||
static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
|
||||
{
|
||||
U32 n;
|
||||
int n;
|
||||
int const maxSymbolValue1 = (int)maxSymbolValue + 1;
|
||||
|
||||
memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
|
||||
for (n=0; n<=maxSymbolValue; n++) {
|
||||
U32 r = BIT_highbit32(count[n] + 1);
|
||||
rankPosition[r].base ++;
|
||||
/* Compute base and set curr to base.
|
||||
* For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
|
||||
* Then 2^lowerRank <= count[n]+1 <= 2^rank.
|
||||
* We attribute each symbol to lowerRank's base value, because we want to know where
|
||||
* each rank begins in the output, so for rank R we want to count ranks R+1 and above.
|
||||
*/
|
||||
ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
|
||||
for (n = 0; n < maxSymbolValue1; ++n) {
|
||||
U32 lowerRank = BIT_highbit32(count[n] + 1);
|
||||
rankPosition[lowerRank].base++;
|
||||
}
|
||||
for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base;
|
||||
for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base;
|
||||
for (n=0; n<=maxSymbolValue; n++) {
|
||||
assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
|
||||
for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
|
||||
rankPosition[n-1].base += rankPosition[n].base;
|
||||
rankPosition[n-1].curr = rankPosition[n-1].base;
|
||||
}
|
||||
/* Sort */
|
||||
for (n = 0; n < maxSymbolValue1; ++n) {
|
||||
U32 const c = count[n];
|
||||
U32 const r = BIT_highbit32(c+1) + 1;
|
||||
U32 pos = rankPosition[r].current++;
|
||||
U32 pos = rankPosition[r].curr++;
|
||||
/* Insert into the correct position in the rank.
|
||||
* We have at most 256 symbols, so this insertion should be fine.
|
||||
*/
|
||||
while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
|
||||
huffNode[pos] = huffNode[pos-1];
|
||||
pos--;
|
||||
@ -335,28 +416,20 @@ static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValu
|
||||
*/
|
||||
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
|
||||
|
||||
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
|
||||
/* HUF_buildTree():
|
||||
* Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
|
||||
*
|
||||
* @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.
|
||||
* @param maxSymbolValue The maximum symbol value.
|
||||
* @return The smallest node in the Huffman tree (by count).
|
||||
*/
|
||||
static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
||||
{
|
||||
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
|
||||
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
|
||||
nodeElt* const huffNode = huffNode0+1;
|
||||
nodeElt* const huffNode0 = huffNode - 1;
|
||||
int nonNullRank;
|
||||
int lowS, lowN;
|
||||
int nodeNb = STARTNODE;
|
||||
int n, nodeRoot;
|
||||
|
||||
/* safety checks */
|
||||
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
||||
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
|
||||
return ERROR(workSpace_tooSmall);
|
||||
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
||||
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
|
||||
return ERROR(maxSymbolValue_tooLarge);
|
||||
memset(huffNode0, 0, sizeof(huffNodeTable));
|
||||
|
||||
/* sort, decreasing order */
|
||||
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
|
||||
|
||||
/* init for parents */
|
||||
nonNullRank = (int)maxSymbolValue;
|
||||
while(huffNode[nonNullRank].count == 0) nonNullRank--;
|
||||
@ -383,40 +456,70 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
|
||||
for (n=0; n<=nonNullRank; n++)
|
||||
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
|
||||
|
||||
/* enforce maxTableLog */
|
||||
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
|
||||
|
||||
/* fill result into tree (val, nbBits) */
|
||||
{ U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
||||
U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
|
||||
int const alphabetSize = (int)(maxSymbolValue + 1);
|
||||
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
||||
for (n=0; n<=nonNullRank; n++)
|
||||
nbPerRank[huffNode[n].nbBits]++;
|
||||
/* determine stating value per rank */
|
||||
{ U16 min = 0;
|
||||
for (n=(int)maxNbBits; n>0; n--) {
|
||||
valPerRank[n] = min; /* get starting value within each rank */
|
||||
min += nbPerRank[n];
|
||||
min >>= 1;
|
||||
} }
|
||||
for (n=0; n<alphabetSize; n++)
|
||||
tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
|
||||
for (n=0; n<alphabetSize; n++)
|
||||
tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
|
||||
}
|
||||
|
||||
return maxNbBits;
|
||||
return nonNullRank;
|
||||
}
|
||||
|
||||
/** HUF_buildCTable() :
|
||||
* @return : maxNbBits
|
||||
* Note : count is used before tree is written, so they can safely overlap
|
||||
/**
|
||||
* HUF_buildCTableFromTree():
|
||||
* Build the CTable given the Huffman tree in huffNode.
|
||||
*
|
||||
* @param[out] CTable The output Huffman CTable.
|
||||
* @param huffNode The Huffman tree.
|
||||
* @param nonNullRank The last and smallest node in the Huffman tree.
|
||||
* @param maxSymbolValue The maximum symbol value.
|
||||
* @param maxNbBits The exact maximum number of bits used in the Huffman tree.
|
||||
*/
|
||||
size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
|
||||
static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
|
||||
{
|
||||
HUF_buildCTable_wksp_tables workspace;
|
||||
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
|
||||
/* fill result into ctable (val, nbBits) */
|
||||
int n;
|
||||
U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
||||
U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
|
||||
int const alphabetSize = (int)(maxSymbolValue + 1);
|
||||
for (n=0; n<=nonNullRank; n++)
|
||||
nbPerRank[huffNode[n].nbBits]++;
|
||||
/* determine starting value per rank */
|
||||
{ U16 min = 0;
|
||||
for (n=(int)maxNbBits; n>0; n--) {
|
||||
valPerRank[n] = min; /* get starting value within each rank */
|
||||
min += nbPerRank[n];
|
||||
min >>= 1;
|
||||
} }
|
||||
for (n=0; n<alphabetSize; n++)
|
||||
CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
|
||||
for (n=0; n<alphabetSize; n++)
|
||||
CTable[n].val = valPerRank[CTable[n].nbBits]++; /* assign value within rank, symbol order */
|
||||
}
|
||||
|
||||
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
|
||||
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
|
||||
nodeElt* const huffNode = huffNode0+1;
|
||||
int nonNullRank;
|
||||
|
||||
/* safety checks */
|
||||
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
||||
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
|
||||
return ERROR(workSpace_tooSmall);
|
||||
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
||||
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
|
||||
return ERROR(maxSymbolValue_tooLarge);
|
||||
ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
|
||||
|
||||
/* sort, decreasing order */
|
||||
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
|
||||
|
||||
/* build tree */
|
||||
nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
|
||||
|
||||
/* enforce maxTableLog */
|
||||
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
|
||||
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
||||
|
||||
HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
|
||||
|
||||
return maxNbBits;
|
||||
}
|
||||
|
||||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
||||
@ -695,7 +798,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
||||
CHECK_F(maxBits);
|
||||
huffLog = (U32)maxBits;
|
||||
/* Zero unused symbols in CTable, so we can check it for validity */
|
||||
memset(table->CTable + (maxSymbolValue + 1), 0,
|
||||
ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
|
||||
sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
|
||||
}
|
||||
|
||||
@ -716,7 +819,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
||||
op += hSize;
|
||||
if (repeat) { *repeat = HUF_repeat_none; }
|
||||
if (oldHufTable)
|
||||
memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
|
||||
ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
|
||||
}
|
||||
return HUF_compressCTable_internal(ostart, op, oend,
|
||||
src, srcSize,
|
||||
@ -747,14 +850,6 @@ size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
|
||||
repeat, preferRepeat, bmi2);
|
||||
}
|
||||
|
||||
size_t HUF_compress1X (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog)
|
||||
{
|
||||
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
||||
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
/* HUF_compress4X_repeat():
|
||||
* compress input using 4 streams.
|
||||
* provide workspace to generate compression tables */
|
||||
@ -784,6 +879,25 @@ size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
|
||||
hufTable, repeat, preferRepeat, bmi2);
|
||||
}
|
||||
|
||||
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
||||
/** HUF_buildCTable() :
|
||||
* @return : maxNbBits
|
||||
* Note : count is used before tree is written, so they can safely overlap
|
||||
*/
|
||||
size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
|
||||
{
|
||||
HUF_buildCTable_wksp_tables workspace;
|
||||
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
|
||||
}
|
||||
|
||||
size_t HUF_compress1X (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog)
|
||||
{
|
||||
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
||||
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
size_t HUF_compress2 (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog)
|
||||
@ -796,3 +910,4 @@ size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSi
|
||||
{
|
||||
return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
|
||||
}
|
||||
#endif
|
||||
|
@ -15,7 +15,7 @@
|
||||
/* **************************************************************
|
||||
* Dependencies
|
||||
****************************************************************/
|
||||
#include <string.h> /* memcpy, memset */
|
||||
#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
|
||||
#include "compiler.h"
|
||||
#include "bitstream.h" /* BIT_* */
|
||||
#include "fse.h" /* to compress headers */
|
||||
@ -103,7 +103,7 @@ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved;
|
||||
static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
||||
{
|
||||
DTableDesc dtd;
|
||||
memcpy(&dtd, table, sizeof(dtd));
|
||||
ZSTD_memcpy(&dtd, table, sizeof(dtd));
|
||||
return dtd;
|
||||
}
|
||||
|
||||
@ -115,29 +115,51 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
||||
/*-***************************/
|
||||
typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
|
||||
|
||||
/**
|
||||
* Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
|
||||
* a time.
|
||||
*/
|
||||
static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
|
||||
U64 D4;
|
||||
if (MEM_isLittleEndian()) {
|
||||
D4 = symbol + (nbBits << 8);
|
||||
} else {
|
||||
D4 = (symbol << 8) + nbBits;
|
||||
}
|
||||
D4 *= 0x0001000100010001ULL;
|
||||
return D4;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
|
||||
U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
|
||||
U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
|
||||
BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
|
||||
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
|
||||
} HUF_ReadDTableX1_Workspace;
|
||||
|
||||
|
||||
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
|
||||
{
|
||||
U32 tableLog = 0;
|
||||
U32 nbSymbols = 0;
|
||||
size_t iSize;
|
||||
void* const dtPtr = DTable + 1;
|
||||
HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
|
||||
HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
|
||||
|
||||
U32* rankVal;
|
||||
BYTE* huffWeight;
|
||||
size_t spaceUsed32 = 0;
|
||||
|
||||
rankVal = (U32 *)workSpace + spaceUsed32;
|
||||
spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
|
||||
huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
|
||||
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
|
||||
|
||||
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
|
||||
if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
|
||||
DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
|
||||
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
||||
/* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
||||
|
||||
iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
|
||||
iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
|
||||
if (HUF_isError(iSize)) return iSize;
|
||||
|
||||
/* Table header */
|
||||
@ -145,52 +167,117 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
|
||||
if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
|
||||
dtd.tableType = 0;
|
||||
dtd.tableLog = (BYTE)tableLog;
|
||||
memcpy(DTable, &dtd, sizeof(dtd));
|
||||
ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
|
||||
}
|
||||
|
||||
/* Calculate starting value for each rank */
|
||||
{ U32 n, nextRankStart = 0;
|
||||
for (n=1; n<tableLog+1; n++) {
|
||||
U32 const current = nextRankStart;
|
||||
nextRankStart += (rankVal[n] << (n-1));
|
||||
rankVal[n] = current;
|
||||
} }
|
||||
/* Compute symbols and rankStart given rankVal:
|
||||
*
|
||||
* rankVal already contains the number of values of each weight.
|
||||
*
|
||||
* symbols contains the symbols ordered by weight. First are the rankVal[0]
|
||||
* weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
|
||||
* symbols[0] is filled (but unused) to avoid a branch.
|
||||
*
|
||||
* rankStart contains the offset where each rank belongs in the DTable.
|
||||
* rankStart[0] is not filled because there are no entries in the table for
|
||||
* weight 0.
|
||||
*/
|
||||
{
|
||||
int n;
|
||||
int nextRankStart = 0;
|
||||
int const unroll = 4;
|
||||
int const nLimit = (int)nbSymbols - unroll + 1;
|
||||
for (n=0; n<(int)tableLog+1; n++) {
|
||||
U32 const curr = nextRankStart;
|
||||
nextRankStart += wksp->rankVal[n];
|
||||
wksp->rankStart[n] = curr;
|
||||
}
|
||||
for (n=0; n < nLimit; n += unroll) {
|
||||
int u;
|
||||
for (u=0; u < unroll; ++u) {
|
||||
size_t const w = wksp->huffWeight[n+u];
|
||||
wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
|
||||
}
|
||||
}
|
||||
for (; n < (int)nbSymbols; ++n) {
|
||||
size_t const w = wksp->huffWeight[n];
|
||||
wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
|
||||
}
|
||||
}
|
||||
|
||||
/* fill DTable */
|
||||
{ U32 n;
|
||||
size_t const nEnd = nbSymbols;
|
||||
for (n=0; n<nEnd; n++) {
|
||||
size_t const w = huffWeight[n];
|
||||
size_t const length = (1 << w) >> 1;
|
||||
size_t const uStart = rankVal[w];
|
||||
size_t const uEnd = uStart + length;
|
||||
size_t u;
|
||||
HUF_DEltX1 D;
|
||||
D.byte = (BYTE)n;
|
||||
D.nbBits = (BYTE)(tableLog + 1 - w);
|
||||
rankVal[w] = (U32)uEnd;
|
||||
if (length < 4) {
|
||||
/* Use length in the loop bound so the compiler knows it is short. */
|
||||
for (u = 0; u < length; ++u)
|
||||
dt[uStart + u] = D;
|
||||
} else {
|
||||
/* Unroll the loop 4 times, we know it is a power of 2. */
|
||||
for (u = uStart; u < uEnd; u += 4) {
|
||||
dt[u + 0] = D;
|
||||
dt[u + 1] = D;
|
||||
dt[u + 2] = D;
|
||||
dt[u + 3] = D;
|
||||
} } } }
|
||||
/* fill DTable
|
||||
* We fill all entries of each weight in order.
|
||||
* That way length is a constant for each iteration of the outter loop.
|
||||
* We can switch based on the length to a different inner loop which is
|
||||
* optimized for that particular case.
|
||||
*/
|
||||
{
|
||||
U32 w;
|
||||
int symbol=wksp->rankVal[0];
|
||||
int rankStart=0;
|
||||
for (w=1; w<tableLog+1; ++w) {
|
||||
int const symbolCount = wksp->rankVal[w];
|
||||
int const length = (1 << w) >> 1;
|
||||
int uStart = rankStart;
|
||||
BYTE const nbBits = (BYTE)(tableLog + 1 - w);
|
||||
int s;
|
||||
int u;
|
||||
switch (length) {
|
||||
case 1:
|
||||
for (s=0; s<symbolCount; ++s) {
|
||||
HUF_DEltX1 D;
|
||||
D.byte = wksp->symbols[symbol + s];
|
||||
D.nbBits = nbBits;
|
||||
dt[uStart] = D;
|
||||
uStart += 1;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (s=0; s<symbolCount; ++s) {
|
||||
HUF_DEltX1 D;
|
||||
D.byte = wksp->symbols[symbol + s];
|
||||
D.nbBits = nbBits;
|
||||
dt[uStart+0] = D;
|
||||
dt[uStart+1] = D;
|
||||
uStart += 2;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
for (s=0; s<symbolCount; ++s) {
|
||||
U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
|
||||
MEM_write64(dt + uStart, D4);
|
||||
uStart += 4;
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
for (s=0; s<symbolCount; ++s) {
|
||||
U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
|
||||
MEM_write64(dt + uStart, D4);
|
||||
MEM_write64(dt + uStart + 4, D4);
|
||||
uStart += 8;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
for (s=0; s<symbolCount; ++s) {
|
||||
U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
|
||||
for (u=0; u < length; u += 16) {
|
||||
MEM_write64(dt + uStart + u + 0, D4);
|
||||
MEM_write64(dt + uStart + u + 4, D4);
|
||||
MEM_write64(dt + uStart + u + 8, D4);
|
||||
MEM_write64(dt + uStart + u + 12, D4);
|
||||
}
|
||||
assert(u == length);
|
||||
uStart += length;
|
||||
}
|
||||
break;
|
||||
}
|
||||
symbol += symbolCount;
|
||||
rankStart += symbolCount * length;
|
||||
}
|
||||
}
|
||||
return iSize;
|
||||
}
|
||||
|
||||
size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_readDTableX1_wksp(DTable, src, srcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE BYTE
|
||||
HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
|
||||
{
|
||||
@ -389,20 +476,6 @@ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
||||
}
|
||||
|
||||
|
||||
size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
||||
return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
|
||||
}
|
||||
|
||||
size_t HUF_decompress4X1_usingDTable(
|
||||
void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
@ -419,8 +492,7 @@ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size
|
||||
{
|
||||
const BYTE* ip = (const BYTE*) cSrc;
|
||||
|
||||
size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
|
||||
workSpace, wkspSize);
|
||||
size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
|
||||
if (HUF_isError(hSize)) return hSize;
|
||||
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
||||
ip += hSize; cSrcSize -= hSize;
|
||||
@ -436,18 +508,6 @@ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
||||
}
|
||||
|
||||
|
||||
size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
||||
return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
||||
}
|
||||
|
||||
#endif /* HUF_FORCE_DECOMPRESS_X2 */
|
||||
|
||||
|
||||
@ -474,7 +534,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 co
|
||||
U32 rankVal[HUF_TABLELOG_MAX + 1];
|
||||
|
||||
/* get pre-calculated rankVal */
|
||||
memcpy(rankVal, rankValOrigin, sizeof(rankVal));
|
||||
ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
|
||||
|
||||
/* fill skipped values */
|
||||
if (minWeight>1) {
|
||||
@ -516,7 +576,7 @@ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
|
||||
const U32 minBits = nbBitsBaseline - maxWeight;
|
||||
U32 s;
|
||||
|
||||
memcpy(rankVal, rankValOrigin, sizeof(rankVal));
|
||||
ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
|
||||
|
||||
/* fill DTable */
|
||||
for (s=0; s<sortedListSize; s++) {
|
||||
@ -581,11 +641,11 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
|
||||
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
|
||||
rankStart = rankStart0 + 1;
|
||||
memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
|
||||
ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
|
||||
|
||||
DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
|
||||
if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
||||
/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
|
||||
/* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
|
||||
|
||||
iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
|
||||
if (HUF_isError(iSize)) return iSize;
|
||||
@ -599,9 +659,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
|
||||
/* Get start index of each weight */
|
||||
{ U32 w, nextRankStart = 0;
|
||||
for (w=1; w<maxW+1; w++) {
|
||||
U32 current = nextRankStart;
|
||||
U32 curr = nextRankStart;
|
||||
nextRankStart += rankStats[w];
|
||||
rankStart[w] = current;
|
||||
rankStart[w] = curr;
|
||||
}
|
||||
rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
|
||||
sizeOfSort = nextRankStart;
|
||||
@ -624,9 +684,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
|
||||
U32 nextRankVal = 0;
|
||||
U32 w;
|
||||
for (w=1; w<maxW+1; w++) {
|
||||
U32 current = nextRankVal;
|
||||
U32 curr = nextRankVal;
|
||||
nextRankVal += rankStats[w] << (w+rescale);
|
||||
rankVal0[w] = current;
|
||||
rankVal0[w] = curr;
|
||||
} }
|
||||
{ U32 const minBits = tableLog+1 - maxW;
|
||||
U32 consumed;
|
||||
@ -644,23 +704,16 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
|
||||
|
||||
dtd.tableLog = (BYTE)maxTableLog;
|
||||
dtd.tableType = 1;
|
||||
memcpy(DTable, &dtd, sizeof(dtd));
|
||||
ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
|
||||
return iSize;
|
||||
}
|
||||
|
||||
size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_readDTableX2_wksp(DTable, src, srcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE U32
|
||||
HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
||||
{
|
||||
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
||||
memcpy(op, dt+val, 2);
|
||||
ZSTD_memcpy(op, dt+val, 2);
|
||||
BIT_skipBits(DStream, dt[val].nbBits);
|
||||
return dt[val].length;
|
||||
}
|
||||
@ -669,7 +722,7 @@ FORCE_INLINE_TEMPLATE U32
|
||||
HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
||||
{
|
||||
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
||||
memcpy(op, dt+val, 1);
|
||||
ZSTD_memcpy(op, dt+val, 1);
|
||||
if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
|
||||
else {
|
||||
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
|
||||
@ -890,20 +943,6 @@ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
||||
}
|
||||
|
||||
|
||||
size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
||||
return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
||||
}
|
||||
|
||||
size_t HUF_decompress4X2_usingDTable(
|
||||
void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
@ -937,20 +976,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
||||
}
|
||||
|
||||
|
||||
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
||||
return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
||||
}
|
||||
|
||||
#endif /* HUF_FORCE_DECOMPRESS_X1 */
|
||||
|
||||
|
||||
@ -1051,67 +1076,6 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
|
||||
}
|
||||
|
||||
|
||||
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
||||
|
||||
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
|
||||
#endif
|
||||
|
||||
/* validation checks */
|
||||
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
||||
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
||||
if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
||||
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
||||
|
||||
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 0);
|
||||
return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 1);
|
||||
return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
|
||||
#else
|
||||
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
/* validation checks */
|
||||
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
||||
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
||||
if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
||||
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
||||
|
||||
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 0);
|
||||
return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 1);
|
||||
return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
||||
#else
|
||||
return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
|
||||
HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
|
||||
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
|
||||
size_t dstSize, const void* cSrc,
|
||||
size_t cSrcSize, void* workSpace,
|
||||
@ -1145,8 +1109,8 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
||||
/* validation checks */
|
||||
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
||||
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
||||
if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
||||
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
||||
if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
||||
if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
||||
|
||||
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
@ -1168,14 +1132,6 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
||||
}
|
||||
}
|
||||
|
||||
size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
|
||||
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
|
||||
{
|
||||
@ -1199,7 +1155,7 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
|
||||
{
|
||||
const BYTE* ip = (const BYTE*) cSrc;
|
||||
|
||||
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
|
||||
size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
|
||||
if (HUF_isError(hSize)) return hSize;
|
||||
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
||||
ip += hSize; cSrcSize -= hSize;
|
||||
@ -1246,3 +1202,149 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_readDTableX1_wksp(DTable, src, srcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
||||
return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_readDTableX2_wksp(DTable, src, srcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
||||
return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
||||
return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
||||
return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
||||
|
||||
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
|
||||
#endif
|
||||
|
||||
/* validation checks */
|
||||
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
||||
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
||||
if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
||||
if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
||||
|
||||
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 0);
|
||||
return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 1);
|
||||
return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
|
||||
#else
|
||||
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
/* validation checks */
|
||||
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
||||
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
||||
if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
||||
if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
||||
|
||||
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 0);
|
||||
return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 1);
|
||||
return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
||||
#else
|
||||
return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
|
||||
HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
||||
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
||||
workSpace, sizeof(workSpace));
|
||||
}
|
||||
#endif
|
||||
|
159
zstd/mem.h
159
zstd/mem.h
@ -18,8 +18,10 @@ extern "C" {
|
||||
/*-****************************************
|
||||
* Dependencies
|
||||
******************************************/
|
||||
#include <stddef.h> /* size_t, ptrdiff_t */
|
||||
#include <string.h> /* memcpy */
|
||||
#include <stddef.h> /* size_t, ptrdiff_t */
|
||||
#include "compiler.h" /* __has_builtin */
|
||||
#include "debug.h" /* DEBUG_STATIC_ASSERT */
|
||||
#include "zstd_deps.h" /* ZSTD_memcpy */
|
||||
|
||||
|
||||
/*-****************************************
|
||||
@ -39,93 +41,15 @@ extern "C" {
|
||||
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0 /* compat. with non-clang compilers */
|
||||
#endif
|
||||
|
||||
/* code only tested on 32 and 64 bits systems */
|
||||
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
|
||||
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
|
||||
|
||||
/* detects whether we are being compiled under msan */
|
||||
#if defined (__has_feature)
|
||||
# if __has_feature(memory_sanitizer)
|
||||
# define MEMORY_SANITIZER 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined (MEMORY_SANITIZER)
|
||||
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
|
||||
* We therefore declare the functions we need ourselves, rather than trying to
|
||||
* include the header file... */
|
||||
|
||||
#include <stdint.h> /* intptr_t */
|
||||
|
||||
/* Make memory region fully initialized (without changing its contents). */
|
||||
void __msan_unpoison(const volatile void *a, size_t size);
|
||||
|
||||
/* Make memory region fully uninitialized (without changing its contents).
|
||||
This is a legacy interface that does not update origin information. Use
|
||||
__msan_allocated_memory() instead. */
|
||||
void __msan_poison(const volatile void *a, size_t size);
|
||||
|
||||
/* Returns the offset of the first (at least partially) poisoned byte in the
|
||||
memory range, or -1 if the whole range is good. */
|
||||
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under asan */
|
||||
#if defined (__has_feature)
|
||||
# if __has_feature(address_sanitizer)
|
||||
# define ADDRESS_SANITIZER 1
|
||||
# endif
|
||||
#elif defined(__SANITIZE_ADDRESS__)
|
||||
# define ADDRESS_SANITIZER 1
|
||||
#endif
|
||||
|
||||
#if defined (ADDRESS_SANITIZER)
|
||||
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
|
||||
* We therefore declare the functions we need ourselves, rather than trying to
|
||||
* include the header file... */
|
||||
|
||||
/**
|
||||
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
|
||||
*
|
||||
* This memory must be previously allocated by your program. Instrumented
|
||||
* code is forbidden from accessing addresses in this region until it is
|
||||
* unpoisoned. This function is not guaranteed to poison the entire region -
|
||||
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
|
||||
* alignment restrictions.
|
||||
*
|
||||
* \note This function is not thread-safe because no two threads can poison or
|
||||
* unpoison memory in the same memory region simultaneously.
|
||||
*
|
||||
* \param addr Start of memory region.
|
||||
* \param size Size of memory region. */
|
||||
void __asan_poison_memory_region(void const volatile *addr, size_t size);
|
||||
|
||||
/**
|
||||
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
|
||||
*
|
||||
* This memory must be previously allocated by your program. Accessing
|
||||
* addresses in this region is allowed until this region is poisoned again.
|
||||
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due
|
||||
* to ASan alignment restrictions.
|
||||
*
|
||||
* \note This function is not thread-safe because no two threads can
|
||||
* poison or unpoison memory in the same memory region simultaneously.
|
||||
*
|
||||
* \param addr Start of memory region.
|
||||
* \param size Size of memory region. */
|
||||
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
|
||||
#endif
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* Basic Types
|
||||
*****************************************************************/
|
||||
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
|
||||
# include <stdint.h>
|
||||
# if defined(_AIX)
|
||||
# include <inttypes.h>
|
||||
# else
|
||||
# include <stdint.h> /* intptr_t */
|
||||
# endif
|
||||
typedef uint8_t BYTE;
|
||||
typedef uint16_t U16;
|
||||
typedef int16_t S16;
|
||||
@ -157,7 +81,53 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* Memory I/O
|
||||
* Memory I/O API
|
||||
*****************************************************************/
|
||||
/*=== Static platform detection ===*/
|
||||
MEM_STATIC unsigned MEM_32bits(void);
|
||||
MEM_STATIC unsigned MEM_64bits(void);
|
||||
MEM_STATIC unsigned MEM_isLittleEndian(void);
|
||||
|
||||
/*=== Native unaligned read/write ===*/
|
||||
MEM_STATIC U16 MEM_read16(const void* memPtr);
|
||||
MEM_STATIC U32 MEM_read32(const void* memPtr);
|
||||
MEM_STATIC U64 MEM_read64(const void* memPtr);
|
||||
MEM_STATIC size_t MEM_readST(const void* memPtr);
|
||||
|
||||
MEM_STATIC void MEM_write16(void* memPtr, U16 value);
|
||||
MEM_STATIC void MEM_write32(void* memPtr, U32 value);
|
||||
MEM_STATIC void MEM_write64(void* memPtr, U64 value);
|
||||
|
||||
/*=== Little endian unaligned read/write ===*/
|
||||
MEM_STATIC U16 MEM_readLE16(const void* memPtr);
|
||||
MEM_STATIC U32 MEM_readLE24(const void* memPtr);
|
||||
MEM_STATIC U32 MEM_readLE32(const void* memPtr);
|
||||
MEM_STATIC U64 MEM_readLE64(const void* memPtr);
|
||||
MEM_STATIC size_t MEM_readLEST(const void* memPtr);
|
||||
|
||||
MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
|
||||
MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
|
||||
MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
|
||||
MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
|
||||
MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
|
||||
|
||||
/*=== Big endian unaligned read/write ===*/
|
||||
MEM_STATIC U32 MEM_readBE32(const void* memPtr);
|
||||
MEM_STATIC U64 MEM_readBE64(const void* memPtr);
|
||||
MEM_STATIC size_t MEM_readBEST(const void* memPtr);
|
||||
|
||||
MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
|
||||
MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
|
||||
MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
|
||||
|
||||
/*=== Byteswap ===*/
|
||||
MEM_STATIC U32 MEM_swap32(U32 in);
|
||||
MEM_STATIC U64 MEM_swap64(U64 in);
|
||||
MEM_STATIC size_t MEM_swapST(size_t in);
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* Memory I/O Implementation
|
||||
*****************************************************************/
|
||||
/* MEM_FORCE_MEMORY_ACCESS :
|
||||
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
|
||||
@ -236,37 +206,37 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v =
|
||||
|
||||
MEM_STATIC U16 MEM_read16(const void* memPtr)
|
||||
{
|
||||
U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_read32(const void* memPtr)
|
||||
{
|
||||
U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
MEM_STATIC U64 MEM_read64(const void* memPtr)
|
||||
{
|
||||
U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
MEM_STATIC size_t MEM_readST(const void* memPtr)
|
||||
{
|
||||
size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
MEM_STATIC void MEM_write16(void* memPtr, U16 value)
|
||||
{
|
||||
memcpy(memPtr, &value, sizeof(value));
|
||||
ZSTD_memcpy(memPtr, &value, sizeof(value));
|
||||
}
|
||||
|
||||
MEM_STATIC void MEM_write32(void* memPtr, U32 value)
|
||||
{
|
||||
memcpy(memPtr, &value, sizeof(value));
|
||||
ZSTD_memcpy(memPtr, &value, sizeof(value));
|
||||
}
|
||||
|
||||
MEM_STATIC void MEM_write64(void* memPtr, U64 value)
|
||||
{
|
||||
memcpy(memPtr, &value, sizeof(value));
|
||||
ZSTD_memcpy(memPtr, &value, sizeof(value));
|
||||
}
|
||||
|
||||
#endif /* MEM_FORCE_MEMORY_ACCESS */
|
||||
@ -445,6 +415,9 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
|
||||
MEM_writeBE64(memPtr, (U64)val);
|
||||
}
|
||||
|
||||
/* code only tested on 32 and 64 bits systems */
|
||||
MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
38
zstd/pool.c
38
zstd/pool.c
@ -10,9 +10,9 @@
|
||||
|
||||
|
||||
/* ====== Dependencies ======= */
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
#include "debug.h" /* assert */
|
||||
#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
|
||||
#include "zstd_internal.h" /* ZSTD_customMalloc, ZSTD_customFree */
|
||||
#include "pool.h"
|
||||
|
||||
/* ====== Compiler specifics ====== */
|
||||
@ -105,6 +105,10 @@ static void* POOL_thread(void* opaque) {
|
||||
assert(0); /* Unreachable */
|
||||
}
|
||||
|
||||
POOL_ctx* ZSTD_createThreadPool(size_t numThreads) {
|
||||
return POOL_create (numThreads, 0);
|
||||
}
|
||||
|
||||
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
|
||||
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
|
||||
}
|
||||
@ -115,14 +119,14 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
|
||||
/* Check parameters */
|
||||
if (!numThreads) { return NULL; }
|
||||
/* Allocate the context and zero initialize */
|
||||
ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
|
||||
ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem);
|
||||
if (!ctx) { return NULL; }
|
||||
/* Initialize the job queue.
|
||||
* It needs one extra space since one space is wasted to differentiate
|
||||
* empty and full queues.
|
||||
*/
|
||||
ctx->queueSize = queueSize + 1;
|
||||
ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
|
||||
ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem);
|
||||
ctx->queueHead = 0;
|
||||
ctx->queueTail = 0;
|
||||
ctx->numThreadsBusy = 0;
|
||||
@ -136,7 +140,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
|
||||
}
|
||||
ctx->shutdown = 0;
|
||||
/* Allocate space for the thread handles */
|
||||
ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
|
||||
ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
|
||||
ctx->threadCapacity = 0;
|
||||
ctx->customMem = customMem;
|
||||
/* Check for errors */
|
||||
@ -179,12 +183,14 @@ void POOL_free(POOL_ctx *ctx) {
|
||||
ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
|
||||
ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
|
||||
ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
|
||||
ZSTD_free(ctx->queue, ctx->customMem);
|
||||
ZSTD_free(ctx->threads, ctx->customMem);
|
||||
ZSTD_free(ctx, ctx->customMem);
|
||||
ZSTD_customFree(ctx->queue, ctx->customMem);
|
||||
ZSTD_customFree(ctx->threads, ctx->customMem);
|
||||
ZSTD_customFree(ctx, ctx->customMem);
|
||||
}
|
||||
|
||||
|
||||
void ZSTD_freeThreadPool (ZSTD_threadPool* pool) {
|
||||
POOL_free (pool);
|
||||
}
|
||||
|
||||
size_t POOL_sizeof(POOL_ctx *ctx) {
|
||||
if (ctx==NULL) return 0; /* supports sizeof NULL */
|
||||
@ -203,11 +209,11 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
|
||||
return 0;
|
||||
}
|
||||
/* numThreads > threadCapacity */
|
||||
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
|
||||
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
|
||||
if (!threadPool) return 1;
|
||||
/* replace existing thread pool */
|
||||
memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
|
||||
ZSTD_free(ctx->threads, ctx->customMem);
|
||||
ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
|
||||
ZSTD_customFree(ctx->threads, ctx->customMem);
|
||||
ctx->threads = threadPool;
|
||||
/* Initialize additional threads */
|
||||
{ size_t threadId;
|
||||
@ -301,7 +307,7 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
|
||||
struct POOL_ctx_s {
|
||||
int dummy;
|
||||
};
|
||||
static POOL_ctx g_ctx;
|
||||
static POOL_ctx g_poolCtx;
|
||||
|
||||
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
|
||||
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
|
||||
@ -311,11 +317,11 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customM
|
||||
(void)numThreads;
|
||||
(void)queueSize;
|
||||
(void)customMem;
|
||||
return &g_ctx;
|
||||
return &g_poolCtx;
|
||||
}
|
||||
|
||||
void POOL_free(POOL_ctx* ctx) {
|
||||
assert(!ctx || ctx == &g_ctx);
|
||||
assert(!ctx || ctx == &g_poolCtx);
|
||||
(void)ctx;
|
||||
}
|
||||
|
||||
@ -337,7 +343,7 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
|
||||
|
||||
size_t POOL_sizeof(POOL_ctx* ctx) {
|
||||
if (ctx==NULL) return 0; /* supports sizeof NULL */
|
||||
assert(ctx == &g_ctx);
|
||||
assert(ctx == &g_poolCtx);
|
||||
return sizeof(*ctx);
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h"
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */
|
||||
#include "zstd.h"
|
||||
|
||||
|
@ -78,11 +78,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
|
||||
|
||||
#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
|
||||
|
||||
#include <stdlib.h>
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#include "zstd_deps.h"
|
||||
|
||||
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
|
||||
{
|
||||
*mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
|
||||
*mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t));
|
||||
if (!*mutex)
|
||||
return 1;
|
||||
return pthread_mutex_init(*mutex, attr);
|
||||
@ -94,14 +95,14 @@ int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
|
||||
return 0;
|
||||
{
|
||||
int const ret = pthread_mutex_destroy(*mutex);
|
||||
free(*mutex);
|
||||
ZSTD_free(*mutex);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
|
||||
{
|
||||
*cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
|
||||
*cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t));
|
||||
if (!*cond)
|
||||
return 1;
|
||||
return pthread_cond_init(*cond, attr);
|
||||
@ -113,7 +114,7 @@ int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
|
||||
return 0;
|
||||
{
|
||||
int const ret = pthread_cond_destroy(*cond);
|
||||
free(*cond);
|
||||
ZSTD_free(*cond);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
@ -77,14 +77,12 @@
|
||||
* Includes & Memory related functions
|
||||
***************************************/
|
||||
/* Modify the local functions below should you wish to use some other memory routines */
|
||||
/* for malloc(), free() */
|
||||
#include <stdlib.h>
|
||||
#include <stddef.h> /* size_t */
|
||||
static void* XXH_malloc(size_t s) { return malloc(s); }
|
||||
static void XXH_free (void* p) { free(p); }
|
||||
/* for memcpy() */
|
||||
#include <string.h>
|
||||
static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
|
||||
/* for ZSTD_malloc(), ZSTD_free() */
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#include "zstd_deps.h" /* size_t, ZSTD_malloc, ZSTD_free, ZSTD_memcpy */
|
||||
static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); }
|
||||
static void XXH_free (void* p) { ZSTD_free(p); }
|
||||
static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); }
|
||||
|
||||
#ifndef XXH_STATIC_LINKING_ONLY
|
||||
# define XXH_STATIC_LINKING_ONLY
|
||||
@ -95,49 +93,13 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
|
||||
/* *************************************
|
||||
* Compiler Specific Options
|
||||
***************************************/
|
||||
#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
|
||||
# define INLINE_KEYWORD inline
|
||||
#else
|
||||
# define INLINE_KEYWORD
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) || defined(__ICCARM__)
|
||||
# define FORCE_INLINE_ATTR __attribute__((always_inline))
|
||||
#elif defined(_MSC_VER)
|
||||
# define FORCE_INLINE_ATTR __forceinline
|
||||
#else
|
||||
# define FORCE_INLINE_ATTR
|
||||
#endif
|
||||
|
||||
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
|
||||
#endif
|
||||
#include "compiler.h"
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Basic Types
|
||||
***************************************/
|
||||
#ifndef MEM_MODULE
|
||||
# define MEM_MODULE
|
||||
# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
|
||||
# include <stdint.h>
|
||||
typedef uint8_t BYTE;
|
||||
typedef uint16_t U16;
|
||||
typedef uint32_t U32;
|
||||
typedef int32_t S32;
|
||||
typedef uint64_t U64;
|
||||
# else
|
||||
typedef unsigned char BYTE;
|
||||
typedef unsigned short U16;
|
||||
typedef unsigned int U32;
|
||||
typedef signed int S32;
|
||||
typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "mem.h" /* BYTE, U32, U64, size_t */
|
||||
|
||||
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
|
||||
|
||||
@ -163,14 +125,14 @@ static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
|
||||
static U32 XXH_read32(const void* memPtr)
|
||||
{
|
||||
U32 val;
|
||||
memcpy(&val, memPtr, sizeof(val));
|
||||
ZSTD_memcpy(&val, memPtr, sizeof(val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static U64 XXH_read64(const void* memPtr)
|
||||
{
|
||||
U64 val;
|
||||
memcpy(&val, memPtr, sizeof(val));
|
||||
ZSTD_memcpy(&val, memPtr, sizeof(val));
|
||||
return val;
|
||||
}
|
||||
|
||||
@ -307,12 +269,12 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
|
||||
****************************/
|
||||
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
|
||||
{
|
||||
memcpy(dstState, srcState, sizeof(*dstState));
|
||||
ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
|
||||
{
|
||||
memcpy(dstState, srcState, sizeof(*dstState));
|
||||
ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
|
||||
}
|
||||
|
||||
|
||||
@ -554,12 +516,12 @@ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
|
||||
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
|
||||
{
|
||||
XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
|
||||
memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */
|
||||
ZSTD_memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */
|
||||
state.v1 = seed + PRIME32_1 + PRIME32_2;
|
||||
state.v2 = seed + PRIME32_2;
|
||||
state.v3 = seed + 0;
|
||||
state.v4 = seed - PRIME32_1;
|
||||
memcpy(statePtr, &state, sizeof(state));
|
||||
ZSTD_memcpy(statePtr, &state, sizeof(state));
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
@ -567,12 +529,12 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int s
|
||||
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
|
||||
{
|
||||
XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
|
||||
memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */
|
||||
ZSTD_memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */
|
||||
state.v1 = seed + PRIME64_1 + PRIME64_2;
|
||||
state.v2 = seed + PRIME64_2;
|
||||
state.v3 = seed + 0;
|
||||
state.v4 = seed - PRIME64_1;
|
||||
memcpy(statePtr, &state, sizeof(state));
|
||||
ZSTD_memcpy(statePtr, &state, sizeof(state));
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
@ -843,14 +805,14 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
|
||||
{
|
||||
XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
|
||||
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
|
||||
memcpy(dst, &hash, sizeof(*dst));
|
||||
ZSTD_memcpy(dst, &hash, sizeof(*dst));
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
|
||||
{
|
||||
XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
|
||||
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
|
||||
memcpy(dst, &hash, sizeof(*dst));
|
||||
ZSTD_memcpy(dst, &hash, sizeof(*dst));
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
|
||||
|
@ -55,7 +55,7 @@ extern "C" {
|
||||
/* ****************************
|
||||
* Definitions
|
||||
******************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h"
|
||||
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
|
||||
|
||||
|
||||
|
395
zstd/zstd.h
395
zstd/zstd.h
@ -72,16 +72,21 @@ extern "C" {
|
||||
/*------ Version ------*/
|
||||
#define ZSTD_VERSION_MAJOR 1
|
||||
#define ZSTD_VERSION_MINOR 4
|
||||
#define ZSTD_VERSION_RELEASE 5
|
||||
|
||||
#define ZSTD_VERSION_RELEASE 7
|
||||
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
|
||||
ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */
|
||||
|
||||
/*! ZSTD_versionNumber() :
|
||||
* Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */
|
||||
ZSTDLIB_API unsigned ZSTD_versionNumber(void);
|
||||
|
||||
#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
|
||||
#define ZSTD_QUOTE(str) #str
|
||||
#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
|
||||
#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
|
||||
ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */
|
||||
|
||||
/*! ZSTD_versionString() :
|
||||
* Return runtime library version, like "1.4.5". Requires v1.3.0+. */
|
||||
ZSTDLIB_API const char* ZSTD_versionString(void);
|
||||
|
||||
/* *************************************
|
||||
* Default constant
|
||||
@ -334,7 +339,9 @@ typedef enum {
|
||||
* for large inputs, by finding large matches at long distance.
|
||||
* It increases memory usage and window size.
|
||||
* Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
|
||||
* except when expressly set to a different value. */
|
||||
* except when expressly set to a different value.
|
||||
* Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and
|
||||
* compression strategy >= ZSTD_btopt (== compression level 16+) */
|
||||
ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2.
|
||||
* Larger values increase memory usage and compression ratio,
|
||||
* but decrease compression speed.
|
||||
@ -365,16 +372,20 @@ typedef enum {
|
||||
ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */
|
||||
|
||||
/* multi-threading parameters */
|
||||
/* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
|
||||
* They return an error otherwise. */
|
||||
/* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
|
||||
* Otherwise, trying to set any other value than default (0) will be a no-op and return an error.
|
||||
* In a situation where it's unknown if the linked library supports multi-threading or not,
|
||||
* setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property.
|
||||
*/
|
||||
ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel.
|
||||
* When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() :
|
||||
* When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() :
|
||||
* ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
|
||||
* while compression work is performed in parallel, within worker threads.
|
||||
* while compression is performed in parallel, within worker thread(s).
|
||||
* (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
|
||||
* in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
|
||||
* More workers improve speed, but also increase memory usage.
|
||||
* Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
|
||||
* Default value is `0`, aka "single-threaded mode" : no worker is spawned,
|
||||
* compression is performed inside Caller's thread, and all invocations are blocking */
|
||||
ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
|
||||
* Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
|
||||
* 0 means default, which is dynamically determined based on compression parameters.
|
||||
@ -403,6 +414,11 @@ typedef enum {
|
||||
* ZSTD_c_literalCompressionMode
|
||||
* ZSTD_c_targetCBlockSize
|
||||
* ZSTD_c_srcSizeHint
|
||||
* ZSTD_c_enableDedicatedDictSearch
|
||||
* ZSTD_c_stableInBuffer
|
||||
* ZSTD_c_stableOutBuffer
|
||||
* ZSTD_c_blockDelimiters
|
||||
* ZSTD_c_validateSequences
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly;
|
||||
* also, the enums values themselves are unstable and can still change.
|
||||
@ -413,7 +429,12 @@ typedef enum {
|
||||
ZSTD_c_experimentalParam4=1001,
|
||||
ZSTD_c_experimentalParam5=1002,
|
||||
ZSTD_c_experimentalParam6=1003,
|
||||
ZSTD_c_experimentalParam7=1004
|
||||
ZSTD_c_experimentalParam7=1004,
|
||||
ZSTD_c_experimentalParam8=1005,
|
||||
ZSTD_c_experimentalParam9=1006,
|
||||
ZSTD_c_experimentalParam10=1007,
|
||||
ZSTD_c_experimentalParam11=1008,
|
||||
ZSTD_c_experimentalParam12=1009
|
||||
} ZSTD_cParameter;
|
||||
|
||||
typedef struct {
|
||||
@ -524,11 +545,13 @@ typedef enum {
|
||||
* At the time of this writing, they include :
|
||||
* ZSTD_d_format
|
||||
* ZSTD_d_stableOutBuffer
|
||||
* ZSTD_d_forceIgnoreChecksum
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly
|
||||
*/
|
||||
ZSTD_d_experimentalParam1=1000,
|
||||
ZSTD_d_experimentalParam2=1001
|
||||
ZSTD_d_experimentalParam2=1001,
|
||||
ZSTD_d_experimentalParam3=1002
|
||||
|
||||
} ZSTD_dParameter;
|
||||
|
||||
@ -664,8 +687,9 @@ typedef enum {
|
||||
* - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
|
||||
* - output->pos must be <= dstCapacity, input->pos must be <= srcSize
|
||||
* - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
|
||||
* - endOp must be a valid directive
|
||||
* - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
|
||||
* - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available,
|
||||
* - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available,
|
||||
* and then immediately returns, just indicating that there is some data remaining to be flushed.
|
||||
* The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
|
||||
* - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
|
||||
@ -1100,21 +1124,40 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
|
||||
|
||||
typedef struct {
|
||||
unsigned int matchPos; /* Match pos in dst */
|
||||
/* If seqDef.offset > 3, then this is seqDef.offset - 3
|
||||
* If seqDef.offset < 3, then this is the corresponding repeat offset
|
||||
* But if seqDef.offset < 3 and litLength == 0, this is the
|
||||
* repeat offset before the corresponding repeat offset
|
||||
* And if seqDef.offset == 3 and litLength == 0, this is the
|
||||
* most recent repeat offset - 1
|
||||
*/
|
||||
unsigned int offset;
|
||||
unsigned int litLength; /* Literal length */
|
||||
unsigned int matchLength; /* Match length */
|
||||
/* 0 when seq not rep and seqDef.offset otherwise
|
||||
* when litLength == 0 this will be <= 4, otherwise <= 3 like normal
|
||||
*/
|
||||
unsigned int rep;
|
||||
unsigned int offset; /* The offset of the match. (NOT the same as the offset code)
|
||||
* If offset == 0 and matchLength == 0, this sequence represents the last
|
||||
* literals in the block of litLength size.
|
||||
*/
|
||||
|
||||
unsigned int litLength; /* Literal length of the sequence. */
|
||||
unsigned int matchLength; /* Match length of the sequence. */
|
||||
|
||||
/* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
|
||||
* In this case, we will treat the sequence as a marker for a block boundary.
|
||||
*/
|
||||
|
||||
unsigned int rep; /* Represents which repeat offset is represented by the field 'offset'.
|
||||
* Ranges from [0, 3].
|
||||
*
|
||||
* Repeat offsets are essentially previous offsets from previous sequences sorted in
|
||||
* recency order. For more detail, see doc/zstd_compression_format.md
|
||||
*
|
||||
* If rep == 0, then 'offset' does not contain a repeat offset.
|
||||
* If rep > 0:
|
||||
* If litLength != 0:
|
||||
* rep == 1 --> offset == repeat_offset_1
|
||||
* rep == 2 --> offset == repeat_offset_2
|
||||
* rep == 3 --> offset == repeat_offset_3
|
||||
* If litLength == 0:
|
||||
* rep == 1 --> offset == repeat_offset_2
|
||||
* rep == 2 --> offset == repeat_offset_3
|
||||
* rep == 3 --> offset == repeat_offset_1 - 1
|
||||
*
|
||||
* Note: This field is optional. ZSTD_generateSequences() will calculate the value of
|
||||
* 'rep', but repeat offsets do not necessarily need to be calculated from an external
|
||||
* sequence provider's perspective. For example, ZSTD_compressSequences() does not
|
||||
* use this 'rep' field at all (as of now).
|
||||
*/
|
||||
} ZSTD_Sequence;
|
||||
|
||||
typedef struct {
|
||||
@ -1156,6 +1199,12 @@ typedef enum {
|
||||
* Decoder cannot recognise automatically this format, requiring this instruction. */
|
||||
} ZSTD_format_e;
|
||||
|
||||
typedef enum {
|
||||
/* Note: this enum controls ZSTD_d_forceIgnoreChecksum */
|
||||
ZSTD_d_validateChecksum = 0,
|
||||
ZSTD_d_ignoreChecksum = 1
|
||||
} ZSTD_forceIgnoreChecksum_e;
|
||||
|
||||
typedef enum {
|
||||
/* Note: this enum and the behavior it controls are effectively internal
|
||||
* implementation details of the compressor. They are expected to continue
|
||||
@ -1253,14 +1302,74 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
|
||||
* or an error code (if srcSize is too small) */
|
||||
ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
|
||||
|
||||
/*! ZSTD_getSequences() :
|
||||
* Extract sequences from the sequence store
|
||||
typedef enum {
|
||||
ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
|
||||
ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */
|
||||
} ZSTD_sequenceFormat_e;
|
||||
|
||||
/*! ZSTD_generateSequences() :
|
||||
* Generate sequences using ZSTD_compress2, given a source buffer.
|
||||
*
|
||||
* Each block will end with a dummy sequence
|
||||
* with offset == 0, matchLength == 0, and litLength == length of last literals.
|
||||
* litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
|
||||
* simply acts as a block delimiter.
|
||||
*
|
||||
* zc can be used to insert custom compression params.
|
||||
* This function invokes ZSTD_compress2
|
||||
* @return : number of sequences extracted
|
||||
*
|
||||
* The output of this function can be fed into ZSTD_compressSequences() with CCtx
|
||||
* setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
|
||||
* @return : number of sequences generated
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
||||
size_t outSeqsSize, const void* src, size_t srcSize);
|
||||
|
||||
ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
||||
size_t outSeqsSize, const void* src, size_t srcSize);
|
||||
|
||||
/*! ZSTD_mergeBlockDelimiters() :
|
||||
* Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
|
||||
* by merging them into into the literals of the next sequence.
|
||||
*
|
||||
* As such, the final generated result has no explicit representation of block boundaries,
|
||||
* and the final last literals segment is not represented in the sequences.
|
||||
*
|
||||
* The output of this function can be fed into ZSTD_compressSequences() with CCtx
|
||||
* setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
|
||||
* @return : number of sequences left after merging
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
|
||||
|
||||
/*! ZSTD_compressSequences() :
|
||||
* Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
|
||||
* If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
|
||||
* The entire source is compressed into a single frame.
|
||||
*
|
||||
* The compression behavior changes based on cctx params. In particular:
|
||||
* If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain
|
||||
* no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on
|
||||
* the block size derived from the cctx, and sequences may be split. This is the default setting.
|
||||
*
|
||||
* If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
|
||||
* block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
|
||||
*
|
||||
* If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
|
||||
* behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
|
||||
* specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
|
||||
*
|
||||
* In addition to the two adjustable experimental params, there are other important cctx params.
|
||||
* - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
|
||||
* - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
|
||||
* - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
|
||||
* is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
|
||||
*
|
||||
* Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
|
||||
* Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
|
||||
* and cannot emit an RLE block that disagrees with the repcode history
|
||||
* @return : final compressed size or a ZSTD error.
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
|
||||
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
|
||||
/***************************************
|
||||
@ -1372,7 +1481,11 @@ ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
|
||||
typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
|
||||
typedef void (*ZSTD_freeFunction) (void* opaque, void* address);
|
||||
typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
|
||||
static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */
|
||||
static
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__unused__))
|
||||
#endif
|
||||
ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */
|
||||
|
||||
ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
|
||||
ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
|
||||
@ -1385,13 +1498,36 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS
|
||||
ZSTD_compressionParameters cParams,
|
||||
ZSTD_customMem customMem);
|
||||
|
||||
/* ! Thread pool :
|
||||
* These prototypes make it possible to share a thread pool among multiple compression contexts.
|
||||
* This can limit resources for applications with multiple threads where each one uses
|
||||
* a threaded compression mode (via ZSTD_c_nbWorkers parameter).
|
||||
* ZSTD_createThreadPool creates a new thread pool with a given number of threads.
|
||||
* Note that the lifetime of such pool must exist while being used.
|
||||
* ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
|
||||
* to use an internal thread pool).
|
||||
* ZSTD_freeThreadPool frees a thread pool.
|
||||
*/
|
||||
typedef struct POOL_ctx_s ZSTD_threadPool;
|
||||
ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
|
||||
ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);
|
||||
ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
|
||||
|
||||
/*
|
||||
* This API is temporary and is expected to change or disappear in the future!
|
||||
*/
|
||||
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
|
||||
const void* dict, size_t dictSize,
|
||||
ZSTD_dictLoadMethod_e dictLoadMethod,
|
||||
ZSTD_dictContentType_e dictContentType,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
ZSTD_customMem customMem);
|
||||
|
||||
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
|
||||
ZSTD_dictLoadMethod_e dictLoadMethod,
|
||||
ZSTD_dictContentType_e dictContentType,
|
||||
ZSTD_customMem customMem);
|
||||
|
||||
|
||||
|
||||
/***************************************
|
||||
* Advanced compression functions
|
||||
***************************************/
|
||||
@ -1404,6 +1540,12 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictS
|
||||
* note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
|
||||
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
|
||||
|
||||
/*! ZSTD_getDictID_fromCDict() :
|
||||
* Provides the dictID of the dictionary loaded into `cdict`.
|
||||
* If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
|
||||
* Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
|
||||
ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
|
||||
|
||||
/*! ZSTD_getCParams() :
|
||||
* @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
|
||||
* `estimatedSrcSize` value is optional, select 0 if not known */
|
||||
@ -1518,6 +1660,143 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
|
||||
* but compression ratio may regress significantly if guess considerably underestimates */
|
||||
#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
|
||||
|
||||
/* Controls whether the new and experimental "dedicated dictionary search
|
||||
* structure" can be used. This feature is still rough around the edges, be
|
||||
* prepared for surprising behavior!
|
||||
*
|
||||
* How to use it:
|
||||
*
|
||||
* When using a CDict, whether to use this feature or not is controlled at
|
||||
* CDict creation, and it must be set in a CCtxParams set passed into that
|
||||
* construction (via ZSTD_createCDict_advanced2()). A compression will then
|
||||
* use the feature or not based on how the CDict was constructed; the value of
|
||||
* this param, set in the CCtx, will have no effect.
|
||||
*
|
||||
* However, when a dictionary buffer is passed into a CCtx, such as via
|
||||
* ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control
|
||||
* whether the CDict that is created internally can use the feature or not.
|
||||
*
|
||||
* What it does:
|
||||
*
|
||||
* Normally, the internal data structures of the CDict are analogous to what
|
||||
* would be stored in a CCtx after compressing the contents of a dictionary.
|
||||
* To an approximation, a compression using a dictionary can then use those
|
||||
* data structures to simply continue what is effectively a streaming
|
||||
* compression where the simulated compression of the dictionary left off.
|
||||
* Which is to say, the search structures in the CDict are normally the same
|
||||
* format as in the CCtx.
|
||||
*
|
||||
* It is possible to do better, since the CDict is not like a CCtx: the search
|
||||
* structures are written once during CDict creation, and then are only read
|
||||
* after that, while the search structures in the CCtx are both read and
|
||||
* written as the compression goes along. This means we can choose a search
|
||||
* structure for the dictionary that is read-optimized.
|
||||
*
|
||||
* This feature enables the use of that different structure.
|
||||
*
|
||||
* Note that some of the members of the ZSTD_compressionParameters struct have
|
||||
* different semantics and constraints in the dedicated search structure. It is
|
||||
* highly recommended that you simply set a compression level in the CCtxParams
|
||||
* you pass into the CDict creation call, and avoid messing with the cParams
|
||||
* directly.
|
||||
*
|
||||
* Effects:
|
||||
*
|
||||
* This will only have any effect when the selected ZSTD_strategy
|
||||
* implementation supports this feature. Currently, that's limited to
|
||||
* ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2.
|
||||
*
|
||||
* Note that this means that the CDict tables can no longer be copied into the
|
||||
* CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be
|
||||
* useable. The dictionary can only be attached or reloaded.
|
||||
*
|
||||
* In general, you should expect compression to be faster--sometimes very much
|
||||
* so--and CDict creation to be slightly slower. Eventually, we will probably
|
||||
* make this mode the default.
|
||||
*/
|
||||
#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
|
||||
|
||||
/* ZSTD_c_stableInBuffer
|
||||
* Experimental parameter.
|
||||
* Default is 0 == disabled. Set to 1 to enable.
|
||||
*
|
||||
* Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same
|
||||
* between calls, except for the modifications that zstd makes to pos (the
|
||||
* caller must not modify pos). This is checked by the compressor, and
|
||||
* compression will fail if it ever changes. This means the only flush
|
||||
* mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end
|
||||
* is not used. The data in the ZSTD_inBuffer in the range [src, src + pos)
|
||||
* MUST not be modified during compression or you will get data corruption.
|
||||
*
|
||||
* When this flag is enabled zstd won't allocate an input window buffer,
|
||||
* because the user guarantees it can reference the ZSTD_inBuffer until
|
||||
* the frame is complete. But, it will still allocate an output buffer
|
||||
* large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
|
||||
* avoid the memcpy() from the input buffer to the input window buffer.
|
||||
*
|
||||
* NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used.
|
||||
* That means this flag cannot be used with ZSTD_compressStream().
|
||||
*
|
||||
* NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
|
||||
* this flag is ALWAYS memory safe, and will never access out-of-bounds
|
||||
* memory. However, compression WILL fail if you violate the preconditions.
|
||||
*
|
||||
* WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST
|
||||
* not be modified during compression or you will get data corruption. This
|
||||
* is because zstd needs to reference data in the ZSTD_inBuffer to find
|
||||
* matches. Normally zstd maintains its own window buffer for this purpose,
|
||||
* but passing this flag tells zstd to use the user provided buffer.
|
||||
*/
|
||||
#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
|
||||
|
||||
/* ZSTD_c_stableOutBuffer
|
||||
* Experimental parameter.
|
||||
* Default is 0 == disabled. Set to 1 to enable.
|
||||
*
|
||||
* Tells he compressor that the ZSTD_outBuffer will not be resized between
|
||||
* calls. Specifically: (out.size - out.pos) will never grow. This gives the
|
||||
* compressor the freedom to say: If the compressed data doesn't fit in the
|
||||
* output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to
|
||||
* always decompress directly into the output buffer, instead of decompressing
|
||||
* into an internal buffer and copying to the output buffer.
|
||||
*
|
||||
* When this flag is enabled zstd won't allocate an output buffer, because
|
||||
* it can write directly to the ZSTD_outBuffer. It will still allocate the
|
||||
* input window buffer (see ZSTD_c_stableInBuffer).
|
||||
*
|
||||
* Zstd will check that (out.size - out.pos) never grows and return an error
|
||||
* if it does. While not strictly necessary, this should prevent surprises.
|
||||
*/
|
||||
#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10
|
||||
|
||||
/* ZSTD_c_blockDelimiters
|
||||
* Default is 0 == ZSTD_sf_noBlockDelimiters.
|
||||
*
|
||||
* For use with sequence compression API: ZSTD_compressSequences().
|
||||
*
|
||||
* Designates whether or not the given array of ZSTD_Sequence contains block delimiters
|
||||
* and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
|
||||
* See the definition of ZSTD_Sequence for more specifics.
|
||||
*/
|
||||
#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
|
||||
|
||||
/* ZSTD_c_validateSequences
|
||||
* Default is 0 == disabled. Set to 1 to enable sequence validation.
|
||||
*
|
||||
* For use with sequence compression API: ZSTD_compressSequences().
|
||||
* Designates whether or not we validate sequences provided to ZSTD_compressSequences()
|
||||
* during function execution.
|
||||
*
|
||||
* Without validation, providing a sequence that does not conform to the zstd spec will cause
|
||||
* undefined behavior, and may produce a corrupted block.
|
||||
*
|
||||
* With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
|
||||
* specifics regarding offset/matchlength requirements) then the function will bail out and
|
||||
* return an error.
|
||||
*
|
||||
*/
|
||||
#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
|
||||
|
||||
/*! ZSTD_CCtx_getParameter() :
|
||||
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
|
||||
* and store it into int* value.
|
||||
@ -1566,8 +1845,10 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, Z
|
||||
/*! ZSTD_CCtxParams_setParameter() :
|
||||
* Similar to ZSTD_CCtx_setParameter.
|
||||
* Set one compression parameter, selected by enum ZSTD_cParameter.
|
||||
* Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams().
|
||||
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
|
||||
* Parameters must be applied to a ZSTD_CCtx using
|
||||
* ZSTD_CCtx_setParametersUsingCCtxParams().
|
||||
* @result : a code representing success or failure (which can be tested with
|
||||
* ZSTD_isError()).
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
|
||||
|
||||
@ -1647,6 +1928,13 @@ ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* pre
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
|
||||
|
||||
/*! ZSTD_DCtx_getParameter() :
|
||||
* Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
|
||||
* and store it into int* value.
|
||||
* @return : 0, or an error code (which can be tested with ZSTD_isError()).
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
|
||||
|
||||
/* ZSTD_d_format
|
||||
* experimental parameter,
|
||||
* allowing selection between ZSTD_format_e input compression formats
|
||||
@ -1684,6 +1972,17 @@ ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowS
|
||||
*/
|
||||
#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
|
||||
|
||||
/* ZSTD_d_forceIgnoreChecksum
|
||||
* Experimental parameter.
|
||||
* Default is 0 == disabled. Set to 1 to enable
|
||||
*
|
||||
* Tells the decompressor to skip checksum validation during decompression, regardless
|
||||
* of whether checksumming was specified during compression. This offers some
|
||||
* slight performance benefits, and may be useful for debugging.
|
||||
* Param has values of type ZSTD_forceIgnoreChecksum_e
|
||||
*/
|
||||
#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3
|
||||
|
||||
/*! ZSTD_DCtx_setFormat() :
|
||||
* Instruct the decoder context about what kind of data to decode next.
|
||||
* This instruction is mandatory to decode data without a fully-formed header,
|
||||
@ -1711,7 +2010,8 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
|
||||
********************************************************************/
|
||||
|
||||
/*===== Advanced Streaming compression functions =====*/
|
||||
/**! ZSTD_initCStream_srcSize() :
|
||||
|
||||
/*! ZSTD_initCStream_srcSize() :
|
||||
* This function is deprecated, and equivalent to:
|
||||
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
|
||||
* ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
|
||||
@ -1728,7 +2028,7 @@ ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
|
||||
int compressionLevel,
|
||||
unsigned long long pledgedSrcSize);
|
||||
|
||||
/**! ZSTD_initCStream_usingDict() :
|
||||
/*! ZSTD_initCStream_usingDict() :
|
||||
* This function is deprecated, and is equivalent to:
|
||||
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
|
||||
* ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
|
||||
@ -1745,7 +2045,7 @@ ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
|
||||
const void* dict, size_t dictSize,
|
||||
int compressionLevel);
|
||||
|
||||
/**! ZSTD_initCStream_advanced() :
|
||||
/*! ZSTD_initCStream_advanced() :
|
||||
* This function is deprecated, and is approximately equivalent to:
|
||||
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
|
||||
* // Pseudocode: Set each zstd parameter and leave the rest as-is.
|
||||
@ -1766,7 +2066,7 @@ ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
|
||||
ZSTD_parameters params,
|
||||
unsigned long long pledgedSrcSize);
|
||||
|
||||
/**! ZSTD_initCStream_usingCDict() :
|
||||
/*! ZSTD_initCStream_usingCDict() :
|
||||
* This function is deprecated, and equivalent to:
|
||||
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
|
||||
* ZSTD_CCtx_refCDict(zcs, cdict);
|
||||
@ -1776,7 +2076,7 @@ ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
|
||||
|
||||
/**! ZSTD_initCStream_usingCDict_advanced() :
|
||||
/*! ZSTD_initCStream_usingCDict_advanced() :
|
||||
* This function is DEPRECATED, and is approximately equivalent to:
|
||||
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
|
||||
* // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
|
||||
@ -1849,7 +2149,8 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
|
||||
|
||||
|
||||
/*===== Advanced Streaming decompression functions =====*/
|
||||
/**
|
||||
|
||||
/*!
|
||||
* This function is deprecated, and is equivalent to:
|
||||
*
|
||||
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
|
||||
@ -1860,7 +2161,7 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
|
||||
|
||||
/**
|
||||
/*!
|
||||
* This function is deprecated, and is equivalent to:
|
||||
*
|
||||
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
|
||||
@ -1871,7 +2172,7 @@ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dic
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
|
||||
|
||||
/**
|
||||
/*!
|
||||
* This function is deprecated, and is equivalent to:
|
||||
*
|
||||
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
|
||||
@ -1933,7 +2234,7 @@ ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstC
|
||||
ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
||||
|
||||
|
||||
/*-
|
||||
/**
|
||||
Buffer-less streaming decompression (synchronous mode)
|
||||
|
||||
A ZSTD_DCtx object is required to track streaming operations.
|
||||
|
@ -13,8 +13,8 @@
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include <stdlib.h> /* malloc, calloc, free */
|
||||
#include <string.h> /* memset */
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
|
||||
#include "error_private.h"
|
||||
#include "zstd_internal.h"
|
||||
|
||||
@ -53,31 +53,31 @@ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString
|
||||
/*=**************************************************************
|
||||
* Custom allocator
|
||||
****************************************************************/
|
||||
void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
|
||||
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
|
||||
{
|
||||
if (customMem.customAlloc)
|
||||
return customMem.customAlloc(customMem.opaque, size);
|
||||
return malloc(size);
|
||||
return ZSTD_malloc(size);
|
||||
}
|
||||
|
||||
void* ZSTD_calloc(size_t size, ZSTD_customMem customMem)
|
||||
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
|
||||
{
|
||||
if (customMem.customAlloc) {
|
||||
/* calloc implemented as malloc+memset;
|
||||
* not as efficient as calloc, but next best guess for custom malloc */
|
||||
void* const ptr = customMem.customAlloc(customMem.opaque, size);
|
||||
memset(ptr, 0, size);
|
||||
ZSTD_memset(ptr, 0, size);
|
||||
return ptr;
|
||||
}
|
||||
return calloc(1, size);
|
||||
return ZSTD_calloc(1, size);
|
||||
}
|
||||
|
||||
void ZSTD_free(void* ptr, ZSTD_customMem customMem)
|
||||
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
|
||||
{
|
||||
if (ptr!=NULL) {
|
||||
if (customMem.customFree)
|
||||
customMem.customFree(customMem.opaque, ptr);
|
||||
else
|
||||
free(ptr);
|
||||
ZSTD_free(ptr);
|
||||
}
|
||||
}
|
||||
|
1754
zstd/zstd_compress.c
1754
zstd/zstd_compress.c
File diff suppressed because it is too large
Load Diff
@ -28,7 +28,6 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
@ -64,7 +63,7 @@ typedef struct {
|
||||
} ZSTD_localDict;
|
||||
|
||||
typedef struct {
|
||||
U32 CTable[HUF_CTABLE_SIZE_U32(255)];
|
||||
HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
|
||||
HUF_repeat repeatMode;
|
||||
} ZSTD_hufCTables_t;
|
||||
|
||||
@ -83,10 +82,27 @@ typedef struct {
|
||||
} ZSTD_entropyCTables_t;
|
||||
|
||||
typedef struct {
|
||||
U32 off;
|
||||
U32 len;
|
||||
U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */
|
||||
U32 len; /* Raw length of match */
|
||||
} ZSTD_match_t;
|
||||
|
||||
typedef struct {
|
||||
U32 offset; /* Offset of sequence */
|
||||
U32 litLength; /* Length of literals prior to match */
|
||||
U32 matchLength; /* Raw length of match */
|
||||
} rawSeq;
|
||||
|
||||
typedef struct {
|
||||
rawSeq* seq; /* The start of the sequences */
|
||||
size_t pos; /* The index in seq where reading stopped. pos <= size. */
|
||||
size_t posInSequence; /* The position within the sequence at seq[pos] where reading
|
||||
stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
|
||||
size_t size; /* The number of sequences. <= capacity. */
|
||||
size_t capacity; /* The capacity starting from `seq` pointer */
|
||||
} rawSeqStore_t;
|
||||
|
||||
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
|
||||
|
||||
typedef struct {
|
||||
int price;
|
||||
U32 off;
|
||||
@ -147,9 +163,13 @@ struct ZSTD_matchState_t {
|
||||
U32* hashTable;
|
||||
U32* hashTable3;
|
||||
U32* chainTable;
|
||||
int dedicatedDictSearch; /* Indicates whether this matchState is using the
|
||||
* dedicated dictionary search structure.
|
||||
*/
|
||||
optState_t opt; /* optimal parser state */
|
||||
const ZSTD_matchState_t* dictMatchState;
|
||||
ZSTD_compressionParameters cParams;
|
||||
const rawSeqStore_t* ldmSeqStore;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@ -181,19 +201,6 @@ typedef struct {
|
||||
U32 windowLog; /* Window log for the LDM */
|
||||
} ldmParams_t;
|
||||
|
||||
typedef struct {
|
||||
U32 offset;
|
||||
U32 litLength;
|
||||
U32 matchLength;
|
||||
} rawSeq;
|
||||
|
||||
typedef struct {
|
||||
rawSeq* seq; /* The start of the sequences */
|
||||
size_t pos; /* The position where reading stopped. <= size. */
|
||||
size_t size; /* The number of sequences. <= capacity. */
|
||||
size_t capacity; /* The capacity starting from `seq` pointer */
|
||||
} rawSeqStore_t;
|
||||
|
||||
typedef struct {
|
||||
int collectSequences;
|
||||
ZSTD_Sequence* seqStart;
|
||||
@ -228,10 +235,34 @@ struct ZSTD_CCtx_params_s {
|
||||
/* Long distance matching parameters */
|
||||
ldmParams_t ldmParams;
|
||||
|
||||
/* Dedicated dict search algorithm trigger */
|
||||
int enableDedicatedDictSearch;
|
||||
|
||||
/* Input/output buffer modes */
|
||||
ZSTD_bufferMode_e inBufferMode;
|
||||
ZSTD_bufferMode_e outBufferMode;
|
||||
|
||||
/* Sequence compression API */
|
||||
ZSTD_sequenceFormat_e blockDelimiters;
|
||||
int validateSequences;
|
||||
|
||||
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
|
||||
ZSTD_customMem customMem;
|
||||
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
|
||||
|
||||
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
|
||||
#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
|
||||
|
||||
/**
|
||||
* Indicates whether this compression proceeds directly from user-provided
|
||||
* source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
|
||||
* whether the context needs to buffer the input/output (ZSTDb_buffered).
|
||||
*/
|
||||
typedef enum {
|
||||
ZSTDb_not_buffered,
|
||||
ZSTDb_buffered
|
||||
} ZSTD_buffered_policy_e;
|
||||
|
||||
struct ZSTD_CCtx_s {
|
||||
ZSTD_compressionStage_e stage;
|
||||
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
|
||||
@ -247,6 +278,7 @@ struct ZSTD_CCtx_s {
|
||||
unsigned long long producedCSize;
|
||||
XXH64_state_t xxhState;
|
||||
ZSTD_customMem customMem;
|
||||
ZSTD_threadPool* pool;
|
||||
size_t staticSize;
|
||||
SeqCollector seqCollector;
|
||||
int isFirstBlock;
|
||||
@ -258,7 +290,10 @@ struct ZSTD_CCtx_s {
|
||||
size_t maxNbLdmSequences;
|
||||
rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
|
||||
ZSTD_blockState_t blockState;
|
||||
U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
|
||||
U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
|
||||
|
||||
/* Wether we are streaming or not */
|
||||
ZSTD_buffered_policy_e bufferedPolicy;
|
||||
|
||||
/* streaming */
|
||||
char* inBuff;
|
||||
@ -273,6 +308,10 @@ struct ZSTD_CCtx_s {
|
||||
ZSTD_cStreamStage streamStage;
|
||||
U32 frameEnded;
|
||||
|
||||
/* Stable in/out buffer verification */
|
||||
ZSTD_inBuffer expectedInBuffer;
|
||||
size_t expectedOutBufferSize;
|
||||
|
||||
/* Dictionary */
|
||||
ZSTD_localDict localDict;
|
||||
const ZSTD_CDict* cdict;
|
||||
@ -286,8 +325,32 @@ struct ZSTD_CCtx_s {
|
||||
|
||||
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
||||
|
||||
typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
|
||||
typedef enum {
|
||||
ZSTD_noDict = 0,
|
||||
ZSTD_extDict = 1,
|
||||
ZSTD_dictMatchState = 2,
|
||||
ZSTD_dedicatedDictSearch = 3
|
||||
} ZSTD_dictMode_e;
|
||||
|
||||
typedef enum {
|
||||
ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict.
|
||||
* In this mode we use both the srcSize and the dictSize
|
||||
* when selecting and adjusting parameters.
|
||||
*/
|
||||
ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
|
||||
* In this mode we only take the srcSize into account when selecting
|
||||
* and adjusting parameters.
|
||||
*/
|
||||
ZSTD_cpm_createCDict = 2, /* Creating a CDict.
|
||||
* In this mode we take both the source size and the dictionary size
|
||||
* into account when selecting and adjusting the parameters.
|
||||
*/
|
||||
ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
|
||||
* We don't know what these parameters are for. We default to the legacy
|
||||
* behavior of taking both the source size and the dict size into account
|
||||
* when selecting and adjusting parameters.
|
||||
*/
|
||||
} ZSTD_cParamMode_e;
|
||||
|
||||
typedef size_t (*ZSTD_blockCompressor) (
|
||||
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@ -345,7 +408,7 @@ MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 con
|
||||
newReps.rep[1] = rep[0];
|
||||
newReps.rep[0] = currentOffset;
|
||||
} else { /* repCode == 0 */
|
||||
memcpy(&newReps, rep, sizeof(newReps));
|
||||
ZSTD_memcpy(&newReps, rep, sizeof(newReps));
|
||||
}
|
||||
}
|
||||
return newReps;
|
||||
@ -372,7 +435,7 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi
|
||||
RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
|
||||
dstSize_tooSmall, "dst buf too small for uncompressed block");
|
||||
MEM_writeLE24(dst, cBlockHeader24);
|
||||
memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
|
||||
ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
|
||||
return ZSTD_blockHeaderSize + srcSize;
|
||||
}
|
||||
|
||||
@ -498,8 +561,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
||||
if (MEM_isLittleEndian()) {
|
||||
if (MEM_64bits()) {
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
unsigned long r = 0;
|
||||
return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
|
||||
# if STATIC_BMI2
|
||||
return _tzcnt_u64(val) >> 3;
|
||||
# else
|
||||
unsigned long r = 0;
|
||||
return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
return (__builtin_ctzll((U64)val) >> 3);
|
||||
# else
|
||||
@ -530,8 +597,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
||||
} else { /* Big Endian CPU */
|
||||
if (MEM_64bits()) {
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
unsigned long r = 0;
|
||||
return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0;
|
||||
# if STATIC_BMI2
|
||||
return _lzcnt_u64(val) >> 3;
|
||||
# else
|
||||
unsigned long r = 0;
|
||||
return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
return (__builtin_clzll(val) >> 3);
|
||||
# else
|
||||
@ -626,7 +697,8 @@ static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
||||
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
||||
|
||||
MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
|
||||
MEM_STATIC FORCE_INLINE_ATTR
|
||||
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
|
||||
{
|
||||
switch(mls)
|
||||
{
|
||||
@ -742,7 +814,7 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
|
||||
return ZSTD_window_hasExtDict(ms->window) ?
|
||||
ZSTD_extDict :
|
||||
ms->dictMatchState != NULL ?
|
||||
ZSTD_dictMatchState :
|
||||
(ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
|
||||
ZSTD_noDict;
|
||||
}
|
||||
|
||||
@ -754,8 +826,8 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
|
||||
MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
|
||||
void const* srcEnd)
|
||||
{
|
||||
U32 const current = (U32)((BYTE const*)srcEnd - window.base);
|
||||
return current > ZSTD_CURRENT_MAX;
|
||||
U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
|
||||
return curr > ZSTD_CURRENT_MAX;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -791,14 +863,14 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
||||
* windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
|
||||
*/
|
||||
U32 const cycleMask = (1U << cycleLog) - 1;
|
||||
U32 const current = (U32)((BYTE const*)src - window->base);
|
||||
U32 const currentCycle0 = current & cycleMask;
|
||||
U32 const curr = (U32)((BYTE const*)src - window->base);
|
||||
U32 const currentCycle0 = curr & cycleMask;
|
||||
/* Exclude zero so that newCurrent - maxDist >= 1. */
|
||||
U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
|
||||
U32 const newCurrent = currentCycle1 + maxDist;
|
||||
U32 const correction = current - newCurrent;
|
||||
U32 const correction = curr - newCurrent;
|
||||
assert((maxDist & cycleMask) == 0);
|
||||
assert(current > newCurrent);
|
||||
assert(curr > newCurrent);
|
||||
/* Loose bound, should be around 1<<29 (see above) */
|
||||
assert(correction > 1<<28);
|
||||
|
||||
@ -919,7 +991,7 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
|
||||
}
|
||||
|
||||
MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
|
||||
memset(window, 0, sizeof(*window));
|
||||
ZSTD_memset(window, 0, sizeof(*window));
|
||||
window->base = (BYTE const*)"";
|
||||
window->dictBase = (BYTE const*)"";
|
||||
window->dictLimit = 1; /* start from 1, so that 1st position is valid */
|
||||
@ -973,12 +1045,16 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
||||
/**
|
||||
* Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
|
||||
*/
|
||||
MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
|
||||
MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
|
||||
{
|
||||
U32 const maxDistance = 1U << windowLog;
|
||||
U32 const lowestValid = ms->window.lowLimit;
|
||||
U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
|
||||
U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
||||
U32 const isDictionary = (ms->loadedDictEnd != 0);
|
||||
/* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
|
||||
* is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
|
||||
* valid for the entire block. So this check is sufficient to find the lowest valid match index.
|
||||
*/
|
||||
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
|
||||
return matchLowest;
|
||||
}
|
||||
@ -986,12 +1062,15 @@ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current
|
||||
/**
|
||||
* Returns the lowest allowed match index in the prefix.
|
||||
*/
|
||||
MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
|
||||
MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
|
||||
{
|
||||
U32 const maxDistance = 1U << windowLog;
|
||||
U32 const lowestValid = ms->window.dictLimit;
|
||||
U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
|
||||
U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
||||
U32 const isDictionary = (ms->loadedDictEnd != 0);
|
||||
/* When computing the lowest prefix index we need to take the dictionary into account to handle
|
||||
* the edge case where the dictionary and the source are contiguous in memory.
|
||||
*/
|
||||
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
|
||||
return matchLowest;
|
||||
}
|
||||
@ -1045,7 +1124,6 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
|
||||
* assumptions : magic number supposed already checked
|
||||
* and dictSize >= 8 */
|
||||
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
||||
short* offcodeNCount, unsigned* offcodeMaxValue,
|
||||
const void* const dict, size_t dictSize);
|
||||
|
||||
void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
|
||||
@ -1061,7 +1139,7 @@ void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
|
||||
* Note: srcSizeHint == 0 means 0!
|
||||
*/
|
||||
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
||||
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
|
||||
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
|
||||
|
||||
/*! ZSTD_initCStream_internal() :
|
||||
* Private use only. Init streaming operation.
|
||||
|
@ -35,7 +35,7 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
|
||||
assert(0);
|
||||
}
|
||||
|
||||
memcpy(ostart + flSize, src, srcSize);
|
||||
ZSTD_memcpy(ostart + flSize, src, srcSize);
|
||||
DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
|
||||
return srcSize + flSize;
|
||||
}
|
||||
@ -86,7 +86,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
disableLiteralCompression, (U32)srcSize);
|
||||
|
||||
/* Prepare nextEntropy assuming reusing the existing table */
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
|
||||
if (disableLiteralCompression)
|
||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
@ -118,11 +118,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
}
|
||||
|
||||
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
if (cLitSize==1) {
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
|
||||
|
@ -50,6 +50,19 @@ static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
|
||||
return maxSymbolValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if we should use ncount=-1 else we should
|
||||
* use ncount=1 for low probability symbols instead.
|
||||
*/
|
||||
static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
|
||||
{
|
||||
/* Heuristic: This should cover most blocks <= 16K and
|
||||
* start to fade out after 16K to about 32K depending on
|
||||
* comprssibility.
|
||||
*/
|
||||
return nbSeq >= 2048;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cost in bytes of encoding the normalized count header.
|
||||
* Returns an error if any of the helper functions return an error.
|
||||
@ -60,7 +73,7 @@ static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
|
||||
BYTE wksp[FSE_NCOUNTBOUND];
|
||||
S16 norm[MaxSeq + 1];
|
||||
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), "");
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
|
||||
return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
|
||||
}
|
||||
|
||||
@ -239,7 +252,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
||||
*op = codeTable[0];
|
||||
return 1;
|
||||
case set_repeat:
|
||||
memcpy(nextCTable, prevCTable, prevCTableSize);
|
||||
ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
|
||||
return 0;
|
||||
case set_basic:
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */
|
||||
@ -253,7 +266,8 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
||||
nbSeq_1--;
|
||||
}
|
||||
assert(nbSeq_1 > 1);
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), "");
|
||||
assert(entropyWorkspaceSize >= FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog));
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
|
||||
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
|
||||
|
@ -29,7 +29,7 @@
|
||||
* This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
|
||||
typedef struct {
|
||||
symbolEncodingType_e hType;
|
||||
BYTE hufDesBuffer[500]; /* TODO give name to this value */
|
||||
BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
|
||||
size_t hufDesSize;
|
||||
} ZSTD_hufCTablesMetadata_t;
|
||||
|
||||
@ -42,7 +42,7 @@ typedef struct {
|
||||
symbolEncodingType_e llType;
|
||||
symbolEncodingType_e ofType;
|
||||
symbolEncodingType_e mlType;
|
||||
BYTE fseTablesBuffer[500]; /* TODO give name to this value */
|
||||
BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
|
||||
size_t fseTablesSize;
|
||||
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
|
||||
} ZSTD_fseCTablesMetadata_t;
|
||||
@ -79,7 +79,7 @@ static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSiz
|
||||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
|
||||
|
||||
/* Prepare nextEntropy assuming reusing the existing table */
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
|
||||
if (disableLiteralsCompression) {
|
||||
DEBUGLOG(5, "set_basic - disabled");
|
||||
@ -118,7 +118,7 @@ static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSiz
|
||||
}
|
||||
|
||||
/* Build Huffman Tree */
|
||||
memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
|
||||
ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
|
||||
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
||||
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
|
||||
maxSymbolValue, huffLog,
|
||||
@ -137,14 +137,14 @@ static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSiz
|
||||
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
|
||||
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
|
||||
DEBUGLOG(5, "set_repeat - smaller");
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
hufMetadata->hType = set_repeat;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (newCSize + hSize >= srcSize) {
|
||||
DEBUGLOG(5, "set_basic - no gains");
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
hufMetadata->hType = set_basic;
|
||||
return 0;
|
||||
}
|
||||
@ -188,7 +188,7 @@ static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
|
||||
|
||||
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
|
||||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
|
||||
memset(workspace, 0, wkspSize);
|
||||
ZSTD_memset(workspace, 0, wkspSize);
|
||||
|
||||
fseMetadata->lastCountSize = 0;
|
||||
/* convert length/distances into codes */
|
||||
@ -348,7 +348,7 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
||||
assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
|
||||
|
||||
if (writeEntropy && hufMetadata->hType == set_compressed) {
|
||||
memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
|
||||
ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
|
||||
op += hufMetadata->hufDesSize;
|
||||
cLitSize += hufMetadata->hufDesSize;
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
|
||||
@ -474,7 +474,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
|
||||
const U32 MLtype = fseMetadata->mlType;
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
|
||||
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
|
||||
memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
|
||||
ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
|
||||
op += fseMetadata->fseTablesSize;
|
||||
} else {
|
||||
const U32 repeat = set_repeat;
|
||||
@ -603,7 +603,7 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
||||
const BYTE* codeTable, unsigned maxCode,
|
||||
size_t nbSeq, const FSE_CTable* fseCTable,
|
||||
const U32* additionalBits,
|
||||
short const* defaultNorm, U32 defaultNormLog,
|
||||
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
unsigned* const countWksp = (unsigned*)workspace;
|
||||
@ -615,7 +615,11 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
||||
|
||||
HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
||||
if (type == set_basic) {
|
||||
cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
|
||||
/* We selected this encoding type, so it must be valid. */
|
||||
assert(max <= defaultMax);
|
||||
cSymbolTypeSizeEstimateInBits = max <= defaultMax
|
||||
? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
|
||||
: ERROR(GENERIC);
|
||||
} else if (type == set_rle) {
|
||||
cSymbolTypeSizeEstimateInBits = 0;
|
||||
} else if (type == set_compressed || type == set_repeat) {
|
||||
@ -643,15 +647,15 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
|
||||
size_t cSeqSizeEstimate = 0;
|
||||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
|
||||
nbSeq, fseTables->offcodeCTable, NULL,
|
||||
OF_defaultNorm, OF_defaultNormLog,
|
||||
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
||||
workspace, wkspSize);
|
||||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
|
||||
nbSeq, fseTables->litlengthCTable, LL_bits,
|
||||
LL_defaultNorm, LL_defaultNormLog,
|
||||
LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
||||
workspace, wkspSize);
|
||||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
|
||||
nbSeq, fseTables->matchlengthCTable, ML_bits,
|
||||
ML_defaultNorm, ML_defaultNormLog,
|
||||
ML_defaultNorm, ML_defaultNormLog, MaxML,
|
||||
workspace, wkspSize);
|
||||
if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
|
||||
return cSeqSizeEstimate + sequencesSectionHeaderSize;
|
||||
@ -790,7 +794,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
||||
} while (!lastSequence);
|
||||
if (writeLitEntropy) {
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
|
||||
memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
|
||||
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
|
||||
}
|
||||
if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
|
||||
/* If we haven't written our entropy tables, then we've violated our contract and
|
||||
@ -809,11 +813,11 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
||||
if (sp < send) {
|
||||
seqDef const* seq;
|
||||
repcodes_t rep;
|
||||
memcpy(&rep, prevCBlock->rep, sizeof(rep));
|
||||
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
|
||||
for (seq = sstart; seq < sp; ++seq) {
|
||||
rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
|
||||
}
|
||||
memcpy(nextCBlock->rep, &rep, sizeof(rep));
|
||||
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
|
||||
}
|
||||
}
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
|
||||
@ -831,7 +835,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
||||
&zc->blockState.nextCBlock->entropy,
|
||||
&zc->appliedParams,
|
||||
&entropyMetadata,
|
||||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
|
||||
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
|
||||
|
||||
return ZSTD_compressSubBlock_multi(&zc->seqStore,
|
||||
zc->blockState.prevCBlock,
|
||||
@ -841,5 +845,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
||||
dst, dstCapacity,
|
||||
src, srcSize,
|
||||
zc->bmi2, lastBlock,
|
||||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
|
||||
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
|
||||
}
|
||||
|
@ -44,6 +44,16 @@ typedef enum {
|
||||
ZSTD_cwksp_alloc_aligned
|
||||
} ZSTD_cwksp_alloc_phase_e;
|
||||
|
||||
/**
|
||||
* Used to describe whether the workspace is statically allocated (and will not
|
||||
* necessarily ever be freed), or if it's dynamically allocated and we can
|
||||
* expect a well-formed caller to free this.
|
||||
*/
|
||||
typedef enum {
|
||||
ZSTD_cwksp_dynamic_alloc,
|
||||
ZSTD_cwksp_static_alloc
|
||||
} ZSTD_cwksp_static_alloc_e;
|
||||
|
||||
/**
|
||||
* Zstd fits all its internal datastructures into a single continuous buffer,
|
||||
* so that it only needs to perform a single OS allocation (or so that a buffer
|
||||
@ -92,7 +102,7 @@ typedef enum {
|
||||
*
|
||||
* - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
|
||||
* so that literally everything fits in a single buffer. Note: if present,
|
||||
* this must be the first object in the workspace, since ZSTD_free{CCtx,
|
||||
* this must be the first object in the workspace, since ZSTD_customFree{CCtx,
|
||||
* CDict}() rely on a pointer comparison to see whether one or two frees are
|
||||
* required.
|
||||
*
|
||||
@ -137,9 +147,10 @@ typedef struct {
|
||||
void* tableValidEnd;
|
||||
void* allocStart;
|
||||
|
||||
int allocFailed;
|
||||
BYTE allocFailed;
|
||||
int workspaceOversizedDuration;
|
||||
ZSTD_cwksp_alloc_phase_e phase;
|
||||
ZSTD_cwksp_static_alloc_e isStatic;
|
||||
} ZSTD_cwksp;
|
||||
|
||||
/*-*************************************
|
||||
@ -178,7 +189,9 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
|
||||
* else is though.
|
||||
*/
|
||||
MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
if (size == 0)
|
||||
return 0;
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
#else
|
||||
return size;
|
||||
@ -228,7 +241,10 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
|
||||
ZSTD_cwksp_internal_advance_phase(ws, phase);
|
||||
alloc = (BYTE *)ws->allocStart - bytes;
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
if (bytes == 0)
|
||||
return NULL;
|
||||
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* over-reserve space */
|
||||
alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
#endif
|
||||
@ -247,11 +263,13 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
|
||||
}
|
||||
ws->allocStart = alloc;
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
|
||||
* either size. */
|
||||
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
__asan_unpoison_memory_region(alloc, bytes);
|
||||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
|
||||
__asan_unpoison_memory_region(alloc, bytes);
|
||||
}
|
||||
#endif
|
||||
|
||||
return alloc;
|
||||
@ -296,8 +314,10 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
|
||||
}
|
||||
ws->tableEnd = end;
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
__asan_unpoison_memory_region(alloc, bytes);
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
|
||||
__asan_unpoison_memory_region(alloc, bytes);
|
||||
}
|
||||
#endif
|
||||
|
||||
return alloc;
|
||||
@ -311,7 +331,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
|
||||
void* alloc = ws->objectEnd;
|
||||
void* end = (BYTE*)alloc + roundedBytes;
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* over-reserve space */
|
||||
end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
#endif
|
||||
@ -332,11 +352,13 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
|
||||
ws->tableEnd = end;
|
||||
ws->tableValidEnd = end;
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
|
||||
* either size. */
|
||||
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
__asan_unpoison_memory_region(alloc, bytes);
|
||||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
|
||||
__asan_unpoison_memory_region(alloc, bytes);
|
||||
}
|
||||
#endif
|
||||
|
||||
return alloc;
|
||||
@ -345,7 +367,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
|
||||
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
|
||||
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
|
||||
|
||||
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
||||
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
||||
/* To validate that the table re-use logic is sound, and that we don't
|
||||
* access table space that we haven't cleaned, we re-"poison" the table
|
||||
* space every time we mark it dirty. */
|
||||
@ -380,7 +402,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
|
||||
assert(ws->tableValidEnd >= ws->objectEnd);
|
||||
assert(ws->tableValidEnd <= ws->allocStart);
|
||||
if (ws->tableValidEnd < ws->tableEnd) {
|
||||
memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
|
||||
ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
|
||||
}
|
||||
ZSTD_cwksp_mark_tables_clean(ws);
|
||||
}
|
||||
@ -392,8 +414,12 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
|
||||
MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
|
||||
DEBUGLOG(4, "cwksp: clearing tables!");
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
{
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* We don't do this when the workspace is statically allocated, because
|
||||
* when that is the case, we have no capability to hook into the end of the
|
||||
* workspace's lifecycle to unpoison the memory.
|
||||
*/
|
||||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
|
||||
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
|
||||
__asan_poison_memory_region(ws->objectEnd, size);
|
||||
}
|
||||
@ -410,7 +436,7 @@ MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
|
||||
MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
|
||||
DEBUGLOG(4, "cwksp: clearing!");
|
||||
|
||||
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
||||
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
||||
/* To validate that the context re-use logic is sound, and that we don't
|
||||
* access stuff that this compression hasn't initialized, we re-"poison"
|
||||
* the workspace (or at least the non-static, non-table parts of it)
|
||||
@ -421,8 +447,12 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
{
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* We don't do this when the workspace is statically allocated, because
|
||||
* when that is the case, we have no capability to hook into the end of the
|
||||
* workspace's lifecycle to unpoison the memory.
|
||||
*/
|
||||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
|
||||
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
|
||||
__asan_poison_memory_region(ws->objectEnd, size);
|
||||
}
|
||||
@ -442,7 +472,7 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
|
||||
* Any existing values in the workspace are ignored (the previously managed
|
||||
* buffer, if present, must be separately freed).
|
||||
*/
|
||||
MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
|
||||
MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) {
|
||||
DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
|
||||
assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
|
||||
ws->workspace = start;
|
||||
@ -450,24 +480,25 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
|
||||
ws->objectEnd = ws->workspace;
|
||||
ws->tableValidEnd = ws->objectEnd;
|
||||
ws->phase = ZSTD_cwksp_alloc_objects;
|
||||
ws->isStatic = isStatic;
|
||||
ZSTD_cwksp_clear(ws);
|
||||
ws->workspaceOversizedDuration = 0;
|
||||
ZSTD_cwksp_assert_internal_consistency(ws);
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
|
||||
void* workspace = ZSTD_malloc(size, customMem);
|
||||
void* workspace = ZSTD_customMalloc(size, customMem);
|
||||
DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
|
||||
RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
|
||||
ZSTD_cwksp_init(ws, workspace, size);
|
||||
ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
|
||||
void *ptr = ws->workspace;
|
||||
DEBUGLOG(4, "cwksp: freeing workspace");
|
||||
memset(ws, 0, sizeof(ZSTD_cwksp));
|
||||
ZSTD_free(ptr, customMem);
|
||||
ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
|
||||
ZSTD_customFree(ptr, customMem);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -476,13 +507,18 @@ MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
|
||||
*/
|
||||
MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
|
||||
*dst = *src;
|
||||
memset(src, 0, sizeof(ZSTD_cwksp));
|
||||
ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
|
||||
return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
|
||||
return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
|
||||
+ (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
|
||||
}
|
||||
|
||||
MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
|
||||
return ws->allocFailed;
|
||||
}
|
||||
|
@ -14,7 +14,7 @@
|
||||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include <string.h> /* memcpy, memmove, memset */
|
||||
#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
||||
#include "cpu.h" /* bmi2 */
|
||||
#include "mem.h" /* low level memory routines */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
@ -127,11 +127,11 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
|
||||
ddict->dictContent = dict;
|
||||
if (!dict) dictSize = 0;
|
||||
} else {
|
||||
void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
|
||||
void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
|
||||
ddict->dictBuffer = internalBuffer;
|
||||
ddict->dictContent = internalBuffer;
|
||||
if (!internalBuffer) return ERROR(memory_allocation);
|
||||
memcpy(internalBuffer, dict, dictSize);
|
||||
ZSTD_memcpy(internalBuffer, dict, dictSize);
|
||||
}
|
||||
ddict->dictSize = dictSize;
|
||||
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
|
||||
@ -147,9 +147,9 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
|
||||
ZSTD_dictContentType_e dictContentType,
|
||||
ZSTD_customMem customMem)
|
||||
{
|
||||
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
|
||||
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
|
||||
|
||||
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
|
||||
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
|
||||
if (ddict == NULL) return NULL;
|
||||
ddict->cMem = customMem;
|
||||
{ size_t const initResult = ZSTD_initDDict_internal(ddict,
|
||||
@ -198,7 +198,7 @@ const ZSTD_DDict* ZSTD_initStaticDDict(
|
||||
if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
|
||||
if (sBufferSize < neededSpace) return NULL;
|
||||
if (dictLoadMethod == ZSTD_dlm_byCopy) {
|
||||
memcpy(ddict+1, dict, dictSize); /* local copy */
|
||||
ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */
|
||||
dict = ddict+1;
|
||||
}
|
||||
if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
|
||||
@ -213,8 +213,8 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
|
||||
{
|
||||
if (ddict==NULL) return 0; /* support free on NULL */
|
||||
{ ZSTD_customMem const cMem = ddict->cMem;
|
||||
ZSTD_free(ddict->dictBuffer, cMem);
|
||||
ZSTD_free(ddict, cMem);
|
||||
ZSTD_customFree(ddict->dictBuffer, cMem);
|
||||
ZSTD_customFree(ddict, cMem);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@ -15,7 +15,7 @@
|
||||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
#include "zstd.h" /* ZSTD_DDict, and several public functions */
|
||||
|
||||
|
||||
|
@ -55,7 +55,7 @@
|
||||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include <string.h> /* memcpy, memmove, memset */
|
||||
#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
||||
#include "cpu.h" /* bmi2 */
|
||||
#include "mem.h" /* low level memory routines */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
@ -94,11 +94,18 @@ static size_t ZSTD_startingInputLength(ZSTD_format_e format)
|
||||
return startingInputLength;
|
||||
}
|
||||
|
||||
static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
|
||||
{
|
||||
assert(dctx->streamStage == zdss_init);
|
||||
dctx->format = ZSTD_f_zstd1;
|
||||
dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
|
||||
dctx->outBufferMode = ZSTD_bm_buffered;
|
||||
dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
|
||||
}
|
||||
|
||||
static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
|
||||
{
|
||||
dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */
|
||||
dctx->staticSize = 0;
|
||||
dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
|
||||
dctx->ddict = NULL;
|
||||
dctx->ddictLocal = NULL;
|
||||
dctx->dictEnd = NULL;
|
||||
@ -113,7 +120,8 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
|
||||
dctx->noForwardProgress = 0;
|
||||
dctx->oversizedDuration = 0;
|
||||
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
|
||||
dctx->outBufferMode = ZSTD_obm_buffered;
|
||||
ZSTD_DCtx_resetParameters(dctx);
|
||||
dctx->validateChecksum = 1;
|
||||
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
dctx->dictContentEndForFuzzing = NULL;
|
||||
#endif
|
||||
@ -134,9 +142,9 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
|
||||
|
||||
ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
|
||||
{
|
||||
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
|
||||
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
|
||||
|
||||
{ ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem);
|
||||
{ ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
|
||||
if (!dctx) return NULL;
|
||||
dctx->customMem = customMem;
|
||||
ZSTD_initDCtx_internal(dctx);
|
||||
@ -164,13 +172,13 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
|
||||
RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
|
||||
{ ZSTD_customMem const cMem = dctx->customMem;
|
||||
ZSTD_clearDict(dctx);
|
||||
ZSTD_free(dctx->inBuff, cMem);
|
||||
ZSTD_customFree(dctx->inBuff, cMem);
|
||||
dctx->inBuff = NULL;
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
|
||||
if (dctx->legacyContext)
|
||||
ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
|
||||
#endif
|
||||
ZSTD_free(dctx, cMem);
|
||||
ZSTD_customFree(dctx, cMem);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -179,7 +187,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
|
||||
void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
|
||||
{
|
||||
size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
|
||||
memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */
|
||||
ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */
|
||||
}
|
||||
|
||||
|
||||
@ -246,7 +254,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t const minInputSize = ZSTD_startingInputLength(format);
|
||||
|
||||
memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
|
||||
ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
|
||||
if (srcSize < minInputSize) return minInputSize;
|
||||
RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
|
||||
|
||||
@ -256,7 +264,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
|
||||
/* skippable frame */
|
||||
if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
|
||||
return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
|
||||
memset(zfhPtr, 0, sizeof(*zfhPtr));
|
||||
ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));
|
||||
zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
|
||||
zfhPtr->frameType = ZSTD_skippableFrame;
|
||||
return 0;
|
||||
@ -446,7 +454,8 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
|
||||
RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
|
||||
dictionary_wrong, "");
|
||||
#endif
|
||||
if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
|
||||
dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0;
|
||||
if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -461,7 +470,7 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
|
||||
static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
|
||||
{
|
||||
ZSTD_frameSizeInfo frameSizeInfo;
|
||||
memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
|
||||
ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
|
||||
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
|
||||
if (ZSTD_isLegacy(src, srcSize))
|
||||
@ -516,7 +525,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
|
||||
ip += 4;
|
||||
}
|
||||
|
||||
frameSizeInfo.compressedSize = ip - ipstart;
|
||||
frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
|
||||
frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
|
||||
? zfh.frameContentSize
|
||||
: nbBlocks * zfh.blockSizeMax;
|
||||
@ -579,12 +588,12 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
DEBUGLOG(5, "ZSTD_copyRawBlock");
|
||||
RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
|
||||
if (dst == NULL) {
|
||||
if (srcSize == 0) return 0;
|
||||
RETURN_ERROR(dstBuffer_null, "");
|
||||
}
|
||||
RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
|
||||
memcpy(dst, src, srcSize);
|
||||
ZSTD_memcpy(dst, src, srcSize);
|
||||
return srcSize;
|
||||
}
|
||||
|
||||
@ -592,12 +601,12 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
|
||||
BYTE b,
|
||||
size_t regenSize)
|
||||
{
|
||||
RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
|
||||
if (dst == NULL) {
|
||||
if (regenSize == 0) return 0;
|
||||
RETURN_ERROR(dstBuffer_null, "");
|
||||
}
|
||||
RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
|
||||
memset(dst, b, regenSize);
|
||||
ZSTD_memset(dst, b, regenSize);
|
||||
return regenSize;
|
||||
}
|
||||
|
||||
@ -647,13 +656,13 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
switch(blockProperties.blockType)
|
||||
{
|
||||
case bt_compressed:
|
||||
decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1);
|
||||
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1);
|
||||
break;
|
||||
case bt_raw :
|
||||
decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
|
||||
decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
|
||||
break;
|
||||
case bt_rle :
|
||||
decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize);
|
||||
decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
|
||||
break;
|
||||
case bt_reserved :
|
||||
default:
|
||||
@ -661,7 +670,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
}
|
||||
|
||||
if (ZSTD_isError(decodedSize)) return decodedSize;
|
||||
if (dctx->fParams.checksumFlag)
|
||||
if (dctx->validateChecksum)
|
||||
XXH64_update(&dctx->xxhState, op, decodedSize);
|
||||
if (decodedSize != 0)
|
||||
op += decodedSize;
|
||||
@ -676,11 +685,13 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
corruption_detected, "");
|
||||
}
|
||||
if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
|
||||
U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
|
||||
U32 checkRead;
|
||||
RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, "");
|
||||
checkRead = MEM_readLE32(ip);
|
||||
RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
|
||||
if (!dctx->forceIgnoreChecksum) {
|
||||
U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
|
||||
U32 checkRead;
|
||||
checkRead = MEM_readLE32(ip);
|
||||
RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
|
||||
}
|
||||
ip += 4;
|
||||
remainingSrcSize -= 4;
|
||||
}
|
||||
@ -688,7 +699,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
/* Allow caller to get size read */
|
||||
*srcPtr = ip;
|
||||
*srcSizePtr = remainingSrcSize;
|
||||
return op-ostart;
|
||||
return (size_t)(op-ostart);
|
||||
}
|
||||
|
||||
static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
@ -721,7 +732,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
|
||||
if (ZSTD_isError(decodedSize)) return decodedSize;
|
||||
|
||||
assert(decodedSize <=- dstCapacity);
|
||||
assert(decodedSize <= dstCapacity);
|
||||
dst = (BYTE*)dst + decodedSize;
|
||||
dstCapacity -= decodedSize;
|
||||
|
||||
@ -761,15 +772,13 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
(ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
|
||||
&& (moreThan1Frame==1),
|
||||
srcSize_wrong,
|
||||
"at least one frame successfully completed, but following "
|
||||
"bytes are garbage: it's more likely to be a srcSize error, "
|
||||
"specifying more bytes than compressed size of frame(s). This "
|
||||
"error message replaces ERROR(prefix_unknown), which would be "
|
||||
"confusing, as the first header is actually correct. Note that "
|
||||
"one could be unlucky, it might be a corruption error instead, "
|
||||
"happening right at the place where we expect zstd magic "
|
||||
"bytes. But this is _much_ less likely than a srcSize field "
|
||||
"error.");
|
||||
"At least one frame successfully completed, "
|
||||
"but following bytes are garbage: "
|
||||
"it's more likely to be a srcSize error, "
|
||||
"specifying more input bytes than size of frame(s). "
|
||||
"Note: one could be unlucky, it might be a corruption error instead, "
|
||||
"happening right at the place where we expect zstd magic bytes. "
|
||||
"But this is _much_ less likely than a srcSize field error.");
|
||||
if (ZSTD_isError(res)) return res;
|
||||
assert(res <= dstCapacity);
|
||||
if (res != 0)
|
||||
@ -781,7 +790,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
|
||||
RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
|
||||
|
||||
return (BYTE*)dst - (BYTE*)dststart;
|
||||
return (size_t)((BYTE*)dst - (BYTE*)dststart);
|
||||
}
|
||||
|
||||
size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
|
||||
@ -899,21 +908,21 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
if (dctx->format == ZSTD_f_zstd1) { /* allows header */
|
||||
assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */
|
||||
if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
|
||||
memcpy(dctx->headerBuffer, src, srcSize);
|
||||
ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
|
||||
dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */
|
||||
dctx->stage = ZSTDds_decodeSkippableHeader;
|
||||
return 0;
|
||||
} }
|
||||
dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
|
||||
if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
|
||||
memcpy(dctx->headerBuffer, src, srcSize);
|
||||
ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
|
||||
dctx->expected = dctx->headerSize - srcSize;
|
||||
dctx->stage = ZSTDds_decodeFrameHeader;
|
||||
return 0;
|
||||
|
||||
case ZSTDds_decodeFrameHeader:
|
||||
assert(src != NULL);
|
||||
memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
|
||||
ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
|
||||
FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
|
||||
dctx->expected = ZSTD_blockHeaderSize;
|
||||
dctx->stage = ZSTDds_decodeBlockHeader;
|
||||
@ -977,7 +986,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
|
||||
DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
|
||||
dctx->decodedSize += rSize;
|
||||
if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
|
||||
if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize);
|
||||
dctx->previousDstEnd = (char*)dst + rSize;
|
||||
|
||||
/* Stay on the same stage until we are finished streaming the block. */
|
||||
@ -1007,10 +1016,13 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
|
||||
case ZSTDds_checkChecksum:
|
||||
assert(srcSize == 4); /* guaranteed by dctx->expected */
|
||||
{ U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
|
||||
U32 const check32 = MEM_readLE32(src);
|
||||
DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
|
||||
RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
|
||||
{
|
||||
if (dctx->validateChecksum) {
|
||||
U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
|
||||
U32 const check32 = MEM_readLE32(src);
|
||||
DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
|
||||
RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
|
||||
}
|
||||
dctx->expected = 0;
|
||||
dctx->stage = ZSTDds_getFrameHeaderSize;
|
||||
return 0;
|
||||
@ -1019,7 +1031,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
case ZSTDds_decodeSkippableHeader:
|
||||
assert(src != NULL);
|
||||
assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
|
||||
memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */
|
||||
ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */
|
||||
dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
|
||||
dctx->stage = ZSTDds_skipFrame;
|
||||
return 0;
|
||||
@ -1075,7 +1087,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
workspace, workspaceSize);
|
||||
#else
|
||||
size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
|
||||
dictPtr, dictEnd - dictPtr,
|
||||
dictPtr, (size_t)(dictEnd - dictPtr),
|
||||
workspace, workspaceSize);
|
||||
#endif
|
||||
RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
|
||||
@ -1084,40 +1096,46 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
|
||||
{ short offcodeNCount[MaxOff+1];
|
||||
unsigned offcodeMaxValue = MaxOff, offcodeLog;
|
||||
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
|
||||
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr));
|
||||
RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
|
||||
RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
|
||||
RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
|
||||
ZSTD_buildFSETable( entropy->OFTable,
|
||||
offcodeNCount, offcodeMaxValue,
|
||||
OF_base, OF_bits,
|
||||
offcodeLog);
|
||||
offcodeLog,
|
||||
entropy->workspace, sizeof(entropy->workspace),
|
||||
/* bmi2 */0);
|
||||
dictPtr += offcodeHeaderSize;
|
||||
}
|
||||
|
||||
{ short matchlengthNCount[MaxML+1];
|
||||
unsigned matchlengthMaxValue = MaxML, matchlengthLog;
|
||||
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
|
||||
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
|
||||
RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
|
||||
RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
|
||||
RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
|
||||
ZSTD_buildFSETable( entropy->MLTable,
|
||||
matchlengthNCount, matchlengthMaxValue,
|
||||
ML_base, ML_bits,
|
||||
matchlengthLog);
|
||||
matchlengthLog,
|
||||
entropy->workspace, sizeof(entropy->workspace),
|
||||
/* bmi2 */ 0);
|
||||
dictPtr += matchlengthHeaderSize;
|
||||
}
|
||||
|
||||
{ short litlengthNCount[MaxLL+1];
|
||||
unsigned litlengthMaxValue = MaxLL, litlengthLog;
|
||||
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
|
||||
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
|
||||
RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
|
||||
RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
|
||||
RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
|
||||
ZSTD_buildFSETable( entropy->LLTable,
|
||||
litlengthNCount, litlengthMaxValue,
|
||||
LL_base, LL_bits,
|
||||
litlengthLog);
|
||||
litlengthLog,
|
||||
entropy->workspace, sizeof(entropy->workspace),
|
||||
/* bmi2 */ 0);
|
||||
dictPtr += litlengthHeaderSize;
|
||||
}
|
||||
|
||||
@ -1131,7 +1149,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
entropy->rep[i] = rep;
|
||||
} }
|
||||
|
||||
return dictPtr - (const BYTE*)dict;
|
||||
return (size_t)(dictPtr - (const BYTE*)dict);
|
||||
}
|
||||
|
||||
static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
|
||||
@ -1170,7 +1188,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
|
||||
dctx->dictID = 0;
|
||||
dctx->bType = bt_reserved;
|
||||
ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
|
||||
memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
|
||||
ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
|
||||
dctx->LLTptr = dctx->entropy.LLTable;
|
||||
dctx->MLTptr = dctx->entropy.MLTable;
|
||||
dctx->OFTptr = dctx->entropy.OFTable;
|
||||
@ -1394,7 +1412,7 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
|
||||
|
||||
size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
|
||||
{
|
||||
return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format);
|
||||
return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format);
|
||||
}
|
||||
|
||||
ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
|
||||
@ -1411,8 +1429,12 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
|
||||
ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
|
||||
return bounds;
|
||||
case ZSTD_d_stableOutBuffer:
|
||||
bounds.lowerBound = (int)ZSTD_obm_buffered;
|
||||
bounds.upperBound = (int)ZSTD_obm_stable;
|
||||
bounds.lowerBound = (int)ZSTD_bm_buffered;
|
||||
bounds.upperBound = (int)ZSTD_bm_stable;
|
||||
return bounds;
|
||||
case ZSTD_d_forceIgnoreChecksum:
|
||||
bounds.lowerBound = (int)ZSTD_d_validateChecksum;
|
||||
bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
|
||||
return bounds;
|
||||
default:;
|
||||
}
|
||||
@ -1436,6 +1458,26 @@ static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
|
||||
RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
|
||||
}
|
||||
|
||||
size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value)
|
||||
{
|
||||
switch (param) {
|
||||
case ZSTD_d_windowLogMax:
|
||||
*value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize);
|
||||
return 0;
|
||||
case ZSTD_d_format:
|
||||
*value = (int)dctx->format;
|
||||
return 0;
|
||||
case ZSTD_d_stableOutBuffer:
|
||||
*value = (int)dctx->outBufferMode;
|
||||
return 0;
|
||||
case ZSTD_d_forceIgnoreChecksum:
|
||||
*value = (int)dctx->forceIgnoreChecksum;
|
||||
return 0;
|
||||
default:;
|
||||
}
|
||||
RETURN_ERROR(parameter_unsupported, "");
|
||||
}
|
||||
|
||||
size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
|
||||
{
|
||||
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
|
||||
@ -1451,7 +1493,11 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
|
||||
return 0;
|
||||
case ZSTD_d_stableOutBuffer:
|
||||
CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
|
||||
dctx->outBufferMode = (ZSTD_outBufferMode_e)value;
|
||||
dctx->outBufferMode = (ZSTD_bufferMode_e)value;
|
||||
return 0;
|
||||
case ZSTD_d_forceIgnoreChecksum:
|
||||
CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
|
||||
dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value;
|
||||
return 0;
|
||||
default:;
|
||||
}
|
||||
@ -1469,8 +1515,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
|
||||
|| (reset == ZSTD_reset_session_and_parameters) ) {
|
||||
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
|
||||
ZSTD_clearDict(dctx);
|
||||
dctx->format = ZSTD_f_zstd1;
|
||||
dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
|
||||
ZSTD_DCtx_resetParameters(dctx);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -1524,7 +1569,7 @@ static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const ne
|
||||
{
|
||||
if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
|
||||
zds->oversizedDuration++;
|
||||
else
|
||||
else
|
||||
zds->oversizedDuration = 0;
|
||||
}
|
||||
|
||||
@ -1538,7 +1583,7 @@ static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const*
|
||||
{
|
||||
ZSTD_outBuffer const expect = zds->expectedOutBuffer;
|
||||
/* No requirement when ZSTD_obm_stable is not enabled. */
|
||||
if (zds->outBufferMode != ZSTD_obm_stable)
|
||||
if (zds->outBufferMode != ZSTD_bm_stable)
|
||||
return 0;
|
||||
/* Any buffer is allowed in zdss_init, this must be the same for every other call until
|
||||
* the context is reset.
|
||||
@ -1548,7 +1593,7 @@ static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const*
|
||||
/* The buffer must match our expectation exactly. */
|
||||
if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
|
||||
return 0;
|
||||
RETURN_ERROR(dstBuffer_wrong, "ZSTD_obm_stable enabled but output differs!");
|
||||
RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!");
|
||||
}
|
||||
|
||||
/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
|
||||
@ -1560,7 +1605,7 @@ static size_t ZSTD_decompressContinueStream(
|
||||
ZSTD_DStream* zds, char** op, char* oend,
|
||||
void const* src, size_t srcSize) {
|
||||
int const isSkipFrame = ZSTD_isSkipFrame(zds);
|
||||
if (zds->outBufferMode == ZSTD_obm_buffered) {
|
||||
if (zds->outBufferMode == ZSTD_bm_buffered) {
|
||||
size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
|
||||
size_t const decodedSize = ZSTD_decompressContinue(zds,
|
||||
zds->outBuff + zds->outStart, dstSize, src, srcSize);
|
||||
@ -1573,14 +1618,14 @@ static size_t ZSTD_decompressContinueStream(
|
||||
}
|
||||
} else {
|
||||
/* Write directly into the output buffer */
|
||||
size_t const dstSize = isSkipFrame ? 0 : oend - *op;
|
||||
size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op);
|
||||
size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
|
||||
FORWARD_IF_ERROR(decodedSize, "");
|
||||
*op += decodedSize;
|
||||
/* Flushing is not needed. */
|
||||
zds->streamStage = zdss_read;
|
||||
assert(*op <= oend);
|
||||
assert(zds->outBufferMode == ZSTD_obm_stable);
|
||||
assert(zds->outBufferMode == ZSTD_bm_stable);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -1663,14 +1708,14 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
assert(iend >= ip);
|
||||
if (toLoad > remainingInput) { /* not enough input to load full header */
|
||||
if (remainingInput > 0) {
|
||||
memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
|
||||
ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
|
||||
zds->lhSize += remainingInput;
|
||||
}
|
||||
input->pos = input->size;
|
||||
return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
|
||||
}
|
||||
assert(ip != NULL);
|
||||
memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
|
||||
ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
|
||||
break;
|
||||
} }
|
||||
|
||||
@ -1678,10 +1723,10 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
|
||||
&& zds->fParams.frameType != ZSTD_skippableFrame
|
||||
&& (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
|
||||
size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
|
||||
size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
|
||||
if (cSize <= (size_t)(iend-istart)) {
|
||||
/* shortcut : using single-pass mode */
|
||||
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds));
|
||||
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
|
||||
if (ZSTD_isError(decompressedSize)) return decompressedSize;
|
||||
DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
|
||||
ip = istart + cSize;
|
||||
@ -1693,7 +1738,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
} }
|
||||
|
||||
/* Check output buffer is large enough for ZSTD_odm_stable. */
|
||||
if (zds->outBufferMode == ZSTD_obm_stable
|
||||
if (zds->outBufferMode == ZSTD_bm_stable
|
||||
&& zds->fParams.frameType != ZSTD_skippableFrame
|
||||
&& zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
|
||||
&& (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
|
||||
@ -1723,7 +1768,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
|
||||
/* Adapt buffer sizes to frame header instructions */
|
||||
{ size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
|
||||
size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_obm_buffered
|
||||
size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
|
||||
? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
|
||||
: 0;
|
||||
|
||||
@ -1731,7 +1776,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
|
||||
{ int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
|
||||
int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
|
||||
|
||||
|
||||
if (tooSmall || tooLarge) {
|
||||
size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
|
||||
DEBUGLOG(4, "inBuff : from %u to %u",
|
||||
@ -1745,10 +1790,10 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
|
||||
memory_allocation, "");
|
||||
} else {
|
||||
ZSTD_free(zds->inBuff, zds->customMem);
|
||||
ZSTD_customFree(zds->inBuff, zds->customMem);
|
||||
zds->inBuffSize = 0;
|
||||
zds->outBuffSize = 0;
|
||||
zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
|
||||
zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem);
|
||||
RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
|
||||
}
|
||||
zds->inBuffSize = neededInBuffSize;
|
||||
@ -1760,7 +1805,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
|
||||
case zdss_read:
|
||||
DEBUGLOG(5, "stage zdss_read");
|
||||
{ size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip);
|
||||
{ size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip));
|
||||
DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
|
||||
if (neededInSize==0) { /* end of frame */
|
||||
zds->streamStage = zdss_init;
|
||||
@ -1790,7 +1835,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
|
||||
corruption_detected,
|
||||
"should never happen");
|
||||
loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip);
|
||||
loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
|
||||
}
|
||||
ip += loadedSize;
|
||||
zds->inPos += loadedSize;
|
||||
@ -1804,7 +1849,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
}
|
||||
case zdss_flush:
|
||||
{ size_t const toFlushSize = zds->outEnd - zds->outStart;
|
||||
size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
|
||||
size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
|
||||
op += flushedSize;
|
||||
zds->outStart += flushedSize;
|
||||
if (flushedSize == toFlushSize) { /* flush completed */
|
||||
|
@ -14,7 +14,7 @@
|
||||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include <string.h> /* memcpy, memmove, memset */
|
||||
#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
||||
#include "compiler.h" /* prefetch */
|
||||
#include "cpu.h" /* bmi2 */
|
||||
#include "mem.h" /* low level memory routines */
|
||||
@ -44,7 +44,7 @@
|
||||
/*_*******************************************************
|
||||
* Memory operations
|
||||
**********************************************************/
|
||||
static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
|
||||
static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
|
||||
|
||||
|
||||
/*-*************************************************************
|
||||
@ -166,7 +166,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
dctx->litSize = litSize;
|
||||
dctx->litEntropy = 1;
|
||||
if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
|
||||
memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
||||
ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
||||
return litCSize + lhSize;
|
||||
}
|
||||
|
||||
@ -191,10 +191,10 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
|
||||
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
||||
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
|
||||
memcpy(dctx->litBuffer, istart+lhSize, litSize);
|
||||
ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize);
|
||||
dctx->litPtr = dctx->litBuffer;
|
||||
dctx->litSize = litSize;
|
||||
memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
||||
ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
||||
return lhSize+litSize;
|
||||
}
|
||||
/* direct reference into compressed stream */
|
||||
@ -223,7 +223,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
break;
|
||||
}
|
||||
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
||||
memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
|
||||
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
|
||||
dctx->litPtr = dctx->litBuffer;
|
||||
dctx->litSize = litSize;
|
||||
return lhSize+1;
|
||||
@ -364,23 +364,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
||||
* generate FSE decoding table for one symbol (ll, ml or off)
|
||||
* cannot fail if input is valid =>
|
||||
* all inputs are presumed validated at this stage */
|
||||
void
|
||||
ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
||||
FORCE_INLINE_TEMPLATE
|
||||
void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
||||
const short* normalizedCounter, unsigned maxSymbolValue,
|
||||
const U32* baseValue, const U32* nbAdditionalBits,
|
||||
unsigned tableLog)
|
||||
unsigned tableLog, void* wksp, size_t wkspSize)
|
||||
{
|
||||
ZSTD_seqSymbol* const tableDecode = dt+1;
|
||||
U16 symbolNext[MaxSeq+1];
|
||||
|
||||
U32 const maxSV1 = maxSymbolValue + 1;
|
||||
U32 const tableSize = 1 << tableLog;
|
||||
U32 highThreshold = tableSize-1;
|
||||
|
||||
U16* symbolNext = (U16*)wksp;
|
||||
BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
|
||||
U32 highThreshold = tableSize - 1;
|
||||
|
||||
|
||||
/* Sanity Checks */
|
||||
assert(maxSymbolValue <= MaxSeq);
|
||||
assert(tableLog <= MaxFSELog);
|
||||
|
||||
assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
|
||||
(void)wkspSize;
|
||||
/* Init, lay down lowprob symbols */
|
||||
{ ZSTD_seqSymbol_header DTableH;
|
||||
DTableH.tableLog = tableLog;
|
||||
@ -396,16 +399,69 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
||||
assert(normalizedCounter[s]>=0);
|
||||
symbolNext[s] = (U16)normalizedCounter[s];
|
||||
} } }
|
||||
memcpy(dt, &DTableH, sizeof(DTableH));
|
||||
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
|
||||
}
|
||||
|
||||
/* Spread symbols */
|
||||
{ U32 const tableMask = tableSize-1;
|
||||
assert(tableSize <= 512);
|
||||
/* Specialized symbol spreading for the case when there are
|
||||
* no low probability (-1 count) symbols. When compressing
|
||||
* small blocks we avoid low probability symbols to hit this
|
||||
* case, since header decoding speed matters more.
|
||||
*/
|
||||
if (highThreshold == tableSize - 1) {
|
||||
size_t const tableMask = tableSize-1;
|
||||
size_t const step = FSE_TABLESTEP(tableSize);
|
||||
/* First lay down the symbols in order.
|
||||
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
|
||||
* misses since small blocks generally have small table logs, so nearly
|
||||
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
|
||||
* our buffer to handle the over-write.
|
||||
*/
|
||||
{
|
||||
U64 const add = 0x0101010101010101ull;
|
||||
size_t pos = 0;
|
||||
U64 sv = 0;
|
||||
U32 s;
|
||||
for (s=0; s<maxSV1; ++s, sv += add) {
|
||||
int i;
|
||||
int const n = normalizedCounter[s];
|
||||
MEM_write64(spread + pos, sv);
|
||||
for (i = 8; i < n; i += 8) {
|
||||
MEM_write64(spread + pos + i, sv);
|
||||
}
|
||||
pos += n;
|
||||
}
|
||||
}
|
||||
/* Now we spread those positions across the table.
|
||||
* The benefit of doing it in two stages is that we avoid the the
|
||||
* variable size inner loop, which caused lots of branch misses.
|
||||
* Now we can run through all the positions without any branch misses.
|
||||
* We unroll the loop twice, since that is what emperically worked best.
|
||||
*/
|
||||
{
|
||||
size_t position = 0;
|
||||
size_t s;
|
||||
size_t const unroll = 2;
|
||||
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
||||
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
||||
size_t u;
|
||||
for (u = 0; u < unroll; ++u) {
|
||||
size_t const uPosition = (position + (u * step)) & tableMask;
|
||||
tableDecode[uPosition].baseValue = spread[s + u];
|
||||
}
|
||||
position = (position + (unroll * step)) & tableMask;
|
||||
}
|
||||
assert(position == 0);
|
||||
}
|
||||
} else {
|
||||
U32 const tableMask = tableSize-1;
|
||||
U32 const step = FSE_TABLESTEP(tableSize);
|
||||
U32 s, position = 0;
|
||||
for (s=0; s<maxSV1; s++) {
|
||||
int i;
|
||||
for (i=0; i<normalizedCounter[s]; i++) {
|
||||
int const n = normalizedCounter[s];
|
||||
for (i=0; i<n; i++) {
|
||||
tableDecode[position].baseValue = s;
|
||||
position = (position + step) & tableMask;
|
||||
while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
|
||||
@ -414,7 +470,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
||||
}
|
||||
|
||||
/* Build Decoding table */
|
||||
{ U32 u;
|
||||
{
|
||||
U32 u;
|
||||
for (u=0; u<tableSize; u++) {
|
||||
U32 const symbol = tableDecode[u].baseValue;
|
||||
U32 const nextState = symbolNext[symbol]++;
|
||||
@ -423,7 +480,46 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
||||
assert(nbAdditionalBits[symbol] < 255);
|
||||
tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
|
||||
tableDecode[u].baseValue = baseValue[symbol];
|
||||
} }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Avoids the FORCE_INLINE of the _body() function. */
|
||||
static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
||||
const short* normalizedCounter, unsigned maxSymbolValue,
|
||||
const U32* baseValue, const U32* nbAdditionalBits,
|
||||
unsigned tableLog, void* wksp, size_t wkspSize)
|
||||
{
|
||||
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
||||
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
|
||||
const short* normalizedCounter, unsigned maxSymbolValue,
|
||||
const U32* baseValue, const U32* nbAdditionalBits,
|
||||
unsigned tableLog, void* wksp, size_t wkspSize)
|
||||
{
|
||||
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
||||
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
||||
}
|
||||
#endif
|
||||
|
||||
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
||||
const short* normalizedCounter, unsigned maxSymbolValue,
|
||||
const U32* baseValue, const U32* nbAdditionalBits,
|
||||
unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
|
||||
{
|
||||
#if DYNAMIC_BMI2
|
||||
if (bmi2) {
|
||||
ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
|
||||
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
(void)bmi2;
|
||||
ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
|
||||
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
||||
}
|
||||
|
||||
|
||||
@ -435,7 +531,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
||||
const void* src, size_t srcSize,
|
||||
const U32* baseValue, const U32* nbAdditionalBits,
|
||||
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
|
||||
int ddictIsCold, int nbSeq)
|
||||
int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
|
||||
int bmi2)
|
||||
{
|
||||
switch(type)
|
||||
{
|
||||
@ -467,7 +564,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
||||
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
|
||||
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
|
||||
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
|
||||
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
|
||||
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
|
||||
*DTablePtr = DTableSpace;
|
||||
return headerSize;
|
||||
}
|
||||
@ -499,7 +596,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
if (nbSeq > 0x7F) {
|
||||
if (nbSeq == 0xFF) {
|
||||
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
|
||||
nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
|
||||
nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
|
||||
ip+=2;
|
||||
} else {
|
||||
RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
|
||||
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
|
||||
@ -520,7 +618,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
ip, iend-ip,
|
||||
LL_base, LL_bits,
|
||||
LL_defaultDTable, dctx->fseEntropy,
|
||||
dctx->ddictIsCold, nbSeq);
|
||||
dctx->ddictIsCold, nbSeq,
|
||||
dctx->workspace, sizeof(dctx->workspace),
|
||||
dctx->bmi2);
|
||||
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
||||
ip += llhSize;
|
||||
}
|
||||
@ -530,7 +630,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
ip, iend-ip,
|
||||
OF_base, OF_bits,
|
||||
OF_defaultDTable, dctx->fseEntropy,
|
||||
dctx->ddictIsCold, nbSeq);
|
||||
dctx->ddictIsCold, nbSeq,
|
||||
dctx->workspace, sizeof(dctx->workspace),
|
||||
dctx->bmi2);
|
||||
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
||||
ip += ofhSize;
|
||||
}
|
||||
@ -540,7 +642,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
ip, iend-ip,
|
||||
ML_base, ML_bits,
|
||||
ML_defaultDTable, dctx->fseEntropy,
|
||||
dctx->ddictIsCold, nbSeq);
|
||||
dctx->ddictIsCold, nbSeq,
|
||||
dctx->workspace, sizeof(dctx->workspace),
|
||||
dctx->bmi2);
|
||||
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
||||
ip += mlhSize;
|
||||
}
|
||||
@ -686,12 +790,12 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
|
||||
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
||||
match = dictEnd - (prefixStart-match);
|
||||
if (match + sequence.matchLength <= dictEnd) {
|
||||
memmove(oLitEnd, match, sequence.matchLength);
|
||||
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
||||
return sequenceLength;
|
||||
}
|
||||
/* span extDict & currentPrefixSegment */
|
||||
{ size_t const length1 = dictEnd - match;
|
||||
memmove(oLitEnd, match, length1);
|
||||
ZSTD_memmove(oLitEnd, match, length1);
|
||||
op = oLitEnd + length1;
|
||||
sequence.matchLength -= length1;
|
||||
match = prefixStart;
|
||||
@ -752,12 +856,12 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
||||
match = dictEnd + (match - prefixStart);
|
||||
if (match + sequence.matchLength <= dictEnd) {
|
||||
memmove(oLitEnd, match, sequence.matchLength);
|
||||
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
||||
return sequenceLength;
|
||||
}
|
||||
/* span extDict & currentPrefixSegment */
|
||||
{ size_t const length1 = dictEnd - match;
|
||||
memmove(oLitEnd, match, length1);
|
||||
ZSTD_memmove(oLitEnd, match, length1);
|
||||
op = oLitEnd + length1;
|
||||
sequence.matchLength -= length1;
|
||||
match = prefixStart;
|
||||
@ -948,7 +1052,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
|
||||
}
|
||||
|
||||
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
|
||||
MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
|
||||
{
|
||||
size_t const windowSize = dctx->fParams.windowSize;
|
||||
/* No dictionary used. */
|
||||
@ -969,6 +1073,7 @@ MEM_STATIC void ZSTD_assertValidSequence(
|
||||
seq_t const seq,
|
||||
BYTE const* prefixStart, BYTE const* virtualStart)
|
||||
{
|
||||
#if DEBUGLEVEL >= 1
|
||||
size_t const windowSize = dctx->fParams.windowSize;
|
||||
size_t const sequenceSize = seq.litLength + seq.matchLength;
|
||||
BYTE const* const oLitEnd = op + seq.litLength;
|
||||
@ -986,6 +1091,9 @@ MEM_STATIC void ZSTD_assertValidSequence(
|
||||
/* Offset must be within our window. */
|
||||
assert(seq.offset <= windowSize);
|
||||
}
|
||||
#else
|
||||
(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1080,14 +1188,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
#endif
|
||||
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
||||
BIT_reloadDStream(&(seqState.DStream));
|
||||
op += oneSeqSize;
|
||||
/* gcc and clang both don't like early returns in this loop.
|
||||
* gcc doesn't like early breaks either.
|
||||
* Instead save an error and report it at the end.
|
||||
* When there is an error, don't increment op, so we don't
|
||||
* overwrite.
|
||||
* Instead break and check for an error at the end of the loop.
|
||||
*/
|
||||
if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
|
||||
else op += oneSeqSize;
|
||||
if (UNLIKELY(ZSTD_isError(oneSeqSize))) {
|
||||
error = oneSeqSize;
|
||||
break;
|
||||
}
|
||||
if (UNLIKELY(!--nbSeq)) break;
|
||||
}
|
||||
|
||||
@ -1104,7 +1212,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
{ size_t const lastLLSize = litEnd - litPtr;
|
||||
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
||||
if (op != NULL) {
|
||||
memcpy(op, litPtr, lastLLSize);
|
||||
ZSTD_memcpy(op, litPtr, lastLLSize);
|
||||
op += lastLLSize;
|
||||
}
|
||||
}
|
||||
@ -1209,7 +1317,7 @@ ZSTD_decompressSequencesLong_body(
|
||||
{ size_t const lastLLSize = litEnd - litPtr;
|
||||
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
||||
if (op != NULL) {
|
||||
memcpy(op, litPtr, lastLLSize);
|
||||
ZSTD_memcpy(op, litPtr, lastLLSize);
|
||||
op += lastLLSize;
|
||||
}
|
||||
}
|
||||
|
@ -15,7 +15,7 @@
|
||||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
#include "zstd.h" /* DCtx, and some public functions */
|
||||
#include "zstd_internal.h" /* blockProperties_t, and some public functions */
|
||||
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
|
||||
@ -48,12 +48,15 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
* this function must be called with valid parameters only
|
||||
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
|
||||
* in which case it cannot fail.
|
||||
* The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
|
||||
* defined in zstd_decompress_internal.h.
|
||||
* Internal use only.
|
||||
*/
|
||||
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
||||
const short* normalizedCounter, unsigned maxSymbolValue,
|
||||
const U32* baseValue, const U32* nbAdditionalBits,
|
||||
unsigned tableLog);
|
||||
unsigned tableLog, void* wksp, size_t wkspSize,
|
||||
int bmi2);
|
||||
|
||||
|
||||
#endif /* ZSTD_DEC_BLOCK_H */
|
||||
|
@ -27,26 +27,26 @@
|
||||
/*-*******************************************************
|
||||
* Constants
|
||||
*********************************************************/
|
||||
static const U32 LL_base[MaxLL+1] = {
|
||||
static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 18, 20, 22, 24, 28, 32, 40,
|
||||
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
||||
0x2000, 0x4000, 0x8000, 0x10000 };
|
||||
|
||||
static const U32 OF_base[MaxOff+1] = {
|
||||
static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
||||
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
||||
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
||||
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
||||
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
||||
|
||||
static const U32 OF_bits[MaxOff+1] = {
|
||||
static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31 };
|
||||
|
||||
static const U32 ML_base[MaxML+1] = {
|
||||
static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
||||
3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 18,
|
||||
19, 20, 21, 22, 23, 24, 25, 26,
|
||||
@ -73,12 +73,16 @@ static const U32 ML_base[MaxML+1] = {
|
||||
|
||||
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
|
||||
|
||||
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
||||
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
||||
|
||||
typedef struct {
|
||||
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
||||
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
||||
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
||||
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
||||
U32 rep[ZSTD_REP_NUM];
|
||||
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
||||
} ZSTD_entropyDTables_t;
|
||||
|
||||
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
||||
@ -95,11 +99,6 @@ typedef enum {
|
||||
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
||||
} ZSTD_dictUses_e;
|
||||
|
||||
typedef enum {
|
||||
ZSTD_obm_buffered = 0, /* Buffer the output */
|
||||
ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */
|
||||
} ZSTD_outBufferMode_e;
|
||||
|
||||
struct ZSTD_DCtx_s
|
||||
{
|
||||
const ZSTD_seqSymbol* LLTptr;
|
||||
@ -122,6 +121,8 @@ struct ZSTD_DCtx_s
|
||||
XXH64_state_t xxhState;
|
||||
size_t headerSize;
|
||||
ZSTD_format_e format;
|
||||
ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
|
||||
U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
|
||||
const BYTE* litPtr;
|
||||
ZSTD_customMem customMem;
|
||||
size_t litSize;
|
||||
@ -152,7 +153,7 @@ struct ZSTD_DCtx_s
|
||||
U32 legacyVersion;
|
||||
U32 hostageByte;
|
||||
int noForwardProgress;
|
||||
ZSTD_outBufferMode_e outBufferMode;
|
||||
ZSTD_bufferMode_e outBufferMode;
|
||||
ZSTD_outBuffer expectedOutBuffer;
|
||||
|
||||
/* workspace */
|
||||
|
111
zstd/zstd_deps.h
Normal file
111
zstd/zstd_deps.h
Normal file
@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2020, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/* This file provides common libc dependencies that zstd requires.
|
||||
* The purpose is to allow replacing this file with a custom implementation
|
||||
* to compile zstd without libc support.
|
||||
*/
|
||||
|
||||
/* Need:
|
||||
* NULL
|
||||
* INT_MAX
|
||||
* UINT_MAX
|
||||
* ZSTD_memcpy()
|
||||
* ZSTD_memset()
|
||||
* ZSTD_memmove()
|
||||
*/
|
||||
#ifndef ZSTD_DEPS_COMMON
|
||||
#define ZSTD_DEPS_COMMON
|
||||
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__ >= 4
|
||||
# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l))
|
||||
# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l))
|
||||
# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l))
|
||||
#else
|
||||
# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l))
|
||||
# define ZSTD_memmove(d,s,l) memmove((d),(s),(l))
|
||||
# define ZSTD_memset(p,v,l) memset((p),(v),(l))
|
||||
#endif
|
||||
|
||||
#endif /* ZSTD_DEPS_COMMON */
|
||||
|
||||
/* Need:
|
||||
* ZSTD_malloc()
|
||||
* ZSTD_free()
|
||||
* ZSTD_calloc()
|
||||
*/
|
||||
#ifdef ZSTD_DEPS_NEED_MALLOC
|
||||
#ifndef ZSTD_DEPS_MALLOC
|
||||
#define ZSTD_DEPS_MALLOC
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ZSTD_malloc(s) malloc(s)
|
||||
#define ZSTD_calloc(n,s) calloc((n), (s))
|
||||
#define ZSTD_free(p) free((p))
|
||||
|
||||
#endif /* ZSTD_DEPS_MALLOC */
|
||||
#endif /* ZSTD_DEPS_NEED_MALLOC */
|
||||
|
||||
/*
|
||||
* Provides 64-bit math support.
|
||||
* Need:
|
||||
* U64 ZSTD_div64(U64 dividend, U32 divisor)
|
||||
*/
|
||||
#ifdef ZSTD_DEPS_NEED_MATH64
|
||||
#ifndef ZSTD_DEPS_MATH64
|
||||
#define ZSTD_DEPS_MATH64
|
||||
|
||||
#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor))
|
||||
|
||||
#endif /* ZSTD_DEPS_MATH64 */
|
||||
#endif /* ZSTD_DEPS_NEED_MATH64 */
|
||||
|
||||
/* Need:
|
||||
* assert()
|
||||
*/
|
||||
#ifdef ZSTD_DEPS_NEED_ASSERT
|
||||
#ifndef ZSTD_DEPS_ASSERT
|
||||
#define ZSTD_DEPS_ASSERT
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#endif /* ZSTD_DEPS_ASSERT */
|
||||
#endif /* ZSTD_DEPS_NEED_ASSERT */
|
||||
|
||||
/* Need:
|
||||
* ZSTD_DEBUG_PRINT()
|
||||
*/
|
||||
#ifdef ZSTD_DEPS_NEED_IO
|
||||
#ifndef ZSTD_DEPS_IO
|
||||
#define ZSTD_DEPS_IO
|
||||
|
||||
#include <stdio.h>
|
||||
#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
|
||||
|
||||
#endif /* ZSTD_DEPS_IO */
|
||||
#endif /* ZSTD_DEPS_NEED_IO */
|
||||
|
||||
/* Only requested when <stdint.h> is known to be present.
|
||||
* Need:
|
||||
* intptr_t
|
||||
*/
|
||||
#ifdef ZSTD_DEPS_NEED_STDINT
|
||||
#ifndef ZSTD_DEPS_STDINT
|
||||
#define ZSTD_DEPS_STDINT
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#endif /* ZSTD_DEPS_STDINT */
|
||||
#endif /* ZSTD_DEPS_NEED_STDINT */
|
@ -31,15 +31,15 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
* is empty.
|
||||
*/
|
||||
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
||||
U32 const current = (U32)(ip - base);
|
||||
U32 const curr = (U32)(ip - base);
|
||||
U32 i;
|
||||
for (i = 0; i < fastHashFillStep; ++i) {
|
||||
size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
|
||||
size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
|
||||
if (i == 0)
|
||||
hashSmall[smHash] = current + i;
|
||||
hashSmall[smHash] = curr + i;
|
||||
if (i == 0 || hashLarge[lgHash] == 0)
|
||||
hashLarge[lgHash] = current + i;
|
||||
hashLarge[lgHash] = curr + i;
|
||||
/* Only load extra positions for ZSTD_dtlm_full */
|
||||
if (dtlm == ZSTD_dtlm_fast)
|
||||
break;
|
||||
@ -108,9 +108,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
U32 const current = (U32)(ip - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
|
||||
U32 const maxRep = current - windowLow;
|
||||
U32 const curr = (U32)(ip - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
|
||||
U32 const maxRep = curr - windowLow;
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
}
|
||||
@ -129,17 +129,17 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
|
||||
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
|
||||
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
|
||||
U32 const current = (U32)(ip-base);
|
||||
U32 const curr = (U32)(ip-base);
|
||||
U32 const matchIndexL = hashLong[h2];
|
||||
U32 matchIndexS = hashSmall[h];
|
||||
const BYTE* matchLong = base + matchIndexL;
|
||||
const BYTE* match = base + matchIndexS;
|
||||
const U32 repIndex = current + 1 - offset_1;
|
||||
const U32 repIndex = curr + 1 - offset_1;
|
||||
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
||||
&& repIndex < prefixLowestIndex) ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
base + repIndex;
|
||||
hashLong[h2] = hashSmall[h] = current; /* update hash tables */
|
||||
hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
|
||||
|
||||
/* check dictMatchState repcode */
|
||||
if (dictMode == ZSTD_dictMatchState
|
||||
@ -177,7 +177,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
|
||||
if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
|
||||
mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
|
||||
offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
|
||||
offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
|
||||
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
} }
|
||||
@ -209,7 +209,7 @@ _search_next_long:
|
||||
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
||||
U32 const matchIndexL3 = hashLong[hl3];
|
||||
const BYTE* matchL3 = base + matchIndexL3;
|
||||
hashLong[hl3] = current + 1;
|
||||
hashLong[hl3] = curr + 1;
|
||||
|
||||
/* check prefix long +1 match */
|
||||
if (matchIndexL3 > prefixLowestIndex) {
|
||||
@ -228,7 +228,7 @@ _search_next_long:
|
||||
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
|
||||
mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
|
||||
ip++;
|
||||
offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
|
||||
offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
|
||||
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
} } }
|
||||
@ -236,7 +236,7 @@ _search_next_long:
|
||||
/* if no long +1 match, explore the short match we found */
|
||||
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
|
||||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
|
||||
offset = (U32)(current - matchIndexS);
|
||||
offset = (U32)(curr - matchIndexS);
|
||||
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
} else {
|
||||
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
|
||||
@ -260,7 +260,7 @@ _match_stored:
|
||||
if (ip <= ilimit) {
|
||||
/* Complementary insertion */
|
||||
/* done after iLimit test, as candidates could be > iend-8 */
|
||||
{ U32 const indexToInsert = current+2;
|
||||
{ U32 const indexToInsert = curr+2;
|
||||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
||||
@ -401,12 +401,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
|
||||
const BYTE* matchLong = matchLongBase + matchLongIndex;
|
||||
|
||||
const U32 current = (U32)(ip-base);
|
||||
const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
|
||||
const U32 curr = (U32)(ip-base);
|
||||
const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
|
||||
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
size_t mLength;
|
||||
hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
|
||||
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
|
||||
|
||||
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
|
||||
& (repIndex > dictStartIndex))
|
||||
@ -421,7 +421,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
|
||||
U32 offset;
|
||||
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
|
||||
offset = current - matchLongIndex;
|
||||
offset = curr - matchLongIndex;
|
||||
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
@ -433,19 +433,19 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
|
||||
const BYTE* match3 = match3Base + matchIndex3;
|
||||
U32 offset;
|
||||
hashLong[h3] = current + 1;
|
||||
hashLong[h3] = curr + 1;
|
||||
if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
|
||||
const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
|
||||
const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
|
||||
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
|
||||
ip++;
|
||||
offset = current+1 - matchIndex3;
|
||||
offset = curr+1 - matchIndex3;
|
||||
while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
|
||||
} else {
|
||||
const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
||||
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
||||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
||||
offset = current - matchIndex;
|
||||
offset = curr - matchIndex;
|
||||
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
}
|
||||
offset_2 = offset_1;
|
||||
@ -464,7 +464,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
if (ip <= ilimit) {
|
||||
/* Complementary insertion */
|
||||
/* done after iLimit test, as candidates could be > iend-8 */
|
||||
{ U32 const indexToInsert = current+2;
|
||||
{ U32 const indexToInsert = curr+2;
|
||||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
||||
|
@ -77,6 +77,7 @@ typedef enum {
|
||||
ZSTD_error_frameIndex_tooLarge = 100,
|
||||
ZSTD_error_seekableIO = 102,
|
||||
ZSTD_error_dstBuffer_wrong = 104,
|
||||
ZSTD_error_srcBuffer_wrong = 105,
|
||||
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
|
||||
} ZSTD_ErrorCode;
|
||||
|
||||
|
@ -29,16 +29,16 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
* Insert the other positions if their hash entry is empty.
|
||||
*/
|
||||
for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
|
||||
U32 const current = (U32)(ip - base);
|
||||
U32 const curr = (U32)(ip - base);
|
||||
size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
|
||||
hashTable[hash0] = current;
|
||||
hashTable[hash0] = curr;
|
||||
if (dtlm == ZSTD_dtlm_fast) continue;
|
||||
/* Only load extra positions for ZSTD_dtlm_full */
|
||||
{ U32 p;
|
||||
for (p = 1; p < fastHashFillStep; ++p) {
|
||||
size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
|
||||
if (hashTable[hash] == 0) { /* not yet filled */
|
||||
hashTable[hash] = current + p;
|
||||
hashTable[hash] = curr + p;
|
||||
} } } }
|
||||
}
|
||||
|
||||
@ -72,9 +72,9 @@ ZSTD_compressBlock_fast_generic(
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
|
||||
ip0 += (ip0 == prefixStart);
|
||||
ip1 = ip0 + 1;
|
||||
{ U32 const current = (U32)(ip0 - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
|
||||
U32 const maxRep = current - windowLow;
|
||||
{ U32 const curr = (U32)(ip0 - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
|
||||
U32 const maxRep = curr - windowLow;
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
}
|
||||
@ -258,14 +258,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
||||
size_t mLength;
|
||||
size_t const h = ZSTD_hashPtr(ip, hlog, mls);
|
||||
U32 const current = (U32)(ip-base);
|
||||
U32 const curr = (U32)(ip-base);
|
||||
U32 const matchIndex = hashTable[h];
|
||||
const BYTE* match = base + matchIndex;
|
||||
const U32 repIndex = current + 1 - offset_1;
|
||||
const U32 repIndex = curr + 1 - offset_1;
|
||||
const BYTE* repMatch = (repIndex < prefixStartIndex) ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
base + repIndex;
|
||||
hashTable[h] = current; /* update hash table */
|
||||
hashTable[h] = curr; /* update hash table */
|
||||
|
||||
if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
@ -284,7 +284,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
continue;
|
||||
} else {
|
||||
/* found a dict match */
|
||||
U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
|
||||
U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
|
||||
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
|
||||
while (((ip>anchor) & (dictMatch>dictStart))
|
||||
&& (ip[-1] == dictMatch[-1])) {
|
||||
@ -316,8 +316,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
|
||||
if (ip <= ilimit) {
|
||||
/* Fill Table */
|
||||
assert(base+current+2 > istart); /* check base overflow */
|
||||
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
|
||||
assert(base+curr+2 > istart); /* check base overflow */
|
||||
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
|
||||
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
||||
|
||||
/* check immediate repcode */
|
||||
@ -410,13 +410,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
const U32 matchIndex = hashTable[h];
|
||||
const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
|
||||
const BYTE* match = matchBase + matchIndex;
|
||||
const U32 current = (U32)(ip-base);
|
||||
const U32 repIndex = current + 1 - offset_1;
|
||||
const U32 curr = (U32)(ip-base);
|
||||
const U32 repIndex = curr + 1 - offset_1;
|
||||
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
hashTable[h] = current; /* update hash table */
|
||||
DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
|
||||
assert(offset_1 <= current +1); /* check repIndex */
|
||||
hashTable[h] = curr; /* update hash table */
|
||||
DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
|
||||
assert(offset_1 <= curr +1); /* check repIndex */
|
||||
|
||||
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
@ -435,7 +435,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
}
|
||||
{ const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
||||
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
||||
U32 const offset = current - matchIndex;
|
||||
U32 const offset = curr - matchIndex;
|
||||
size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
||||
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
offset_2 = offset_1; offset_1 = offset; /* update offset history */
|
||||
@ -446,7 +446,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
|
||||
if (ip <= ilimit) {
|
||||
/* Fill Table */
|
||||
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
|
||||
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
|
||||
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
||||
/* check immediate repcode */
|
||||
while (ip <= ilimit) {
|
||||
|
@ -19,7 +19,7 @@
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#ifdef __aarch64__
|
||||
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include "compiler.h"
|
||||
@ -139,7 +139,7 @@ void _force_has_format_string(const char *format, ...) {
|
||||
|
||||
#define ZSTD_REP_NUM 3 /* number of repcodes */
|
||||
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
|
||||
static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
||||
static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
||||
|
||||
#define KB *(1 <<10)
|
||||
#define MB *(1 <<20)
|
||||
@ -153,13 +153,13 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
||||
#define BIT0 1
|
||||
|
||||
#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
|
||||
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
|
||||
static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
|
||||
static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
|
||||
static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
|
||||
|
||||
#define ZSTD_FRAMEIDSIZE 4 /* magic number size */
|
||||
|
||||
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
|
||||
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
|
||||
static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
|
||||
typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
|
||||
|
||||
#define ZSTD_FRAMECHECKSUMSIZE 4
|
||||
@ -186,61 +186,75 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
|
||||
#define OffFSELog 8
|
||||
#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
|
||||
|
||||
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3,
|
||||
4, 6, 7, 8, 9,10,11,12,
|
||||
13,14,15,16 };
|
||||
static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 3, 2, 1, 1, 1, 1, 1,
|
||||
-1,-1,-1,-1 };
|
||||
#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
|
||||
/* Each table cannot take more than #symbols * FSELog bits */
|
||||
#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
|
||||
|
||||
static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3,
|
||||
4, 6, 7, 8, 9,10,11,12,
|
||||
13,14,15,16
|
||||
};
|
||||
static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
|
||||
4, 3, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 3, 2, 1, 1, 1, 1, 1,
|
||||
-1,-1,-1,-1
|
||||
};
|
||||
#define LL_DEFAULTNORMLOG 6 /* for static allocation */
|
||||
static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
|
||||
static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
|
||||
|
||||
static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3,
|
||||
4, 4, 5, 7, 8, 9,10,11,
|
||||
12,13,14,15,16 };
|
||||
static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2,
|
||||
2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1,-1,-1,
|
||||
-1,-1,-1,-1,-1 };
|
||||
static UNUSED_ATTR const U32 ML_bits[MaxML+1] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3,
|
||||
4, 4, 5, 7, 8, 9,10,11,
|
||||
12,13,14,15,16
|
||||
};
|
||||
static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
|
||||
1, 4, 3, 2, 2, 2, 2, 2,
|
||||
2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1,-1,-1,
|
||||
-1,-1,-1,-1,-1
|
||||
};
|
||||
#define ML_DEFAULTNORMLOG 6 /* for static allocation */
|
||||
static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
|
||||
static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
|
||||
|
||||
static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2,
|
||||
2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
-1,-1,-1,-1,-1 };
|
||||
static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
|
||||
1, 1, 1, 1, 1, 1, 2, 2,
|
||||
2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
-1,-1,-1,-1,-1
|
||||
};
|
||||
#define OF_DEFAULTNORMLOG 5 /* for static allocation */
|
||||
static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
||||
static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
||||
|
||||
|
||||
/*-*******************************************
|
||||
* Shared functions to include for inlining
|
||||
*********************************************/
|
||||
static void ZSTD_copy8(void* dst, const void* src) {
|
||||
#ifdef __aarch64__
|
||||
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
|
||||
vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
|
||||
#else
|
||||
memcpy(dst, src, 8);
|
||||
ZSTD_memcpy(dst, src, 8);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
||||
static void ZSTD_copy16(void* dst, const void* src) {
|
||||
#ifdef __aarch64__
|
||||
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
|
||||
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
|
||||
#else
|
||||
memcpy(dst, src, 16);
|
||||
ZSTD_memcpy(dst, src, 16);
|
||||
#endif
|
||||
}
|
||||
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
|
||||
@ -255,13 +269,13 @@ typedef enum {
|
||||
} ZSTD_overlap_e;
|
||||
|
||||
/*! ZSTD_wildcopy() :
|
||||
* Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
|
||||
* Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
|
||||
* @param ovtype controls the overlap detection
|
||||
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
|
||||
* - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
|
||||
* The src buffer must be before the dst buffer.
|
||||
*/
|
||||
MEM_STATIC FORCE_INLINE_ATTR
|
||||
MEM_STATIC FORCE_INLINE_ATTR
|
||||
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
|
||||
{
|
||||
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
||||
@ -284,14 +298,16 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
||||
* one COPY16() in the first call. Then, do two calls per loop since
|
||||
* at that point it is more likely to have a high trip count.
|
||||
*/
|
||||
#ifndef __aarch64__
|
||||
#ifdef __aarch64__
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
#else
|
||||
COPY16(op, ip);
|
||||
if (op >= oend) return;
|
||||
ZSTD_copy16(op, ip);
|
||||
if (16 >= length) return;
|
||||
op += 16;
|
||||
ip += 16;
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
COPY16(op, ip);
|
||||
@ -305,7 +321,7 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src,
|
||||
{
|
||||
size_t const length = MIN(dstCapacity, srcSize);
|
||||
if (length > 0) {
|
||||
memcpy(dst, src, length);
|
||||
ZSTD_memcpy(dst, src, length);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
@ -320,28 +336,39 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src,
|
||||
* In which case, resize it down to free some memory */
|
||||
#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
|
||||
|
||||
/* Controls whether the input/output buffer is buffered or stable. */
|
||||
typedef enum {
|
||||
ZSTD_bm_buffered = 0, /* Buffer the input/output */
|
||||
ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
|
||||
} ZSTD_bufferMode_e;
|
||||
|
||||
|
||||
/*-*******************************************
|
||||
* Private declarations
|
||||
*********************************************/
|
||||
typedef struct seqDef_s {
|
||||
U32 offset;
|
||||
U32 offset; /* Offset code of the sequence */
|
||||
U16 litLength;
|
||||
U16 matchLength;
|
||||
} seqDef;
|
||||
|
||||
typedef struct {
|
||||
seqDef* sequencesStart;
|
||||
seqDef* sequences;
|
||||
seqDef* sequences; /* ptr to end of sequences */
|
||||
BYTE* litStart;
|
||||
BYTE* lit;
|
||||
BYTE* lit; /* ptr to end of literals */
|
||||
BYTE* llCode;
|
||||
BYTE* mlCode;
|
||||
BYTE* ofCode;
|
||||
size_t maxNbSeq;
|
||||
size_t maxNbLit;
|
||||
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
|
||||
U32 longLengthPos;
|
||||
|
||||
/* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
|
||||
* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
|
||||
* the existing value of the litLength or matchLength by 0x10000.
|
||||
*/
|
||||
U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
|
||||
U32 longLengthPos; /* Index of the sequence to apply long length modification to */
|
||||
} seqStore_t;
|
||||
|
||||
typedef struct {
|
||||
@ -384,9 +411,9 @@ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBu
|
||||
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
|
||||
|
||||
/* custom memory allocation functions */
|
||||
void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
|
||||
void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
|
||||
void ZSTD_free(void* ptr, ZSTD_customMem customMem);
|
||||
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
|
||||
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
|
||||
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
|
||||
|
||||
|
||||
MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
|
||||
@ -394,8 +421,12 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
|
||||
assert(val != 0);
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
# if STATIC_BMI2 == 1
|
||||
return _lzcnt_u32(val)^31;
|
||||
# else
|
||||
unsigned long r=0;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
||||
|
430
zstd/zstd_lazy.c
430
zstd/zstd_lazy.c
@ -58,11 +58,11 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
|
||||
|
||||
/** ZSTD_insertDUBT1() :
|
||||
* sort one already inserted but unsorted position
|
||||
* assumption : current >= btlow == (current - btmask)
|
||||
* assumption : curr >= btlow == (curr - btmask)
|
||||
* doesn't fail */
|
||||
static void
|
||||
ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
||||
U32 current, const BYTE* inputEnd,
|
||||
U32 curr, const BYTE* inputEnd,
|
||||
U32 nbCompares, U32 btLow,
|
||||
const ZSTD_dictMode_e dictMode)
|
||||
{
|
||||
@ -74,41 +74,41 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* const dictBase = ms->window.dictBase;
|
||||
const U32 dictLimit = ms->window.dictLimit;
|
||||
const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
|
||||
const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
|
||||
const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
|
||||
const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
|
||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||
const BYTE* const prefixStart = base + dictLimit;
|
||||
const BYTE* match;
|
||||
U32* smallerPtr = bt + 2*(current&btMask);
|
||||
U32* smallerPtr = bt + 2*(curr&btMask);
|
||||
U32* largerPtr = smallerPtr + 1;
|
||||
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
U32 const windowValid = ms->window.lowLimit;
|
||||
U32 const maxDistance = 1U << cParams->windowLog;
|
||||
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
|
||||
U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
|
||||
|
||||
|
||||
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
|
||||
current, dictLimit, windowLow);
|
||||
assert(current >= btLow);
|
||||
curr, dictLimit, windowLow);
|
||||
assert(curr >= btLow);
|
||||
assert(ip < iend); /* condition for ZSTD_count */
|
||||
|
||||
while (nbCompares-- && (matchIndex > windowLow)) {
|
||||
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
||||
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
||||
assert(matchIndex < current);
|
||||
assert(matchIndex < curr);
|
||||
/* note : all candidates are now supposed sorted,
|
||||
* but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
|
||||
* when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
|
||||
|
||||
if ( (dictMode != ZSTD_extDict)
|
||||
|| (matchIndex+matchLength >= dictLimit) /* both in current segment*/
|
||||
|| (current < dictLimit) /* both in extDict */) {
|
||||
|| (curr < dictLimit) /* both in extDict */) {
|
||||
const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
|
||||
|| (matchIndex+matchLength >= dictLimit)) ?
|
||||
base : dictBase;
|
||||
assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
|
||||
|| (current < dictLimit) );
|
||||
|| (curr < dictLimit) );
|
||||
match = mBase + matchIndex;
|
||||
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
|
||||
} else {
|
||||
@ -119,7 +119,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
||||
}
|
||||
|
||||
DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
|
||||
current, matchIndex, (U32)matchLength);
|
||||
curr, matchIndex, (U32)matchLength);
|
||||
|
||||
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
|
||||
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
|
||||
@ -168,7 +168,7 @@ ZSTD_DUBT_findBetterDictMatch (
|
||||
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* const prefixStart = base + ms->window.dictLimit;
|
||||
U32 const current = (U32)(ip-base);
|
||||
U32 const curr = (U32)(ip-base);
|
||||
const BYTE* const dictBase = dms->window.base;
|
||||
const BYTE* const dictEnd = dms->window.nextSrc;
|
||||
U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
|
||||
@ -195,10 +195,10 @@ ZSTD_DUBT_findBetterDictMatch (
|
||||
|
||||
if (matchLength > bestLength) {
|
||||
U32 matchIndex = dictMatchIndex + dictIndexDelta;
|
||||
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
|
||||
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
|
||||
DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
|
||||
current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
|
||||
bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
|
||||
curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
|
||||
bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
|
||||
}
|
||||
if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
|
||||
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
||||
@ -218,9 +218,9 @@ ZSTD_DUBT_findBetterDictMatch (
|
||||
}
|
||||
|
||||
if (bestLength >= MINMATCH) {
|
||||
U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
|
||||
U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
|
||||
DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
|
||||
current, (U32)bestLength, (U32)*offsetPtr, mIndex);
|
||||
curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
|
||||
}
|
||||
return bestLength;
|
||||
|
||||
@ -241,13 +241,13 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
||||
U32 matchIndex = hashTable[h];
|
||||
|
||||
const BYTE* const base = ms->window.base;
|
||||
U32 const current = (U32)(ip-base);
|
||||
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
|
||||
U32 const curr = (U32)(ip-base);
|
||||
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
|
||||
|
||||
U32* const bt = ms->chainTable;
|
||||
U32 const btLog = cParams->chainLog - 1;
|
||||
U32 const btMask = (1 << btLog) - 1;
|
||||
U32 const btLow = (btMask >= current) ? 0 : current - btMask;
|
||||
U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
|
||||
U32 const unsortLimit = MAX(btLow, windowLow);
|
||||
|
||||
U32* nextCandidate = bt + 2*(matchIndex&btMask);
|
||||
@ -256,8 +256,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
||||
U32 nbCandidates = nbCompares;
|
||||
U32 previousCandidate = 0;
|
||||
|
||||
DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
|
||||
DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
|
||||
assert(ip <= iend-8); /* required for h calculation */
|
||||
assert(dictMode != ZSTD_dedicatedDictSearch);
|
||||
|
||||
/* reach end of unsorted candidates list */
|
||||
while ( (matchIndex > unsortLimit)
|
||||
@ -299,14 +300,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
||||
const U32 dictLimit = ms->window.dictLimit;
|
||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||
const BYTE* const prefixStart = base + dictLimit;
|
||||
U32* smallerPtr = bt + 2*(current&btMask);
|
||||
U32* largerPtr = bt + 2*(current&btMask) + 1;
|
||||
U32 matchEndIdx = current + 8 + 1;
|
||||
U32* smallerPtr = bt + 2*(curr&btMask);
|
||||
U32* largerPtr = bt + 2*(curr&btMask) + 1;
|
||||
U32 matchEndIdx = curr + 8 + 1;
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
size_t bestLength = 0;
|
||||
|
||||
matchIndex = hashTable[h];
|
||||
hashTable[h] = current; /* Update Hash Table */
|
||||
hashTable[h] = curr; /* Update Hash Table */
|
||||
|
||||
while (nbCompares-- && (matchIndex > windowLow)) {
|
||||
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
||||
@ -326,8 +327,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
||||
if (matchLength > bestLength) {
|
||||
if (matchLength > matchEndIdx - matchIndex)
|
||||
matchEndIdx = matchIndex + (U32)matchLength;
|
||||
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
|
||||
bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
|
||||
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
|
||||
bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
|
||||
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
nbCompares = 0; /* in addition to avoiding checking any
|
||||
@ -363,12 +364,12 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
||||
mls, dictMode);
|
||||
}
|
||||
|
||||
assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
|
||||
assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
|
||||
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
||||
if (bestLength >= MINMATCH) {
|
||||
U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
|
||||
U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
|
||||
DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
|
||||
current, (U32)bestLength, (U32)*offsetPtr, mIndex);
|
||||
curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
|
||||
}
|
||||
return bestLength;
|
||||
}
|
||||
@ -446,7 +447,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
|
||||
|
||||
/* Update chains up to ip (excluded)
|
||||
Assumption : always within prefix (i.e. not within extDict) */
|
||||
static U32 ZSTD_insertAndFindFirstIndex_internal(
|
||||
FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
|
||||
ZSTD_matchState_t* ms,
|
||||
const ZSTD_compressionParameters* const cParams,
|
||||
const BYTE* ip, U32 const mls)
|
||||
@ -475,6 +476,121 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
|
||||
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
|
||||
}
|
||||
|
||||
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
|
||||
{
|
||||
const BYTE* const base = ms->window.base;
|
||||
U32 const target = (U32)(ip - base);
|
||||
U32* const hashTable = ms->hashTable;
|
||||
U32* const chainTable = ms->chainTable;
|
||||
U32 const chainSize = 1 << ms->cParams.chainLog;
|
||||
U32 idx = ms->nextToUpdate;
|
||||
U32 const minChain = chainSize < target ? target - chainSize : idx;
|
||||
U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
|
||||
U32 const cacheSize = bucketSize - 1;
|
||||
U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
|
||||
U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
|
||||
|
||||
/* We know the hashtable is oversized by a factor of `bucketSize`.
|
||||
* We are going to temporarily pretend `bucketSize == 1`, keeping only a
|
||||
* single entry. We will use the rest of the space to construct a temporary
|
||||
* chaintable.
|
||||
*/
|
||||
U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
|
||||
U32* const tmpHashTable = hashTable;
|
||||
U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
|
||||
U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
|
||||
U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
|
||||
|
||||
U32 hashIdx;
|
||||
|
||||
assert(ms->cParams.chainLog <= 24);
|
||||
assert(ms->cParams.hashLog >= ms->cParams.chainLog);
|
||||
assert(idx != 0);
|
||||
assert(tmpMinChain <= minChain);
|
||||
|
||||
/* fill conventional hash table and conventional chain table */
|
||||
for ( ; idx < target; idx++) {
|
||||
U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
|
||||
if (idx >= tmpMinChain) {
|
||||
tmpChainTable[idx - tmpMinChain] = hashTable[h];
|
||||
}
|
||||
tmpHashTable[h] = idx;
|
||||
}
|
||||
|
||||
/* sort chains into ddss chain table */
|
||||
{
|
||||
U32 chainPos = 0;
|
||||
for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
|
||||
U32 count;
|
||||
U32 countBeyondMinChain = 0;
|
||||
U32 i = tmpHashTable[hashIdx];
|
||||
for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
|
||||
/* skip through the chain to the first position that won't be
|
||||
* in the hash cache bucket */
|
||||
if (i < minChain) {
|
||||
countBeyondMinChain++;
|
||||
}
|
||||
i = tmpChainTable[i - tmpMinChain];
|
||||
}
|
||||
if (count == cacheSize) {
|
||||
for (count = 0; count < chainLimit;) {
|
||||
if (i < minChain) {
|
||||
if (!i || countBeyondMinChain++ > cacheSize) {
|
||||
/* only allow pulling `cacheSize` number of entries
|
||||
* into the cache or chainTable beyond `minChain`,
|
||||
* to replace the entries pulled out of the
|
||||
* chainTable into the cache. This lets us reach
|
||||
* back further without increasing the total number
|
||||
* of entries in the chainTable, guaranteeing the
|
||||
* DDSS chain table will fit into the space
|
||||
* allocated for the regular one. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
chainTable[chainPos++] = i;
|
||||
count++;
|
||||
if (i < tmpMinChain) {
|
||||
break;
|
||||
}
|
||||
i = tmpChainTable[i - tmpMinChain];
|
||||
}
|
||||
} else {
|
||||
count = 0;
|
||||
}
|
||||
if (count) {
|
||||
tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
|
||||
} else {
|
||||
tmpHashTable[hashIdx] = 0;
|
||||
}
|
||||
}
|
||||
assert(chainPos <= chainSize); /* I believe this is guaranteed... */
|
||||
}
|
||||
|
||||
/* move chain pointers into the last entry of each hash bucket */
|
||||
for (hashIdx = (1 << hashLog); hashIdx; ) {
|
||||
U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
|
||||
U32 const chainPackedPointer = tmpHashTable[hashIdx];
|
||||
U32 i;
|
||||
for (i = 0; i < cacheSize; i++) {
|
||||
hashTable[bucketIdx + i] = 0;
|
||||
}
|
||||
hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
|
||||
}
|
||||
|
||||
/* fill the buckets of the hash table */
|
||||
for (idx = ms->nextToUpdate; idx < target; idx++) {
|
||||
U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
|
||||
<< ZSTD_LAZY_DDSS_BUCKET_LOG;
|
||||
U32 i;
|
||||
/* Shift hash cache down 1. */
|
||||
for (i = cacheSize - 1; i; i--)
|
||||
hashTable[h + i] = hashTable[h + i - 1];
|
||||
hashTable[h] = idx;
|
||||
}
|
||||
|
||||
ms->nextToUpdate = target;
|
||||
}
|
||||
|
||||
|
||||
/* inlining is important to hardwire a hot branch (template emulation) */
|
||||
FORCE_INLINE_TEMPLATE
|
||||
@ -493,20 +609,33 @@ size_t ZSTD_HcFindBestMatch_generic (
|
||||
const U32 dictLimit = ms->window.dictLimit;
|
||||
const BYTE* const prefixStart = base + dictLimit;
|
||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||
const U32 current = (U32)(ip-base);
|
||||
const U32 curr = (U32)(ip-base);
|
||||
const U32 maxDistance = 1U << cParams->windowLog;
|
||||
const U32 lowestValid = ms->window.lowLimit;
|
||||
const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
|
||||
const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
||||
const U32 isDictionary = (ms->loadedDictEnd != 0);
|
||||
const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
|
||||
const U32 minChain = current > chainSize ? current - chainSize : 0;
|
||||
const U32 minChain = curr > chainSize ? curr - chainSize : 0;
|
||||
U32 nbAttempts = 1U << cParams->searchLog;
|
||||
size_t ml=4-1;
|
||||
|
||||
/* HC4 match finder */
|
||||
U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
|
||||
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||
const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
|
||||
? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
|
||||
const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
|
||||
? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
|
||||
|
||||
for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
|
||||
U32 matchIndex;
|
||||
|
||||
if (dictMode == ZSTD_dedicatedDictSearch) {
|
||||
const U32* entry = &dms->hashTable[ddsIdx];
|
||||
PREFETCH_L1(entry);
|
||||
}
|
||||
|
||||
/* HC4 match finder */
|
||||
matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
|
||||
|
||||
for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
|
||||
size_t currentMl=0;
|
||||
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
|
||||
const BYTE* const match = base + matchIndex;
|
||||
@ -523,7 +652,7 @@ size_t ZSTD_HcFindBestMatch_generic (
|
||||
/* save best solution */
|
||||
if (currentMl > ml) {
|
||||
ml = currentMl;
|
||||
*offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
|
||||
*offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
|
||||
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
||||
}
|
||||
|
||||
@ -531,8 +660,92 @@ size_t ZSTD_HcFindBestMatch_generic (
|
||||
matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
|
||||
}
|
||||
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||
if (dictMode == ZSTD_dedicatedDictSearch) {
|
||||
const U32 ddsLowestIndex = dms->window.dictLimit;
|
||||
const BYTE* const ddsBase = dms->window.base;
|
||||
const BYTE* const ddsEnd = dms->window.nextSrc;
|
||||
const U32 ddsSize = (U32)(ddsEnd - ddsBase);
|
||||
const U32 ddsIndexDelta = dictLimit - ddsSize;
|
||||
const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
|
||||
const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
|
||||
U32 ddsAttempt;
|
||||
|
||||
for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
|
||||
PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
|
||||
}
|
||||
|
||||
{
|
||||
U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
|
||||
U32 const chainIndex = chainPackedPointer >> 8;
|
||||
|
||||
PREFETCH_L1(&dms->chainTable[chainIndex]);
|
||||
}
|
||||
|
||||
for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
|
||||
size_t currentMl=0;
|
||||
const BYTE* match;
|
||||
matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
|
||||
match = ddsBase + matchIndex;
|
||||
|
||||
if (!matchIndex) {
|
||||
return ml;
|
||||
}
|
||||
|
||||
/* guaranteed by table construction */
|
||||
(void)ddsLowestIndex;
|
||||
assert(matchIndex >= ddsLowestIndex);
|
||||
assert(match+4 <= ddsEnd);
|
||||
if (MEM_read32(match) == MEM_read32(ip)) {
|
||||
/* assumption : matchIndex <= dictLimit-4 (by table construction) */
|
||||
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
|
||||
}
|
||||
|
||||
/* save best solution */
|
||||
if (currentMl > ml) {
|
||||
ml = currentMl;
|
||||
*offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
|
||||
if (ip+currentMl == iLimit) {
|
||||
/* best possible, avoids read overflow on next attempt */
|
||||
return ml;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
|
||||
U32 chainIndex = chainPackedPointer >> 8;
|
||||
U32 const chainLength = chainPackedPointer & 0xFF;
|
||||
U32 const chainAttempts = nbAttempts - ddsAttempt;
|
||||
U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
|
||||
U32 chainAttempt;
|
||||
|
||||
for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
|
||||
PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
|
||||
}
|
||||
|
||||
for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
|
||||
size_t currentMl=0;
|
||||
const BYTE* match;
|
||||
matchIndex = dms->chainTable[chainIndex];
|
||||
match = ddsBase + matchIndex;
|
||||
|
||||
/* guaranteed by table construction */
|
||||
assert(matchIndex >= ddsLowestIndex);
|
||||
assert(match+4 <= ddsEnd);
|
||||
if (MEM_read32(match) == MEM_read32(ip)) {
|
||||
/* assumption : matchIndex <= dictLimit-4 (by table construction) */
|
||||
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
|
||||
}
|
||||
|
||||
/* save best solution */
|
||||
if (currentMl > ml) {
|
||||
ml = currentMl;
|
||||
*offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
|
||||
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
const U32* const dmsChainTable = dms->chainTable;
|
||||
const U32 dmsChainSize = (1 << dms->cParams.chainLog);
|
||||
const U32 dmsChainMask = dmsChainSize - 1;
|
||||
@ -545,7 +758,7 @@ size_t ZSTD_HcFindBestMatch_generic (
|
||||
|
||||
matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
|
||||
|
||||
for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
|
||||
for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
|
||||
size_t currentMl=0;
|
||||
const BYTE* const match = dmsBase + matchIndex;
|
||||
assert(match+4 <= dmsEnd);
|
||||
@ -555,11 +768,12 @@ size_t ZSTD_HcFindBestMatch_generic (
|
||||
/* save best solution */
|
||||
if (currentMl > ml) {
|
||||
ml = currentMl;
|
||||
*offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
|
||||
*offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
|
||||
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
||||
}
|
||||
|
||||
if (matchIndex <= dmsMinChain) break;
|
||||
|
||||
matchIndex = dmsChainTable[matchIndex & dmsChainMask];
|
||||
}
|
||||
}
|
||||
@ -600,6 +814,22 @@ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
|
||||
}
|
||||
|
||||
|
||||
static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
|
||||
ZSTD_matchState_t* ms,
|
||||
const BYTE* ip, const BYTE* const iLimit,
|
||||
size_t* offsetPtr)
|
||||
{
|
||||
switch(ms->cParams.minMatch)
|
||||
{
|
||||
default : /* includes case 3 */
|
||||
case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
|
||||
case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
|
||||
case 7 :
|
||||
case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
|
||||
ZSTD_matchState_t* ms,
|
||||
const BYTE* ip, const BYTE* const iLimit,
|
||||
@ -641,39 +871,62 @@ ZSTD_compressBlock_lazy_generic(
|
||||
typedef size_t (*searchMax_f)(
|
||||
ZSTD_matchState_t* ms,
|
||||
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
|
||||
searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
|
||||
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
|
||||
: ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
|
||||
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
|
||||
: ZSTD_HcFindBestMatch_selectMLS);
|
||||
|
||||
/**
|
||||
* This table is indexed first by the four ZSTD_dictMode_e values, and then
|
||||
* by the two searchMethod_e values. NULLs are placed for configurations
|
||||
* that should never occur (extDict modes go to the other implementation
|
||||
* below and there is no DDSS for binary tree search yet).
|
||||
*/
|
||||
const searchMax_f searchFuncs[4][2] = {
|
||||
{
|
||||
ZSTD_HcFindBestMatch_selectMLS,
|
||||
ZSTD_BtFindBestMatch_selectMLS
|
||||
},
|
||||
{
|
||||
NULL,
|
||||
NULL
|
||||
},
|
||||
{
|
||||
ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
|
||||
ZSTD_BtFindBestMatch_dictMatchState_selectMLS
|
||||
},
|
||||
{
|
||||
ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
|
||||
NULL
|
||||
}
|
||||
};
|
||||
|
||||
searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
|
||||
U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
|
||||
|
||||
const int isDMS = dictMode == ZSTD_dictMatchState;
|
||||
const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
|
||||
const int isDxS = isDMS || isDDS;
|
||||
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||
const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.dictLimit : 0;
|
||||
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.base : NULL;
|
||||
const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
|
||||
dictBase + dictLowestIndex : NULL;
|
||||
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.nextSrc : NULL;
|
||||
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
||||
const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
|
||||
const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
|
||||
const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
|
||||
const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL;
|
||||
const U32 dictIndexDelta = isDxS ?
|
||||
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
||||
0;
|
||||
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
|
||||
|
||||
assert(searchMax != NULL);
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
|
||||
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
U32 const current = (U32)(ip - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
|
||||
U32 const maxRep = current - windowLow;
|
||||
U32 const curr = (U32)(ip - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
|
||||
U32 const maxRep = curr - windowLow;
|
||||
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
if (isDxS) {
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
@ -693,9 +946,9 @@ ZSTD_compressBlock_lazy_generic(
|
||||
const BYTE* start=ip+1;
|
||||
|
||||
/* check repCode */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
if (isDxS) {
|
||||
const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
|
||||
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
||||
const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
|
||||
&& repIndex < prefixLowestIndex) ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
base + repIndex;
|
||||
@ -736,7 +989,7 @@ ZSTD_compressBlock_lazy_generic(
|
||||
if ((mlRep >= 4) && (gain2 > gain1))
|
||||
matchLength = mlRep, offset = 0, start = ip;
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
if (isDxS) {
|
||||
const U32 repIndex = (U32)(ip - base) - offset_1;
|
||||
const BYTE* repMatch = repIndex < prefixLowestIndex ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
@ -771,7 +1024,7 @@ ZSTD_compressBlock_lazy_generic(
|
||||
if ((mlRep >= 4) && (gain2 > gain1))
|
||||
matchLength = mlRep, offset = 0, start = ip;
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
if (isDxS) {
|
||||
const U32 repIndex = (U32)(ip - base) - offset_1;
|
||||
const BYTE* repMatch = repIndex < prefixLowestIndex ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
@ -809,7 +1062,7 @@ ZSTD_compressBlock_lazy_generic(
|
||||
&& (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
|
||||
{ start--; matchLength++; }
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
if (isDxS) {
|
||||
U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
|
||||
const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
|
||||
const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
|
||||
@ -825,12 +1078,11 @@ _storeSequence:
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
if (isDxS) {
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex = current2 - offset_2;
|
||||
const BYTE* repMatch = dictMode == ZSTD_dictMatchState
|
||||
&& repIndex < prefixLowestIndex ?
|
||||
const BYTE* repMatch = repIndex < prefixLowestIndex ?
|
||||
dictBase - dictIndexDelta + repIndex :
|
||||
base + repIndex;
|
||||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
|
||||
@ -925,6 +1177,28 @@ size_t ZSTD_compressBlock_greedy_dictMatchState(
|
||||
}
|
||||
|
||||
|
||||
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_lazy_extDict_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore,
|
||||
@ -968,11 +1242,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
||||
size_t matchLength=0;
|
||||
size_t offset=0;
|
||||
const BYTE* start=ip+1;
|
||||
U32 current = (U32)(ip-base);
|
||||
U32 curr = (U32)(ip-base);
|
||||
|
||||
/* check repCode */
|
||||
{ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
|
||||
const U32 repIndex = (U32)(current+1 - offset_1);
|
||||
{ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
|
||||
const U32 repIndex = (U32)(curr+1 - offset_1);
|
||||
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
||||
@ -999,11 +1273,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
||||
if (depth>=1)
|
||||
while (ip<ilimit) {
|
||||
ip ++;
|
||||
current++;
|
||||
curr++;
|
||||
/* check repCode */
|
||||
if (offset) {
|
||||
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
|
||||
const U32 repIndex = (U32)(current - offset_1);
|
||||
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
|
||||
const U32 repIndex = (U32)(curr - offset_1);
|
||||
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
||||
@ -1030,11 +1304,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
||||
/* let's find an even better one */
|
||||
if ((depth==2) && (ip<ilimit)) {
|
||||
ip ++;
|
||||
current++;
|
||||
curr++;
|
||||
/* check repCode */
|
||||
if (offset) {
|
||||
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
|
||||
const U32 repIndex = (U32)(current - offset_1);
|
||||
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
|
||||
const U32 repIndex = (U32)(curr - offset_1);
|
||||
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
||||
|
@ -17,8 +17,18 @@ extern "C" {
|
||||
|
||||
#include "zstd_compress_internal.h"
|
||||
|
||||
/**
|
||||
* Dedicated Dictionary Search Structure bucket log. In the
|
||||
* ZSTD_dedicatedDictSearch mode, the hashTable has
|
||||
* 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just
|
||||
* one.
|
||||
*/
|
||||
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
|
||||
|
||||
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
|
||||
|
||||
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
|
||||
|
||||
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
|
||||
|
||||
size_t ZSTD_compressBlock_btlazy2(
|
||||
@ -47,6 +57,16 @@ size_t ZSTD_compressBlock_greedy_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressBlock_greedy_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
@ -27,13 +27,6 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
|
||||
DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
|
||||
if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
|
||||
if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
|
||||
if (cParams->strategy >= ZSTD_btopt) {
|
||||
/* Get out of the way of the optimal parser */
|
||||
U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
|
||||
assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
|
||||
assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
|
||||
params->minMatchLength = minMatch;
|
||||
}
|
||||
if (params->hashLog == 0) {
|
||||
params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
|
||||
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
|
||||
@ -150,10 +143,10 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
|
||||
* We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
|
||||
static size_t ZSTD_ldm_countBackwardsMatch(
|
||||
const BYTE* pIn, const BYTE* pAnchor,
|
||||
const BYTE* pMatch, const BYTE* pBase)
|
||||
const BYTE* pMatch, const BYTE* pMatchBase)
|
||||
{
|
||||
size_t matchLength = 0;
|
||||
while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
|
||||
while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
|
||||
pIn--;
|
||||
pMatch--;
|
||||
matchLength++;
|
||||
@ -161,6 +154,27 @@ static size_t ZSTD_ldm_countBackwardsMatch(
|
||||
return matchLength;
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_countBackwardsMatch_2segments() :
|
||||
* Returns the number of bytes that match backwards from pMatch,
|
||||
* even with the backwards match spanning 2 different segments.
|
||||
*
|
||||
* On reaching `pMatchBase`, start counting from mEnd */
|
||||
static size_t ZSTD_ldm_countBackwardsMatch_2segments(
|
||||
const BYTE* pIn, const BYTE* pAnchor,
|
||||
const BYTE* pMatch, const BYTE* pMatchBase,
|
||||
const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
|
||||
{
|
||||
size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
|
||||
if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
|
||||
/* If backwards match is entirely in the extDict or prefix, immediately return */
|
||||
return matchLength;
|
||||
}
|
||||
DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
|
||||
matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
|
||||
DEBUGLOG(7, "final backwards match length = %zu", matchLength);
|
||||
return matchLength;
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_fillFastTables() :
|
||||
*
|
||||
* Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
|
||||
@ -246,10 +260,10 @@ void ZSTD_ldm_fillHashTable(
|
||||
* (after a long match, only update tables a limited amount). */
|
||||
static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
|
||||
{
|
||||
U32 const current = (U32)(anchor - ms->window.base);
|
||||
if (current > ms->nextToUpdate + 1024) {
|
||||
U32 const curr = (U32)(anchor - ms->window.base);
|
||||
if (curr > ms->nextToUpdate + 1024) {
|
||||
ms->nextToUpdate =
|
||||
current - MIN(512, current - ms->nextToUpdate - 1024);
|
||||
curr - MIN(512, curr - ms->nextToUpdate - 1024);
|
||||
}
|
||||
}
|
||||
|
||||
@ -286,7 +300,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
||||
|
||||
while (ip <= ilimit) {
|
||||
size_t mLength;
|
||||
U32 const current = (U32)(ip - base);
|
||||
U32 const curr = (U32)(ip - base);
|
||||
size_t forwardMatchLength = 0, backwardMatchLength = 0;
|
||||
ldmEntry_t* bestEntry = NULL;
|
||||
if (ip != istart) {
|
||||
@ -336,8 +350,9 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
||||
continue;
|
||||
}
|
||||
curBackwardMatchLength =
|
||||
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
|
||||
lowMatchPtr);
|
||||
ZSTD_ldm_countBackwardsMatch_2segments(ip, anchor,
|
||||
pMatch, lowMatchPtr,
|
||||
dictStart, dictEnd);
|
||||
curTotalMatchLength = curForwardMatchLength +
|
||||
curBackwardMatchLength;
|
||||
} else { /* !extDict */
|
||||
@ -365,7 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
||||
/* No match found -- continue searching */
|
||||
if (bestEntry == NULL) {
|
||||
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
|
||||
hBits, current,
|
||||
hBits, curr,
|
||||
*params);
|
||||
ip++;
|
||||
continue;
|
||||
@ -377,11 +392,11 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
||||
|
||||
{
|
||||
/* Store the sequence:
|
||||
* ip = current - backwardMatchLength
|
||||
* ip = curr - backwardMatchLength
|
||||
* The match is at (bestEntry->offset - backwardMatchLength)
|
||||
*/
|
||||
U32 const matchIndex = bestEntry->offset;
|
||||
U32 const offset = current - matchIndex;
|
||||
U32 const offset = curr - matchIndex;
|
||||
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
|
||||
|
||||
/* Out of sequence storage */
|
||||
@ -562,6 +577,23 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
|
||||
return sequence;
|
||||
}
|
||||
|
||||
void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
|
||||
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
|
||||
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
|
||||
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
|
||||
if (currPos >= currSeq.litLength + currSeq.matchLength) {
|
||||
currPos -= currSeq.litLength + currSeq.matchLength;
|
||||
rawSeqStore->pos++;
|
||||
} else {
|
||||
rawSeqStore->posInSequence = currPos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
|
||||
rawSeqStore->posInSequence = 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
@ -577,6 +609,15 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
BYTE const* ip = istart;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
|
||||
/* If using opt parser, use LDMs only as candidates rather than always accepting them */
|
||||
if (cParams->strategy >= ZSTD_btopt) {
|
||||
size_t lastLLSize;
|
||||
ms->ldmSeqStore = rawSeqStore;
|
||||
lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
|
||||
ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
|
||||
return lastLLSize;
|
||||
}
|
||||
|
||||
assert(rawSeqStore->pos <= rawSeqStore->size);
|
||||
assert(rawSeqStore->size <= rawSeqStore->capacity);
|
||||
/* Loop through each sequence and apply the block compressor to the lits */
|
||||
|
@ -78,6 +78,12 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
|
||||
U32 const minMatch);
|
||||
|
||||
/* ZSTD_ldm_skipRawSeqStoreBytes():
|
||||
* Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'.
|
||||
* Not to be used in conjunction with ZSTD_ldm_skipSequences().
|
||||
* Must be called for data with is not passed to ZSTD_ldm_blockCompress().
|
||||
*/
|
||||
void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes);
|
||||
|
||||
/** ZSTD_ldm_getTableSize() :
|
||||
* Estimate the space needed for long distance matching tables or 0 if LDM is
|
||||
|
235
zstd/zstd_opt.c
235
zstd/zstd_opt.c
@ -386,32 +386,32 @@ static U32 ZSTD_insertBt1(
|
||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||
const BYTE* const prefixStart = base + dictLimit;
|
||||
const BYTE* match;
|
||||
const U32 current = (U32)(ip-base);
|
||||
const U32 btLow = btMask >= current ? 0 : current - btMask;
|
||||
U32* smallerPtr = bt + 2*(current&btMask);
|
||||
const U32 curr = (U32)(ip-base);
|
||||
const U32 btLow = btMask >= curr ? 0 : curr - btMask;
|
||||
U32* smallerPtr = bt + 2*(curr&btMask);
|
||||
U32* largerPtr = smallerPtr + 1;
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
U32 const windowLow = ms->window.lowLimit;
|
||||
U32 matchEndIdx = current+8+1;
|
||||
U32 matchEndIdx = curr+8+1;
|
||||
size_t bestLength = 8;
|
||||
U32 nbCompares = 1U << cParams->searchLog;
|
||||
#ifdef ZSTD_C_PREDICT
|
||||
U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
|
||||
U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
|
||||
U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
|
||||
U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
|
||||
predictedSmall += (predictedSmall>0);
|
||||
predictedLarge += (predictedLarge>0);
|
||||
#endif /* ZSTD_C_PREDICT */
|
||||
|
||||
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
|
||||
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
|
||||
|
||||
assert(ip <= iend-8); /* required for h calculation */
|
||||
hashTable[h] = current; /* Update Hash Table */
|
||||
hashTable[h] = curr; /* Update Hash Table */
|
||||
|
||||
assert(windowLow > 0);
|
||||
while (nbCompares-- && (matchIndex >= windowLow)) {
|
||||
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
||||
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
||||
assert(matchIndex < current);
|
||||
assert(matchIndex < curr);
|
||||
|
||||
#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
|
||||
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
|
||||
@ -474,8 +474,8 @@ static U32 ZSTD_insertBt1(
|
||||
*smallerPtr = *largerPtr = 0;
|
||||
{ U32 positions = 0;
|
||||
if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
|
||||
assert(matchEndIdx > current + 8);
|
||||
return MAX(positions, matchEndIdx - (current + 8));
|
||||
assert(matchEndIdx > curr + 8);
|
||||
return MAX(positions, matchEndIdx - (curr + 8));
|
||||
}
|
||||
}
|
||||
|
||||
@ -519,7 +519,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
||||
const BYTE* const base = ms->window.base;
|
||||
U32 const current = (U32)(ip-base);
|
||||
U32 const curr = (U32)(ip-base);
|
||||
U32 const hashLog = cParams->hashLog;
|
||||
U32 const minMatch = (mls==3) ? 3 : 4;
|
||||
U32* const hashTable = ms->hashTable;
|
||||
@ -533,12 +533,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
U32 const dictLimit = ms->window.dictLimit;
|
||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||
const BYTE* const prefixStart = base + dictLimit;
|
||||
U32 const btLow = (btMask >= current) ? 0 : current - btMask;
|
||||
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
|
||||
U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
|
||||
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
|
||||
U32 const matchLow = windowLow ? windowLow : 1;
|
||||
U32* smallerPtr = bt + 2*(current&btMask);
|
||||
U32* largerPtr = bt + 2*(current&btMask) + 1;
|
||||
U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
|
||||
U32* smallerPtr = bt + 2*(curr&btMask);
|
||||
U32* largerPtr = bt + 2*(curr&btMask) + 1;
|
||||
U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
U32 mnum = 0;
|
||||
U32 nbCompares = 1U << cParams->searchLog;
|
||||
@ -557,7 +557,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
|
||||
|
||||
size_t bestLength = lengthToBeat-1;
|
||||
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
|
||||
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
|
||||
|
||||
/* check repCode */
|
||||
assert(ll0 <= 1); /* necessarily 1 or 0 */
|
||||
@ -565,29 +565,29 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
U32 repCode;
|
||||
for (repCode = ll0; repCode < lastR; repCode++) {
|
||||
U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
||||
U32 const repIndex = current - repOffset;
|
||||
U32 const repIndex = curr - repOffset;
|
||||
U32 repLen = 0;
|
||||
assert(current >= dictLimit);
|
||||
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
|
||||
assert(curr >= dictLimit);
|
||||
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
|
||||
/* We must validate the repcode offset because when we're using a dictionary the
|
||||
* valid offset range shrinks when the dictionary goes out of bounds.
|
||||
*/
|
||||
if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
|
||||
repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
|
||||
}
|
||||
} else { /* repIndex < dictLimit || repIndex >= current */
|
||||
} else { /* repIndex < dictLimit || repIndex >= curr */
|
||||
const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
|
||||
dmsBase + repIndex - dmsIndexDelta :
|
||||
dictBase + repIndex;
|
||||
assert(current >= windowLow);
|
||||
assert(curr >= windowLow);
|
||||
if ( dictMode == ZSTD_extDict
|
||||
&& ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
|
||||
&& ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
|
||||
& (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
|
||||
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
||||
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState
|
||||
&& ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */
|
||||
&& ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
|
||||
& ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
|
||||
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
||||
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
|
||||
@ -609,7 +609,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
if ((mls == 3) /*static*/ && (bestLength < mls)) {
|
||||
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
|
||||
if ((matchIndex3 >= matchLow)
|
||||
& (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
|
||||
& (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
|
||||
size_t mlen;
|
||||
if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
|
||||
const BYTE* const match = base + matchIndex3;
|
||||
@ -624,26 +624,26 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
DEBUGLOG(8, "found small match with hlog3, of length %u",
|
||||
(U32)mlen);
|
||||
bestLength = mlen;
|
||||
assert(current > matchIndex3);
|
||||
assert(curr > matchIndex3);
|
||||
assert(mnum==0); /* no prior solution */
|
||||
matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
|
||||
matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
|
||||
matches[0].len = (U32)mlen;
|
||||
mnum = 1;
|
||||
if ( (mlen > sufficient_len) |
|
||||
(ip+mlen == iLimit) ) { /* best possible length */
|
||||
ms->nextToUpdate = current+1; /* skip insertion */
|
||||
ms->nextToUpdate = curr+1; /* skip insertion */
|
||||
return 1;
|
||||
} } }
|
||||
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
||||
}
|
||||
|
||||
hashTable[h] = current; /* Update Hash Table */
|
||||
hashTable[h] = curr; /* Update Hash Table */
|
||||
|
||||
while (nbCompares-- && (matchIndex >= matchLow)) {
|
||||
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
||||
const BYTE* match;
|
||||
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
||||
assert(current > matchIndex);
|
||||
assert(curr > matchIndex);
|
||||
|
||||
if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
|
||||
assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
|
||||
@ -660,12 +660,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
|
||||
if (matchLength > bestLength) {
|
||||
DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
|
||||
(U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
|
||||
(U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
|
||||
assert(matchEndIdx > matchIndex);
|
||||
if (matchLength > matchEndIdx - matchIndex)
|
||||
matchEndIdx = matchIndex + (U32)matchLength;
|
||||
bestLength = matchLength;
|
||||
matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
|
||||
matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
|
||||
matches[mnum].len = (U32)matchLength;
|
||||
mnum++;
|
||||
if ( (matchLength > ZSTD_OPT_NUM)
|
||||
@ -708,11 +708,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
if (matchLength > bestLength) {
|
||||
matchIndex = dictMatchIndex + dmsIndexDelta;
|
||||
DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
|
||||
(U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
|
||||
(U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
|
||||
if (matchLength > matchEndIdx - matchIndex)
|
||||
matchEndIdx = matchIndex + (U32)matchLength;
|
||||
bestLength = matchLength;
|
||||
matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
|
||||
matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
|
||||
matches[mnum].len = (U32)matchLength;
|
||||
mnum++;
|
||||
if ( (matchLength > ZSTD_OPT_NUM)
|
||||
@ -733,7 +733,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
}
|
||||
}
|
||||
|
||||
assert(matchEndIdx > current+8);
|
||||
assert(matchEndIdx > curr+8);
|
||||
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
||||
return mnum;
|
||||
}
|
||||
@ -764,6 +764,140 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
||||
}
|
||||
}
|
||||
|
||||
/*************************
|
||||
* LDM helper functions *
|
||||
*************************/
|
||||
|
||||
/* Struct containing info needed to make decision about ldm inclusion */
|
||||
typedef struct {
|
||||
rawSeqStore_t seqStore; /* External match candidates store for this block */
|
||||
U32 startPosInBlock; /* Start position of the current match candidate */
|
||||
U32 endPosInBlock; /* End position of the current match candidate */
|
||||
U32 offset; /* Offset of the match candidate */
|
||||
} ZSTD_optLdm_t;
|
||||
|
||||
/* ZSTD_optLdm_skipRawSeqStoreBytes():
|
||||
* Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
|
||||
*/
|
||||
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
|
||||
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
|
||||
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
|
||||
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
|
||||
if (currPos >= currSeq.litLength + currSeq.matchLength) {
|
||||
currPos -= currSeq.litLength + currSeq.matchLength;
|
||||
rawSeqStore->pos++;
|
||||
} else {
|
||||
rawSeqStore->posInSequence = currPos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
|
||||
rawSeqStore->posInSequence = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
|
||||
* Calculates the beginning and end of the next match in the current block.
|
||||
* Updates 'pos' and 'posInSequence' of the ldmSeqStore.
|
||||
*/
|
||||
static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
|
||||
U32 blockBytesRemaining) {
|
||||
rawSeq currSeq;
|
||||
U32 currBlockEndPos;
|
||||
U32 literalsBytesRemaining;
|
||||
U32 matchBytesRemaining;
|
||||
|
||||
/* Setting match end position to MAX to ensure we never use an LDM during this block */
|
||||
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
|
||||
optLdm->startPosInBlock = UINT_MAX;
|
||||
optLdm->endPosInBlock = UINT_MAX;
|
||||
return;
|
||||
}
|
||||
/* Calculate appropriate bytes left in matchLength and litLength after adjusting
|
||||
based on ldmSeqStore->posInSequence */
|
||||
currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
|
||||
assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
|
||||
currBlockEndPos = currPosInBlock + blockBytesRemaining;
|
||||
literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
|
||||
currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
|
||||
0;
|
||||
matchBytesRemaining = (literalsBytesRemaining == 0) ?
|
||||
currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
|
||||
currSeq.matchLength;
|
||||
|
||||
/* If there are more literal bytes than bytes remaining in block, no ldm is possible */
|
||||
if (literalsBytesRemaining >= blockBytesRemaining) {
|
||||
optLdm->startPosInBlock = UINT_MAX;
|
||||
optLdm->endPosInBlock = UINT_MAX;
|
||||
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Matches may be < MINMATCH by this process. In that case, we will reject them
|
||||
when we are deciding whether or not to add the ldm */
|
||||
optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
|
||||
optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
|
||||
optLdm->offset = currSeq.offset;
|
||||
|
||||
if (optLdm->endPosInBlock > currBlockEndPos) {
|
||||
/* Match ends after the block ends, we can't use the whole match */
|
||||
optLdm->endPosInBlock = currBlockEndPos;
|
||||
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
|
||||
} else {
|
||||
/* Consume nb of bytes equal to size of sequence left */
|
||||
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
|
||||
}
|
||||
}
|
||||
|
||||
/* ZSTD_optLdm_maybeAddMatch():
|
||||
* Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
|
||||
* and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
|
||||
*/
|
||||
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
||||
ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
|
||||
U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
|
||||
/* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
|
||||
U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
|
||||
U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
|
||||
|
||||
/* Ensure that current block position is not outside of the match */
|
||||
if (currPosInBlock < optLdm->startPosInBlock
|
||||
|| currPosInBlock >= optLdm->endPosInBlock
|
||||
|| candidateMatchLength < MINMATCH) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
|
||||
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
|
||||
candidateOffCode, candidateMatchLength, currPosInBlock);
|
||||
matches[*nbMatches].len = candidateMatchLength;
|
||||
matches[*nbMatches].off = candidateOffCode;
|
||||
(*nbMatches)++;
|
||||
}
|
||||
}
|
||||
|
||||
/* ZSTD_optLdm_processMatchCandidate():
|
||||
* Wrapper function to update ldm seq store and call ldm functions as necessary.
|
||||
*/
|
||||
static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
|
||||
U32 currPosInBlock, U32 remainingBytes) {
|
||||
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (currPosInBlock >= optLdm->endPosInBlock) {
|
||||
if (currPosInBlock > optLdm->endPosInBlock) {
|
||||
/* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
|
||||
* at the end of a match from the ldm seq store, and will often be some bytes
|
||||
* over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
|
||||
*/
|
||||
U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
||||
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
|
||||
}
|
||||
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
|
||||
}
|
||||
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
|
||||
}
|
||||
|
||||
/*-*******************************
|
||||
* Optimal parser
|
||||
@ -817,6 +951,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
|
||||
ZSTD_match_t* const matches = optStatePtr->matchTable;
|
||||
ZSTD_optimal_t lastSequence;
|
||||
ZSTD_optLdm_t optLdm;
|
||||
|
||||
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
|
||||
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
|
||||
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
|
||||
|
||||
/* init */
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
|
||||
@ -832,7 +971,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
/* find first match */
|
||||
{ U32 const litlen = (U32)(ip - anchor);
|
||||
U32 const ll0 = !litlen;
|
||||
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
|
||||
U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
|
||||
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
||||
(U32)(ip-istart), (U32)(iend - ip));
|
||||
if (!nbMatches) { ip++; continue; }
|
||||
|
||||
/* initialize opt[0] */
|
||||
@ -925,9 +1066,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
if (opt[cur].mlen != 0) {
|
||||
U32 const prev = cur - opt[cur].mlen;
|
||||
repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
|
||||
memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
|
||||
ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
|
||||
} else {
|
||||
memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
|
||||
ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
|
||||
}
|
||||
|
||||
/* last match must start at a minimum distance of 8 from oend */
|
||||
@ -945,8 +1086,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
|
||||
U32 const previousPrice = opt[cur].price;
|
||||
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
||||
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
|
||||
U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
|
||||
U32 matchNb;
|
||||
|
||||
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
||||
(U32)(inr-istart), (U32)(iend-inr));
|
||||
|
||||
if (!nbMatches) {
|
||||
DEBUGLOG(7, "rPos:%u : no match found", cur);
|
||||
continue;
|
||||
@ -1010,9 +1155,9 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
||||
*/
|
||||
if (lastSequence.mlen != 0) {
|
||||
repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
|
||||
memcpy(rep, &reps, sizeof(reps));
|
||||
ZSTD_memcpy(rep, &reps, sizeof(reps));
|
||||
} else {
|
||||
memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
|
||||
ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
|
||||
}
|
||||
|
||||
{ U32 const storeEnd = cur + 1;
|
||||
@ -1110,7 +1255,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
|
||||
memcpy(tmpRep, rep, sizeof(tmpRep));
|
||||
ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
|
||||
|
||||
DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
|
||||
assert(ms->opt.litLengthSum == 0); /* first block */
|
||||
@ -1143,7 +1288,7 @@ size_t ZSTD_compressBlock_btultra2(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
U32 const current = (U32)((const BYTE*)src - ms->window.base);
|
||||
U32 const curr = (U32)((const BYTE*)src - ms->window.base);
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
|
||||
|
||||
/* 2-pass strategy:
|
||||
@ -1158,7 +1303,7 @@ size_t ZSTD_compressBlock_btultra2(
|
||||
if ( (ms->opt.litLengthSum==0) /* first block */
|
||||
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
|
||||
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
|
||||
&& (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
|
||||
&& (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
|
||||
&& (srcSize > ZSTD_PREDEF_THRESHOLD)
|
||||
) {
|
||||
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
|
||||
|
@ -20,8 +20,7 @@
|
||||
|
||||
|
||||
/* ====== Dependencies ====== */
|
||||
#include <string.h> /* memcpy, memset */
|
||||
#include <limits.h> /* INT_MAX, UINT_MAX */
|
||||
#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
|
||||
#include "mem.h" /* MEM_STATIC */
|
||||
#include "pool.h" /* threadpool */
|
||||
#include "threading.h" /* mutex */
|
||||
@ -106,11 +105,11 @@ typedef struct ZSTDMT_bufferPool_s {
|
||||
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)
|
||||
{
|
||||
unsigned const maxNbBuffers = 2*nbWorkers + 3;
|
||||
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
|
||||
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
|
||||
sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
|
||||
if (bufPool==NULL) return NULL;
|
||||
if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
|
||||
ZSTD_free(bufPool, cMem);
|
||||
ZSTD_customFree(bufPool, cMem);
|
||||
return NULL;
|
||||
}
|
||||
bufPool->bufferSize = 64 KB;
|
||||
@ -127,10 +126,10 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
|
||||
if (!bufPool) return; /* compatibility with free on NULL */
|
||||
for (u=0; u<bufPool->totalBuffers; u++) {
|
||||
DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
|
||||
ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
|
||||
ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
|
||||
}
|
||||
ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
|
||||
ZSTD_free(bufPool, bufPool->cMem);
|
||||
ZSTD_customFree(bufPool, bufPool->cMem);
|
||||
}
|
||||
|
||||
/* only works at initialization, not during compression */
|
||||
@ -201,13 +200,13 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
|
||||
}
|
||||
/* size conditions not respected : scratch this buffer, create new one */
|
||||
DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing");
|
||||
ZSTD_free(buf.start, bufPool->cMem);
|
||||
ZSTD_customFree(buf.start, bufPool->cMem);
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
|
||||
/* create new buffer */
|
||||
DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer");
|
||||
{ buffer_t buffer;
|
||||
void* const start = ZSTD_malloc(bSize, bufPool->cMem);
|
||||
void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
|
||||
buffer.start = start; /* note : start can be NULL if malloc fails ! */
|
||||
buffer.capacity = (start==NULL) ? 0 : bSize;
|
||||
if (start==NULL) {
|
||||
@ -229,13 +228,13 @@ static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer)
|
||||
{
|
||||
size_t const bSize = bufPool->bufferSize;
|
||||
if (buffer.capacity < bSize) {
|
||||
void* const start = ZSTD_malloc(bSize, bufPool->cMem);
|
||||
void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
|
||||
buffer_t newBuffer;
|
||||
newBuffer.start = start;
|
||||
newBuffer.capacity = start == NULL ? 0 : bSize;
|
||||
if (start != NULL) {
|
||||
assert(newBuffer.capacity >= buffer.capacity);
|
||||
memcpy(newBuffer.start, buffer.start, buffer.capacity);
|
||||
ZSTD_memcpy(newBuffer.start, buffer.start, buffer.capacity);
|
||||
DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize);
|
||||
return newBuffer;
|
||||
}
|
||||
@ -261,14 +260,12 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
|
||||
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
|
||||
/* Reached bufferPool capacity (should not happen) */
|
||||
DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
|
||||
ZSTD_free(buf.start, bufPool->cMem);
|
||||
ZSTD_customFree(buf.start, bufPool->cMem);
|
||||
}
|
||||
|
||||
|
||||
/* ===== Seq Pool Wrapper ====== */
|
||||
|
||||
static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0};
|
||||
|
||||
typedef ZSTDMT_bufferPool ZSTDMT_seqPool;
|
||||
|
||||
static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
|
||||
@ -278,7 +275,7 @@ static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
|
||||
|
||||
static rawSeqStore_t bufferToSeq(buffer_t buffer)
|
||||
{
|
||||
rawSeqStore_t seq = {NULL, 0, 0, 0};
|
||||
rawSeqStore_t seq = kNullRawSeqStore;
|
||||
seq.seq = (rawSeq*)buffer.start;
|
||||
seq.capacity = buffer.capacity / sizeof(rawSeq);
|
||||
return seq;
|
||||
@ -354,7 +351,7 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
|
||||
for (cid=0; cid<pool->totalCCtx; cid++)
|
||||
ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
|
||||
ZSTD_pthread_mutex_destroy(&pool->poolMutex);
|
||||
ZSTD_free(pool, pool->cMem);
|
||||
ZSTD_customFree(pool, pool->cMem);
|
||||
}
|
||||
|
||||
/* ZSTDMT_createCCtxPool() :
|
||||
@ -362,12 +359,12 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
|
||||
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
|
||||
ZSTD_customMem cMem)
|
||||
{
|
||||
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
|
||||
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc(
|
||||
sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
|
||||
assert(nbWorkers > 0);
|
||||
if (!cctxPool) return NULL;
|
||||
if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
|
||||
ZSTD_free(cctxPool, cMem);
|
||||
ZSTD_customFree(cctxPool, cMem);
|
||||
return NULL;
|
||||
}
|
||||
cctxPool->cMem = cMem;
|
||||
@ -478,7 +475,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
||||
serialState->ldmState.hashPower =
|
||||
ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
|
||||
} else {
|
||||
memset(¶ms.ldmParams, 0, sizeof(params.ldmParams));
|
||||
ZSTD_memset(¶ms.ldmParams, 0, sizeof(params.ldmParams));
|
||||
}
|
||||
serialState->nextJobID = 0;
|
||||
if (params.fParams.checksumFlag)
|
||||
@ -499,18 +496,18 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
||||
ZSTD_window_init(&serialState->ldmState.window);
|
||||
/* Resize tables and output space if necessary. */
|
||||
if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
|
||||
ZSTD_free(serialState->ldmState.hashTable, cMem);
|
||||
serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_malloc(hashSize, cMem);
|
||||
ZSTD_customFree(serialState->ldmState.hashTable, cMem);
|
||||
serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_customMalloc(hashSize, cMem);
|
||||
}
|
||||
if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
|
||||
ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
|
||||
serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_malloc(bucketSize, cMem);
|
||||
ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
|
||||
serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(bucketSize, cMem);
|
||||
}
|
||||
if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
|
||||
return 1;
|
||||
/* Zero the tables */
|
||||
memset(serialState->ldmState.hashTable, 0, hashSize);
|
||||
memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
|
||||
ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
|
||||
ZSTD_memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
|
||||
|
||||
/* Update window state and fill hash table with dict */
|
||||
serialState->ldmState.loadedDictEnd = 0;
|
||||
@ -537,7 +534,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
||||
static int ZSTDMT_serialState_init(serialState_t* serialState)
|
||||
{
|
||||
int initError = 0;
|
||||
memset(serialState, 0, sizeof(*serialState));
|
||||
ZSTD_memset(serialState, 0, sizeof(*serialState));
|
||||
initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL);
|
||||
initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL);
|
||||
initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL);
|
||||
@ -552,8 +549,8 @@ static void ZSTDMT_serialState_free(serialState_t* serialState)
|
||||
ZSTD_pthread_cond_destroy(&serialState->cond);
|
||||
ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex);
|
||||
ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond);
|
||||
ZSTD_free(serialState->ldmState.hashTable, cMem);
|
||||
ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
|
||||
ZSTD_customFree(serialState->ldmState.hashTable, cMem);
|
||||
ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
|
||||
}
|
||||
|
||||
static void ZSTDMT_serialState_update(serialState_t* serialState,
|
||||
@ -820,7 +817,6 @@ struct ZSTDMT_CCtx_s {
|
||||
roundBuff_t roundBuff;
|
||||
serialState_t serial;
|
||||
rsyncState_t rsync;
|
||||
unsigned singleBlockingThread;
|
||||
unsigned jobIDMask;
|
||||
unsigned doneJobID;
|
||||
unsigned nextJobID;
|
||||
@ -832,6 +828,7 @@ struct ZSTDMT_CCtx_s {
|
||||
ZSTD_customMem cMem;
|
||||
ZSTD_CDict* cdictLocal;
|
||||
const ZSTD_CDict* cdict;
|
||||
unsigned providedFactory: 1;
|
||||
};
|
||||
|
||||
static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem)
|
||||
@ -842,7 +839,7 @@ static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZS
|
||||
ZSTD_pthread_mutex_destroy(&jobTable[jobNb].job_mutex);
|
||||
ZSTD_pthread_cond_destroy(&jobTable[jobNb].job_cond);
|
||||
}
|
||||
ZSTD_free(jobTable, cMem);
|
||||
ZSTD_customFree(jobTable, cMem);
|
||||
}
|
||||
|
||||
/* ZSTDMT_allocJobsTable()
|
||||
@ -854,7 +851,7 @@ static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_custom
|
||||
U32 const nbJobs = 1 << nbJobsLog2;
|
||||
U32 jobNb;
|
||||
ZSTDMT_jobDescription* const jobTable = (ZSTDMT_jobDescription*)
|
||||
ZSTD_calloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
|
||||
ZSTD_customCalloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
|
||||
int initError = 0;
|
||||
if (jobTable==NULL) return NULL;
|
||||
*nbJobsPtr = nbJobs;
|
||||
@ -885,12 +882,12 @@ static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
|
||||
|
||||
/* ZSTDMT_CCtxParam_setNbWorkers():
|
||||
* Internal use only */
|
||||
size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
|
||||
static size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
|
||||
{
|
||||
return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers);
|
||||
}
|
||||
|
||||
MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem)
|
||||
MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool)
|
||||
{
|
||||
ZSTDMT_CCtx* mtctx;
|
||||
U32 nbJobs = nbWorkers + 2;
|
||||
@ -903,12 +900,19 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
|
||||
/* invalid custom allocator */
|
||||
return NULL;
|
||||
|
||||
mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem);
|
||||
mtctx = (ZSTDMT_CCtx*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtx), cMem);
|
||||
if (!mtctx) return NULL;
|
||||
ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
|
||||
mtctx->cMem = cMem;
|
||||
mtctx->allJobsCompleted = 1;
|
||||
mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem);
|
||||
if (pool != NULL) {
|
||||
mtctx->factory = pool;
|
||||
mtctx->providedFactory = 1;
|
||||
}
|
||||
else {
|
||||
mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem);
|
||||
mtctx->providedFactory = 0;
|
||||
}
|
||||
mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
|
||||
assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */
|
||||
mtctx->jobIDMask = nbJobs - 1;
|
||||
@ -925,22 +929,18 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
|
||||
return mtctx;
|
||||
}
|
||||
|
||||
ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
|
||||
ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool)
|
||||
{
|
||||
#ifdef ZSTD_MULTITHREAD
|
||||
return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem);
|
||||
return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem, pool);
|
||||
#else
|
||||
(void)nbWorkers;
|
||||
(void)cMem;
|
||||
(void)pool;
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers)
|
||||
{
|
||||
return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem);
|
||||
}
|
||||
|
||||
|
||||
/* ZSTDMT_releaseAllJobResources() :
|
||||
* note : ensure all workers are killed first ! */
|
||||
@ -957,7 +957,7 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
|
||||
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
|
||||
|
||||
/* Clear the job description, but keep the mutex/cond */
|
||||
memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
|
||||
ZSTD_memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
|
||||
mtctx->jobs[jobID].job_mutex = mutex;
|
||||
mtctx->jobs[jobID].job_cond = cond;
|
||||
}
|
||||
@ -984,7 +984,8 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx)
|
||||
size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
|
||||
{
|
||||
if (mtctx==NULL) return 0; /* compatible with free on NULL */
|
||||
POOL_free(mtctx->factory); /* stop and free worker threads */
|
||||
if (!mtctx->providedFactory)
|
||||
POOL_free(mtctx->factory); /* stop and free worker threads */
|
||||
ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */
|
||||
ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
|
||||
ZSTDMT_freeBufferPool(mtctx->bufPool);
|
||||
@ -993,8 +994,8 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
|
||||
ZSTDMT_serialState_free(&mtctx->serial);
|
||||
ZSTD_freeCDict(mtctx->cdictLocal);
|
||||
if (mtctx->roundBuff.buffer)
|
||||
ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
|
||||
ZSTD_free(mtctx, mtctx->cMem);
|
||||
ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem);
|
||||
ZSTD_customFree(mtctx, mtctx->cMem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1011,65 +1012,6 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
|
||||
+ mtctx->roundBuff.capacity;
|
||||
}
|
||||
|
||||
/* Internal only */
|
||||
size_t
|
||||
ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
|
||||
ZSTDMT_parameter parameter,
|
||||
int value)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
|
||||
switch(parameter)
|
||||
{
|
||||
case ZSTDMT_p_jobSize :
|
||||
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
|
||||
return ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, value);
|
||||
case ZSTDMT_p_overlapLog :
|
||||
DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
|
||||
return ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, value);
|
||||
case ZSTDMT_p_rsyncable :
|
||||
DEBUGLOG(4, "ZSTD_p_rsyncable : %i", value);
|
||||
return ZSTD_CCtxParams_setParameter(params, ZSTD_c_rsyncable, value);
|
||||
default :
|
||||
return ERROR(parameter_unsupported);
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
|
||||
return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
|
||||
}
|
||||
|
||||
size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value)
|
||||
{
|
||||
switch (parameter) {
|
||||
case ZSTDMT_p_jobSize:
|
||||
return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_jobSize, value);
|
||||
case ZSTDMT_p_overlapLog:
|
||||
return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_overlapLog, value);
|
||||
case ZSTDMT_p_rsyncable:
|
||||
return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_rsyncable, value);
|
||||
default:
|
||||
return ERROR(parameter_unsupported);
|
||||
}
|
||||
}
|
||||
|
||||
/* Sets parameters relevant to the compression job,
|
||||
* initializing others to default values. */
|
||||
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
|
||||
{
|
||||
ZSTD_CCtx_params jobParams = *params;
|
||||
/* Clear parameters related to multithreading */
|
||||
jobParams.forceWindow = 0;
|
||||
jobParams.nbWorkers = 0;
|
||||
jobParams.jobSize = 0;
|
||||
jobParams.overlapLog = 0;
|
||||
jobParams.rsyncable = 0;
|
||||
memset(&jobParams.ldmParams, 0, sizeof(ldmParams_t));
|
||||
memset(&jobParams.customMem, 0, sizeof(ZSTD_customMem));
|
||||
return jobParams;
|
||||
}
|
||||
|
||||
|
||||
/* ZSTDMT_resize() :
|
||||
* @return : error code if fails, 0 on success */
|
||||
@ -1098,7 +1040,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
|
||||
DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
|
||||
compressionLevel);
|
||||
mtctx->params.compressionLevel = compressionLevel;
|
||||
{ ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
||||
{ ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
|
||||
cParams.windowLog = saved_wlog;
|
||||
mtctx->params.cParams = cParams;
|
||||
}
|
||||
@ -1185,8 +1127,8 @@ static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
|
||||
if (params->ldmParams.enableLdm) {
|
||||
/* In Long Range Mode, the windowLog is typically oversized.
|
||||
* In which case, it's preferable to determine the jobSize
|
||||
* based on chainLog instead. */
|
||||
jobLog = MAX(21, params->cParams.chainLog + 4);
|
||||
* based on cycleLog instead. */
|
||||
jobLog = MAX(21, ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy) + 3);
|
||||
} else {
|
||||
jobLog = MAX(20, params->cParams.windowLog + 2);
|
||||
}
|
||||
@ -1240,174 +1182,6 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
|
||||
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
|
||||
{
|
||||
assert(nbWorkers>0);
|
||||
{ size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
|
||||
size_t const jobMaxSize = jobSizeTarget << 2;
|
||||
size_t const passSizeMax = jobMaxSize * nbWorkers;
|
||||
unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
|
||||
unsigned const nbJobsLarge = multiplier * nbWorkers;
|
||||
unsigned const nbJobsMax = (unsigned)(srcSize / jobSizeTarget) + 1;
|
||||
unsigned const nbJobsSmall = MIN(nbJobsMax, nbWorkers);
|
||||
return (multiplier>1) ? nbJobsLarge : nbJobsSmall;
|
||||
} }
|
||||
|
||||
/* ZSTDMT_compress_advanced_internal() :
|
||||
* This is a blocking function : it will only give back control to caller after finishing its compression job.
|
||||
*/
|
||||
static size_t
|
||||
ZSTDMT_compress_advanced_internal(
|
||||
ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_CCtx_params params)
|
||||
{
|
||||
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(¶ms);
|
||||
size_t const overlapSize = ZSTDMT_computeOverlapSize(¶ms);
|
||||
unsigned const nbJobs = ZSTDMT_computeNbJobs(¶ms, srcSize, params.nbWorkers);
|
||||
size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
|
||||
size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
|
||||
const char* const srcStart = (const char*)src;
|
||||
size_t remainingSrcSize = srcSize;
|
||||
unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbJobs : (unsigned)(dstCapacity / ZSTD_compressBound(avgJobSize)); /* presumes avgJobSize >= 256 KB, which should be the case */
|
||||
size_t frameStartPos = 0, dstBufferPos = 0;
|
||||
assert(jobParams.nbWorkers == 0);
|
||||
assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
|
||||
|
||||
params.jobSize = (U32)avgJobSize;
|
||||
DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbJobs=%2u (rawSize=%u bytes; fixedSize=%u) ",
|
||||
nbJobs, (U32)proposedJobSize, (U32)avgJobSize);
|
||||
|
||||
if ((nbJobs==1) | (params.nbWorkers<=1)) { /* fallback to single-thread mode : this is a blocking invocation anyway */
|
||||
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
|
||||
DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
|
||||
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
|
||||
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
|
||||
}
|
||||
|
||||
assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
|
||||
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
|
||||
/* LDM doesn't even try to load the dictionary in single-ingestion mode */
|
||||
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0, ZSTD_dct_auto))
|
||||
return ERROR(memory_allocation);
|
||||
|
||||
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) , ""); /* only expands if necessary */
|
||||
|
||||
{ unsigned u;
|
||||
for (u=0; u<nbJobs; u++) {
|
||||
size_t const jobSize = MIN(remainingSrcSize, avgJobSize);
|
||||
size_t const dstBufferCapacity = ZSTD_compressBound(jobSize);
|
||||
buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
|
||||
buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
|
||||
size_t dictSize = u ? overlapSize : 0;
|
||||
|
||||
mtctx->jobs[u].prefix.start = srcStart + frameStartPos - dictSize;
|
||||
mtctx->jobs[u].prefix.size = dictSize;
|
||||
mtctx->jobs[u].src.start = srcStart + frameStartPos;
|
||||
mtctx->jobs[u].src.size = jobSize; assert(jobSize > 0); /* avoid job.src.size == 0 */
|
||||
mtctx->jobs[u].consumed = 0;
|
||||
mtctx->jobs[u].cSize = 0;
|
||||
mtctx->jobs[u].cdict = (u==0) ? cdict : NULL;
|
||||
mtctx->jobs[u].fullFrameSize = srcSize;
|
||||
mtctx->jobs[u].params = jobParams;
|
||||
/* do not calculate checksum within sections, but write it in header for first section */
|
||||
mtctx->jobs[u].dstBuff = dstBuffer;
|
||||
mtctx->jobs[u].cctxPool = mtctx->cctxPool;
|
||||
mtctx->jobs[u].bufPool = mtctx->bufPool;
|
||||
mtctx->jobs[u].seqPool = mtctx->seqPool;
|
||||
mtctx->jobs[u].serial = &mtctx->serial;
|
||||
mtctx->jobs[u].jobID = u;
|
||||
mtctx->jobs[u].firstJob = (u==0);
|
||||
mtctx->jobs[u].lastJob = (u==nbJobs-1);
|
||||
|
||||
DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)jobSize);
|
||||
DEBUG_PRINTHEX(6, mtctx->jobs[u].prefix.start, 12);
|
||||
POOL_add(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[u]);
|
||||
|
||||
frameStartPos += jobSize;
|
||||
dstBufferPos += dstBufferCapacity;
|
||||
remainingSrcSize -= jobSize;
|
||||
} }
|
||||
|
||||
/* collect result */
|
||||
{ size_t error = 0, dstPos = 0;
|
||||
unsigned jobID;
|
||||
for (jobID=0; jobID<nbJobs; jobID++) {
|
||||
DEBUGLOG(5, "waiting for job %u ", jobID);
|
||||
ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
|
||||
while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
|
||||
DEBUGLOG(5, "waiting for jobCompleted signal from job %u", jobID);
|
||||
ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
|
||||
DEBUGLOG(5, "ready to write job %u ", jobID);
|
||||
|
||||
{ size_t const cSize = mtctx->jobs[jobID].cSize;
|
||||
if (ZSTD_isError(cSize)) error = cSize;
|
||||
if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
|
||||
if (jobID) { /* note : job 0 is written directly at dst, which is correct position */
|
||||
if (!error)
|
||||
memmove((char*)dst + dstPos, mtctx->jobs[jobID].dstBuff.start, cSize); /* may overlap when job compressed within dst */
|
||||
if (jobID >= compressWithinDst) { /* job compressed into its own buffer, which must be released */
|
||||
DEBUGLOG(5, "releasing buffer %u>=%u", jobID, compressWithinDst);
|
||||
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
|
||||
} }
|
||||
mtctx->jobs[jobID].dstBuff = g_nullBuffer;
|
||||
mtctx->jobs[jobID].cSize = 0;
|
||||
dstPos += cSize ;
|
||||
}
|
||||
} /* for (jobID=0; jobID<nbJobs; jobID++) */
|
||||
|
||||
DEBUGLOG(4, "checksumFlag : %u ", params.fParams.checksumFlag);
|
||||
if (params.fParams.checksumFlag) {
|
||||
U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
|
||||
if (dstPos + 4 > dstCapacity) {
|
||||
error = ERROR(dstSize_tooSmall);
|
||||
} else {
|
||||
DEBUGLOG(4, "writing checksum : %08X \n", checksum);
|
||||
MEM_writeLE32((char*)dst + dstPos, checksum);
|
||||
dstPos += 4;
|
||||
} }
|
||||
|
||||
if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos);
|
||||
return error ? error : dstPos;
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_parameters params,
|
||||
int overlapLog)
|
||||
{
|
||||
ZSTD_CCtx_params cctxParams = mtctx->params;
|
||||
cctxParams.cParams = params.cParams;
|
||||
cctxParams.fParams = params.fParams;
|
||||
assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX);
|
||||
cctxParams.overlapLog = overlapLog;
|
||||
return ZSTDMT_compress_advanced_internal(mtctx,
|
||||
dst, dstCapacity,
|
||||
src, srcSize,
|
||||
cdict, cctxParams);
|
||||
}
|
||||
|
||||
|
||||
size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
int compressionLevel)
|
||||
{
|
||||
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
|
||||
int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy);
|
||||
params.fParams.contentSizeFlag = 1;
|
||||
return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
|
||||
}
|
||||
|
||||
|
||||
/* ====================================== */
|
||||
/* ======= Streaming API ======= */
|
||||
/* ====================================== */
|
||||
@ -1432,16 +1206,6 @@ size_t ZSTDMT_initCStream_internal(
|
||||
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
||||
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
|
||||
|
||||
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
|
||||
if (mtctx->singleBlockingThread) {
|
||||
ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(¶ms);
|
||||
DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
|
||||
assert(singleThreadParams.nbWorkers == 0);
|
||||
return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
|
||||
dict, dictSize, cdict,
|
||||
&singleThreadParams, pledgedSrcSize);
|
||||
}
|
||||
|
||||
DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
|
||||
|
||||
if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */
|
||||
@ -1504,8 +1268,8 @@ size_t ZSTDMT_initCStream_internal(
|
||||
size_t const capacity = MAX(windowSize, sectionsSize) + slackSize;
|
||||
if (mtctx->roundBuff.capacity < capacity) {
|
||||
if (mtctx->roundBuff.buffer)
|
||||
ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
|
||||
mtctx->roundBuff.buffer = (BYTE*)ZSTD_malloc(capacity, mtctx->cMem);
|
||||
ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem);
|
||||
mtctx->roundBuff.buffer = (BYTE*)ZSTD_customMalloc(capacity, mtctx->cMem);
|
||||
if (mtctx->roundBuff.buffer == NULL) {
|
||||
mtctx->roundBuff.capacity = 0;
|
||||
return ERROR(memory_allocation);
|
||||
@ -1530,53 +1294,6 @@ size_t ZSTDMT_initCStream_internal(
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
|
||||
const void* dict, size_t dictSize,
|
||||
ZSTD_parameters params,
|
||||
unsigned long long pledgedSrcSize)
|
||||
{
|
||||
ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */
|
||||
DEBUGLOG(4, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
|
||||
cctxParams.cParams = params.cParams;
|
||||
cctxParams.fParams = params.fParams;
|
||||
return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dct_auto, NULL,
|
||||
cctxParams, pledgedSrcSize);
|
||||
}
|
||||
|
||||
size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_frameParameters fParams,
|
||||
unsigned long long pledgedSrcSize)
|
||||
{
|
||||
ZSTD_CCtx_params cctxParams = mtctx->params;
|
||||
if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */
|
||||
cctxParams.cParams = ZSTD_getCParamsFromCDict(cdict);
|
||||
cctxParams.fParams = fParams;
|
||||
return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dct_auto, cdict,
|
||||
cctxParams, pledgedSrcSize);
|
||||
}
|
||||
|
||||
|
||||
/* ZSTDMT_resetCStream() :
|
||||
* pledgedSrcSize can be zero == unknown (for the time being)
|
||||
* prefer using ZSTD_CONTENTSIZE_UNKNOWN,
|
||||
* as `0` might mean "empty" in the future */
|
||||
size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize)
|
||||
{
|
||||
if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
|
||||
return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, 0, mtctx->params,
|
||||
pledgedSrcSize);
|
||||
}
|
||||
|
||||
size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
|
||||
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
||||
ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */
|
||||
DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel);
|
||||
cctxParams.cParams = params.cParams;
|
||||
cctxParams.fParams = params.fParams;
|
||||
return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
|
||||
}
|
||||
|
||||
|
||||
/* ZSTDMT_writeLastEmptyBlock()
|
||||
* Write a single empty block with an end-of-frame to finish a frame.
|
||||
@ -1740,7 +1457,7 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
|
||||
assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
|
||||
assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
|
||||
if (toFlush > 0) {
|
||||
memcpy((char*)output->dst + output->pos,
|
||||
ZSTD_memcpy((char*)output->dst + output->pos,
|
||||
(const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
|
||||
toFlush);
|
||||
}
|
||||
@ -1894,7 +1611,7 @@ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
|
||||
return 0;
|
||||
}
|
||||
ZSTDMT_waitForLdmComplete(mtctx, buffer);
|
||||
memmove(start, mtctx->inBuff.prefix.start, prefixSize);
|
||||
ZSTD_memmove(start, mtctx->inBuff.prefix.start, prefixSize);
|
||||
mtctx->inBuff.prefix.start = start;
|
||||
mtctx->roundBuff.pos = prefixSize;
|
||||
}
|
||||
@ -1968,6 +1685,16 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
||||
pos = 0;
|
||||
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
||||
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
||||
if ((hash & hitMask) == hitMask) {
|
||||
/* We're already at a sync point so don't load any more until
|
||||
* we're able to flush this sync point.
|
||||
* This likely happened because the job table was full so we
|
||||
* couldn't add our job.
|
||||
*/
|
||||
syncPoint.toLoad = 0;
|
||||
syncPoint.flush = 1;
|
||||
return syncPoint;
|
||||
}
|
||||
} else {
|
||||
/* We don't have enough bytes buffered to initialize the hash, but
|
||||
* we know we have at least RSYNC_LENGTH bytes total.
|
||||
@ -2022,34 +1749,11 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
assert(output->pos <= output->size);
|
||||
assert(input->pos <= input->size);
|
||||
|
||||
if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */
|
||||
return ZSTD_compressStream2(mtctx->cctxPool->cctx[0], output, input, endOp);
|
||||
}
|
||||
|
||||
if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
|
||||
/* current frame being ended. Only flush/end are allowed */
|
||||
return ERROR(stage_wrong);
|
||||
}
|
||||
|
||||
/* single-pass shortcut (note : synchronous-mode) */
|
||||
if ( (!mtctx->params.rsyncable) /* rsyncable mode is disabled */
|
||||
&& (mtctx->nextJobID == 0) /* just started */
|
||||
&& (mtctx->inBuff.filled == 0) /* nothing buffered */
|
||||
&& (!mtctx->jobReady) /* no job already created */
|
||||
&& (endOp == ZSTD_e_end) /* end order */
|
||||
&& (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough space in dst */
|
||||
size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx,
|
||||
(char*)output->dst + output->pos, output->size - output->pos,
|
||||
(const char*)input->src + input->pos, input->size - input->pos,
|
||||
mtctx->cdict, mtctx->params);
|
||||
if (ZSTD_isError(cSize)) return cSize;
|
||||
input->pos = input->size;
|
||||
output->pos += cSize;
|
||||
mtctx->allJobsCompleted = 1;
|
||||
mtctx->frameEnded = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* fill input buffer */
|
||||
if ( (!mtctx->jobReady)
|
||||
&& (input->size > input->pos) ) { /* support NULL input */
|
||||
@ -2072,13 +1776,21 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
|
||||
DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
|
||||
(U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
|
||||
memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
|
||||
ZSTD_memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
|
||||
input->pos += syncPoint.toLoad;
|
||||
mtctx->inBuff.filled += syncPoint.toLoad;
|
||||
forwardInputProgress = syncPoint.toLoad>0;
|
||||
}
|
||||
if ((input->pos < input->size) && (endOp == ZSTD_e_end))
|
||||
endOp = ZSTD_e_flush; /* can't end now : not all input consumed */
|
||||
}
|
||||
if ((input->pos < input->size) && (endOp == ZSTD_e_end)) {
|
||||
/* Can't end yet because the input is not fully consumed.
|
||||
* We are in one of these cases:
|
||||
* - mtctx->inBuff is NULL & empty: we couldn't get an input buffer so don't create a new job.
|
||||
* - We filled the input buffer: flush this job but don't end the frame.
|
||||
* - We hit a synchronization point: flush this job but don't end the frame.
|
||||
*/
|
||||
assert(mtctx->inBuff.filled == 0 || mtctx->inBuff.filled == mtctx->targetSectionSize || mtctx->params.rsyncable);
|
||||
endOp = ZSTD_e_flush;
|
||||
}
|
||||
|
||||
if ( (mtctx->jobReady)
|
||||
@ -2097,47 +1809,3 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
return remainingToFlush;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
||||
{
|
||||
FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) , "");
|
||||
|
||||
/* recommended next input size : fill current input buffer */
|
||||
return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
|
||||
}
|
||||
|
||||
|
||||
static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame)
|
||||
{
|
||||
size_t const srcSize = mtctx->inBuff.filled;
|
||||
DEBUGLOG(5, "ZSTDMT_flushStream_internal");
|
||||
|
||||
if ( mtctx->jobReady /* one job ready for a worker to pick up */
|
||||
|| (srcSize > 0) /* still some data within input buffer */
|
||||
|| ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
|
||||
DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
|
||||
(U32)srcSize, (U32)endFrame);
|
||||
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) , "");
|
||||
}
|
||||
|
||||
/* check if there is any data available to flush */
|
||||
return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */, endFrame);
|
||||
}
|
||||
|
||||
|
||||
size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
|
||||
{
|
||||
DEBUGLOG(5, "ZSTDMT_flushStream");
|
||||
if (mtctx->singleBlockingThread)
|
||||
return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output);
|
||||
return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_flush);
|
||||
}
|
||||
|
||||
size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTDMT_endStream");
|
||||
if (mtctx->singleBlockingThread)
|
||||
return ZSTD_endStream(mtctx->cctxPool->cctx[0], output);
|
||||
return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_end);
|
||||
}
|
||||
|
@ -19,26 +19,14 @@
|
||||
/* Note : This is an internal API.
|
||||
* These APIs used to be exposed with ZSTDLIB_API,
|
||||
* because it used to be the only way to invoke MT compression.
|
||||
* Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
|
||||
* instead.
|
||||
*
|
||||
* If you depend on these APIs and can't switch, then define
|
||||
* ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
|
||||
* However, we may completely remove these functions in a future
|
||||
* release, so please switch soon.
|
||||
* Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead.
|
||||
*
|
||||
* This API requires ZSTD_MULTITHREAD to be defined during compilation,
|
||||
* otherwise ZSTDMT_createCCtx*() will fail.
|
||||
*/
|
||||
|
||||
#ifdef ZSTD_LEGACY_MULTITHREADED_API
|
||||
# define ZSTDMT_API ZSTDLIB_API
|
||||
#else
|
||||
# define ZSTDMT_API
|
||||
#endif
|
||||
|
||||
/* === Dependencies === */
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
|
||||
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
|
||||
|
||||
@ -54,78 +42,34 @@
|
||||
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
|
||||
|
||||
|
||||
/* ========================================================
|
||||
* === Private interface, for use by ZSTD_compress.c ===
|
||||
* === Not exposed in libzstd. Never invoke directly ===
|
||||
* ======================================================== */
|
||||
|
||||
/* === Memory management === */
|
||||
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
|
||||
/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
|
||||
ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
|
||||
/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
|
||||
ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
|
||||
ZSTD_customMem cMem);
|
||||
ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
|
||||
/* === Simple one-pass compression function === */
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
int compressionLevel);
|
||||
|
||||
ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
|
||||
ZSTD_customMem cMem,
|
||||
ZSTD_threadPool *pool);
|
||||
size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
/* === Streaming functions === */
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
|
||||
ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
|
||||
ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
||||
ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
||||
|
||||
|
||||
/* === Advanced functions and parameters === */
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_parameters params,
|
||||
int overlapLog);
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
|
||||
const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
|
||||
ZSTD_parameters params,
|
||||
unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_frameParameters fparams,
|
||||
unsigned long long pledgedSrcSize); /* note : zero means empty */
|
||||
|
||||
/* ZSTDMT_parameter :
|
||||
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
|
||||
typedef enum {
|
||||
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
|
||||
ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
|
||||
ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
|
||||
} ZSTDMT_parameter;
|
||||
|
||||
/* ZSTDMT_setMTCtxParameter() :
|
||||
* allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
|
||||
* The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
|
||||
* Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
|
||||
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
||||
ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
|
||||
|
||||
/* ZSTDMT_getMTCtxParameter() :
|
||||
* Query the ZSTDMT_CCtx for a parameter value.
|
||||
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
||||
ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
|
||||
size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
|
||||
|
||||
/*! ZSTDMT_initCStream_internal() :
|
||||
* Private use only. Init streaming operation.
|
||||
* expects params to be valid.
|
||||
* must receive dict, or cdict, or none, but not both.
|
||||
* @return : 0, or an error code */
|
||||
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
|
||||
const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
|
||||
|
||||
/*! ZSTDMT_compressStream_generic() :
|
||||
* Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
|
||||
@ -134,16 +78,10 @@ ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter
|
||||
* 0 if fully flushed
|
||||
* or an error code
|
||||
* note : needs to be init using any ZSTD_initCStream*() variant */
|
||||
ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
ZSTD_outBuffer* output,
|
||||
ZSTD_inBuffer* input,
|
||||
ZSTD_EndDirective endOp);
|
||||
|
||||
|
||||
/* ========================================================
|
||||
* === Private interface, for use by ZSTD_compress.c ===
|
||||
* === Not exposed in libzstd. Never invoke directly ===
|
||||
* ======================================================== */
|
||||
size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
ZSTD_outBuffer* output,
|
||||
ZSTD_inBuffer* input,
|
||||
ZSTD_EndDirective endOp);
|
||||
|
||||
/*! ZSTDMT_toFlushNow()
|
||||
* Tell how many bytes are ready to be flushed immediately.
|
||||
@ -153,15 +91,6 @@ ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
* therefore flushing is limited by speed of oldest job. */
|
||||
size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
/*! ZSTDMT_CCtxParam_setMTCtxParameter()
|
||||
* like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
|
||||
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
|
||||
|
||||
/*! ZSTDMT_CCtxParam_setNbWorkers()
|
||||
* Set nbWorkers, and clamp it.
|
||||
* Also reset jobSize and overlapLog */
|
||||
size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
|
||||
|
||||
/*! ZSTDMT_updateCParams_whileCompressing() :
|
||||
* Updates only a selected set of compression parameters, to remain compatible with current frame.
|
||||
* New parameters will be applied to next compression job. */
|
||||
@ -174,17 +103,6 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
|
||||
ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
|
||||
/*! ZSTDMT_initCStream_internal() :
|
||||
* Private use only. Init streaming operation.
|
||||
* expects params to be valid.
|
||||
* must receive dict, or cdict, or none, but not both.
|
||||
* @return : 0, or an error code */
|
||||
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
|
||||
const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user