Dmitry Vyukov 9f2c6207d5 tsan: optimize sync clock memory consumption
This change implements 2 optimizations of sync clocks that reduce memory consumption:

Use previously unused first level block space to store clock elements.
Currently a clock for 100 threads consumes 3 512-byte blocks:

2 64-bit second level blocks to store clock elements
+1 32-bit first level block to store indices to second level blocks
Only 8 bytes of the first level block are actually used.
With this change such clock consumes only 2 blocks.

Share similar clocks differing only by a single clock entry for the current thread.
When a thread does several release operations on fresh sync objects without intervening
acquire operations in between (e.g. initialization of several fields in ctor),
the resulting clocks differ only by a single entry for the current thread.
This change reuses a single clock for such release operations. The current thread time
(which is different for different clocks) is stored in dirty entries.

We are experiencing issues with a large program that eats all 64M clock blocks
(32GB of non-flushable memory) and crashes with dense allocator overflow.
Max number of threads in the program is ~170 which is currently quite unfortunate
(consume 4 blocks per clock). Currently it crashes after consuming 60+ GB of memory.
The first optimization brings clock block consumption down to ~40M and
allows the program to work. The second optimization further reduces block consumption
to "modest" 16M blocks (~8GB of RAM) and reduces overall RAM consumption to ~30GB.

Measurements on another real world C++ RPC benchmark show RSS reduction
from 3.491G to 3.186G and a modest speedup of ~5%.

Go parallel client/server HTTP benchmark:
https://github.com/golang/benchmarks/blob/master/http/http.go
shows RSS reduction from 320MB to 240MB and a few percent speedup.

Reviewed in https://reviews.llvm.org/D35323

llvm-svn: 308018
2017-07-14 11:30:06 +00:00

197 lines
4.6 KiB
C++

//===-- tsan_defs.h ---------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of ThreadSanitizer (TSan), a race detector.
//
//===----------------------------------------------------------------------===//
#ifndef TSAN_DEFS_H
#define TSAN_DEFS_H
#include "sanitizer_common/sanitizer_internal_defs.h"
#include "sanitizer_common/sanitizer_libc.h"
#include "tsan_stat.h"
#include "ubsan/ubsan_platform.h"
// Setup defaults for compile definitions.
#ifndef TSAN_NO_HISTORY
# define TSAN_NO_HISTORY 0
#endif
#ifndef TSAN_COLLECT_STATS
# define TSAN_COLLECT_STATS 0
#endif
#ifndef TSAN_CONTAINS_UBSAN
# if CAN_SANITIZE_UB && !SANITIZER_GO
# define TSAN_CONTAINS_UBSAN 1
# else
# define TSAN_CONTAINS_UBSAN 0
# endif
#endif
namespace __tsan {
const int kClkBits = 42;
const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;
struct ClockElem {
u64 epoch : kClkBits;
u64 reused : 64 - kClkBits; // tid reuse count
};
struct ClockBlock {
static const uptr kSize = 512;
static const uptr kTableSize = kSize / sizeof(u32);
static const uptr kClockCount = kSize / sizeof(ClockElem);
static const uptr kRefIdx = kTableSize - 1;
static const uptr kBlockIdx = kTableSize - 2;
union {
u32 table[kTableSize];
ClockElem clock[kClockCount];
};
ClockBlock() {
}
};
const int kTidBits = 13;
// Reduce kMaxTid by kClockCount because one slot in ClockBlock table is
// occupied by reference counter, so total number of elements we can store
// in SyncClock is kClockCount * (kTableSize - 1).
const unsigned kMaxTid = (1 << kTidBits) - ClockBlock::kClockCount;
#if !SANITIZER_GO
const unsigned kMaxTidInClock = kMaxTid * 2; // This includes msb 'freed' bit.
#else
const unsigned kMaxTidInClock = kMaxTid; // Go does not track freed memory.
#endif
const uptr kShadowStackSize = 64 * 1024;
// Count of shadow values in a shadow cell.
const uptr kShadowCnt = 4;
// That many user bytes are mapped onto a single shadow cell.
const uptr kShadowCell = 8;
// Size of a single shadow value (u64).
const uptr kShadowSize = 8;
// Shadow memory is kShadowMultiplier times larger than user memory.
const uptr kShadowMultiplier = kShadowSize * kShadowCnt / kShadowCell;
// That many user bytes are mapped onto a single meta shadow cell.
// Must be less or equal to minimal memory allocator alignment.
const uptr kMetaShadowCell = 8;
// Size of a single meta shadow value (u32).
const uptr kMetaShadowSize = 4;
#if TSAN_NO_HISTORY
const bool kCollectHistory = false;
#else
const bool kCollectHistory = true;
#endif
const u16 kInvalidTid = kMaxTid + 1;
// The following "build consistency" machinery ensures that all source files
// are built in the same configuration. Inconsistent builds lead to
// hard to debug crashes.
#if SANITIZER_DEBUG
void build_consistency_debug();
#else
void build_consistency_release();
#endif
#if TSAN_COLLECT_STATS
void build_consistency_stats();
#else
void build_consistency_nostats();
#endif
static inline void USED build_consistency() {
#if SANITIZER_DEBUG
build_consistency_debug();
#else
build_consistency_release();
#endif
#if TSAN_COLLECT_STATS
build_consistency_stats();
#else
build_consistency_nostats();
#endif
}
template<typename T>
T min(T a, T b) {
return a < b ? a : b;
}
template<typename T>
T max(T a, T b) {
return a > b ? a : b;
}
template<typename T>
T RoundUp(T p, u64 align) {
DCHECK_EQ(align & (align - 1), 0);
return (T)(((u64)p + align - 1) & ~(align - 1));
}
template<typename T>
T RoundDown(T p, u64 align) {
DCHECK_EQ(align & (align - 1), 0);
return (T)((u64)p & ~(align - 1));
}
// Zeroizes high part, returns 'bits' lsb bits.
template<typename T>
T GetLsb(T v, int bits) {
return (T)((u64)v & ((1ull << bits) - 1));
}
struct MD5Hash {
u64 hash[2];
bool operator==(const MD5Hash &other) const;
};
MD5Hash md5_hash(const void *data, uptr size);
struct Processor;
struct ThreadState;
class ThreadContext;
struct Context;
struct ReportStack;
class ReportDesc;
class RegionAlloc;
// Descriptor of user's memory block.
struct MBlock {
u64 siz : 48;
u64 tag : 16;
u32 stk;
u16 tid;
};
COMPILER_CHECK(sizeof(MBlock) == 16);
enum ExternalTag : uptr {
kExternalTagNone = 0,
kExternalTagSwiftModifyingAccess = 1,
kExternalTagFirstUserAvailable = 2,
kExternalTagMax = 1024,
// Don't set kExternalTagMax over 65,536, since MBlock only stores tags
// as 16-bit values, see tsan_defs.h.
};
} // namespace __tsan
#endif // TSAN_DEFS_H