[CAS] Harden validate() against on-disk corruption (#190634)

Fixes found by fuzzer:

OnDiskTrieRawHashMap:
- Bounds-check data slot offsets in TrieVerifier::visitSlot() before
  calling getRecord(), preventing asData() assertion on out-of-bounds
  trie entries.
- Validate subtrie headers (NumBits, bounds) before constructing
  SubtrieHandle, preventing SEGV in getSlots() from corrupt NumBits.
- Validate arena bump pointer alignment, catching misaligned BumpPtr
  that would crash store() with an alignment assertion.
- Fix comma operator bug in getOrCreateRoot() where the
  compare_exchange_strong result was discarded, causing asSubtrie()
  assertion when RootTrieOffset was corrupted to zero.

OnDiskGraphDB:
- Reject invalid (zero) ref offsets in validate callback, preventing
  asData() assertion when corrupt data pool refs are resolved via
  recoverFromFileOffset().
- Validate DataRecordHandle layout flags before calling getTotalSize(),
  preventing llvm_unreachable on corrupt NumRefsFlags/DataSizeFlags.
- Validate data pool bump pointer alignment, catching misaligned
  BumpPtr that would crash store() in DataRecordHandle::constructImpl().
- Check data record refs offset alignment before calling getRefs(),
  preventing PointerUnion assertion from misaligned refs pointer.

MappedFileRegionArena:
- Convert assertions in initializeHeader() to errors so corrupted
  arena headers return an error on CAS open instead of crashing.

Assisted-By: Claude
This commit is contained in:
Steven Wu 2026-04-06 13:33:22 -07:00 committed by GitHub
parent 70d3dcaa64
commit 1a0ca1019d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 102 additions and 14 deletions

View File

@ -109,7 +109,7 @@ public:
private:
// initialize header from offset.
void initializeHeader(uint64_t HeaderOffset);
Error initializeHeader(uint64_t HeaderOffset);
LLVM_ABI_FOR_TEST void destroyImpl();
void moveImpl(MappedFileRegionArena &RHS) {

View File

@ -89,7 +89,7 @@ Error DatabaseFile::addTable(TableHandle Table) {
}
std::optional<TableHandle> DatabaseFile::findTable(StringRef Name) {
int64_t RootTableOffset = H->RootTableOffset.load();
uint64_t RootTableOffset = H->RootTableOffset.load();
if (!RootTableOffset)
return std::nullopt;
@ -114,6 +114,11 @@ Error DatabaseFile::validate(MappedFileRegion &Region) {
return createStringError(std::errc::invalid_argument,
"database: wrong version");
if (H->RootTableOffset < 0 ||
static_cast<uint64_t>(H->RootTableOffset) > Region.size())
return createStringError(std::errc::invalid_argument,
"database: root table offset out of bound");
auto *MFH = reinterpret_cast<MappedFileRegionArena::Header *>(Region.data() +
sizeof(Header));
// Check the bump-ptr, which should point past the header.

View File

@ -228,6 +228,11 @@ Expected<MappedFileRegionArena> MappedFileRegionArena::create(
") does not match existing config (" + utostr(H.HeaderOffset) +
")");
if (H.Capacity < MinCapacity)
return createStringError(
std::make_error_code(std::errc::bad_file_descriptor),
"capacity inside the MappedFileRegionArena is too small");
// If the capacity doesn't match, use the existing capacity instead.
if (H.Capacity != Capacity)
Capacity = H.Capacity;
@ -255,7 +260,8 @@ Expected<MappedFileRegionArena> MappedFileRegionArena::create(
}
// Initialize the header.
Result.initializeHeader(HeaderOffset);
if (Error E = Result.initializeHeader(HeaderOffset))
return std::move(E);
if (FileSize->Size < MinCapacity) {
assert(MainFile->Locked == sys::fs::LockKind::Exclusive);
@ -326,22 +332,30 @@ void MappedFileRegionArena::destroyImpl() {
Logger->logMappedFileRegionArenaClose(Path);
}
void MappedFileRegionArena::initializeHeader(uint64_t HeaderOffset) {
assert(capacity() < (uint64_t)INT64_MAX && "capacity must fit in int64_t");
Error MappedFileRegionArena::initializeHeader(uint64_t HeaderOffset) {
if (capacity() >= static_cast<uint64_t>(INT64_MAX))
return createStringError(make_error_code(std::errc::protocol_error),
"arena capacity does not fit in int64_t");
uint64_t HeaderEndOffset = HeaderOffset + sizeof(decltype(*H));
assert(HeaderEndOffset <= capacity() &&
"Expected end offset to be pre-allocated");
assert(isAligned(Align::Of<decltype(*H)>(), HeaderOffset) &&
"Expected end offset to be aligned");
if (HeaderEndOffset > capacity())
return createStringError(make_error_code(std::errc::protocol_error),
"arena header extends past capacity");
if (!isAligned(Align::Of<decltype(*H)>(), HeaderOffset))
return createStringError(make_error_code(std::errc::protocol_error),
"arena header offset is not aligned");
H = reinterpret_cast<decltype(H)>(data() + HeaderOffset);
uint64_t ExistingValue = 0;
if (!H->BumpPtr.compare_exchange_strong(ExistingValue, HeaderEndOffset))
assert(ExistingValue >= HeaderEndOffset &&
"Expected 0, or past the end of the header itself");
if (ExistingValue < HeaderEndOffset)
return createStringError(
make_error_code(std::errc::protocol_error),
"arena bump pointer is corrupt: 0x" +
utohexstr(ExistingValue, /*LowerCase=*/true));
if (Logger)
Logger->logMappedFileRegionArenaCreate(Path, *FD, data(), capacity(),
size());
return Error::success();
}
static Error createAllocatorOutOfSpaceError() {

View File

@ -916,6 +916,9 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
if (auto E = UpstreamDB->validate(Deep, Hasher))
return E;
}
if (!isAligned(Align(8), DataPool.size()))
return createStringError(llvm::errc::illegal_byte_sequence,
"data pool bump pointer is not aligned");
return Index.validate([&](FileOffset Offset,
OnDiskTrieRawHashMap::ConstValueProxy Record)
-> Error {
@ -953,13 +956,28 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
// the record. It can be reused by later insertion so just skip this entry
// for now.
return Error::success();
case TrieRecord::StorageKind::DataPool:
case TrieRecord::StorageKind::DataPool: {
// Check offset is a postive value, and large enough to hold the
// header for the data record.
if (D.Offset.get() <= 0 ||
D.Offset.get() + sizeof(DataRecordHandle::Header) >= DataPool.size())
return formatError("datapool record out of bound");
// DataRecord start needs to be aligned.
if (!isAligned(Align(8), D.Offset.get()))
return formatError("data record offset is not aligned");
// Validate the layout flags before getFromDataPool calls getTotalSize().
auto HeaderData =
DataPool.get(D.Offset, sizeof(DataRecordHandle::Header));
if (!HeaderData)
return formatError(toString(HeaderData.takeError()));
auto LF = DataRecordHandle::get(HeaderData->data()).getLayoutFlags();
if (LF.NumRefs > DataRecordHandle::NumRefsFlags::Max ||
LF.DataSize > DataRecordHandle::DataSizeFlags::Max)
return formatError("data record has invalid layout flags");
break;
}
case TrieRecord::StorageKind::Standalone:
case TrieRecord::StorageKind::StandaloneLeaf:
case TrieRecord::StorageKind::StandaloneLeaf0:
@ -999,6 +1017,8 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
return dataError(toString(DataRecord.takeError()));
for (auto InternRef : DataRecord->getRefs()) {
if (InternRef.getFileOffset().get() <= 0)
return dataError("invalid ref offset");
auto Index = getIndexProxyFromRef(InternRef);
if (!Index)
return Index.takeError();
@ -1015,6 +1035,8 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
return dataError(
"data record span passed the end of the standalone file");
for (auto InternRef : DataRecord.getRefs()) {
if (InternRef.getFileOffset().get() <= 0)
return dataError("invalid ref offset");
auto Index = getIndexProxyFromRef(InternRef);
if (!Index)
return Index.takeError();

View File

@ -282,6 +282,7 @@ public:
explicit operator bool() const { return H; }
const Header &getHeader() const { return *H; }
SubtrieHandle getRoot() const;
int64_t getRootTrieOffset() const { return H->RootTrieOffset; }
Expected<SubtrieHandle> getOrCreateRoot(MappedFileRegionArena &Alloc);
MappedFileRegion &getRegion() const { return *Region; }
@ -387,8 +388,7 @@ TrieRawHashMapHandle::getOrCreateRoot(MappedFileRegionArena &Alloc) {
return LazyRoot.takeError();
if (H->RootTrieOffset.compare_exchange_strong(
Race, LazyRoot->getOffset().asSubtrie()),
Logger.get())
Race, LazyRoot->getOffset().asSubtrie()))
return *LazyRoot;
// There was a race. Return the other root.
@ -646,6 +646,12 @@ void OnDiskTrieRawHashMap::print(
Error OnDiskTrieRawHashMap::validate(
function_ref<Error(FileOffset, ConstValueProxy)> RecordVerifier) const {
uint64_t BumpPtr = Impl->File.getAlloc().size();
if (!isAligned(MappedFileRegionArena::getAlign(), BumpPtr))
return createStringError(make_error_code(std::errc::protocol_error),
"arena bump pointer is not aligned: 0x" +
utohexstr(BumpPtr, /*LowerCase=*/true));
return Impl->Trie.validate(RecordVerifier);
}
@ -749,6 +755,10 @@ OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine,
SmallString<128> TrieNameStorage;
StringRef TrieName = TrieNameTwine.toStringRef(TrieNameStorage);
if (MaxFileSize == 0)
return createTableConfigError(std::errc::invalid_argument, Path, TrieName,
"invalid size");
constexpr size_t DefaultNumRootBits = 10;
constexpr size_t DefaultNumSubtrieBits = 6;
@ -875,6 +885,8 @@ private:
Error validateSubTrie(SubtrieHandle Node, bool IsRoot);
Error validateSubtrieHeader(uint64_t Offset, bool IsRoot);
// Helper function to capture errors when visiting the trie nodes.
void addError(Error NewError) {
assert(NewError && "not an error");
@ -1006,6 +1018,12 @@ private:
if (!isAligned(MappedFileRegionArena::getAlign(), Slot.asData()))
return createInvalidTrieError(Slot.asData(), "mis-aligned data entry");
uint64_t DataOffset = Slot.asData();
uint64_t RecordEnd = DataOffset + Trie.getRecordSize();
if (RecordEnd > (uint64_t)Trie.getRegion().size())
return createInvalidTrieError(DataOffset,
"data entry extends past end of file");
TrieRawHashMapHandle::RecordData Record =
Trie.getRecord(SubtrieSlotValue::getDataOffset(Slot.asData()));
return RecordVerifier(Slot.asDataFileOffset(),
@ -1021,6 +1039,11 @@ private:
} // namespace
Error TrieVisitor::visit() {
if (int64_t RootOffset = Trie.getRootTrieOffset()) {
if (auto Err = validateSubtrieHeader(RootOffset, /*IsRoot=*/true))
return Err;
}
auto Root = Trie.getRoot();
if (!Root)
return Error::success();
@ -1044,6 +1067,8 @@ Error TrieVisitor::visit() {
std::string SubtriePrefix;
appendIndexBits(SubtriePrefix, I, NumSlots);
if (Slot.isSubtrie()) {
if (auto Err = validateSubtrieHeader(Slot.asSubtrie(), /*IsRoot=*/false))
return Err;
SubtrieHandle S(Trie.getRegion(), Slot, Trie.getLogger());
Subs.push_back(S);
Prefixes.push_back(SubtriePrefix);
@ -1097,6 +1122,26 @@ Error TrieVisitor::validateSubTrie(SubtrieHandle Node, bool IsRoot) {
return Error::success();
}
Error TrieVisitor::validateSubtrieHeader(uint64_t Offset, bool IsRoot) {
uint64_t RegionSize = Trie.getRegion().size();
if (Offset + sizeof(SubtrieHandle::Header) > RegionSize)
return createInvalidTrieError(Offset, "subtrie header out of bound");
auto *H = reinterpret_cast<const SubtrieHandle::Header *>(
Trie.getRegion().data() + Offset);
if (H->NumBits == 0)
return createInvalidTrieError(Offset, "invalid subtrie NumBits");
if (!IsRoot && H->NumBits > Trie.getNumSubtrieBits())
return createInvalidTrieError(Offset, "subtrie has corrupt NumBits");
if (Offset + static_cast<uint64_t>(SubtrieHandle::getSize(H->NumBits)) >
RegionSize)
return createInvalidTrieError(Offset, "subtrie node spans out of bound");
return Error::success();
}
Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) {
if (auto Err = validateSubTrie(Node, /*IsRoot=*/false))
return Err;
@ -1117,6 +1162,8 @@ Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) {
std::string SubtriePrefix = Prefix.str();
appendIndexBits(SubtriePrefix, I, NumSlots);
if (Slot.isSubtrie()) {
if (auto Err = validateSubtrieHeader(Slot.asSubtrie(), /*IsRoot=*/false))
return Err;
SubtrieHandle S(Trie.getRegion(), Slot, Trie.getLogger());
Subs.push_back(S);
Prefixes.push_back(SubtriePrefix);