From 1a0ca1019d214a24b55a45704dc71fa183672362 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Mon, 6 Apr 2026 13:33:22 -0700 Subject: [PATCH] [CAS] Harden validate() against on-disk corruption (#190634) Fixes found by fuzzer: OnDiskTrieRawHashMap: - Bounds-check data slot offsets in TrieVerifier::visitSlot() before calling getRecord(), preventing asData() assertion on out-of-bounds trie entries. - Validate subtrie headers (NumBits, bounds) before constructing SubtrieHandle, preventing SEGV in getSlots() from corrupt NumBits. - Validate arena bump pointer alignment, catching misaligned BumpPtr that would crash store() with an alignment assertion. - Fix comma operator bug in getOrCreateRoot() where the compare_exchange_strong result was discarded, causing asSubtrie() assertion when RootTrieOffset was corrupted to zero. OnDiskGraphDB: - Reject invalid (zero) ref offsets in validate callback, preventing asData() assertion when corrupt data pool refs are resolved via recoverFromFileOffset(). - Validate DataRecordHandle layout flags before calling getTotalSize(), preventing llvm_unreachable on corrupt NumRefsFlags/DataSizeFlags. - Validate data pool bump pointer alignment, catching misaligned BumpPtr that would crash store() in DataRecordHandle::constructImpl(). - Check data record refs offset alignment before calling getRefs(), preventing PointerUnion assertion from misaligned refs pointer. MappedFileRegionArena: - Convert assertions in initializeHeader() to errors so corrupted arena headers return an error on CAS open instead of crashing. Assisted-By: Claude --- llvm/include/llvm/CAS/MappedFileRegionArena.h | 2 +- llvm/lib/CAS/DatabaseFile.cpp | 7 ++- llvm/lib/CAS/MappedFileRegionArena.cpp | 32 ++++++++---- llvm/lib/CAS/OnDiskGraphDB.cpp | 24 ++++++++- llvm/lib/CAS/OnDiskTrieRawHashMap.cpp | 51 ++++++++++++++++++- 5 files changed, 102 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/CAS/MappedFileRegionArena.h b/llvm/include/llvm/CAS/MappedFileRegionArena.h index a0a6236c9ff1..a00bfa7306ef 100644 --- a/llvm/include/llvm/CAS/MappedFileRegionArena.h +++ b/llvm/include/llvm/CAS/MappedFileRegionArena.h @@ -109,7 +109,7 @@ public: private: // initialize header from offset. - void initializeHeader(uint64_t HeaderOffset); + Error initializeHeader(uint64_t HeaderOffset); LLVM_ABI_FOR_TEST void destroyImpl(); void moveImpl(MappedFileRegionArena &RHS) { diff --git a/llvm/lib/CAS/DatabaseFile.cpp b/llvm/lib/CAS/DatabaseFile.cpp index cb130a808475..f0f3c75d4550 100644 --- a/llvm/lib/CAS/DatabaseFile.cpp +++ b/llvm/lib/CAS/DatabaseFile.cpp @@ -89,7 +89,7 @@ Error DatabaseFile::addTable(TableHandle Table) { } std::optional DatabaseFile::findTable(StringRef Name) { - int64_t RootTableOffset = H->RootTableOffset.load(); + uint64_t RootTableOffset = H->RootTableOffset.load(); if (!RootTableOffset) return std::nullopt; @@ -114,6 +114,11 @@ Error DatabaseFile::validate(MappedFileRegion &Region) { return createStringError(std::errc::invalid_argument, "database: wrong version"); + if (H->RootTableOffset < 0 || + static_cast(H->RootTableOffset) > Region.size()) + return createStringError(std::errc::invalid_argument, + "database: root table offset out of bound"); + auto *MFH = reinterpret_cast(Region.data() + sizeof(Header)); // Check the bump-ptr, which should point past the header. diff --git a/llvm/lib/CAS/MappedFileRegionArena.cpp b/llvm/lib/CAS/MappedFileRegionArena.cpp index e1c55c7c2a66..2dae3f2d0511 100644 --- a/llvm/lib/CAS/MappedFileRegionArena.cpp +++ b/llvm/lib/CAS/MappedFileRegionArena.cpp @@ -228,6 +228,11 @@ Expected MappedFileRegionArena::create( ") does not match existing config (" + utostr(H.HeaderOffset) + ")"); + if (H.Capacity < MinCapacity) + return createStringError( + std::make_error_code(std::errc::bad_file_descriptor), + "capacity inside the MappedFileRegionArena is too small"); + // If the capacity doesn't match, use the existing capacity instead. if (H.Capacity != Capacity) Capacity = H.Capacity; @@ -255,7 +260,8 @@ Expected MappedFileRegionArena::create( } // Initialize the header. - Result.initializeHeader(HeaderOffset); + if (Error E = Result.initializeHeader(HeaderOffset)) + return std::move(E); if (FileSize->Size < MinCapacity) { assert(MainFile->Locked == sys::fs::LockKind::Exclusive); @@ -326,22 +332,30 @@ void MappedFileRegionArena::destroyImpl() { Logger->logMappedFileRegionArenaClose(Path); } -void MappedFileRegionArena::initializeHeader(uint64_t HeaderOffset) { - assert(capacity() < (uint64_t)INT64_MAX && "capacity must fit in int64_t"); +Error MappedFileRegionArena::initializeHeader(uint64_t HeaderOffset) { + if (capacity() >= static_cast(INT64_MAX)) + return createStringError(make_error_code(std::errc::protocol_error), + "arena capacity does not fit in int64_t"); uint64_t HeaderEndOffset = HeaderOffset + sizeof(decltype(*H)); - assert(HeaderEndOffset <= capacity() && - "Expected end offset to be pre-allocated"); - assert(isAligned(Align::Of(), HeaderOffset) && - "Expected end offset to be aligned"); + if (HeaderEndOffset > capacity()) + return createStringError(make_error_code(std::errc::protocol_error), + "arena header extends past capacity"); + if (!isAligned(Align::Of(), HeaderOffset)) + return createStringError(make_error_code(std::errc::protocol_error), + "arena header offset is not aligned"); H = reinterpret_cast(data() + HeaderOffset); uint64_t ExistingValue = 0; if (!H->BumpPtr.compare_exchange_strong(ExistingValue, HeaderEndOffset)) - assert(ExistingValue >= HeaderEndOffset && - "Expected 0, or past the end of the header itself"); + if (ExistingValue < HeaderEndOffset) + return createStringError( + make_error_code(std::errc::protocol_error), + "arena bump pointer is corrupt: 0x" + + utohexstr(ExistingValue, /*LowerCase=*/true)); if (Logger) Logger->logMappedFileRegionArenaCreate(Path, *FD, data(), capacity(), size()); + return Error::success(); } static Error createAllocatorOutOfSpaceError() { diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp index 86e9824fe93d..f7405ff6e90d 100644 --- a/llvm/lib/CAS/OnDiskGraphDB.cpp +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -916,6 +916,9 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { if (auto E = UpstreamDB->validate(Deep, Hasher)) return E; } + if (!isAligned(Align(8), DataPool.size())) + return createStringError(llvm::errc::illegal_byte_sequence, + "data pool bump pointer is not aligned"); return Index.validate([&](FileOffset Offset, OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error { @@ -953,13 +956,28 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { // the record. It can be reused by later insertion so just skip this entry // for now. return Error::success(); - case TrieRecord::StorageKind::DataPool: + case TrieRecord::StorageKind::DataPool: { // Check offset is a postive value, and large enough to hold the // header for the data record. if (D.Offset.get() <= 0 || D.Offset.get() + sizeof(DataRecordHandle::Header) >= DataPool.size()) return formatError("datapool record out of bound"); + + // DataRecord start needs to be aligned. + if (!isAligned(Align(8), D.Offset.get())) + return formatError("data record offset is not aligned"); + + // Validate the layout flags before getFromDataPool calls getTotalSize(). + auto HeaderData = + DataPool.get(D.Offset, sizeof(DataRecordHandle::Header)); + if (!HeaderData) + return formatError(toString(HeaderData.takeError())); + auto LF = DataRecordHandle::get(HeaderData->data()).getLayoutFlags(); + if (LF.NumRefs > DataRecordHandle::NumRefsFlags::Max || + LF.DataSize > DataRecordHandle::DataSizeFlags::Max) + return formatError("data record has invalid layout flags"); break; + } case TrieRecord::StorageKind::Standalone: case TrieRecord::StorageKind::StandaloneLeaf: case TrieRecord::StorageKind::StandaloneLeaf0: @@ -999,6 +1017,8 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { return dataError(toString(DataRecord.takeError())); for (auto InternRef : DataRecord->getRefs()) { + if (InternRef.getFileOffset().get() <= 0) + return dataError("invalid ref offset"); auto Index = getIndexProxyFromRef(InternRef); if (!Index) return Index.takeError(); @@ -1015,6 +1035,8 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { return dataError( "data record span passed the end of the standalone file"); for (auto InternRef : DataRecord.getRefs()) { + if (InternRef.getFileOffset().get() <= 0) + return dataError("invalid ref offset"); auto Index = getIndexProxyFromRef(InternRef); if (!Index) return Index.takeError(); diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp index 8c2771de74c4..edb19d7e7c73 100644 --- a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp +++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp @@ -282,6 +282,7 @@ public: explicit operator bool() const { return H; } const Header &getHeader() const { return *H; } SubtrieHandle getRoot() const; + int64_t getRootTrieOffset() const { return H->RootTrieOffset; } Expected getOrCreateRoot(MappedFileRegionArena &Alloc); MappedFileRegion &getRegion() const { return *Region; } @@ -387,8 +388,7 @@ TrieRawHashMapHandle::getOrCreateRoot(MappedFileRegionArena &Alloc) { return LazyRoot.takeError(); if (H->RootTrieOffset.compare_exchange_strong( - Race, LazyRoot->getOffset().asSubtrie()), - Logger.get()) + Race, LazyRoot->getOffset().asSubtrie())) return *LazyRoot; // There was a race. Return the other root. @@ -646,6 +646,12 @@ void OnDiskTrieRawHashMap::print( Error OnDiskTrieRawHashMap::validate( function_ref RecordVerifier) const { + uint64_t BumpPtr = Impl->File.getAlloc().size(); + if (!isAligned(MappedFileRegionArena::getAlign(), BumpPtr)) + return createStringError(make_error_code(std::errc::protocol_error), + "arena bump pointer is not aligned: 0x" + + utohexstr(BumpPtr, /*LowerCase=*/true)); + return Impl->Trie.validate(RecordVerifier); } @@ -749,6 +755,10 @@ OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine, SmallString<128> TrieNameStorage; StringRef TrieName = TrieNameTwine.toStringRef(TrieNameStorage); + if (MaxFileSize == 0) + return createTableConfigError(std::errc::invalid_argument, Path, TrieName, + "invalid size"); + constexpr size_t DefaultNumRootBits = 10; constexpr size_t DefaultNumSubtrieBits = 6; @@ -875,6 +885,8 @@ private: Error validateSubTrie(SubtrieHandle Node, bool IsRoot); + Error validateSubtrieHeader(uint64_t Offset, bool IsRoot); + // Helper function to capture errors when visiting the trie nodes. void addError(Error NewError) { assert(NewError && "not an error"); @@ -1006,6 +1018,12 @@ private: if (!isAligned(MappedFileRegionArena::getAlign(), Slot.asData())) return createInvalidTrieError(Slot.asData(), "mis-aligned data entry"); + uint64_t DataOffset = Slot.asData(); + uint64_t RecordEnd = DataOffset + Trie.getRecordSize(); + if (RecordEnd > (uint64_t)Trie.getRegion().size()) + return createInvalidTrieError(DataOffset, + "data entry extends past end of file"); + TrieRawHashMapHandle::RecordData Record = Trie.getRecord(SubtrieSlotValue::getDataOffset(Slot.asData())); return RecordVerifier(Slot.asDataFileOffset(), @@ -1021,6 +1039,11 @@ private: } // namespace Error TrieVisitor::visit() { + if (int64_t RootOffset = Trie.getRootTrieOffset()) { + if (auto Err = validateSubtrieHeader(RootOffset, /*IsRoot=*/true)) + return Err; + } + auto Root = Trie.getRoot(); if (!Root) return Error::success(); @@ -1044,6 +1067,8 @@ Error TrieVisitor::visit() { std::string SubtriePrefix; appendIndexBits(SubtriePrefix, I, NumSlots); if (Slot.isSubtrie()) { + if (auto Err = validateSubtrieHeader(Slot.asSubtrie(), /*IsRoot=*/false)) + return Err; SubtrieHandle S(Trie.getRegion(), Slot, Trie.getLogger()); Subs.push_back(S); Prefixes.push_back(SubtriePrefix); @@ -1097,6 +1122,26 @@ Error TrieVisitor::validateSubTrie(SubtrieHandle Node, bool IsRoot) { return Error::success(); } +Error TrieVisitor::validateSubtrieHeader(uint64_t Offset, bool IsRoot) { + uint64_t RegionSize = Trie.getRegion().size(); + if (Offset + sizeof(SubtrieHandle::Header) > RegionSize) + return createInvalidTrieError(Offset, "subtrie header out of bound"); + + auto *H = reinterpret_cast( + Trie.getRegion().data() + Offset); + if (H->NumBits == 0) + return createInvalidTrieError(Offset, "invalid subtrie NumBits"); + + if (!IsRoot && H->NumBits > Trie.getNumSubtrieBits()) + return createInvalidTrieError(Offset, "subtrie has corrupt NumBits"); + + if (Offset + static_cast(SubtrieHandle::getSize(H->NumBits)) > + RegionSize) + return createInvalidTrieError(Offset, "subtrie node spans out of bound"); + + return Error::success(); +} + Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) { if (auto Err = validateSubTrie(Node, /*IsRoot=*/false)) return Err; @@ -1117,6 +1162,8 @@ Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) { std::string SubtriePrefix = Prefix.str(); appendIndexBits(SubtriePrefix, I, NumSlots); if (Slot.isSubtrie()) { + if (auto Err = validateSubtrieHeader(Slot.asSubtrie(), /*IsRoot=*/false)) + return Err; SubtrieHandle S(Trie.getRegion(), Slot, Trie.getLogger()); Subs.push_back(S); Prefixes.push_back(SubtriePrefix);