[CAS] Harden validate() against on-disk corruption (#190634)
Fixes found by fuzzer: OnDiskTrieRawHashMap: - Bounds-check data slot offsets in TrieVerifier::visitSlot() before calling getRecord(), preventing asData() assertion on out-of-bounds trie entries. - Validate subtrie headers (NumBits, bounds) before constructing SubtrieHandle, preventing SEGV in getSlots() from corrupt NumBits. - Validate arena bump pointer alignment, catching misaligned BumpPtr that would crash store() with an alignment assertion. - Fix comma operator bug in getOrCreateRoot() where the compare_exchange_strong result was discarded, causing asSubtrie() assertion when RootTrieOffset was corrupted to zero. OnDiskGraphDB: - Reject invalid (zero) ref offsets in validate callback, preventing asData() assertion when corrupt data pool refs are resolved via recoverFromFileOffset(). - Validate DataRecordHandle layout flags before calling getTotalSize(), preventing llvm_unreachable on corrupt NumRefsFlags/DataSizeFlags. - Validate data pool bump pointer alignment, catching misaligned BumpPtr that would crash store() in DataRecordHandle::constructImpl(). - Check data record refs offset alignment before calling getRefs(), preventing PointerUnion assertion from misaligned refs pointer. MappedFileRegionArena: - Convert assertions in initializeHeader() to errors so corrupted arena headers return an error on CAS open instead of crashing. Assisted-By: Claude
This commit is contained in:
parent
70d3dcaa64
commit
1a0ca1019d
@ -109,7 +109,7 @@ public:
|
||||
|
||||
private:
|
||||
// initialize header from offset.
|
||||
void initializeHeader(uint64_t HeaderOffset);
|
||||
Error initializeHeader(uint64_t HeaderOffset);
|
||||
|
||||
LLVM_ABI_FOR_TEST void destroyImpl();
|
||||
void moveImpl(MappedFileRegionArena &RHS) {
|
||||
|
||||
@ -89,7 +89,7 @@ Error DatabaseFile::addTable(TableHandle Table) {
|
||||
}
|
||||
|
||||
std::optional<TableHandle> DatabaseFile::findTable(StringRef Name) {
|
||||
int64_t RootTableOffset = H->RootTableOffset.load();
|
||||
uint64_t RootTableOffset = H->RootTableOffset.load();
|
||||
if (!RootTableOffset)
|
||||
return std::nullopt;
|
||||
|
||||
@ -114,6 +114,11 @@ Error DatabaseFile::validate(MappedFileRegion &Region) {
|
||||
return createStringError(std::errc::invalid_argument,
|
||||
"database: wrong version");
|
||||
|
||||
if (H->RootTableOffset < 0 ||
|
||||
static_cast<uint64_t>(H->RootTableOffset) > Region.size())
|
||||
return createStringError(std::errc::invalid_argument,
|
||||
"database: root table offset out of bound");
|
||||
|
||||
auto *MFH = reinterpret_cast<MappedFileRegionArena::Header *>(Region.data() +
|
||||
sizeof(Header));
|
||||
// Check the bump-ptr, which should point past the header.
|
||||
|
||||
@ -228,6 +228,11 @@ Expected<MappedFileRegionArena> MappedFileRegionArena::create(
|
||||
") does not match existing config (" + utostr(H.HeaderOffset) +
|
||||
")");
|
||||
|
||||
if (H.Capacity < MinCapacity)
|
||||
return createStringError(
|
||||
std::make_error_code(std::errc::bad_file_descriptor),
|
||||
"capacity inside the MappedFileRegionArena is too small");
|
||||
|
||||
// If the capacity doesn't match, use the existing capacity instead.
|
||||
if (H.Capacity != Capacity)
|
||||
Capacity = H.Capacity;
|
||||
@ -255,7 +260,8 @@ Expected<MappedFileRegionArena> MappedFileRegionArena::create(
|
||||
}
|
||||
|
||||
// Initialize the header.
|
||||
Result.initializeHeader(HeaderOffset);
|
||||
if (Error E = Result.initializeHeader(HeaderOffset))
|
||||
return std::move(E);
|
||||
|
||||
if (FileSize->Size < MinCapacity) {
|
||||
assert(MainFile->Locked == sys::fs::LockKind::Exclusive);
|
||||
@ -326,22 +332,30 @@ void MappedFileRegionArena::destroyImpl() {
|
||||
Logger->logMappedFileRegionArenaClose(Path);
|
||||
}
|
||||
|
||||
void MappedFileRegionArena::initializeHeader(uint64_t HeaderOffset) {
|
||||
assert(capacity() < (uint64_t)INT64_MAX && "capacity must fit in int64_t");
|
||||
Error MappedFileRegionArena::initializeHeader(uint64_t HeaderOffset) {
|
||||
if (capacity() >= static_cast<uint64_t>(INT64_MAX))
|
||||
return createStringError(make_error_code(std::errc::protocol_error),
|
||||
"arena capacity does not fit in int64_t");
|
||||
uint64_t HeaderEndOffset = HeaderOffset + sizeof(decltype(*H));
|
||||
assert(HeaderEndOffset <= capacity() &&
|
||||
"Expected end offset to be pre-allocated");
|
||||
assert(isAligned(Align::Of<decltype(*H)>(), HeaderOffset) &&
|
||||
"Expected end offset to be aligned");
|
||||
if (HeaderEndOffset > capacity())
|
||||
return createStringError(make_error_code(std::errc::protocol_error),
|
||||
"arena header extends past capacity");
|
||||
if (!isAligned(Align::Of<decltype(*H)>(), HeaderOffset))
|
||||
return createStringError(make_error_code(std::errc::protocol_error),
|
||||
"arena header offset is not aligned");
|
||||
H = reinterpret_cast<decltype(H)>(data() + HeaderOffset);
|
||||
|
||||
uint64_t ExistingValue = 0;
|
||||
if (!H->BumpPtr.compare_exchange_strong(ExistingValue, HeaderEndOffset))
|
||||
assert(ExistingValue >= HeaderEndOffset &&
|
||||
"Expected 0, or past the end of the header itself");
|
||||
if (ExistingValue < HeaderEndOffset)
|
||||
return createStringError(
|
||||
make_error_code(std::errc::protocol_error),
|
||||
"arena bump pointer is corrupt: 0x" +
|
||||
utohexstr(ExistingValue, /*LowerCase=*/true));
|
||||
if (Logger)
|
||||
Logger->logMappedFileRegionArenaCreate(Path, *FD, data(), capacity(),
|
||||
size());
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
static Error createAllocatorOutOfSpaceError() {
|
||||
|
||||
@ -916,6 +916,9 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
|
||||
if (auto E = UpstreamDB->validate(Deep, Hasher))
|
||||
return E;
|
||||
}
|
||||
if (!isAligned(Align(8), DataPool.size()))
|
||||
return createStringError(llvm::errc::illegal_byte_sequence,
|
||||
"data pool bump pointer is not aligned");
|
||||
return Index.validate([&](FileOffset Offset,
|
||||
OnDiskTrieRawHashMap::ConstValueProxy Record)
|
||||
-> Error {
|
||||
@ -953,13 +956,28 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
|
||||
// the record. It can be reused by later insertion so just skip this entry
|
||||
// for now.
|
||||
return Error::success();
|
||||
case TrieRecord::StorageKind::DataPool:
|
||||
case TrieRecord::StorageKind::DataPool: {
|
||||
// Check offset is a postive value, and large enough to hold the
|
||||
// header for the data record.
|
||||
if (D.Offset.get() <= 0 ||
|
||||
D.Offset.get() + sizeof(DataRecordHandle::Header) >= DataPool.size())
|
||||
return formatError("datapool record out of bound");
|
||||
|
||||
// DataRecord start needs to be aligned.
|
||||
if (!isAligned(Align(8), D.Offset.get()))
|
||||
return formatError("data record offset is not aligned");
|
||||
|
||||
// Validate the layout flags before getFromDataPool calls getTotalSize().
|
||||
auto HeaderData =
|
||||
DataPool.get(D.Offset, sizeof(DataRecordHandle::Header));
|
||||
if (!HeaderData)
|
||||
return formatError(toString(HeaderData.takeError()));
|
||||
auto LF = DataRecordHandle::get(HeaderData->data()).getLayoutFlags();
|
||||
if (LF.NumRefs > DataRecordHandle::NumRefsFlags::Max ||
|
||||
LF.DataSize > DataRecordHandle::DataSizeFlags::Max)
|
||||
return formatError("data record has invalid layout flags");
|
||||
break;
|
||||
}
|
||||
case TrieRecord::StorageKind::Standalone:
|
||||
case TrieRecord::StorageKind::StandaloneLeaf:
|
||||
case TrieRecord::StorageKind::StandaloneLeaf0:
|
||||
@ -999,6 +1017,8 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
|
||||
return dataError(toString(DataRecord.takeError()));
|
||||
|
||||
for (auto InternRef : DataRecord->getRefs()) {
|
||||
if (InternRef.getFileOffset().get() <= 0)
|
||||
return dataError("invalid ref offset");
|
||||
auto Index = getIndexProxyFromRef(InternRef);
|
||||
if (!Index)
|
||||
return Index.takeError();
|
||||
@ -1015,6 +1035,8 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
|
||||
return dataError(
|
||||
"data record span passed the end of the standalone file");
|
||||
for (auto InternRef : DataRecord.getRefs()) {
|
||||
if (InternRef.getFileOffset().get() <= 0)
|
||||
return dataError("invalid ref offset");
|
||||
auto Index = getIndexProxyFromRef(InternRef);
|
||||
if (!Index)
|
||||
return Index.takeError();
|
||||
|
||||
@ -282,6 +282,7 @@ public:
|
||||
explicit operator bool() const { return H; }
|
||||
const Header &getHeader() const { return *H; }
|
||||
SubtrieHandle getRoot() const;
|
||||
int64_t getRootTrieOffset() const { return H->RootTrieOffset; }
|
||||
Expected<SubtrieHandle> getOrCreateRoot(MappedFileRegionArena &Alloc);
|
||||
MappedFileRegion &getRegion() const { return *Region; }
|
||||
|
||||
@ -387,8 +388,7 @@ TrieRawHashMapHandle::getOrCreateRoot(MappedFileRegionArena &Alloc) {
|
||||
return LazyRoot.takeError();
|
||||
|
||||
if (H->RootTrieOffset.compare_exchange_strong(
|
||||
Race, LazyRoot->getOffset().asSubtrie()),
|
||||
Logger.get())
|
||||
Race, LazyRoot->getOffset().asSubtrie()))
|
||||
return *LazyRoot;
|
||||
|
||||
// There was a race. Return the other root.
|
||||
@ -646,6 +646,12 @@ void OnDiskTrieRawHashMap::print(
|
||||
|
||||
Error OnDiskTrieRawHashMap::validate(
|
||||
function_ref<Error(FileOffset, ConstValueProxy)> RecordVerifier) const {
|
||||
uint64_t BumpPtr = Impl->File.getAlloc().size();
|
||||
if (!isAligned(MappedFileRegionArena::getAlign(), BumpPtr))
|
||||
return createStringError(make_error_code(std::errc::protocol_error),
|
||||
"arena bump pointer is not aligned: 0x" +
|
||||
utohexstr(BumpPtr, /*LowerCase=*/true));
|
||||
|
||||
return Impl->Trie.validate(RecordVerifier);
|
||||
}
|
||||
|
||||
@ -749,6 +755,10 @@ OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine,
|
||||
SmallString<128> TrieNameStorage;
|
||||
StringRef TrieName = TrieNameTwine.toStringRef(TrieNameStorage);
|
||||
|
||||
if (MaxFileSize == 0)
|
||||
return createTableConfigError(std::errc::invalid_argument, Path, TrieName,
|
||||
"invalid size");
|
||||
|
||||
constexpr size_t DefaultNumRootBits = 10;
|
||||
constexpr size_t DefaultNumSubtrieBits = 6;
|
||||
|
||||
@ -875,6 +885,8 @@ private:
|
||||
|
||||
Error validateSubTrie(SubtrieHandle Node, bool IsRoot);
|
||||
|
||||
Error validateSubtrieHeader(uint64_t Offset, bool IsRoot);
|
||||
|
||||
// Helper function to capture errors when visiting the trie nodes.
|
||||
void addError(Error NewError) {
|
||||
assert(NewError && "not an error");
|
||||
@ -1006,6 +1018,12 @@ private:
|
||||
if (!isAligned(MappedFileRegionArena::getAlign(), Slot.asData()))
|
||||
return createInvalidTrieError(Slot.asData(), "mis-aligned data entry");
|
||||
|
||||
uint64_t DataOffset = Slot.asData();
|
||||
uint64_t RecordEnd = DataOffset + Trie.getRecordSize();
|
||||
if (RecordEnd > (uint64_t)Trie.getRegion().size())
|
||||
return createInvalidTrieError(DataOffset,
|
||||
"data entry extends past end of file");
|
||||
|
||||
TrieRawHashMapHandle::RecordData Record =
|
||||
Trie.getRecord(SubtrieSlotValue::getDataOffset(Slot.asData()));
|
||||
return RecordVerifier(Slot.asDataFileOffset(),
|
||||
@ -1021,6 +1039,11 @@ private:
|
||||
} // namespace
|
||||
|
||||
Error TrieVisitor::visit() {
|
||||
if (int64_t RootOffset = Trie.getRootTrieOffset()) {
|
||||
if (auto Err = validateSubtrieHeader(RootOffset, /*IsRoot=*/true))
|
||||
return Err;
|
||||
}
|
||||
|
||||
auto Root = Trie.getRoot();
|
||||
if (!Root)
|
||||
return Error::success();
|
||||
@ -1044,6 +1067,8 @@ Error TrieVisitor::visit() {
|
||||
std::string SubtriePrefix;
|
||||
appendIndexBits(SubtriePrefix, I, NumSlots);
|
||||
if (Slot.isSubtrie()) {
|
||||
if (auto Err = validateSubtrieHeader(Slot.asSubtrie(), /*IsRoot=*/false))
|
||||
return Err;
|
||||
SubtrieHandle S(Trie.getRegion(), Slot, Trie.getLogger());
|
||||
Subs.push_back(S);
|
||||
Prefixes.push_back(SubtriePrefix);
|
||||
@ -1097,6 +1122,26 @@ Error TrieVisitor::validateSubTrie(SubtrieHandle Node, bool IsRoot) {
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
Error TrieVisitor::validateSubtrieHeader(uint64_t Offset, bool IsRoot) {
|
||||
uint64_t RegionSize = Trie.getRegion().size();
|
||||
if (Offset + sizeof(SubtrieHandle::Header) > RegionSize)
|
||||
return createInvalidTrieError(Offset, "subtrie header out of bound");
|
||||
|
||||
auto *H = reinterpret_cast<const SubtrieHandle::Header *>(
|
||||
Trie.getRegion().data() + Offset);
|
||||
if (H->NumBits == 0)
|
||||
return createInvalidTrieError(Offset, "invalid subtrie NumBits");
|
||||
|
||||
if (!IsRoot && H->NumBits > Trie.getNumSubtrieBits())
|
||||
return createInvalidTrieError(Offset, "subtrie has corrupt NumBits");
|
||||
|
||||
if (Offset + static_cast<uint64_t>(SubtrieHandle::getSize(H->NumBits)) >
|
||||
RegionSize)
|
||||
return createInvalidTrieError(Offset, "subtrie node spans out of bound");
|
||||
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) {
|
||||
if (auto Err = validateSubTrie(Node, /*IsRoot=*/false))
|
||||
return Err;
|
||||
@ -1117,6 +1162,8 @@ Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) {
|
||||
std::string SubtriePrefix = Prefix.str();
|
||||
appendIndexBits(SubtriePrefix, I, NumSlots);
|
||||
if (Slot.isSubtrie()) {
|
||||
if (auto Err = validateSubtrieHeader(Slot.asSubtrie(), /*IsRoot=*/false))
|
||||
return Err;
|
||||
SubtrieHandle S(Trie.getRegion(), Slot, Trie.getLogger());
|
||||
Subs.push_back(S);
|
||||
Prefixes.push_back(SubtriePrefix);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user