From 284e5d79cdf292f4693ee835cbcbb6760ac63889 Mon Sep 17 00:00:00 2001 From: Jacob Lambert Date: Tue, 31 Mar 2026 13:12:27 -0700 Subject: [PATCH] [MsgPackDocument]: Fix DocNode comparison and add copyNode (#189436) Fix two bugs in DocNode's comparison operators and add a new Document::copyNode() method: 1. operator== was implemented via operator<, which hits llvm_unreachable for Array/Map nodes. Implement operator== directly with recursive value comparison for all node types. 2. operator< compared KindAndDoc pointers, causing cross-document nodes of the same type and value to silently produce wrong results. Compare by kind then by value instead. 3. Add Document::copyNode() for deep copying nodes between Documents with independent memory ownership. --- .../llvm/BinaryFormat/MsgPackDocument.h | 47 +++- llvm/lib/BinaryFormat/MsgPackDocument.cpp | 91 +++++++ .../BinaryFormat/MsgPackDocumentTest.cpp | 256 ++++++++++++++++++ 3 files changed, 380 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h index f09feabb1028..f84e409e6167 100644 --- a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h +++ b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h @@ -77,8 +77,14 @@ public: // that has no associated Document, and the result of getEmptyNode(), which // does have an associated document. bool isEmpty() const { return !KindAndDoc || getKind() == Type::Empty; } - Type getKind() const { return KindAndDoc->Kind; } - Document *getDocument() const { return KindAndDoc->Doc; } + Type getKind() const { + assert(KindAndDoc); + return KindAndDoc->Kind; + } + Document *getDocument() const { + assert(KindAndDoc); + return KindAndDoc->Doc; + } int64_t &getInt() { assert(getKind() == Type::Int); @@ -152,17 +158,20 @@ public: return *reinterpret_cast(this); } - /// Comparison operator, used for map keys. + /// Comparison operator, used for map keys. Compares by value, so nodes + /// from different Documents with the same kind and value are ordered + /// consistently. Only supports scalar types; Array and Map nodes should + /// not be used as map keys. friend bool operator<(const DocNode &Lhs, const DocNode &Rhs) { - // This has to cope with one or both of the nodes being default-constructed, - // such that KindAndDoc is not set. + // Cope with default-constructed nodes where KindAndDoc is not set: + // isEmpty() returns true both for default-constructed nodes and for + // nodes returned by getEmptyNode(). if (Rhs.isEmpty()) return false; - if (Lhs.KindAndDoc != Rhs.KindAndDoc) { - if (Lhs.isEmpty()) - return true; + if (Lhs.isEmpty()) + return true; + if (Lhs.getKind() != Rhs.getKind()) return (unsigned)Lhs.getKind() < (unsigned)Rhs.getKind(); - } switch (Lhs.getKind()) { case Type::Int: return Lhs.Int < Rhs.Int; @@ -178,14 +187,15 @@ public: case Type::Binary: return Lhs.Raw < Rhs.Raw; default: - llvm_unreachable("bad map key type"); + assert(false && "bad map key type"); + return false; } } - /// Equality operator - friend bool operator==(const DocNode &Lhs, const DocNode &Rhs) { - return !(Lhs < Rhs) && !(Rhs < Lhs); - } + /// Equality operator. Supports all node types including Array and Map, + /// comparing recursively by value. Works correctly for nodes from + /// different Documents. + LLVM_ABI friend bool operator==(const DocNode &Lhs, const DocNode &Rhs); /// Inequality operator friend bool operator!=(const DocNode &Lhs, const DocNode &Rhs) { @@ -223,6 +233,9 @@ private: LLVM_ABI void convertToMap(); }; +/// Namespace-scope declaration for the out-of-line friend operator==. +LLVM_ABI bool operator==(const DocNode &Lhs, const DocNode &Rhs); + /// A DocNode that is a map. class MapDocNode : public DocNode { public: @@ -403,6 +416,12 @@ public: return N.getArray(); } + /// Deep copy a DocNode from any Document into this Document. The returned + /// node is owned by this Document and is independent of the source node's + /// Document. Strings are copied so the source Document's lifetime does not + /// need to extend beyond this call. + LLVM_ABI DocNode copyNode(DocNode Src); + /// Read a document from a binary msgpack blob, merging into anything already /// in the Document. The blob data must remain valid for the lifetime of this /// Document (because a string object in the document contains a StringRef diff --git a/llvm/lib/BinaryFormat/MsgPackDocument.cpp b/llvm/lib/BinaryFormat/MsgPackDocument.cpp index b52f02912244..cfce04bc571d 100644 --- a/llvm/lib/BinaryFormat/MsgPackDocument.cpp +++ b/llvm/lib/BinaryFormat/MsgPackDocument.cpp @@ -109,6 +109,97 @@ DocNode &DocNode::operator=(double Val) { return *this; } +// Equality operator. Compares recursively by value, supporting all node types +// including Array and Map. Works correctly for nodes from different Documents. +// This relies on operator< comparing scalar keys by value (not by document +// identity), so that Map::find works across document boundaries. +bool llvm::msgpack::operator==(const DocNode &Lhs, const DocNode &Rhs) { + if (Lhs.isEmpty() && Rhs.isEmpty()) + return true; + if (Lhs.isEmpty() || Rhs.isEmpty()) + return false; + if (Lhs.getKind() != Rhs.getKind()) + return false; + switch (Lhs.getKind()) { + case Type::Nil: + return true; + case Type::Int: + return Lhs.Int == Rhs.Int; + case Type::UInt: + return Lhs.UInt == Rhs.UInt; + case Type::Boolean: + return Lhs.Bool == Rhs.Bool; + case Type::Float: + return Lhs.Float == Rhs.Float; + case Type::String: + case Type::Binary: + return Lhs.Raw == Rhs.Raw; + case Type::Array: { + if (Lhs.Array->size() != Rhs.Array->size()) + return false; + for (size_t I = 0, E = Lhs.Array->size(); I != E; ++I) + if ((*Lhs.Array)[I] != (*Rhs.Array)[I]) + return false; + return true; + } + case Type::Map: { + if (Lhs.Map->size() != Rhs.Map->size()) + return false; + for (auto &Entry : *Lhs.Map) { + auto It = Rhs.Map->find(Entry.first); + if (It == Rhs.Map->end()) + return false; + if (Entry.second != It->second) + return false; + } + return true; + } + default: + assert(false && "unhandled DocNode type in operator=="); + return false; + } +} + +/// Deep copy a DocNode from any Document into this Document. +DocNode Document::copyNode(DocNode Src) { + if (Src.isEmpty()) + return getEmptyNode(); + switch (Src.getKind()) { + case Type::Nil: + return getNode(); + case Type::Int: + return getNode(Src.getInt()); + case Type::UInt: + return getNode(Src.getUInt()); + case Type::Boolean: + return getNode(Src.getBool()); + case Type::Float: + return getNode(Src.getFloat()); + case Type::String: + // TODO: Restructure string interning so that no-copy strings from the + // source Document become no-copy strings in the destination Document, + // avoiding duplicate copies when the caller retains the source. + return getNode(Src.getString(), /*Copy=*/true); + case Type::Binary: + return getNode(Src.getBinary(), /*Copy=*/true); + case Type::Map: { + auto NewMap = getMapNode(); + for (auto &Entry : Src.getMap()) + NewMap[copyNode(Entry.first)] = copyNode(Entry.second); + return NewMap; + } + case Type::Array: { + auto NewArray = getArrayNode(); + for (auto &Elem : Src.getArray()) + NewArray.push_back(copyNode(Elem)); + return NewArray; + } + default: + assert(false && "unhandled DocNode type in copyNode"); + return getEmptyNode(); + } +} + // A level in the document reading stack. struct StackLevel { StackLevel(DocNode Node, size_t StartIndex, size_t Length, diff --git a/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp b/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp index 60b2b28c7fac..8442d8d50bc0 100644 --- a/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp +++ b/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp @@ -22,6 +22,262 @@ TEST(MsgPackDocument, DocNodeTest) { ASSERT_TRUE(Str1 == Str2); } +TEST(MsgPackDocument, DocNodeEmptyAndNilEquality) { + Document Doc1, Doc2; + + // Two empty nodes are equal. + EXPECT_FALSE(Doc1.getEmptyNode() != Doc1.getEmptyNode()); + // Cross-document empty nodes are equal. + EXPECT_FALSE(Doc1.getEmptyNode() != Doc2.getEmptyNode()); + // Default-constructed (no document) empty nodes are equal. + DocNode Default1, Default2; + EXPECT_FALSE(Default1 != Default2); + // Empty vs non-empty are not equal. + EXPECT_FALSE(Doc1.getEmptyNode() == Doc1.getNode(0)); + + // Two nil nodes are equal. + EXPECT_FALSE(Doc1.getNode() != Doc1.getNode()); + // Cross-document nil nodes are equal. + EXPECT_FALSE(Doc1.getNode() != Doc2.getNode()); +} + +TEST(MsgPackDocument, DocNodeDifferentTypesNotEqual) { + Document Doc; + // Int vs String. + EXPECT_FALSE(Doc.getNode(1) == Doc.getNode("1")); + // Int vs UInt. + EXPECT_FALSE(Doc.getNode(int64_t(1)) == Doc.getNode(uint64_t(1))); + // Boolean vs Int. + EXPECT_FALSE(Doc.getNode(true) == Doc.getNode(1)); + // Scalar vs Array. + DocNode Arr = Doc.getArrayNode(); + EXPECT_FALSE(Doc.getNode(1) == Arr); + // Scalar vs Map. + DocNode Map = Doc.getMapNode(); + EXPECT_FALSE(Doc.getNode(1) == Map); + // Array vs Map. + EXPECT_FALSE(Arr == Map); +} + +TEST(MsgPackDocument, DocNodeCrossDocumentScalarEquality) { + Document Doc1, Doc2; + // Same values across documents should be equal. + EXPECT_FALSE(Doc1.getNode(42) != Doc2.getNode(42)); + EXPECT_FALSE(Doc1.getNode(42U) != Doc2.getNode(42U)); + EXPECT_FALSE(Doc1.getNode(int64_t(-1)) != Doc2.getNode(int64_t(-1))); + EXPECT_FALSE(Doc1.getNode(true) != Doc2.getNode(true)); + EXPECT_FALSE(Doc1.getNode(3.14) != Doc2.getNode(3.14)); + EXPECT_FALSE(Doc1.getNode("hello") != Doc2.getNode("hello")); + + // Different values across documents should not be equal. + EXPECT_FALSE(Doc1.getNode(1) == Doc2.getNode(2)); + EXPECT_FALSE(Doc1.getNode("foo") == Doc2.getNode("bar")); + EXPECT_FALSE(Doc1.getNode(true) == Doc2.getNode(false)); + EXPECT_FALSE(Doc1.getNode(1U) == Doc2.getNode(2U)); +} + +TEST(MsgPackDocument, DocNodeCrossDocumentScalarOrdering) { + Document Doc1, Doc2; + // operator< should compare by value across documents. + EXPECT_TRUE(Doc1.getNode(1) < Doc2.getNode(2)); + EXPECT_FALSE(Doc1.getNode(2) < Doc2.getNode(1)); + EXPECT_FALSE(Doc1.getNode(1) < Doc2.getNode(1)); + + EXPECT_TRUE(Doc1.getNode("abc") < Doc2.getNode("def")); + EXPECT_FALSE(Doc1.getNode("def") < Doc2.getNode("abc")); + + EXPECT_TRUE(Doc1.getNode(1U) < Doc2.getNode(2U)); + EXPECT_FALSE(Doc1.getNode(2U) < Doc2.getNode(1U)); +} + +TEST(MsgPackDocument, DocNodeArrayEquality) { + Document Doc; + auto A1 = Doc.getArrayNode(); + A1.push_back(Doc.getNode(int64_t(1))); + A1.push_back(Doc.getNode(int64_t(2))); + + auto A2 = Doc.getArrayNode(); + A2.push_back(Doc.getNode(int64_t(1))); + A2.push_back(Doc.getNode(int64_t(2))); + + // Same contents should be equal. + DocNode N1 = A1, N2 = A2; + EXPECT_FALSE(N1 != N2); + + // Different contents should not be equal. + auto A3 = Doc.getArrayNode(); + A3.push_back(Doc.getNode(int64_t(1))); + A3.push_back(Doc.getNode(int64_t(99))); + DocNode N3 = A3; + EXPECT_FALSE(N1 == N3); + + // Different sizes should not be equal. + auto A4 = Doc.getArrayNode(); + A4.push_back(Doc.getNode(int64_t(1))); + DocNode N4 = A4; + EXPECT_FALSE(N1 == N4); +} + +TEST(MsgPackDocument, DocNodeMapEquality) { + Document Doc; + auto M1 = Doc.getMapNode(); + M1["x"] = 10; + M1["y"] = 20; + + auto M2 = Doc.getMapNode(); + M2["x"] = 10; + M2["y"] = 20; + + DocNode N1 = M1, N2 = M2; + EXPECT_FALSE(N1 != N2); + + // Different value. + auto M3 = Doc.getMapNode(); + M3["x"] = 10; + M3["y"] = 99; + DocNode N3 = M3; + EXPECT_FALSE(N1 == N3); + + // Different key. + auto M4 = Doc.getMapNode(); + M4["x"] = 10; + M4["z"] = 20; + DocNode N4 = M4; + EXPECT_FALSE(N1 == N4); +} + +TEST(MsgPackDocument, DocNodeCrossDocumentArrayEquality) { + Document Doc1, Doc2; + auto A1 = Doc1.getArrayNode(); + A1.push_back(Doc1.getNode("hello")); + A1.push_back(Doc1.getNode(42)); + + auto A2 = Doc2.getArrayNode(); + A2.push_back(Doc2.getNode("hello")); + A2.push_back(Doc2.getNode(42)); + + DocNode N1 = A1, N2 = A2; + EXPECT_FALSE(N1 != N2); + + auto A3 = Doc2.getArrayNode(); + A3.push_back(Doc2.getNode("hello")); + A3.push_back(Doc2.getNode(99)); + DocNode N3 = A3; + EXPECT_FALSE(N1 == N3); +} + +TEST(MsgPackDocument, DocNodeCrossDocumentMapEquality) { + Document Doc1, Doc2; + auto M1 = Doc1.getMapNode(); + M1["key"] = Doc1.getNode("value"); + + auto M2 = Doc2.getMapNode(); + M2["key"] = Doc2.getNode("value"); + + DocNode N1 = M1, N2 = M2; + EXPECT_FALSE(N1 != N2); + + auto M3 = Doc2.getMapNode(); + M3["key"] = Doc2.getNode("other"); + DocNode N3 = M3; + EXPECT_FALSE(N1 == N3); +} + +TEST(MsgPackDocument, CopyNodeEmpty) { + Document Src, Dst; + auto Copied = Dst.copyNode(Src.getEmptyNode()); + EXPECT_TRUE(Copied.isEmpty()); +} + +TEST(MsgPackDocument, CopyNodeScalar) { + Document Src, Dst; + auto Node = Src.getNode("hello", /*Copy=*/true); + auto Copied = Dst.copyNode(Node); + EXPECT_FALSE(Node != Copied); + EXPECT_EQ(Copied.getKind(), Type::String); + EXPECT_EQ(Copied.getString(), "hello"); +} + +TEST(MsgPackDocument, CopyNodeArray) { + Document Src, Dst; + auto A = Src.getArrayNode(); + A.push_back(Src.getNode(int64_t(1))); + A.push_back(Src.getNode("two", /*Copy=*/true)); + A.push_back(Src.getNode(3.0)); + + DocNode SrcNode = A; + auto Copied = Dst.copyNode(SrcNode); + EXPECT_FALSE(SrcNode != Copied); + EXPECT_EQ(Copied.getArray().size(), 3u); + EXPECT_EQ(Copied.getArray()[0].getInt(), int64_t(1)); + EXPECT_EQ(Copied.getArray()[1].getString(), "two"); + EXPECT_EQ(Copied.getArray()[2].getFloat(), 3.0); +} + +TEST(MsgPackDocument, CopyNodeMap) { + Document Src, Dst; + auto M = Src.getMapNode(); + M["name"] = Src.getNode("test", /*Copy=*/true); + M["count"] = 42; + + DocNode SrcNode = M; + auto Copied = Dst.copyNode(SrcNode); + EXPECT_FALSE(SrcNode != Copied); + + // Verify by iterating the map directly. + auto &CopiedMap = Copied.getMap(); + EXPECT_EQ(CopiedMap.size(), 2u); + for (auto &Entry : CopiedMap) { + EXPECT_TRUE(Entry.first.isString()); + if (Entry.first.getString() == "name") + EXPECT_EQ(Entry.second.getString(), "test"); + else if (Entry.first.getString() == "count") + EXPECT_EQ(Entry.second.getInt(), int64_t(42)); + else + FAIL() << "unexpected key: " << Entry.first.toString(); + } +} + +TEST(MsgPackDocument, CopyNodeNested) { + Document Src, Dst; + auto M = Src.getMapNode(); + auto Inner = Src.getArrayNode(); + Inner.push_back(Src.getNode(int64_t(1))); + Inner.push_back(Src.getNode(int64_t(2))); + M["arr"] = Inner; + M["val"] = Src.getNode("x", /*Copy=*/true); + + DocNode SrcNode = M; + auto Copied = Dst.copyNode(SrcNode); + EXPECT_FALSE(SrcNode != Copied); + auto &CopiedMap = Copied.getMap(); + EXPECT_TRUE(CopiedMap["arr"].isArray()); + EXPECT_EQ(CopiedMap["arr"].getArray().size(), 2u); + EXPECT_EQ(CopiedMap["arr"].getArray()[0].getInt(), int64_t(1)); +} + +TEST(MsgPackDocument, CopyNodeIndependence) { + Document Src, Dst; + auto A = Src.getArrayNode(); + A.push_back(Src.getNode(int64_t(1))); + A.push_back(Src.getNode(int64_t(2))); + + DocNode SrcNode = A; + auto Copied = Dst.copyNode(SrcNode); + EXPECT_FALSE(SrcNode != Copied); + + // Mutate the source — the copy should be unaffected. + A.push_back(Src.getNode(int64_t(3))); + EXPECT_EQ(A.size(), 3u); + EXPECT_EQ(Copied.getArray().size(), 2u); + + // Mutate the copy — the source should be unaffected. + Copied.getArray().push_back(Dst.getNode(int64_t(10))); + Copied.getArray().push_back(Dst.getNode(int64_t(20))); + EXPECT_EQ(Copied.getArray().size(), 4u); + EXPECT_EQ(A.size(), 3u); +} + TEST(MsgPackDocument, TestReadBoolean) { Document Doc1; bool Ok = Doc1.readFromBlob(StringRef("\xC2", 1), /*Multi=*/false);