From db713325d530e88d0229643c6002950ddfc16d6e Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Wed, 21 Jan 2026 12:19:34 -0500 Subject: [PATCH] [libc][tsearch] add weak AVL tree for tsearch implementation (#172411) Related to #114695. This PR adds a Weak AVL Tree for tsearch APIs. The symbol implementations are coming in a following up PR to avoid creating a huge patch. The work is based on @MaskRay's recent post (see below). A general self-balancing binary search tree where the node pointer can be used as stable handles to the stored values. The self-balancing strategy is the Weak AVL (WAVL) tree, based on the following foundational references: 1. https://maskray.me/blog/2025-12-14-weak-avl-tree 2. https://reviews.freebsd.org/D25480 3. https://ics.uci.edu/~goodrich/teach/cs165/notes/WeakAVLTrees.pdf 4. https://dl.acm.org/doi/10.1145/2689412 (Rank-Balanced Trees) WAVL trees belong to the rank-balanced binary search tree framework (reference 4), alongside AVL and Red-Black trees. Key Properties of WAVL Trees: 1. Relationship to Red-Black Trees: A WAVL tree can always be colored as a Red-Black tree. 2. Relationship to AVL Trees: An AVL tree meets all the requirements of a WAVL tree. Insertion-only WAVL trees maintain the same structure as AVL trees. Rank-Based Balancing: In rank-balanced trees, each node is assigned a rank (conceptually similar to height). In AVL/WAVL, the rank difference between a parent and its child is strictly enforced to be either **1** or **2**. - **AVL Trees:** Rank is equivalent to height. The strict condition is that there are no 2-2 nodes (a parent with rank difference 2 to both children). - **WAVL Trees:** The no 2-2 node rule is relaxed for internal nodes during the deletion fixup process, making WAVL trees less strictly balanced than AVL trees but easier to maintain than Red-Black trees. Balancing Mechanics (Promotion/Demotion): - **Null nodes** are considered to have rank -1. - **External/leaf nodes** have rank 0. - **Insertion:** Inserting a node may create a situation where a parent and child have the same rank (difference 0). This is fixed by **promoting** the rank of the parent and propagating the fix upwards using at most two rotations (trinode fixup). - **Deletion:** Deleting a node may result in a parent being 3 ranks higher than a child (difference 3). This is fixed by **demoting** the parent's rank and propagating the fix upwards. Implementation Detail: The rank is **implicitly** maintained. We never store the full rank. Instead, a 2-bit tag is used on each node to record the rank difference to each child: - Bit cleared (0) -> Rank difference is **1**. - Bit set (1) -> Rank difference is **2**. --------- Co-authored-by: Michael Jones --- libc/fuzzing/__support/CMakeLists.txt | 9 + libc/fuzzing/__support/weak_avl_fuzz.cpp | 98 ++++ libc/src/__support/CMakeLists.txt | 15 + libc/src/__support/weak_avl.h | 595 ++++++++++++++++++++++ libc/test/src/__support/CMakeLists.txt | 10 + libc/test/src/__support/weak_avl_test.cpp | 274 ++++++++++ 6 files changed, 1001 insertions(+) create mode 100644 libc/fuzzing/__support/weak_avl_fuzz.cpp create mode 100644 libc/src/__support/weak_avl.h create mode 100644 libc/test/src/__support/weak_avl_test.cpp diff --git a/libc/fuzzing/__support/CMakeLists.txt b/libc/fuzzing/__support/CMakeLists.txt index 9c674d2fb0d6..be7225903645 100644 --- a/libc/fuzzing/__support/CMakeLists.txt +++ b/libc/fuzzing/__support/CMakeLists.txt @@ -25,6 +25,15 @@ add_libc_fuzzer( -D__LIBC_EXPLICIT_SIMD_OPT ) +add_libc_fuzzer( + weak_avl_fuzz + SRCS + weak_avl_fuzz.cpp + DEPENDS + libc.src.__support.weak_avl + libc.src.__support.CPP.optional +) + # TODO: FreeListHeap uses the _end symbol which conflicts with the _end symbol # defined by GPU start.cpp files so for now we exclude this fuzzer on GPU. if(LLVM_LIBC_FULL_BUILD AND NOT LIBC_TARGET_OS_IS_GPU) diff --git a/libc/fuzzing/__support/weak_avl_fuzz.cpp b/libc/fuzzing/__support/weak_avl_fuzz.cpp new file mode 100644 index 000000000000..a5d3efe6e5da --- /dev/null +++ b/libc/fuzzing/__support/weak_avl_fuzz.cpp @@ -0,0 +1,98 @@ +//===-- weak_avl_fuzz.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Fuzzing test for llvm-libc weak AVL implementations. +/// +//===----------------------------------------------------------------------===// +#include "hdr/types/ENTRY.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/CPP/optional.h" +#include "src/__support/macros/config.h" +#include "src/__support/weak_avl.h" + +namespace LIBC_NAMESPACE_DECL { + +// A sequence of actions: +// - Erase: a single byte valued (5, 6 mod 7) followed by an int +// - Find: a single byte valued (4 mod 7) followed by an int +// - FindOrInsert: a single byte valued (0,1,2,3 mod 7) followed by an int +extern "C" size_t LLVMFuzzerMutate(uint8_t *data, size_t size, size_t max_size); +extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *data, size_t size, + size_t max_size, unsigned int seed) { + size = LLVMFuzzerMutate(data, size, max_size); + return size / (1 + sizeof(int)) * (1 + sizeof(int)); +} + +class AVLTree { + using Node = WeakAVLNode; + Node *root = nullptr; + bool reversed = false; + static int compare(int a, int b) { return (a > b) - (a < b); } + static int reverse_compare(int a, int b) { return (b > a) - (b < a); } + +public: + AVLTree(bool reversed = false) : reversed(reversed) {} + bool find(int key) { + return Node::find(root, key, reversed ? reverse_compare : compare) + .has_value(); + } + bool find_or_insert(int key) { + return Node::find_or_insert(root, key, reversed ? reverse_compare : compare) + .has_value(); + } + bool erase(int key) { + if (cpp::optional node = + Node::find(root, key, reversed ? reverse_compare : compare)) { + Node::erase(root, node.value()); + return true; + } + return false; + } + ~AVLTree() { Node::destroy(root); } +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + AVLTree tree1; + AVLTree tree2(true); + for (size_t i = 0; i + (1 + sizeof(int)) <= size; i += 1 + sizeof(int)) { + uint8_t action = data[i]; + int key; + __builtin_memcpy(&key, data + i + 1, sizeof(int)); + if (action % 7 == 4) { + // Find + bool res1 = tree1.find(key); + bool res2 = tree2.find(key); + if (res1 != res2) + __builtin_trap(); + + } else if (action % 7 == 5 || action % 7 == 6) { + // Erase + bool res1 = tree1.erase(key); + bool res2 = tree2.erase(key); + if (res1 != res2) + __builtin_trap(); + if (tree1.find(key)) + __builtin_trap(); + if (tree2.find(key)) + __builtin_trap(); + } else { + // FindOrInsert + bool res1 = tree1.find_or_insert(key); + bool res2 = tree2.find_or_insert(key); + if (res1 != res2) + __builtin_trap(); + if (!tree1.find(key)) + __builtin_trap(); + if (!tree2.find(key)) + __builtin_trap(); + } + } + return 0; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index df524c25cbd8..64cf36871322 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -391,6 +391,21 @@ add_header_library( libc.src.__support.macros.attributes ) +add_header_library( + weak_avl + HDRS + weak_avl.h + DEPENDS + libc.hdr.stdint_proxy + libc.src.__support.CPP.bit + libc.src.__support.CPP.new + libc.src.__support.CPP.utility + libc.src.__support.CPP.optional + libc.src.__support.libc_assert + libc.src.__support.macros.attributes + libc.src.__support.macros.config +) + add_subdirectory(FPUtil) add_subdirectory(OSUtil) add_subdirectory(StringUtil) diff --git a/libc/src/__support/weak_avl.h b/libc/src/__support/weak_avl.h new file mode 100644 index 000000000000..31c7e31a19c6 --- /dev/null +++ b/libc/src/__support/weak_avl.h @@ -0,0 +1,595 @@ +//===-- Implementation header for weak AVL tree -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_WEAK_AVL_H +#define LLVM_LIBC_SRC___SUPPORT_WEAK_AVL_H + +#include "hdr/stdint_proxy.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/CPP/new.h" +#include "src/__support/CPP/optional.h" +#include "src/__support/CPP/utility/move.h" +#include "src/__support/alloc-checker.h" +#include "src/__support/libc_assert.h" +#include "src/__support/macros/attributes.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +// A general self-balancing binary search tree where the node pointer can +// be used as stable handles to the stored values. +// +// The self-balancing strategy is the Weak AVL (WAVL) tree, based on the +// following foundational references: +// 1. https://maskray.me/blog/2025-12-14-weak-avl-tree +// 2. https://reviews.freebsd.org/D25480 +// 3. https://ics.uci.edu/~goodrich/teach/cs165/notes/WeakAVLTrees.pdf +// 4. https://dl.acm.org/doi/10.1145/2689412 (Rank-Balanced Trees) +// +// WAVL trees belong to the rank-balanced binary search tree framework +// (reference 4), alongside AVL and Red-Black trees. +// +// Key Properties of WAVL Trees: +// 1. Relationship to Red-Black Trees: A WAVL tree can always be colored as a +// Red-Black tree. +// 2. Relationship to AVL Trees: An AVL tree meets all the requirements of a +// WAVL tree. Insertion-only WAVL trees maintain the same structure as AVL +// trees. +// +// Rank-Based Balancing: +// In rank-balanced trees, each node is assigned a rank (conceptually similar +// to height). The rank difference between a parent and its child is +// strictly enforced to be either **1** or **2**. +// +// - **AVL Trees:** Rank is equivalent to height. The strict condition is that +// there are no 2-2 nodes (a parent with rank difference 2 to both children). +// - **WAVL Trees:** The no 2-2 node rule is relaxed for internal nodes during +// the deletion fixup process, making WAVL trees less strictly balanced than +// AVL trees but easier to maintain than Red-Black trees. +// +// Balancing Mechanics (Promotion/Demotion): +// - **Null nodes** are considered to have rank -1. +// - **External/leaf nodes** have rank 0. +// - **Insertion:** Inserting a node may create a situation where a parent and +// child have the same rank (difference 0). This is fixed by **promoting** +// the rank of the parent and propagating the fix upwards using at most two +// rotations (trinode fixup). +// - **Deletion:** Deleting a node may result in a parent being 3 ranks higher +// than a child (difference 3). This is fixed by **demoting** the parent's +// rank and propagating the fix upwards. +// +// Implementation Detail: +// The rank is **implicitly** maintained. We never store the full rank. Instead, +// a 2-bit tag is used on each node to record the rank difference to each child: +// - Bit cleared (0) -> Rank difference is **1**. +// - Bit set (1) -> Rank difference is **2**. +template class WeakAVLNode { + // Data + T data; + + // Parent pointer + WeakAVLNode *parent; + + // Children pointers + WeakAVLNode *children[2]; + + // Flags + unsigned char left_rank_diff_2 : 1; + unsigned char right_rank_diff_2 : 1; + + LIBC_INLINE bool is_leaf() { + return (children[0] == nullptr) && (children[1] == nullptr); + } + + LIBC_INLINE void toggle_rank_diff_2(bool is_right) { + if (is_right) + right_rank_diff_2 ^= 1; + else + left_rank_diff_2 ^= 1; + } + + LIBC_INLINE bool both_flags_set() const { + return left_rank_diff_2 && right_rank_diff_2; + } + + LIBC_INLINE bool any_flag_set() const { + return left_rank_diff_2 || right_rank_diff_2; + } + + LIBC_INLINE void clear_flags() { + left_rank_diff_2 = 0; + right_rank_diff_2 = 0; + } + + LIBC_INLINE void set_both_flags() { + left_rank_diff_2 = 1; + right_rank_diff_2 = 1; + } + + LIBC_INLINE WeakAVLNode(T data) + : data(cpp::move(data)), parent(nullptr), children{nullptr, nullptr}, + left_rank_diff_2(0), right_rank_diff_2(0) {} + + LIBC_INLINE static WeakAVLNode *create(T value) { + AllocChecker ac; + WeakAVLNode *res = new (ac) WeakAVLNode(value); + if (ac) + return res; + return nullptr; + } + + // Unlink a node from tree. The corresponding flag is not updated. The node is + // not deleted and its pointers are not cleared. + // FixupSite is the lowest surviving node from which rank/flag invariants may + // be violated. + // Our tree requires value to stay in their node to maintain stable addresses. + // This complicates the unlink operation as the successor transplanting needs + // to update all the pointers and flags. + struct FixupSite { + WeakAVLNode *parent; + bool is_right; + }; + LIBC_INLINE static FixupSite unlink(WeakAVLNode *&root, WeakAVLNode *node) { + bool has_left = node->children[0] != nullptr; + bool has_right = node->children[1] != nullptr; + + // Case 0: no children + if (!has_left && !has_right) { + if (!node->parent) { + root = nullptr; + return {nullptr, false}; + } + FixupSite site = {node->parent, node->parent->children[1] == node}; + site.parent->children[site.is_right] = nullptr; + return site; + } + + // Case 1: one child + if (has_left != has_right) { + WeakAVLNode *child = node->children[has_right]; + if (!node->parent) { + root = child; + child->parent = nullptr; + return {nullptr, false}; + } + FixupSite site = {node->parent, node->parent->children[1] == node}; + site.parent->children[site.is_right] = child; + child->parent = site.parent; + return site; + } + + // Case 2: two children: replace by successor (leftmost in right subtree) + WeakAVLNode *succ = node->children[1]; + while (succ->children[0]) + succ = succ->children[0]; + + WeakAVLNode *succ_parent = succ->parent; + // succ and node may be adjacent to each other, so we + // still need to check the exact direction of the successor. + bool succ_was_right = succ_parent->children[1] == succ; + WeakAVLNode *succ_rchild = succ->children[1]; + + // 1) Splice successor out of its old position (flags intentionally + // unchanged) + FixupSite site = {succ_parent, succ_was_right}; + succ_parent->children[succ_was_right] = succ_rchild; + if (succ_rchild) + succ_rchild->parent = succ_parent; + + // 2) Transplant successor into node's position + succ->parent = node->parent; + succ->left_rank_diff_2 = node->left_rank_diff_2; + succ->right_rank_diff_2 = node->right_rank_diff_2; + + succ->children[0] = node->children[0]; + succ->children[1] = node->children[1]; + if (succ->children[0]) + succ->children[0]->parent = succ; + if (succ->children[1]) + succ->children[1]->parent = succ; + + if (succ->parent) { + bool node_was_right = succ->parent->children[1] == node; + succ->parent->children[node_was_right] = succ; + } else { + root = succ; + } + + // 3) If the physical removal was under `node`, fixup parent must be the + // successor (since `node` is deleted and successor now occupies that + // spot). + if (site.parent == node) + site.parent = succ; + + return site; + } + +public: + using OptionalNodePtr = cpp::optional; + + LIBC_INLINE const WeakAVLNode *get_left() const { return children[0]; } + LIBC_INLINE const WeakAVLNode *get_right() const { return children[1]; } + LIBC_INLINE const T &get_data() const { return data; } + LIBC_INLINE bool has_rank_diff_2(bool is_right) const { + return is_right ? right_rank_diff_2 : left_rank_diff_2; + } + + // Destroy the subtree rooted at node + LIBC_INLINE static void destroy(WeakAVLNode *node) { + if (!node) + return; + destroy(node->children[0]); + destroy(node->children[1]); + delete node; + } + // Rotate the subtree rooted at node in the given direction. + // + // Illustration for is_right = true (Left Rotation): + // + // (Node) (Pivot) + // / \ / \ + // A (Pivot) => (Node) C + // / \ / \ + // B C A B + // + LIBC_INLINE static WeakAVLNode *rotate(WeakAVLNode *&root, WeakAVLNode *node, + bool is_right) { + WeakAVLNode *pivot = node->children[is_right]; + // Handover pivot's child + WeakAVLNode *grandchild = pivot->children[!is_right]; + node->children[is_right] = grandchild; + if (grandchild) + grandchild->parent = node; + pivot->parent = node->parent; + // Pivot becomes the new root of the subtree + if (!node->parent) { + root = pivot; + } else { + bool node_is_right = node->parent->children[1] == node; + node->parent->children[node_is_right] = pivot; + } + pivot->children[!is_right] = node; + node->parent = pivot; + return pivot; + } + + // Find data in the subtree rooted at root. If not found, returns + // OptionalNode. `Compare` returns integer values for ternary comparison. + // Unlike other interfaces, `find` does not modify the tree; hence we pass + // the `root` by value. + // It is assumed that the order returned by the comparator is consistent + // on each call. + template + LIBC_INLINE static OptionalNodePtr find(WeakAVLNode *root, T data, + Compare comp) { + WeakAVLNode *cursor = root; + while (cursor != nullptr) { + int comp_result = comp(cursor->data, data); + if (comp_result == 0) + return cursor; // Node found + bool is_right = comp_result < 0; + cursor = cursor->children[is_right]; + } + return cpp::nullopt; + } + // Insert data into the subtree rooted at root. + // Returns the node if insertion is successful or the node exists in + // the tree. + // Returns cpp::nullopt if memory allocation fails. + // `Compare` returns integer values for ternary comparison. + // It is assumed that the order returned by the comparator is consistent + // on each call. + template + LIBC_INLINE static OptionalNodePtr find_or_insert(WeakAVLNode *&root, T data, + Compare comp) { + WeakAVLNode *parent = nullptr, *cursor = root; + bool is_right = false; + while (cursor != nullptr) { + parent = cursor; + int comp_result = comp(parent->data, data); + if (comp_result == 0) + return parent; // Node already exists + is_right = comp_result < 0; + cursor = cursor->children[is_right]; + } + WeakAVLNode *allocated = create(cpp::move(data)); + if (!allocated) + return cpp::nullopt; + WeakAVLNode *node = allocated; + node->parent = parent; + + // Case 0: inserting into an empty tree + if (!parent) { + root = node; // Tree was empty + return node; + } + + parent->children[is_right] = node; + // Rebalance process + // Case 1: both node and its sibling have rank-difference 1. So after the + // insertion, the node is at the same level as the parent. Promoting parent + // will fix the conflict of the trinodes but we may need to continue on + // parent. + // + // (GP) (GP) + // | Promote | x - 1 + // | x -----> (P) + // 0 | / 1 / \ + // (N) --- (P) ---- (N) \ 2 + // \ 1 \ + // (S) (S) + while (parent && !parent->any_flag_set()) { + parent->toggle_rank_diff_2(!is_right); + node = parent; + parent = node->parent; + if (parent) + is_right = (parent->children[1] == node); + } + // We finish if node has reaches the root -- otherwise, we end up with + // two more cases. + if (!parent) + return allocated; + + // Case 2: parent does not need to be promoted as node is lower + // than the parent by 2 ranks. + // (P) (P) + // / \ / \ + // 2 1 => 1 1 + // / \ / \ + // (N) (*) (N) (*) + if (parent->has_rank_diff_2(is_right)) { + parent->toggle_rank_diff_2(is_right); + return allocated; + } + + // At this point, we know there is a violation but one-step fix is possible. + LIBC_ASSERT(!node->both_flags_set() && + "there should be no 2-2 node along the insertion fixup path"); + + LIBC_ASSERT((node == allocated || node->any_flag_set()) && + "Internal node must have a child with rank-difference 2, " + "otherwise it should have already been handled."); + + // Case 3: node's sibling has rank-difference 2. And node has a 1-node + // along the same direction. We can do a single rotation to fix the + // trinode. + // (GP) (GP) + // 0 | X Rotate | + // (N) ----- (P) => (N) + // 1 / \ 2 \ 2 1 / \ 1 + // (C1) \ \ (C1) (P) + // (C2) (S) 1 / \ 1 + // (C2) (S) + if (node->has_rank_diff_2(!is_right)) { + WeakAVLNode *new_subroot = rotate(root, parent, is_right); + new_subroot->clear_flags(); + parent->clear_flags(); + return allocated; + } + // Case 4: node's sibling has rank-difference 2. And node has a 1-node + // along the opposite direction. We need a double rotation to fix the + // trinode. + // (GP) (GP) + // 0 | X Zig-Zag | X + // (N) ----- (P) => (C1) + // 2 / \ 1 \ 2 1 / \ 1 + // / (C1) \ (N) (P) + // (C2) L / \ R (S) 1 / \ L R / \ 1 + // (A) (B) (C2) (A)(B) (S) + // (mirrored) + // (GP) (GP) + // X | 0 Zig-Zag | X + // (P) ----- (N) => (C1) + // 2 / 1 / \ 2 1 / \ 1 + // / (C1) \ (P) (N) + // (S) L / \ R (C2) 1 / \ L R / \ 1 + // (A) (B) (S)(A) (B)(C2) + + WeakAVLNode *subroot1 = rotate(root, node, !is_right); // First rotation + [[maybe_unused]] WeakAVLNode *subroot2 = + rotate(root, parent, is_right); // Second rotation + LIBC_ASSERT(subroot1 == subroot2 && + "Subroots after double rotation should be the same"); + bool subroot_left_diff_2 = subroot1->left_rank_diff_2; + bool subroot_right_diff_2 = subroot1->right_rank_diff_2; + node->clear_flags(); + parent->clear_flags(); + subroot1->clear_flags(); + // Select destinations + WeakAVLNode *dst_left = is_right ? parent : node; + WeakAVLNode *dst_right = is_right ? node : parent; + // Masked toggles + if (subroot_left_diff_2) + dst_left->toggle_rank_diff_2(true); + + if (subroot_right_diff_2) + dst_right->toggle_rank_diff_2(false); + return allocated; + } + + // Erase the node from the tree rooted at root. + LIBC_INLINE static void erase(WeakAVLNode *&root, WeakAVLNode *node) { + // Unlink the node from the tree + auto [cursor, is_right] = unlink(root, node); + delete node; + WeakAVLNode *sibling = nullptr; + while (cursor) { + // Case 0. cursor previously had rank-difference 1 on the side of the + // deleted node. We can simply update the rank-difference and stop. + // Notice that this step may create 2-2 nodes, thus deviate from "strong" + // AVL tree. + // + // (C) (C) + // X / \ 1 => X / \ + // (*) (D) (*) \ 2 + // (D) + if (!cursor->has_rank_diff_2(is_right)) { + cursor->toggle_rank_diff_2(is_right); + // If we created a 2-2 leaf, we must demote it and continue. + // Otherwise, we are done as the internal node becomes a 2-2 node and + // there is no further violation upwards. + if (!cursor->both_flags_set() || !cursor->is_leaf()) + return; + // Clear flags for demotion. + cursor->clear_flags(); + } + + // Case 1. cursor previously had rank-difference 2 on the side of the + // deleted node. Now it has rank-difference 3, which violates the + // weak-AVL property. We found that we have a sibling with rank-difference + // 2, so we can demote cursor and continue upwards. + // + // (P) (P) + // | X | (X + 1) + // (C) | + // / \ => (C) + // 2 / \ 1 / \ + // (*) \ 3 (*) \ 2 + // (D) (D) + else if (cursor->has_rank_diff_2(!is_right)) + cursor->toggle_rank_diff_2(!is_right); + + // Case 2. continue from Case 1; but the sibling has rank-difference 1. + // However, we found that the sibling is a 2-2 node. We demote both + // sibling and cursor, and continue upwards. We break for other cases if + // sibling cannot be demoted. + // + // (P) (P) + // | X | (X + 1) + // (C) | + // 1 / \ => (C) + // (S) \ 1 / \ + // / \ \ 3 (S) \ 2 + // 2 / \ 2 (D) 1 / \ 1 (D) + // (*) (*) (*) (*) + else { + sibling = cursor->children[!is_right]; + LIBC_ASSERT(sibling && "rank-difference 1 sibling cannot be empty"); + if (sibling->both_flags_set()) + sibling->clear_flags(); + else + break; + } + + // Update cursor to move upwards + if (cursor->parent) + is_right = (cursor->parent->children[1] == cursor); + cursor = cursor->parent; + } + + // Either cursor is nullptr (we reached the root), or sibling has + // rank-difference 1. + if (!cursor) + return; + LIBC_ASSERT(sibling && "rank-difference 1 sibling must exist"); + bool sibling_is_right = !is_right; // Rename for clarity + + // Case 3. continue from Case 2; but the sibling cannot be demoted. + // Sibling has a node T along the same direction with rank-difference 1. + // + // (P) (P) + // | X | X + // (C) (S) + // 1 / \ Rotate 2 / \ 1 + // (S) \ => / (C) + // 1 / \ Y \ 3 (T) Y / \ 2 + // (T) \ (D) (*) \ + // (*) (D) + if (!sibling->has_rank_diff_2(sibling_is_right)) { + WeakAVLNode *new_subroot = rotate(root, cursor, sibling_is_right); + LIBC_ASSERT(new_subroot == sibling && + "sibling should become the subtree root"); + // Update flags + bool sibling_alter_child_has_rank_diff_2 = + new_subroot->has_rank_diff_2(!sibling_is_right); + new_subroot->clear_flags(); + new_subroot->toggle_rank_diff_2(sibling_is_right); + + // Cursor only needs to be updated if it becomes a 2-2 node + if (sibling_alter_child_has_rank_diff_2) { + // Demote a 2-2 cursor if it is a leaf + bool cursor_is_leaf = cursor->is_leaf(); + if (cursor_is_leaf) + cursor->clear_flags(); + + // If cursor is now a leaf, then its parent (which should be the pivot) + // becomes a 2-2 node after cursor's demotion. Otherwise, cursor itself + // should become a 2-2 node. + WeakAVLNode *candidate = cursor_is_leaf ? new_subroot : cursor; + candidate->toggle_rank_diff_2(sibling_is_right ^ cursor_is_leaf); + LIBC_ASSERT(candidate->both_flags_set() && + "target node should become a 2-2 node."); + } + } + // Case 4. continue from Case 3; but rank-difference 1 child T of sibling + // is on the opposite direction. + // + // (P) (P) + // | X | X + // (C) Zig-Zag (T) + // 1 / \ => / \ + // (S) \ 2 / \ 2 + // / \ 1 \ 3 (S) (C) + // 2 / (T) (D) 1 / Y \ / Z \ 1 + // (*) Y / \ Z (*) (A)(B) (D) + // (A) (B) + else { + WeakAVLNode *target_child = rotate(root, sibling, !sibling_is_right); + bool subtree_left_diff_2 = target_child->left_rank_diff_2; + bool subtree_right_diff_2 = target_child->right_rank_diff_2; + [[maybe_unused]] WeakAVLNode *new_subroot = + rotate(root, cursor, sibling_is_right); + LIBC_ASSERT(new_subroot == target_child && + "target_child should become the subtree root"); + // Set flags + target_child->set_both_flags(); + cursor->clear_flags(); + sibling->clear_flags(); + // Select destinations + WeakAVLNode *dst_left = sibling_is_right ? cursor : sibling; + WeakAVLNode *dst_right = sibling_is_right ? sibling : cursor; + // Masked toggles + if (subtree_left_diff_2) + dst_left->toggle_rank_diff_2(true); + if (subtree_right_diff_2) + dst_right->toggle_rank_diff_2(false); + } + } + + enum struct WalkType { + PreOrder, + InOrder, + PostOrder, + Leaf, + }; + template + LIBC_INLINE static void walk(WeakAVLNode *node, Func func) { + if (!node) + return; + + if (node->is_leaf()) { + func(node, WalkType::Leaf); + return; + } + + func(node, WalkType::PreOrder); + + if (node->children[0]) + walk(node->children[0], func); + + func(node, WalkType::InOrder); + + if (node->children[1]) + walk(node->children[1], func); + func(node, WalkType::PostOrder); + } +}; + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_WEAK_AVL_H diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 98980ce66d9b..b6729ba5eb26 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -280,6 +280,16 @@ add_libc_test( libc.src.__support.CPP.bit ) +add_libc_test( + weak_avl_test + SUITE + libc-support-tests + SRCS + weak_avl_test.cpp + DEPENDS + libc.src.__support.weak_avl +) + add_subdirectory(CPP) add_subdirectory(File) add_subdirectory(RPC) diff --git a/libc/test/src/__support/weak_avl_test.cpp b/libc/test/src/__support/weak_avl_test.cpp new file mode 100644 index 000000000000..49ff2e8d9774 --- /dev/null +++ b/libc/test/src/__support/weak_avl_test.cpp @@ -0,0 +1,274 @@ +//===-- Unittests for WeakAVL ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/optional.h" +#include "src/__support/weak_avl.h" +#include "test/UnitTest/Test.h" + +using Node = LIBC_NAMESPACE::WeakAVLNode; + +namespace { +int ternary_compare(int a, int b) { return (a > b) - (a < b); } +constexpr int TEST_SIZE = 128; +// Validate weak-AVL rank-difference invariant assuming **pure insertion only** +// (i.e. no erasure has occurred). +// +// NOTE: This validator is intentionally *not* correct after erase(), because +// weak-AVL allows transient or permanent 2-2 configurations during deletion +// fixup. +bool validate_pure_insertion(const Node *node) { + if (!node) + return true; + bool left_2 = node->has_rank_diff_2(false); + bool right_2 = node->has_rank_diff_2(true); + return (!left_2 || !right_2) && validate_pure_insertion(node->get_left()) && + validate_pure_insertion(node->get_right()); +} + +// Insert according to pattern `next(i)` +using NextFn = int (*)(int); +using OptionalNodePtr = LIBC_NAMESPACE::cpp::optional; +struct Tree { + Node *root = nullptr; + + bool validate_pure_insertion() { return ::validate_pure_insertion(root); } + + bool contains(int value) { + return Node::find(root, value, ternary_compare).has_value(); + } + + OptionalNodePtr insert(int value) { + return Node::find_or_insert(root, value, ternary_compare); + } + + OptionalNodePtr find(int value) { + return Node::find(root, value, ternary_compare); + } + + void erase(int value) { + if (OptionalNodePtr node = Node::find(root, value, ternary_compare)) + Node::erase(root, node.value()); + } + + template static Tree build(NextFn next, int N) { + Tree tree; + for (int i = 0; i < N; ++i) + tree.insert(next(i)); + return tree; + } + + bool empty() const { return root == nullptr; } + + ~Tree() { Node::destroy(root); } +}; + +// Insertion patterns +static int seq(int i) { return i; } + +static int rev(int i) { + constexpr int N = TEST_SIZE; + return N - 1 - i; +} + +// Coprime stride permutation: i -> (i * X) % N +static int stride(int i, int prime = 7919) { + constexpr int N = TEST_SIZE; + return (i * prime) % N; +} + +} // namespace + +TEST(LlvmLibcWeakAVLTest, SimpleInsertion) { + Tree tree; + + OptionalNodePtr node10 = tree.insert(10); + ASSERT_TRUE(node10.has_value()); + ASSERT_TRUE(tree.insert(5).has_value()); + ASSERT_TRUE(tree.validate_pure_insertion()); + + OptionalNodePtr node15 = tree.insert(15); + ASSERT_TRUE(node15.has_value()); + ASSERT_TRUE(tree.validate_pure_insertion()); + + OptionalNodePtr node10_again = tree.insert(10); + ASSERT_EQ(*node10, *node10_again); + ASSERT_TRUE(tree.validate_pure_insertion()); +} + +TEST(LlvmLibcWeakAVLTest, SequentialInsertion) { + constexpr int N = TEST_SIZE; + + Tree tree = Tree::build(seq, N); + ASSERT_TRUE(tree.validate_pure_insertion()); + + for (int i = 0; i < N; ++i) { + OptionalNodePtr node = tree.insert(i); + ASSERT_TRUE(node.has_value()); + ASSERT_EQ(node.value()->get_data(), i); + } + + ASSERT_TRUE(tree.validate_pure_insertion()); +} + +TEST(LlvmLibcWeakAVLTest, ReversedInsertion) { + constexpr int N = TEST_SIZE; + + Tree tree = Tree::build(rev, N); + ASSERT_TRUE(tree.validate_pure_insertion()); + + for (int i = 0; i < N; ++i) { + OptionalNodePtr node = tree.insert(i); + ASSERT_TRUE(node.has_value()); + ASSERT_EQ(node.value()->get_data(), i); + } + + ASSERT_TRUE(tree.validate_pure_insertion()); +} + +TEST(LlvmLibcWeakAVLTest, StridedInsertion) { + constexpr int N = TEST_SIZE; + + Tree tree = Tree::build([](int i) { return stride(i); }, N); + ASSERT_TRUE(tree.validate_pure_insertion()); + + for (int i = 0; i < N; ++i) { + OptionalNodePtr node = tree.insert(i); + ASSERT_TRUE(node.has_value()); + ASSERT_EQ(node.value()->get_data(), i); + } + + ASSERT_TRUE(tree.validate_pure_insertion()); +} + +TEST(LlvmLibcWeakAVLTest, FindExistingAndMissing) { + constexpr int N = TEST_SIZE; + + Tree tree = Tree::build(seq, N); + ASSERT_TRUE(tree.validate_pure_insertion()); + + for (int i = 0; i < N; ++i) { + OptionalNodePtr node = tree.find(i); + ASSERT_TRUE(node.has_value()); + ASSERT_EQ(node.value()->get_data(), i); + } + + ASSERT_FALSE(tree.find(-1).has_value()); + ASSERT_FALSE(tree.find(N).has_value()); + ASSERT_FALSE(tree.find(2 * N).has_value()); +} + +TEST(LlvmLibcWeakAVLTest, SequentialErase) { + constexpr int N = TEST_SIZE; + + Tree tree = Tree::build(seq, N); + + for (int i = 0; i < N; ++i) { + ASSERT_TRUE(tree.contains(i)); + tree.erase(i); + ASSERT_FALSE(tree.contains(i)); + } + + ASSERT_TRUE(tree.empty()); +} + +TEST(LlvmLibcWeakAVLTest, ReverseErase) { + constexpr int N = TEST_SIZE; + + Tree tree = Tree::build(seq, N); + + for (int i = N - 1; i >= 0; --i) { + ASSERT_TRUE(tree.contains(i)); + tree.erase(i); + ASSERT_FALSE(tree.contains(i)); + } + + ASSERT_TRUE(tree.empty()); +} + +TEST(LlvmLibcWeakAVLTest, StridedErase) { + constexpr int N = TEST_SIZE; + + Tree tree = Tree::build(seq, N); + + for (int i = 0; i < N; ++i) { + int key = stride(i, 5261); + ASSERT_TRUE(tree.contains(key)); + tree.erase(key); + ASSERT_FALSE(tree.contains(key)); + } + + ASSERT_TRUE(tree.empty()); +} + +TEST(LlvmLibcWeakAVLTest, EraseStructuralCases) { + Tree tree; + int keys[] = {10, 5, 15, 3, 7, 12, 18}; + + // rank1: 10 10 + // / / \ + // rank0: 10 --> 5 --> 5 15 + + // rank2: 10 10 + // / \ / \ + // rank1: 10 5 \ 5 \ + // / \ --> / \ --> /\ \ + // rank0: 5 15 3 15 3 7 15 + + // rank2: 10 10 10 + // / \ / \ / \ + // rank1: 5 \ --> 5 15 --> 5 15 + // /\ \ /\ / /\ / \ + // rank0: 3 7 15 3 7 12 3 7 12 18 + + for (int k : keys) + tree.insert(k); + + // Erase leaf. + // rank2: 10 10 + // / \ / \ + // rank1: 5 15 5 15 + // /\ / \ --> \ / \ + // rank0: 3 7 12 18 7 12 18 + tree.erase(3); + ASSERT_FALSE(tree.contains(3)); + + // Erase internal nodes. + // Erase leaf. + // rank2: 10 10 10 + // / \ / \ / \ + // rank1: 5 15 7 15 / 15 + // \ / \ --> \ / \ --> / /\ + // rank0: 7 12 18 5 12 18 7 12 18 + tree.erase(5); + ASSERT_FALSE(tree.contains(5)); + + // Erase root. + // rank2: 10 12 12 + // / \ / \ / \ + // rank1: / 15 --> / 15 --> / 15 + // / /\ / /\ / \ + // rank0: 7 12 18 7 10 18 7 18 + tree.erase(10); + ASSERT_FALSE(tree.contains(10)); + + int attempts[] = {7, 12, 15, 18}; + for (int k : attempts) + ASSERT_TRUE(tree.contains(k)); +} + +TEST(LlvmLibcTreeWalk, InOrderTraversal) { + Tree tree = Tree::build([](int x) { return stride(x, 1007); }, TEST_SIZE); + int data[TEST_SIZE]; + int counter = 0; + Node::walk(tree.root, [&](Node *node, Node::WalkType type) { + if (type == Node::WalkType::InOrder || type == Node::WalkType::Leaf) + data[counter++] = node->get_data(); + }); + for (int i = 0; i < TEST_SIZE; ++i) + ASSERT_EQ(data[i], i); +}