From 6e916d0598e02d46e0b807777bac5022430debdb Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Wed, 25 Mar 2026 12:09:24 -0400 Subject: [PATCH] [llvm][ADT] Add variable-width tag encoding to PointerUnion (#188167) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PointerUnion stores a fixed-width `ceil(log2(N))`-bit tag in the low bits of the pointer. This works only when every member type provides at least that many low bits — if the least-aligned type doesn't, compilation fails, even though the higher-aligned types may have plenty of spare bits going to waste. Introduce a variable-length escape-encoded tag that exploits the extra low bits of higher-aligned types, analogous to UTF-8: types are grouped into tiers by NumLowBitsAvailable; each non-final tier reserves one code as an escape prefix, and the next tier extends the tag into the newly available bits. This allows PointerUnion to hold more type variants than a fixed-width tag permits. The fixed-width path is used when the minimum alignment already provides enough bits (the common case); the variable-width path activates only when it doesn't, and requires types to be listed in non-decreasing NumLowBitsAvailable order. I need this for https://github.com/llvm/llvm-project/pull/186923 which requires a 6-member PointerUnion in MLIR TypeRange/ValueRange. On 32-bit systems, some members only provide 2 low bits, insufficient for a 3-bit fixed-width tag. --- .../lldb/pointer-union.cpp | 18 + .../lldb/pointer-union.test | 25 + llvm/benchmarks/PointerUnionBM.cpp | 125 ++++- llvm/include/llvm/ADT/PointerUnion.h | 174 ++++++- llvm/unittests/ADT/PointerUnionTest.cpp | 489 +++++++++++++++++- 5 files changed, 803 insertions(+), 28 deletions(-) diff --git a/cross-project-tests/debuginfo-tests/llvm-prettyprinters/lldb/pointer-union.cpp b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/lldb/pointer-union.cpp index be8695473b8b..9a93d0aca327 100644 --- a/cross-project-tests/debuginfo-tests/llvm-prettyprinters/lldb/pointer-union.cpp +++ b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/lldb/pointer-union.cpp @@ -16,6 +16,12 @@ void DerivedWithVirtual::func() {} struct alignas(8) Z {}; struct Derived : public Z {}; +// Types for variable-width tag encoding test. +// 3 x alignof(4) + 2 x alignof(8) requires escape-coded tags because +// ceil(log2(5)) = 3 > min(NumLowBitsAvailable) = 2. +template struct alignas(4) Align4 {}; +template struct alignas(8) Align8 {}; + int main() { int a = 5; float f = 4.0; @@ -50,4 +56,16 @@ int main() { llvm::PointerUnion local_float(&local); puts("Break here"); + + // Variable-width tag encoding: formatter should fall back to void*. + Align4<0> a4_0; + Align8<0> a8_0; + llvm::PointerUnion *, Align4<1> *, Align4<2> *, Align8<0> *, + Align8<1> *> + varwidth(&a4_0); + llvm::PointerUnion *, Align4<1> *, Align4<2> *, Align8<0> *, + Align8<1> *> + varwidth_tier1(&a8_0); + + puts("Break here"); } diff --git a/cross-project-tests/debuginfo-tests/llvm-prettyprinters/lldb/pointer-union.test b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/lldb/pointer-union.test index 1be963f5b74f..d252db3457ed 100644 --- a/cross-project-tests/debuginfo-tests/llvm-prettyprinters/lldb/pointer-union.test +++ b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/lldb/pointer-union.test @@ -40,6 +40,13 @@ continue p &local v -T local_float +continue + +p &a4_0 +p &a8_0 +v -T varwidth +v -T varwidth_tier1 + #--- checks # CHECK: (lldb) p &f # CHECK-NEXT: (float *) [[PTR_F:0x[0-9a-zA-Z]+]] @@ -122,3 +129,21 @@ v -T local_float # CHECK-NEXT: (llvm::PointerUnion) local_float = { # CHECK-NEXT: (Local *) Pointer = [[PTR_LOCAL]] # CHECK-NEXT: } + +# CHECK: (lldb) p &a4_0 +# CHECK-NEXT: (Align4<0> *) [[PTR_A4:0x[0-9a-zA-Z]+]] + +# CHECK: (lldb) p &a8_0 +# CHECK-NEXT: (Align8<0> *) [[PTR_A8:0x[0-9a-zA-Z]+]] + +# Variable-width tag encoding: formatter falls back to void* with +# tag bits stripped, since fixed-width decoding doesn't apply. +# CHECK: (lldb) v -T varwidth +# CHECK-NEXT: (llvm::PointerUnion *, Align4<1> *, Align4<2> *, Align8<0> *, Align8<1> *>) varwidth = { +# CHECK-NEXT: (void *) Pointer = [[PTR_A4]] +# CHECK-NEXT: } + +# CHECK: (lldb) v -T varwidth_tier1 +# CHECK-NEXT: (llvm::PointerUnion *, Align4<1> *, Align4<2> *, Align8<0> *, Align8<1> *>) varwidth_tier1 = { +# CHECK-NEXT: (void *) Pointer = [[PTR_A8]] +# CHECK-NEXT: } diff --git a/llvm/benchmarks/PointerUnionBM.cpp b/llvm/benchmarks/PointerUnionBM.cpp index 35ca7cd1e1c4..ead007625c2e 100644 --- a/llvm/benchmarks/PointerUnionBM.cpp +++ b/llvm/benchmarks/PointerUnionBM.cpp @@ -22,10 +22,16 @@ using namespace llvm; namespace llvm { namespace { -// Aligned slot types with controlled NumLowBitsAvailable (3 low bits). +// Aligned slot types with controlled NumLowBitsAvailable. +template struct alignas(4) A4 { + int data; +}; // 2 bits. template struct alignas(8) A8 { int data; -}; +}; // 3 bits. +template struct alignas(32) A32 { + int data; +}; // 5 bits. template T &getSlot() { static T S{}; @@ -63,12 +69,69 @@ TypeList...> makeA8TypesImpl(std::index_sequence); template using A8Types = decltype(makeA8TypesImpl(std::make_index_sequence{})); +// Splits N types into two tiers: N0 types in tier 0 (A4, 2-bit) and +// N1 types in tier 1 (A32, 5-bit). Tier 0 gets up to 3 types (the max +// for 2 low bits minus one escape code). +template struct TwoTierSplit { + static constexpr size_t N0 = N < 4 ? N - 1 : 3; + static constexpr size_t N1 = N - N0; +}; + +// Splits N types into three tiers: N0 types in tier 0 (A4, 2-bit), +// N1 = 1 type in tier 1 (A8, 3-bit), and N2 types in tier 2 (A32, 5-bit). +// Tier 0 gets up to 3 types; tier 1 always gets exactly 1. +template struct ThreeTierSplit { + static constexpr size_t N0 = N < 5 ? N - 2 : 3; + static constexpr size_t N1 = 1; + static constexpr size_t N2 = N - N0 - N1; +}; + +// A4A32Types = TypeList, ..., A4, A32<0>, ..., A32>. +// Used to generate randomized arrays with a mix of A4 and A32 types. +template +TypeList..., A32...> makeA4A32TypesImpl(std::index_sequence, + std::index_sequence); +template +using A4A32Types = decltype(makeA4A32TypesImpl( + std::make_index_sequence::N0>{}, + std::make_index_sequence::N1>{})); + +// A4A8A32Types = TypeList, ..., A8<0>, ..., A32<0>, ...>. +// Used to generate randomized arrays with a mix of A4, A8, and A32 types. +template +TypeList..., A8..., A32...> + makeA4A8A32TypesImpl(std::index_sequence, std::index_sequence, + std::index_sequence); +template +using A4A8A32Types = decltype(makeA4A8A32TypesImpl( + std::make_index_sequence::N0>{}, + std::make_index_sequence::N1>{}, + std::make_index_sequence::N2>{})); + +// Union type aliases. template auto makePtrUnion(std::index_sequence) -> PointerUnion *...>; - template using PtrUnion = decltype(makePtrUnion(std::make_index_sequence{})); +template +auto makePtrUnion2T(std::index_sequence, std::index_sequence) + -> PointerUnion *..., A32 *...>; +template +using PtrUnion2T = + decltype(makePtrUnion2T(std::make_index_sequence::N0>{}, + std::make_index_sequence::N1>{})); + +template +auto makePtrUnion3T(std::index_sequence, std::index_sequence, + std::index_sequence) + -> PointerUnion *..., A8 *..., A32 *...>; +template +using PtrUnion3T = + decltype(makePtrUnion3T(std::make_index_sequence::N0>{}, + std::make_index_sequence::N1>{}, + std::make_index_sequence::N2>{})); + // Isa: random type mix, uniform distribution (~1/N hit rate for each type). template static void BM_Isa(benchmark::State &State) { @@ -104,7 +167,9 @@ static void BM_IsNull(benchmark::State &State) { } // namespace } // namespace llvm -// Registration -- N = 2, 4, 8. +// Registration -- N = 2, 4, 8. PtrUnion3T uses N = 3, 4, 8. + +// Isa: PtrUnion (fixed-width tag). BENCHMARK((BM_Isa, A8<0>, A8Types<2>>))->Name("Isa/PU/2/First"); BENCHMARK((BM_Isa, A8<1>, A8Types<2>>))->Name("Isa/PU/2/Last"); @@ -114,8 +179,60 @@ BENCHMARK((BM_Isa, A8<3>, A8Types<4>>))->Name("Isa/PU/4/Last"); BENCHMARK((BM_Isa, A8<0>, A8Types<8>>))->Name("Isa/PU/8/First"); BENCHMARK((BM_Isa, A8<7>, A8Types<8>>))->Name("Isa/PU/8/Last"); +// Isa: PtrUnion2T (variable-width, 2 alignment tiers). +// Note: PtrUnion2T uses variable-width encoding only when N > 3 (when +// fixed-width tags don't fit in 2 bits). PtrUnion2T<2> is still fixed-width. +BENCHMARK((BM_Isa, A4<0>, A4A32Types<2>>)) + ->Name("Isa/PU2T/2/Tier0"); +BENCHMARK((BM_Isa, A32<0>, A4A32Types<2>>)) + ->Name("Isa/PU2T/2/Tier1"); + +BENCHMARK((BM_Isa, A4<0>, A4A32Types<4>>)) + ->Name("Isa/PU2T/4/Tier0"); +BENCHMARK((BM_Isa, A32<0>, A4A32Types<4>>)) + ->Name("Isa/PU2T/4/Tier1"); + +BENCHMARK((BM_Isa, A4<0>, A4A32Types<8>>)) + ->Name("Isa/PU2T/8/Tier0"); +BENCHMARK((BM_Isa, A32<4>, A4A32Types<8>>)) + ->Name("Isa/PU2T/8/Tier1"); + +// Isa: PtrUnion3T (variable-width, 3 alignment tiers). +// Note: PtrUnion3T uses variable-width encoding only when N > 4 (when +// fixed-width tags don't fit in 2 bits). PtrUnion3T<3> and <4> are +// still fixed-width. +BENCHMARK((BM_Isa, A4<0>, A4A8A32Types<3>>)) + ->Name("Isa/PU3T/3/Tier0"); +BENCHMARK((BM_Isa, A8<0>, A4A8A32Types<3>>)) + ->Name("Isa/PU3T/3/Tier1"); +BENCHMARK((BM_Isa, A32<0>, A4A8A32Types<3>>)) + ->Name("Isa/PU3T/3/Tier2"); + +BENCHMARK((BM_Isa, A4<0>, A4A8A32Types<4>>)) + ->Name("Isa/PU3T/4/Tier0"); +BENCHMARK((BM_Isa, A8<0>, A4A8A32Types<4>>)) + ->Name("Isa/PU3T/4/Tier1"); +BENCHMARK((BM_Isa, A32<0>, A4A8A32Types<4>>)) + ->Name("Isa/PU3T/4/Tier2"); + +BENCHMARK((BM_Isa, A4<0>, A4A8A32Types<8>>)) + ->Name("Isa/PU3T/8/Tier0"); +BENCHMARK((BM_Isa, A8<0>, A4A8A32Types<8>>)) + ->Name("Isa/PU3T/8/Tier1"); +BENCHMARK((BM_Isa, A32<3>, A4A8A32Types<8>>)) + ->Name("Isa/PU3T/8/Tier2"); + +// IsNull: all suites. BENCHMARK((BM_IsNull, A8Types<2>>))->Name("IsNull/PU/2"); BENCHMARK((BM_IsNull, A8Types<4>>))->Name("IsNull/PU/4"); BENCHMARK((BM_IsNull, A8Types<8>>))->Name("IsNull/PU/8"); +BENCHMARK((BM_IsNull, A4A32Types<2>>))->Name("IsNull/PU2T/2"); +BENCHMARK((BM_IsNull, A4A32Types<4>>))->Name("IsNull/PU2T/4"); +BENCHMARK((BM_IsNull, A4A32Types<8>>))->Name("IsNull/PU2T/8"); + +BENCHMARK((BM_IsNull, A4A8A32Types<3>>))->Name("IsNull/PU3T/3"); +BENCHMARK((BM_IsNull, A4A8A32Types<4>>))->Name("IsNull/PU3T/4"); +BENCHMARK((BM_IsNull, A4A8A32Types<8>>))->Name("IsNull/PU3T/8"); + BENCHMARK_MAIN(); diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h index 550127237249..52cb12168f9b 100644 --- a/llvm/include/llvm/ADT/PointerUnion.h +++ b/llvm/include/llvm/ADT/PointerUnion.h @@ -1,4 +1,4 @@ -//===- llvm/ADT/PointerUnion.h - Discriminated Union of 2 Ptrs --*- C++ -*-===// +//===- llvm/ADT/PointerUnion.h - Pointer Type Union -------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -21,9 +21,11 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include +#include #include #include #include +#include namespace llvm { @@ -40,6 +42,90 @@ template constexpr int lowBitsAvailable() { {static_cast(PointerLikeTypeTraits::NumLowBitsAvailable)...}); } +/// True if all types have enough low bits for a fixed-width tag. +template constexpr bool useFixedWidthTags() { + return lowBitsAvailable() >= bitsRequired(sizeof...(PTs)); +} + +/// True if types are in non-decreasing NumLowBitsAvailable order. +// TODO: Switch to llvm::is_sorted when it becomes constexpr. +template constexpr bool typesInNonDecreasingBitOrder() { + int Bits[] = {PointerLikeTypeTraits::NumLowBitsAvailable...}; + for (size_t I = 1; I < sizeof...(PTs); ++I) + if (Bits[I] < Bits[I - 1]) + return false; + return true; +} + +/// Tag descriptor for one type in the union. +struct TagEntry { + uintptr_t Value; // Bit pattern stored in the low bits. + uintptr_t Mask; // Mask covering all tag bits for this entry. +}; + +/// Compute fixed-width tag table (all types have enough bits for the tag). +/// For example, with 4 types and 3 available bits, the tag is 2 bits wide +/// (values 0-3) and each entry has the same mask of 0x3. +template +constexpr std::array computeFixedTags() { + constexpr size_t N = sizeof...(PTs); + constexpr uintptr_t TagMask = (uintptr_t(1) << bitsRequired(N)) - 1; + std::array Result = {}; + for (size_t I = 0; I < N; ++I) { + Result[I].Value = uintptr_t(I); + Result[I].Mask = TagMask; + } + return Result; +} + +/// Compute variable-width tag table, or return std::nullopt if the types +/// don't fit. Types must be in non-decreasing NumLowBitsAvailable order. +/// Groups types by available bits into tiers; each non-final tier reserves +/// its highest code as an escape prefix. +/// +/// Example with 3 tiers (2-bit, 3-bit, 5-bit types): +/// Tier 0 (2 bits): codes 0b00, 0b01, 0b10; escape = 0b11 +/// Tier 1 (3 bits): codes 0b011, escape = 0b111 +/// Tier 2 (5 bits): codes 0b00111, 0b01111, 0b10111, 0b11111 +template +constexpr std::optional> +computeExtendedTags() { + constexpr size_t N = sizeof...(PTs); + std::array Result = {}; + int Bits[] = {PointerLikeTypeTraits::NumLowBitsAvailable...}; + uintptr_t EscapePrefix = 0; + int PrevBits = 0; + size_t I = 0; + // Walk tiers (groups of types with the same NumLowBitsAvailable). For each + // tier, assign tag values using the new bits introduced by this tier, + // prefixed by the accumulated escape codes from previous tiers. Non-final + // tiers reserve their highest code as an escape to the next tier. + while (I < N) { + int TierBits = Bits[I]; + if (TierBits < PrevBits) + return std::nullopt; + int NewBits = TierBits - PrevBits; + size_t TierEnd = I; + while (TierEnd < N && Bits[TierEnd] == TierBits) + ++TierEnd; + bool IsLastTier = (TierEnd == N); + size_t TypesInTier = TierEnd - I; + size_t Capacity = + IsLastTier ? (size_t(1) << NewBits) : ((size_t(1) << NewBits) - 1); + if (TypesInTier > Capacity) + return std::nullopt; + for (size_t J = 0; J < TypesInTier; ++J) { + Result[I + J].Value = EscapePrefix | (uintptr_t(J) << PrevBits); + Result[I + J].Mask = (uintptr_t(1) << TierBits) - 1; + } + uintptr_t EscapeCode = (uintptr_t(1) << NewBits) - 1; + EscapePrefix |= EscapeCode << PrevBits; + PrevBits = TierBits; + I = TierEnd; + } + return Result; +} + /// CRTP base that generates non-template constructors and assignment operators /// for each type in the union. Non-template constructors allow implicit /// conversions (derived-to-base, non-const-to-const). @@ -82,6 +168,13 @@ public: /// This implementation is extremely efficient in space due to leveraging the /// low bits of the pointer, while exposing a natural and type-safe API. /// +/// When all types have enough alignment for a fixed-width tag, +/// the tag is placed in the high end of the available low bits, leaving spare +/// low bits for nesting in PointerIntPair or SmallPtrSet. When types have +/// heterogeneous alignment, a variable-length escape-encoded tag +/// is used; in that case, types must be listed in non-decreasing +/// NumLowBitsAvailable order. +/// /// Common use patterns would be something like this: /// PointerUnion P; /// P = (int*)0; @@ -97,6 +190,7 @@ template class PointerUnion : public pointer_union_detail::PointerUnionMembers, 0, PTs...> { + static_assert(sizeof...(PTs) > 0, "PointerUnion must have at least one type"); static_assert(TypesAreDistinct::value, "PointerUnion alternative types cannot be repeated"); @@ -111,6 +205,11 @@ class PointerUnion // These are constexpr functions rather than static constexpr data members // so that alignof() on potentially incomplete types is not evaluated at // class-definition time. + + static constexpr bool useFixedWidthTags() { + return pointer_union_detail::useFixedWidthTags(); + } + static constexpr int minLowBitsAvailable() { return pointer_union_detail::lowBitsAvailable(); } @@ -119,22 +218,55 @@ class PointerUnion return pointer_union_detail::bitsRequired(sizeof...(PTs)); } - /// The tag is shifted to the high end of the available low bits so that - /// the lowest bits remain free for nesting in PointerIntPair or SmallPtrSet. - static constexpr int tagShift() { return minLowBitsAvailable() - tagBits(); } + /// When using fixed-width tags, the tag is shifted to the high end of the + /// available low bits so that the lowest bits remain free for nesting. With + /// variable-width encoding mode, the tag starts at bit 0. + static constexpr int tagShift() { + return useFixedWidthTags() ? (minLowBitsAvailable() - tagBits()) : 0; + } - static constexpr uintptr_t tagMask() { - return (uintptr_t(1) << tagBits()) - 1; + using TagTable = std::array; + + /// Returns the tag lookup table for this union's encoding scheme. + static constexpr TagTable getTagTable() { + if constexpr (useFixedWidthTags()) { + return pointer_union_detail::computeFixedTags(); + } else { + static_assert( + pointer_union_detail::typesInNonDecreasingBitOrder(), + "Variable-width PointerUnion types must be in non-decreasing " + "NumLowBitsAvailable order"); + constexpr auto Table = + pointer_union_detail::computeExtendedTags(); + static_assert(Table.has_value(), + "Too many types for the available low bits"); + return *Table; + } + } + + // Variable-width isNull: check membership in the sparse set of tag values. + // A single threshold comparison does not work here because lower-tier + // non-null pointers can encode to values below higher-tier thresholds. + template + static constexpr bool isNullVariableImpl(uintptr_t V, + std::index_sequence) { + constexpr TagTable Table = getTagTable(); + static_assert(tagShift() == 0, + "isNullVariableImpl assumes tag starts at bit 0"); + return ((V == Table[Is].Value) || ...); } template static uintptr_t encode(T V) { + constexpr TagTable Table = getTagTable(); constexpr int Shift = tagShift(); - constexpr auto Tag = uintptr_t(FirstIndexOfType::value); + constexpr size_t Idx = FirstIndexOfType::value; + static_assert(Table[0].Value == 0, + "First type must have tag value 0 for getAddrOfPtr1"); uintptr_t PtrInt = reinterpret_cast( PointerLikeTypeTraits::getAsVoidPointer(V)); - assert((PtrInt & (tagMask() << Shift)) == 0 && + assert((PtrInt & (Table[Idx].Mask << Shift)) == 0 && "Pointer low bits collide with tag"); - return PtrInt | (Tag << Shift); + return PtrInt | (Table[Idx].Value << Shift); } public: @@ -152,8 +284,13 @@ public: /// Test if the pointer held in the union is null, regardless of /// which type it is. bool isNull() const { - return (static_cast(this->Val.asInt()) >> - minLowBitsAvailable()) == 0; + if constexpr (useFixedWidthTags()) { + return (static_cast(this->Val.asInt()) >> + minLowBitsAvailable()) == 0; + } else { + return isNullVariableImpl(static_cast(this->Val.asInt()), + std::index_sequence_for{}); + } } explicit operator bool() const { return !isNull(); } @@ -232,16 +369,23 @@ struct CastInfo> using From = PointerUnion; static inline bool isPossible(From &F) { + constexpr std::array Table = + From::getTagTable(); constexpr int Shift = From::tagShift(); - constexpr auto Tag = uintptr_t(FirstIndexOfType::value); + constexpr size_t Idx = FirstIndexOfType::value; auto V = reinterpret_cast(F.getOpaqueValue()); - return ((V >> Shift) & From::tagMask()) == Tag; + constexpr uintptr_t TagMask = Table[Idx].Mask << Shift; + constexpr uintptr_t TagValue = Table[Idx].Value << Shift; + return (V & TagMask) == TagValue; } static To doCast(From &F) { assert(isPossible(F) && "cast to an incompatible type!"); - constexpr uintptr_t PtrMask = - ~((uintptr_t(1) << From::minLowBitsAvailable()) - 1); + constexpr std::array Table = + From::getTagTable(); + constexpr int Shift = From::tagShift(); + constexpr size_t Idx = FirstIndexOfType::value; + constexpr uintptr_t PtrMask = ~(uintptr_t(Table[Idx].Mask) << Shift); void *Ptr = reinterpret_cast( reinterpret_cast(F.getOpaqueValue()) & PtrMask); return PointerLikeTypeTraits::getFromVoidPointer(Ptr); diff --git a/llvm/unittests/ADT/PointerUnionTest.cpp b/llvm/unittests/ADT/PointerUnionTest.cpp index 74817468612c..258e4050984e 100644 --- a/llvm/unittests/ADT/PointerUnionTest.cpp +++ b/llvm/unittests/ADT/PointerUnionTest.cpp @@ -1,4 +1,4 @@ -//===- llvm/unittest/ADT/PointerUnionTest.cpp - Optional unit tests -------===// +//===- llvm/unittest/ADT/PointerUnionTest.cpp - PointerUnion unit tests ---===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/DenseMap.h" #include "gtest/gtest.h" using namespace llvm; @@ -29,9 +30,11 @@ struct PointerUnionTest : public testing::Test { PointerUnionTest() : f(3.14f), i(42), d(3.14), l(42), a(&f), b(&i), c(&i), n(), i3(&i), - f3(&f), l3(&l), i4(&i), f4(&f), l4(&l), d4(&d), i4null((int *)nullptr), - f4null((float *)nullptr), l4null((long long *)nullptr), - d4null((double *)nullptr) {} + f3(&f), l3(&l), i4(&i), f4(&f), l4(&l), d4(&d), + i4null(static_cast(nullptr)), + f4null(static_cast(nullptr)), + l4null(static_cast(nullptr)), + d4null(static_cast(nullptr)) {} }; TEST_F(PointerUnionTest, Comparison) { @@ -66,8 +69,8 @@ TEST_F(PointerUnionTest, Null) { EXPECT_FALSE(!b); EXPECT_TRUE(!n); // workaround an issue with EXPECT macros and explicit bool - EXPECT_TRUE((bool)a); - EXPECT_TRUE((bool)b); + EXPECT_TRUE(static_cast(a)); + EXPECT_TRUE(static_cast(b)); EXPECT_FALSE(n); EXPECT_NE(n, b); @@ -111,7 +114,7 @@ TEST_F(PointerUnionTest, Is) { TEST_F(PointerUnionTest, Get) { EXPECT_EQ(cast(a), &f); EXPECT_EQ(cast(b), &i); - EXPECT_EQ(cast(n), (int *)nullptr); + EXPECT_EQ(cast(n), static_cast(nullptr)); } template struct alignas(8) Aligned {}; @@ -152,8 +155,10 @@ TEST_F(PointerUnionTest, ManyElements) { } TEST_F(PointerUnionTest, GetAddrOfPtr1) { - EXPECT_TRUE((void *)b.getAddrOfPtr1() == (void *)&b); - EXPECT_TRUE((void *)n.getAddrOfPtr1() == (void *)&n); + EXPECT_TRUE(static_cast(b.getAddrOfPtr1()) == + static_cast(&b)); + EXPECT_TRUE(static_cast(n.getAddrOfPtr1()) == + static_cast(&n)); } TEST_F(PointerUnionTest, NewCastInfra) { @@ -383,4 +388,470 @@ TEST(PointerUnionNestedTest, NestedTagPreservation) { EXPECT_EQ(cast(extracted2.Value), &high); } +//===----------------------------------------------------------------------===// +// Variable-width encoding PointerUnion tests +//===----------------------------------------------------------------------===// + +template struct alignas(4) Align4 {}; +template struct alignas(8) Align8 {}; +template struct alignas(16) Align16 {}; + +TEST(PointerUnionEncodingTest, ExtendedTagsFit) { + // Positive: 3 x 2-bit + 2 x 3-bit types. + EXPECT_TRUE( + (pointer_union_detail::computeExtendedTags< + Align4<0> *, Align4<1> *, Align4<2> *, Align8<0> *, Align8<1> *>() + .has_value())); + // Negative: 4 x 2-bit types need 4 codes but only 3 are available + // (2^2 - 1 escape = 3). + EXPECT_FALSE( + (pointer_union_detail::computeExtendedTags< + Align4<0> *, Align4<1> *, Align4<2> *, Align4<3> *, Align8<0> *>() + .has_value())); +} + +TEST(PointerUnionEncodingTest, ComputeExtendedTags) { + // 2-tier union: 3 x 2-bit + 2 x 3-bit. + auto Tags = *pointer_union_detail::computeExtendedTags< + Align4<0> *, Align4<1> *, Align4<2> *, Align8<0> *, Align8<1> *>(); + // Tier 0 (2-bit): codes 0b00, 0b01, 0b10; escape = 0b11. + EXPECT_EQ(Tags[0].Value, 0b00u); + EXPECT_EQ(Tags[0].Mask, 0b11u); + EXPECT_EQ(Tags[1].Value, 0b01u); + EXPECT_EQ(Tags[2].Value, 0b10u); + // Tier 1 (3-bit): codes 0b011, 0b111; mask = 0b111. + EXPECT_EQ(Tags[3].Value, 0b011u); + EXPECT_EQ(Tags[3].Mask, 0b111u); + EXPECT_EQ(Tags[4].Value, 0b111u); +} + +TEST(PointerUnionEncodingTest, ComputeExtendedTags3Tier) { + // 3-tier union: 3 x 2-bit + 1 x 3-bit + 2 x 4-bit. + auto Tags = + *pointer_union_detail::computeExtendedTags *, Align4<1> *, + Align4<2> *, Align8<0> *, + Align16<0> *, Align16<1> *>(); + // Tier 0 (2-bit): codes 0b00, 0b01, 0b10; escape = 0b11. + EXPECT_EQ(Tags[0].Value, 0b00u); + EXPECT_EQ(Tags[0].Mask, 0b11u); + EXPECT_EQ(Tags[1].Value, 0b01u); + EXPECT_EQ(Tags[2].Value, 0b10u); + // Tier 1 (3-bit): code 0b011; escape = 0b111. Mask = 0b111. + EXPECT_EQ(Tags[3].Value, 0b011u); + EXPECT_EQ(Tags[3].Mask, 0b111u); + // Tier 2 (4-bit): codes 0b0111, 0b1111. Mask = 0b1111. + EXPECT_EQ(Tags[4].Value, 0b0111u); + EXPECT_EQ(Tags[4].Mask, 0b1111u); + EXPECT_EQ(Tags[5].Value, 0b1111u); + EXPECT_EQ(Tags[5].Mask, 0b1111u); +} + +// 2-tier: 3 x 2-bit + 2 x 3-bit types. +using PU2Tier = PointerUnion *, Align4<1> *, Align4<2> *, Align8<0> *, + Align8<1> *>; + +// 3-tier: 3 x 2-bit + 1 x 3-bit + 2 x 4-bit types. +using PU3Tier = PointerUnion *, Align4<1> *, Align4<2> *, Align8<0> *, + Align16<0> *, Align16<1> *>; + +// Variable-width unions still fit in a single pointer. +static_assert(sizeof(PU2Tier) == sizeof(void *)); +static_assert(sizeof(PU3Tier) == sizeof(void *)); + +// These unions actually use variable-width encoding (fixed-width tags don't +// fit because 5 types need 3 tag bits but Align4 only provides 2). +static_assert( + !pointer_union_detail::useFixedWidthTags< + Align4<0> *, Align4<1> *, Align4<2> *, Align8<0> *, Align8<1> *>()); +static_assert(!pointer_union_detail::useFixedWidthTags< + Align4<0> *, Align4<1> *, Align4<2> *, Align8<0> *, Align16<0> *, + Align16<1> *>()); + +// NumLowBitsAvailable is 0 for variable-width PointerUnion. +static_assert(PointerLikeTypeTraits::NumLowBitsAvailable == 0); +static_assert(PointerLikeTypeTraits::NumLowBitsAvailable == 0); + +struct PointerUnion2TierTest : public testing::Test { + Align4<0> a0; + Align4<1> a1; + Align4<2> a2; + Align8<0> b0; + Align8<1> b1; + + PU2Tier pa0, pa1, pa2, pb0, pb1, null; + PU2Tier na0, na1, na2, nb0, nb1; + + PointerUnion2TierTest() + : pa0(&a0), pa1(&a1), pa2(&a2), pb0(&b0), pb1(&b1), null(), + na0(static_cast *>(nullptr)), + na1(static_cast *>(nullptr)), + na2(static_cast *>(nullptr)), + nb0(static_cast *>(nullptr)), + nb1(static_cast *>(nullptr)) {} +}; + +TEST_F(PointerUnion2TierTest, Isa) { + // Tier 0 types + EXPECT_TRUE(isa *>(pa0)); + EXPECT_FALSE(isa *>(pa0)); + EXPECT_FALSE(isa *>(pa0)); + EXPECT_FALSE(isa *>(pa0)); + EXPECT_FALSE(isa *>(pa0)); + + EXPECT_TRUE(isa *>(pa1)); + EXPECT_TRUE(isa *>(pa2)); + + // Tier 1 types + EXPECT_TRUE(isa *>(pb0)); + EXPECT_FALSE(isa *>(pb0)); + EXPECT_FALSE(isa *>(pb0)); + + EXPECT_TRUE(isa *>(pb1)); + EXPECT_FALSE(isa *>(pb1)); + + // Null pointers preserve type identity + EXPECT_TRUE(isa *>(na0)); + EXPECT_TRUE(isa *>(nb1)); + EXPECT_FALSE(isa *>(na0)); +} + +TEST_F(PointerUnion2TierTest, Cast) { + EXPECT_EQ(cast *>(pa0), &a0); + EXPECT_EQ(cast *>(pa1), &a1); + EXPECT_EQ(cast *>(pa2), &a2); + EXPECT_EQ(cast *>(pb0), &b0); + EXPECT_EQ(cast *>(pb1), &b1); +} + +TEST_F(PointerUnion2TierTest, DynCast) { + EXPECT_EQ(dyn_cast *>(pa0), &a0); + EXPECT_EQ(dyn_cast *>(pa0), nullptr); + EXPECT_EQ(dyn_cast *>(pa0), nullptr); + + EXPECT_EQ(dyn_cast *>(pb0), &b0); + EXPECT_EQ(dyn_cast *>(pb0), nullptr); + + // pb1 has the all-ones tag -- most likely to expose masking bugs. + EXPECT_EQ(dyn_cast *>(pb1), &b1); + EXPECT_EQ(dyn_cast *>(pb1), nullptr); + EXPECT_EQ(dyn_cast *>(pb1), nullptr); + EXPECT_EQ(dyn_cast *>(pb1), nullptr); + EXPECT_EQ(dyn_cast *>(pb1), nullptr); + + EXPECT_EQ(dyn_cast_if_present *>(na0), nullptr); + EXPECT_EQ(dyn_cast_if_present *>(na0), nullptr); + EXPECT_EQ(dyn_cast_if_present *>(nb0), nullptr); +} + +TEST_F(PointerUnion2TierTest, Null) { + EXPECT_FALSE(pa0.isNull()); + EXPECT_FALSE(pb0.isNull()); + EXPECT_TRUE(null.isNull()); + EXPECT_TRUE(!null); + EXPECT_TRUE(static_cast(pa0)); + + EXPECT_TRUE(na0.isNull()); + EXPECT_TRUE(na1.isNull()); + EXPECT_TRUE(na2.isNull()); + EXPECT_TRUE(nb0.isNull()); + EXPECT_TRUE(nb1.isNull()); +} + +TEST_F(PointerUnion2TierTest, NullDiscrimination) { + // Null pointers of different types have different opaque values. + EXPECT_NE(na0, na1); + EXPECT_NE(na0, na2); + EXPECT_NE(na0, nb0); + EXPECT_NE(na1, nb0); + EXPECT_NE(nb0, nb1); + + // Default-constructed is null of first type. + EXPECT_EQ(null, na0); +} + +TEST_F(PointerUnion2TierTest, Comparison) { + EXPECT_EQ(pa0, pa0); + EXPECT_NE(pa0, pa1); + EXPECT_NE(pa0, pb0); + + PU2Tier other(&a0); + EXPECT_EQ(pa0, other); +} + +TEST_F(PointerUnion2TierTest, Assignment) { + PU2Tier u; + EXPECT_TRUE(u.isNull()); + + u = &a0; + EXPECT_TRUE(isa *>(u)); + EXPECT_EQ(cast *>(u), &a0); + + u = &b0; + EXPECT_TRUE(isa *>(u)); + EXPECT_EQ(cast *>(u), &b0); + + u = &a2; + EXPECT_TRUE(isa *>(u)); + + u = nullptr; + EXPECT_TRUE(u.isNull()); +} + +TEST_F(PointerUnion2TierTest, GetAddrOfPtr1) { + EXPECT_TRUE(static_cast(pa0.getAddrOfPtr1()) == + static_cast(&pa0)); + EXPECT_TRUE(static_cast(null.getAddrOfPtr1()) == + static_cast(&null)); +} + +TEST_F(PointerUnion2TierTest, OpaqueValueRoundTrip) { + void *opaque = pa0.getOpaqueValue(); + PU2Tier restored = PU2Tier::getFromOpaqueValue(opaque); + EXPECT_EQ(pa0, restored); + EXPECT_EQ(cast *>(restored), &a0); + + opaque = pb0.getOpaqueValue(); + restored = PU2Tier::getFromOpaqueValue(opaque); + EXPECT_EQ(pb0, restored); + EXPECT_EQ(cast *>(restored), &b0); + + opaque = pb1.getOpaqueValue(); + restored = PU2Tier::getFromOpaqueValue(opaque); + EXPECT_EQ(pb1, restored); + EXPECT_EQ(cast *>(restored), &b1); +} + +// 3-tier tests + +struct PointerUnion3TierTest : public testing::Test { + Align4<0> a0; + Align4<1> a1; + Align4<2> a2; + Align8<0> b0; + Align16<0> c0; + Align16<1> c1; + + PU3Tier pa0, pa1, pa2, pb0, pc0, pc1, null; + + PointerUnion3TierTest() + : pa0(&a0), pa1(&a1), pa2(&a2), pb0(&b0), pc0(&c0), pc1(&c1), null() {} +}; + +TEST_F(PointerUnion3TierTest, Isa) { + EXPECT_TRUE(isa *>(pa0)); + EXPECT_FALSE(isa *>(pa0)); + EXPECT_FALSE(isa *>(pa0)); + + EXPECT_TRUE(isa *>(pb0)); + EXPECT_FALSE(isa *>(pb0)); + EXPECT_FALSE(isa *>(pb0)); + + EXPECT_TRUE(isa *>(pc0)); + EXPECT_FALSE(isa *>(pc0)); + EXPECT_FALSE(isa *>(pc0)); + EXPECT_FALSE(isa *>(pc0)); + + EXPECT_TRUE(isa *>(pc1)); + EXPECT_FALSE(isa *>(pc1)); +} + +TEST_F(PointerUnion3TierTest, Cast) { + EXPECT_EQ(cast *>(pa0), &a0); + EXPECT_EQ(cast *>(pa1), &a1); + EXPECT_EQ(cast *>(pa2), &a2); + EXPECT_EQ(cast *>(pb0), &b0); + EXPECT_EQ(cast *>(pc0), &c0); + EXPECT_EQ(cast *>(pc1), &c1); +} + +TEST_F(PointerUnion3TierTest, DynCast) { + EXPECT_EQ(dyn_cast *>(pa0), &a0); + EXPECT_EQ(dyn_cast *>(pa0), nullptr); + EXPECT_EQ(dyn_cast *>(pa0), nullptr); + + EXPECT_EQ(dyn_cast *>(pb0), &b0); + EXPECT_EQ(dyn_cast *>(pb0), nullptr); + EXPECT_EQ(dyn_cast *>(pb0), nullptr); + + EXPECT_EQ(dyn_cast *>(pc0), &c0); + EXPECT_EQ(dyn_cast *>(pc0), nullptr); + EXPECT_EQ(dyn_cast *>(pc0), nullptr); + + EXPECT_EQ(dyn_cast *>(pc1), &c1); + EXPECT_EQ(dyn_cast *>(pc1), nullptr); +} + +TEST_F(PointerUnion3TierTest, Null) { + EXPECT_TRUE(null.isNull()); + EXPECT_FALSE(pa0.isNull()); + EXPECT_FALSE(pb0.isNull()); + EXPECT_FALSE(pc0.isNull()); + EXPECT_FALSE(pc1.isNull()); + + PU3Tier na0(static_cast *>(nullptr)); + PU3Tier nb0(static_cast *>(nullptr)); + PU3Tier nc0(static_cast *>(nullptr)); + PU3Tier nc1(static_cast *>(nullptr)); + EXPECT_TRUE(na0.isNull()); + EXPECT_TRUE(nb0.isNull()); + EXPECT_TRUE(nc0.isNull()); + EXPECT_TRUE(nc1.isNull()); + + // Null discrimination across all three tiers. + EXPECT_NE(na0, nb0); + EXPECT_NE(nb0, nc0); + EXPECT_NE(nc0, nc1); + EXPECT_NE(na0, nc0); +} + +TEST_F(PointerUnion3TierTest, Assignment) { + PU3Tier u; + EXPECT_TRUE(u.isNull()); + + u = &a0; + EXPECT_TRUE(isa *>(u)); + EXPECT_EQ(cast *>(u), &a0); + + u = &b0; + EXPECT_TRUE(isa *>(u)); + EXPECT_EQ(cast *>(u), &b0); + + u = &c1; + EXPECT_TRUE(isa *>(u)); + EXPECT_EQ(cast *>(u), &c1); + + u = nullptr; + EXPECT_TRUE(u.isNull()); +} + +TEST_F(PointerUnion3TierTest, OpaqueValueRoundTrip) { + // pb0's tag (0b011) contains the tier-0 escape prefix (0b11) in its low 2 + // bits. + void *opaque = pb0.getOpaqueValue(); + PU3Tier restored = PU3Tier::getFromOpaqueValue(opaque); + EXPECT_EQ(pb0, restored); + EXPECT_EQ(cast *>(restored), &b0); + + opaque = pc0.getOpaqueValue(); + restored = PU3Tier::getFromOpaqueValue(opaque); + EXPECT_EQ(pc0, restored); + EXPECT_EQ(cast *>(restored), &c0); + + opaque = pc1.getOpaqueValue(); + restored = PU3Tier::getFromOpaqueValue(opaque); + EXPECT_EQ(pc1, restored); + EXPECT_EQ(cast *>(restored), &c1); +} + +TEST_F(PointerUnion3TierTest, ConstCast) { + const PU3Tier cpc0(&c0); + EXPECT_TRUE(isa *>(cpc0)); + EXPECT_FALSE(isa *>(cpc0)); + EXPECT_EQ(cast *>(cpc0), &c0); + EXPECT_EQ(dyn_cast *>(cpc0), nullptr); +} + +TEST(PointerUnionMultiTierDenseMapTest, BasicOperations) { + Align4<0> a0; + Align8<0> b0; + Align8<1> b1; + + DenseMap map; + PU2Tier ka(&a0), kb(&b0), kb1(&b1); + + map[ka] = 1; + map[kb] = 2; + map[kb1] = 3; + + EXPECT_EQ(map[ka], 1); + EXPECT_EQ(map[kb], 2); + EXPECT_EQ(map[kb1], 3); + + EXPECT_EQ(map.count(ka), 1u); + map.erase(ka); + EXPECT_EQ(map.count(ka), 0u); + EXPECT_EQ(map.count(kb), 1u); +} + +TEST(PointerUnionMixedAlignFixedWidth, BasicOperations) { + // Align4 provides 2 low bits, Align8 provides 3. Two types need 1 tag bit, + // so all types have enough bits for fixed-width encoding with spare bits. + using MixedPU = PointerUnion *, Align8<0> *>; + static_assert(PointerLikeTypeTraits::NumLowBitsAvailable > 0, + "Mixed-alignment 2-type union should have spare low bits"); + + Align4<0> a; + Align8<0> b; + + MixedPU u; + EXPECT_TRUE(u.isNull()); + + u = &a; + EXPECT_TRUE(isa *>(u)); + EXPECT_FALSE(isa *>(u)); + EXPECT_EQ(cast *>(u), &a); + + u = &b; + EXPECT_TRUE(isa *>(u)); + EXPECT_FALSE(isa *>(u)); + EXPECT_EQ(cast *>(u), &b); + + u = nullptr; + EXPECT_TRUE(u.isNull()); +} + +TEST(PointerUnionLargeTierJump, BasicOperations) { + // 3 x 2-bit + 2 x 4-bit: skips the 3-bit tier entirely (tier jump 2->4). + using JumpPU = PointerUnion *, Align4<1> *, Align4<2> *, + Align16<0> *, Align16<1> *>; + static_assert( + !pointer_union_detail::useFixedWidthTags< + Align4<0> *, Align4<1> *, Align4<2> *, Align16<0> *, Align16<1> *>(), + "Should use variable-width encoding"); + + Align4<0> a0; + Align4<1> a1; + Align4<2> a2; + Align16<0> c0; + Align16<1> c1; + + JumpPU u; + EXPECT_TRUE(u.isNull()); + + u = &a0; + EXPECT_TRUE(isa *>(u)); + EXPECT_EQ(cast *>(u), &a0); + + u = &a1; + EXPECT_TRUE(isa *>(u)); + EXPECT_EQ(cast *>(u), &a1); + + u = &a2; + EXPECT_TRUE(isa *>(u)); + EXPECT_EQ(cast *>(u), &a2); + + u = &c0; + EXPECT_TRUE(isa *>(u)); + EXPECT_FALSE(isa *>(u)); + EXPECT_EQ(cast *>(u), &c0); + + u = &c1; + EXPECT_TRUE(isa *>(u)); + EXPECT_FALSE(isa *>(u)); + EXPECT_EQ(cast *>(u), &c1); + + // Typed nulls preserve type identity and are null. + JumpPU na0(static_cast *>(nullptr)); + JumpPU nc0(static_cast *>(nullptr)); + JumpPU nc1(static_cast *>(nullptr)); + EXPECT_TRUE(na0.isNull()); + EXPECT_TRUE(nc0.isNull()); + EXPECT_TRUE(nc1.isNull()); + EXPECT_TRUE(isa *>(na0)); + EXPECT_TRUE(isa *>(nc0)); + EXPECT_TRUE(isa *>(nc1)); + EXPECT_NE(na0, nc0); + EXPECT_NE(nc0, nc1); +} + } // end anonymous namespace