Jakub Kuderski 6e916d0598
[llvm][ADT] Add variable-width tag encoding to PointerUnion (#188167)
PointerUnion stores a fixed-width `ceil(log2(N))`-bit tag in the low
bits of the pointer. This works only when every member type provides at
least that many low bits — if the least-aligned type doesn't,
compilation fails, even though the higher-aligned types may have plenty
of spare bits going to waste.

Introduce a variable-length escape-encoded tag that exploits the extra
low bits of higher-aligned types, analogous to UTF-8: types are grouped
into tiers by NumLowBitsAvailable; each non-final tier reserves one code
as an escape prefix, and the next tier extends the tag into the newly
available bits. This allows PointerUnion to hold more type variants than
a fixed-width tag permits.

The fixed-width path is used when the minimum alignment already provides
enough bits (the common case); the variable-width path activates only
when it doesn't, and requires types to be listed in non-decreasing
NumLowBitsAvailable order.

I need this for https://github.com/llvm/llvm-project/pull/186923 which
requires a 6-member PointerUnion in MLIR TypeRange/ValueRange. On 32-bit
systems, some members only provide 2 low bits, insufficient for a 3-bit
fixed-width tag.
2026-03-25 12:09:24 -04:00

72 lines
1.8 KiB
C++

#include <cstdio>
#include "llvm/ADT/PointerUnion.h"
struct HasVirtual {
virtual void func() = 0;
virtual ~HasVirtual() = default;
};
struct DerivedWithVirtual : public HasVirtual {
virtual void func() override;
virtual ~DerivedWithVirtual() = default;
};
void DerivedWithVirtual::func() {}
struct alignas(8) Z {};
struct Derived : public Z {};
// Types for variable-width tag encoding test.
// 3 x alignof(4) + 2 x alignof(8) requires escape-coded tags because
// ceil(log2(5)) = 3 > min(NumLowBitsAvailable) = 2.
template <int I> struct alignas(4) Align4 {};
template <int I> struct alignas(8) Align8 {};
int main() {
int a = 5;
float f = 4.0;
Z z;
Derived derived;
DerivedWithVirtual dv;
llvm::PointerUnion<Z *, float *> z_float(&f);
llvm::PointerUnion<Z *, float *> raw_z_float(nullptr);
llvm::PointerUnion<Z *, float *> null_float(static_cast<float *>(nullptr));
llvm::PointerUnion<long long *, int *, float *> long_int_float(&a);
llvm::PointerUnion<Z *> z_only(&z);
llvm::PointerIntPair<llvm::PointerUnion<Z *, float *>, 1> union_int_pair(
z_float, 1);
puts("Break here");
z_float = &derived;
puts("Break here");
llvm::PointerUnion<HasVirtual *, float *> virtual_float(&dv);
puts("Break here");
// Function-local types stress template_argument lookup in debuggers.
struct alignas(8) Local {};
Local local;
llvm::PointerUnion<Local *, float *> local_float(&local);
puts("Break here");
// Variable-width tag encoding: formatter should fall back to void*.
Align4<0> a4_0;
Align8<0> a8_0;
llvm::PointerUnion<Align4<0> *, Align4<1> *, Align4<2> *, Align8<0> *,
Align8<1> *>
varwidth(&a4_0);
llvm::PointerUnion<Align4<0> *, Align4<1> *, Align4<2> *, Align8<0> *,
Align8<1> *>
varwidth_tier1(&a8_0);
puts("Break here");
}