[LangRef] Specify icmp on pointers to only compare address (#163936)

This changes LangRef to specify that pointer icmp only compares the
address bits of the pointers. That is, `icmp pred %a, %b` is equivalent
to `icmp pred ptrtoaddr(%a), ptrtoaddr(%b)`.

Similarly, it specifies that the `nonnull` attribute requires that the
address bits are non-zero.

There are a couple of motivations for this:

* For inequality comparisons, this is really the only sensible
semantics. Relational comparison of address and metadata bits as a
single integer is generally meaningless (unless the metadata bits are
equal).
* This matches (as far as I understand) the behavior of existing CHERI
implementations.
* LLVM can only reason about the address bits. These semantics allow
pointers with non-address bits to receive essentially the same
comparison optimization support as ordinary pointers.

In terms of implementation, this PR adjusts:
 * The AMDGPULowerBufferFatPointers pass.
* An InstCombine fold that may replace pointers with different
non-address bits.
 * The fold that replaces pointers based on dominating pointer equality.

It does not adjust:
* ISel, because we don't have in-tree targets where we can show a
difference.
* Various icmp+ptrtoint transforms, because we'll have to change this
code for ptrtoaddr optimization support anyway, and these changes are
tightly related.

Related discussion starting from:
https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/60?u=nikic
This commit is contained in:
Nikita Popov 2025-12-04 16:24:23 +01:00 committed by GitHub
parent 15d7c01f30
commit 09b0885569
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 34 additions and 41 deletions

View File

@ -1556,6 +1556,9 @@ Currently, only the following parameter attributes are defined:
attribute may only be applied to pointer-typed parameters. This is not
checked or enforced by LLVM; if the parameter or return pointer is null,
:ref:`poison value <poisonvalues>` is returned or passed instead.
The ``nonnull`` attribute only refers to the address bits of the pointers.
If all the address bits are zero, the result will be a poison value, even
if the pointer has non-zero non-address bits or non-zero external state.
The ``nonnull`` attribute should be combined with the ``noundef`` attribute
to ensure a pointer is not null or otherwise the behavior is undefined.
@ -13064,8 +13067,10 @@ code given as ``cond``. The comparison performed always yields either an
#. ``sle``: interprets the operands as signed values and yields ``true``
if ``op1`` is less than or equal to ``op2``.
If the operands are :ref:`pointer <t_pointer>` typed, the pointer values
are compared as if they were integers.
If the operands are :ref:`pointer <t_pointer>` typed, the address bits of the
pointers are compared as if they were integers. Non-address bits or external
state are not compared. That is, ``icmp`` on pointers is equivalent to ``icmp``
on the ``ptrtoaddr`` of the pointers.
If the operands are integer vectors, then they are compared element by
element. The result is an ``i1`` vector with the same number of elements

View File

@ -803,7 +803,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
// Returns true if a use is either in an ICmp/PtrToInt or a Phi/Select that only
// feeds into them.
static bool isPointerUseReplacable(const Use &U) {
static bool isPointerUseReplacable(const Use &U, bool HasNonAddressBits) {
unsigned Limit = 40;
SmallVector<const User *> Worklist({U.getUser()});
SmallPtrSet<const User *, 8> Visited;
@ -812,9 +812,11 @@ static bool isPointerUseReplacable(const Use &U) {
auto *User = Worklist.pop_back_val();
if (!Visited.insert(User).second)
continue;
if (isa<ICmpInst, PtrToAddrInst>(User))
continue;
// FIXME: The PtrToIntInst case here is not strictly correct, as it
// changes which provenance is exposed.
if (isa<ICmpInst, PtrToIntInst, PtrToAddrInst>(User))
if (!HasNonAddressBits && isa<PtrToIntInst>(User))
continue;
if (isa<PHINode, SelectInst>(User))
Worklist.append(User->user_begin(), User->user_end());
@ -842,9 +844,10 @@ static bool isPointerAlwaysReplaceable(const Value *From, const Value *To,
bool llvm::canReplacePointersInUseIfEqual(const Use &U, const Value *To,
const DataLayout &DL) {
assert(U->getType() == To->getType() && "values must have matching types");
Type *Ty = To->getType();
assert(U->getType() == Ty && "values must have matching types");
// Not a pointer, just return true.
if (!To->getType()->isPointerTy())
if (!Ty->isPointerTy())
return true;
// Do not perform replacements in lifetime intrinsic arguments.
@ -853,7 +856,10 @@ bool llvm::canReplacePointersInUseIfEqual(const Use &U, const Value *To,
if (isPointerAlwaysReplaceable(&*U, To, DL))
return true;
return isPointerUseReplacable(U);
bool HasNonAddressBits =
DL.getAddressSizeInBits(Ty) != DL.getPointerTypeSizeInBits(Ty);
return isPointerUseReplacable(U, HasNonAddressBits);
}
bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To,

View File

@ -2062,17 +2062,7 @@ PtrParts SplitPtrStructs::visitICmpInst(ICmpInst &Cmp) {
"Pointer comparison is only equal or unequal");
auto [LhsRsrc, LhsOff] = getPtrParts(Lhs);
auto [RhsRsrc, RhsOff] = getPtrParts(Rhs);
Value *RsrcCmp =
IRB.CreateICmp(Pred, LhsRsrc, RhsRsrc, Cmp.getName() + ".rsrc");
copyMetadata(RsrcCmp, &Cmp);
Value *OffCmp = IRB.CreateICmp(Pred, LhsOff, RhsOff, Cmp.getName() + ".off");
copyMetadata(OffCmp, &Cmp);
Value *Res = nullptr;
if (Pred == ICmpInst::ICMP_EQ)
Res = IRB.CreateAnd(RsrcCmp, OffCmp);
else if (Pred == ICmpInst::ICMP_NE)
Res = IRB.CreateOr(RsrcCmp, OffCmp);
Value *Res = IRB.CreateICmp(Pred, LhsOff, RhsOff);
copyMetadata(Res, &Cmp);
Res->takeName(&Cmp);
SplitUsers.insert(&Cmp);

View File

@ -3373,9 +3373,9 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
bool Changed = false;
GEP.replaceUsesWithIf(Y, [&](Use &U) {
bool ShouldReplace = isa<PtrToAddrInst>(U.getUser()) ||
(!HasNonAddressBits &&
isa<ICmpInst, PtrToIntInst>(U.getUser()));
bool ShouldReplace =
isa<PtrToAddrInst, ICmpInst>(U.getUser()) ||
(!HasNonAddressBits && isa<PtrToIntInst>(U.getUser()));
Changed |= ShouldReplace;
return ShouldReplace;
});

View File

@ -405,9 +405,7 @@ define i1 @test_null(ptr addrspace(7) %p) {
; CHECK-SAME: ({ ptr addrspace(8), i32 } [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[P_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 0
; CHECK-NEXT: [[P_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 1
; CHECK-NEXT: [[IS_NULL_RSRC:%.*]] = icmp eq ptr addrspace(8) [[P_RSRC]], null
; CHECK-NEXT: [[IS_NULL_OFF:%.*]] = icmp eq i32 [[P_OFF]], 0
; CHECK-NEXT: [[IS_NULL:%.*]] = and i1 [[IS_NULL_RSRC]], [[IS_NULL_OFF]]
; CHECK-NEXT: [[IS_NULL:%.*]] = icmp eq i32 [[P_OFF]], 0
; CHECK-NEXT: ret i1 [[IS_NULL]]
;
%is.null = icmp eq ptr addrspace(7) %p, addrspacecast (ptr null to ptr addrspace(7))
@ -471,9 +469,7 @@ define i1 @icmp_eq(ptr addrspace(7) %a, ptr addrspace(7) %b) {
; CHECK-NEXT: [[B_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[B]], 1
; CHECK-NEXT: [[A_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[A]], 0
; CHECK-NEXT: [[A_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[A]], 1
; CHECK-NEXT: [[RET_RSRC:%.*]] = icmp eq ptr addrspace(8) [[A_RSRC]], [[B_RSRC]]
; CHECK-NEXT: [[RET_OFF:%.*]] = icmp eq i32 [[A_OFF]], [[B_OFF]]
; CHECK-NEXT: [[RET:%.*]] = and i1 [[RET_RSRC]], [[RET_OFF]]
; CHECK-NEXT: [[RET:%.*]] = icmp eq i32 [[A_OFF]], [[B_OFF]]
; CHECK-NEXT: ret i1 [[RET]]
;
%ret = icmp eq ptr addrspace(7) %a, %b
@ -487,9 +483,7 @@ define i1 @icmp_ne(ptr addrspace(7) %a, ptr addrspace(7) %b) {
; CHECK-NEXT: [[B_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[B]], 1
; CHECK-NEXT: [[A_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[A]], 0
; CHECK-NEXT: [[A_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[A]], 1
; CHECK-NEXT: [[RET_RSRC:%.*]] = icmp ne ptr addrspace(8) [[A_RSRC]], [[B_RSRC]]
; CHECK-NEXT: [[RET_OFF:%.*]] = icmp ne i32 [[A_OFF]], [[B_OFF]]
; CHECK-NEXT: [[RET:%.*]] = or i1 [[RET_RSRC]], [[RET_OFF]]
; CHECK-NEXT: [[RET:%.*]] = icmp ne i32 [[A_OFF]], [[B_OFF]]
; CHECK-NEXT: ret i1 [[RET]]
;
%ret = icmp ne ptr addrspace(7) %a, %b
@ -503,9 +497,7 @@ define <2 x i1> @icmp_eq_vec(<2 x ptr addrspace(7)> %a, <2 x ptr addrspace(7)> %
; CHECK-NEXT: [[B_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[B]], 1
; CHECK-NEXT: [[A_RSRC:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[A]], 0
; CHECK-NEXT: [[A_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[A]], 1
; CHECK-NEXT: [[RET_RSRC:%.*]] = icmp eq <2 x ptr addrspace(8)> [[A_RSRC]], [[B_RSRC]]
; CHECK-NEXT: [[RET_OFF:%.*]] = icmp eq <2 x i32> [[A_OFF]], [[B_OFF]]
; CHECK-NEXT: [[RET:%.*]] = and <2 x i1> [[RET_RSRC]], [[RET_OFF]]
; CHECK-NEXT: [[RET:%.*]] = icmp eq <2 x i32> [[A_OFF]], [[B_OFF]]
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
%ret = icmp eq <2 x ptr addrspace(7)> %a, %b
@ -519,9 +511,7 @@ define <2 x i1> @icmp_ne_vec(<2 x ptr addrspace(7)> %a, <2 x ptr addrspace(7)> %
; CHECK-NEXT: [[B_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[B]], 1
; CHECK-NEXT: [[A_RSRC:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[A]], 0
; CHECK-NEXT: [[A_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[A]], 1
; CHECK-NEXT: [[RET_RSRC:%.*]] = icmp ne <2 x ptr addrspace(8)> [[A_RSRC]], [[B_RSRC]]
; CHECK-NEXT: [[RET_OFF:%.*]] = icmp ne <2 x i32> [[A_OFF]], [[B_OFF]]
; CHECK-NEXT: [[RET:%.*]] = or <2 x i1> [[RET_RSRC]], [[RET_OFF]]
; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[A_OFF]], [[B_OFF]]
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
%ret = icmp ne <2 x ptr addrspace(7)> %a, %b

View File

@ -404,12 +404,14 @@ define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint(ptr %p, ptr %p
ret i64 %int
}
define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2) {
; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint_addrsize(
; If the pointer and address size does not match, we can't replace a pointer
; with different provenance in ptrtoint, as the non-address bits may not match.
define i64 @assume_ptr_eq_different_prov_matters_ptrtoint_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2) {
; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_matters_ptrtoint_addrsize(
; CHECK-SAME: ptr addrspace(1) [[P:%.*]], ptr addrspace(1) [[P2:%.*]]) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[P]], [[P2]]
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
; CHECK-NEXT: [[INT:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64
; CHECK-NEXT: [[INT:%.*]] = ptrtoint ptr addrspace(1) [[P2]] to i64
; CHECK-NEXT: ret i64 [[INT]]
;
%cmp = icmp eq ptr addrspace(1) %p, %p2

View File

@ -206,7 +206,7 @@ define ptr @gep_sub_ptrtoaddr_different_obj(ptr %p, ptr %p2, ptr %p3) {
ret ptr %gep
}
; The use in ptrtoaddr should be replaced. The uses in ptrtoint and icmp should
; The use in ptrtoaddr and icmp should be replaced. The use in ptrtoint should
; not be replaced, as the non-address bits differ. The use in the return value
; should not be replaced as the provenace differs.
define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2, ptr addrspace(1) %p3) {
@ -216,7 +216,7 @@ define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize(ptr addrspace(
; CHECK-NEXT: [[P2_ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P2]] to i32
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[P2_ADDR]], [[P_ADDR]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspace(1) [[P]], i32 [[SUB]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[GEP]], [[P3]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[P2]], [[P3]]
; CHECK-NEXT: call void @use.i1(i1 [[CMP]])
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i64
; CHECK-NEXT: [[INT:%.*]] = trunc i64 [[TMP1]] to i32