From 09b08855698fbfedb5ad0726cb97a5dfc33ad46a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 4 Dec 2025 16:24:23 +0100 Subject: [PATCH] [LangRef] Specify icmp on pointers to only compare address (#163936) This changes LangRef to specify that pointer icmp only compares the address bits of the pointers. That is, `icmp pred %a, %b` is equivalent to `icmp pred ptrtoaddr(%a), ptrtoaddr(%b)`. Similarly, it specifies that the `nonnull` attribute requires that the address bits are non-zero. There are a couple of motivations for this: * For inequality comparisons, this is really the only sensible semantics. Relational comparison of address and metadata bits as a single integer is generally meaningless (unless the metadata bits are equal). * This matches (as far as I understand) the behavior of existing CHERI implementations. * LLVM can only reason about the address bits. These semantics allow pointers with non-address bits to receive essentially the same comparison optimization support as ordinary pointers. In terms of implementation, this PR adjusts: * The AMDGPULowerBufferFatPointers pass. * An InstCombine fold that may replace pointers with different non-address bits. * The fold that replaces pointers based on dominating pointer equality. It does not adjust: * ISel, because we don't have in-tree targets where we can show a difference. * Various icmp+ptrtoint transforms, because we'll have to change this code for ptrtoaddr optimization support anyway, and these changes are tightly related. Related discussion starting from: https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/60?u=nikic --- llvm/docs/LangRef.rst | 9 +++++++-- llvm/lib/Analysis/Loads.cpp | 16 ++++++++++----- .../AMDGPU/AMDGPULowerBufferFatPointers.cpp | 12 +---------- .../InstCombine/InstructionCombining.cpp | 6 +++--- .../lower-buffer-fat-pointers-pointer-ops.ll | 20 +++++-------------- llvm/test/Transforms/GVN/assume-equal.ll | 8 +++++--- llvm/test/Transforms/InstCombine/ptrtoaddr.ll | 4 ++-- 7 files changed, 34 insertions(+), 41 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 656b930e1f5c..b8ed1dba6303 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1556,6 +1556,9 @@ Currently, only the following parameter attributes are defined: attribute may only be applied to pointer-typed parameters. This is not checked or enforced by LLVM; if the parameter or return pointer is null, :ref:`poison value ` is returned or passed instead. + The ``nonnull`` attribute only refers to the address bits of the pointers. + If all the address bits are zero, the result will be a poison value, even + if the pointer has non-zero non-address bits or non-zero external state. The ``nonnull`` attribute should be combined with the ``noundef`` attribute to ensure a pointer is not null or otherwise the behavior is undefined. @@ -13064,8 +13067,10 @@ code given as ``cond``. The comparison performed always yields either an #. ``sle``: interprets the operands as signed values and yields ``true`` if ``op1`` is less than or equal to ``op2``. -If the operands are :ref:`pointer ` typed, the pointer values -are compared as if they were integers. +If the operands are :ref:`pointer ` typed, the address bits of the +pointers are compared as if they were integers. Non-address bits or external +state are not compared. That is, ``icmp`` on pointers is equivalent to ``icmp`` +on the ``ptrtoaddr`` of the pointers. If the operands are integer vectors, then they are compared element by element. The result is an ``i1`` vector with the same number of elements diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 54f55b20a93a..73f40f13a262 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -803,7 +803,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA, // Returns true if a use is either in an ICmp/PtrToInt or a Phi/Select that only // feeds into them. -static bool isPointerUseReplacable(const Use &U) { +static bool isPointerUseReplacable(const Use &U, bool HasNonAddressBits) { unsigned Limit = 40; SmallVector Worklist({U.getUser()}); SmallPtrSet Visited; @@ -812,9 +812,11 @@ static bool isPointerUseReplacable(const Use &U) { auto *User = Worklist.pop_back_val(); if (!Visited.insert(User).second) continue; + if (isa(User)) + continue; // FIXME: The PtrToIntInst case here is not strictly correct, as it // changes which provenance is exposed. - if (isa(User)) + if (!HasNonAddressBits && isa(User)) continue; if (isa(User)) Worklist.append(User->user_begin(), User->user_end()); @@ -842,9 +844,10 @@ static bool isPointerAlwaysReplaceable(const Value *From, const Value *To, bool llvm::canReplacePointersInUseIfEqual(const Use &U, const Value *To, const DataLayout &DL) { - assert(U->getType() == To->getType() && "values must have matching types"); + Type *Ty = To->getType(); + assert(U->getType() == Ty && "values must have matching types"); // Not a pointer, just return true. - if (!To->getType()->isPointerTy()) + if (!Ty->isPointerTy()) return true; // Do not perform replacements in lifetime intrinsic arguments. @@ -853,7 +856,10 @@ bool llvm::canReplacePointersInUseIfEqual(const Use &U, const Value *To, if (isPointerAlwaysReplaceable(&*U, To, DL)) return true; - return isPointerUseReplacable(U); + + bool HasNonAddressBits = + DL.getAddressSizeInBits(Ty) != DL.getPointerTypeSizeInBits(Ty); + return isPointerUseReplacable(U, HasNonAddressBits); } bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index fdff21b6ef8d..e9bcf16025d5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -2062,17 +2062,7 @@ PtrParts SplitPtrStructs::visitICmpInst(ICmpInst &Cmp) { "Pointer comparison is only equal or unequal"); auto [LhsRsrc, LhsOff] = getPtrParts(Lhs); auto [RhsRsrc, RhsOff] = getPtrParts(Rhs); - Value *RsrcCmp = - IRB.CreateICmp(Pred, LhsRsrc, RhsRsrc, Cmp.getName() + ".rsrc"); - copyMetadata(RsrcCmp, &Cmp); - Value *OffCmp = IRB.CreateICmp(Pred, LhsOff, RhsOff, Cmp.getName() + ".off"); - copyMetadata(OffCmp, &Cmp); - - Value *Res = nullptr; - if (Pred == ICmpInst::ICMP_EQ) - Res = IRB.CreateAnd(RsrcCmp, OffCmp); - else if (Pred == ICmpInst::ICMP_NE) - Res = IRB.CreateOr(RsrcCmp, OffCmp); + Value *Res = IRB.CreateICmp(Pred, LhsOff, RhsOff); copyMetadata(Res, &Cmp); Res->takeName(&Cmp); SplitUsers.insert(&Cmp); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 5bc9c28bed14..c6de57cb34c6 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3373,9 +3373,9 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS); bool Changed = false; GEP.replaceUsesWithIf(Y, [&](Use &U) { - bool ShouldReplace = isa(U.getUser()) || - (!HasNonAddressBits && - isa(U.getUser())); + bool ShouldReplace = + isa(U.getUser()) || + (!HasNonAddressBits && isa(U.getUser())); Changed |= ShouldReplace; return ShouldReplace; }); diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll index 610c3e2c0286..01d2c1c356cb 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll @@ -405,9 +405,7 @@ define i1 @test_null(ptr addrspace(7) %p) { ; CHECK-SAME: ({ ptr addrspace(8), i32 } [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[P_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 0 ; CHECK-NEXT: [[P_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 1 -; CHECK-NEXT: [[IS_NULL_RSRC:%.*]] = icmp eq ptr addrspace(8) [[P_RSRC]], null -; CHECK-NEXT: [[IS_NULL_OFF:%.*]] = icmp eq i32 [[P_OFF]], 0 -; CHECK-NEXT: [[IS_NULL:%.*]] = and i1 [[IS_NULL_RSRC]], [[IS_NULL_OFF]] +; CHECK-NEXT: [[IS_NULL:%.*]] = icmp eq i32 [[P_OFF]], 0 ; CHECK-NEXT: ret i1 [[IS_NULL]] ; %is.null = icmp eq ptr addrspace(7) %p, addrspacecast (ptr null to ptr addrspace(7)) @@ -471,9 +469,7 @@ define i1 @icmp_eq(ptr addrspace(7) %a, ptr addrspace(7) %b) { ; CHECK-NEXT: [[B_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[B]], 1 ; CHECK-NEXT: [[A_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[A]], 0 ; CHECK-NEXT: [[A_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[A]], 1 -; CHECK-NEXT: [[RET_RSRC:%.*]] = icmp eq ptr addrspace(8) [[A_RSRC]], [[B_RSRC]] -; CHECK-NEXT: [[RET_OFF:%.*]] = icmp eq i32 [[A_OFF]], [[B_OFF]] -; CHECK-NEXT: [[RET:%.*]] = and i1 [[RET_RSRC]], [[RET_OFF]] +; CHECK-NEXT: [[RET:%.*]] = icmp eq i32 [[A_OFF]], [[B_OFF]] ; CHECK-NEXT: ret i1 [[RET]] ; %ret = icmp eq ptr addrspace(7) %a, %b @@ -487,9 +483,7 @@ define i1 @icmp_ne(ptr addrspace(7) %a, ptr addrspace(7) %b) { ; CHECK-NEXT: [[B_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[B]], 1 ; CHECK-NEXT: [[A_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[A]], 0 ; CHECK-NEXT: [[A_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[A]], 1 -; CHECK-NEXT: [[RET_RSRC:%.*]] = icmp ne ptr addrspace(8) [[A_RSRC]], [[B_RSRC]] -; CHECK-NEXT: [[RET_OFF:%.*]] = icmp ne i32 [[A_OFF]], [[B_OFF]] -; CHECK-NEXT: [[RET:%.*]] = or i1 [[RET_RSRC]], [[RET_OFF]] +; CHECK-NEXT: [[RET:%.*]] = icmp ne i32 [[A_OFF]], [[B_OFF]] ; CHECK-NEXT: ret i1 [[RET]] ; %ret = icmp ne ptr addrspace(7) %a, %b @@ -503,9 +497,7 @@ define <2 x i1> @icmp_eq_vec(<2 x ptr addrspace(7)> %a, <2 x ptr addrspace(7)> % ; CHECK-NEXT: [[B_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[B]], 1 ; CHECK-NEXT: [[A_RSRC:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[A]], 0 ; CHECK-NEXT: [[A_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[A]], 1 -; CHECK-NEXT: [[RET_RSRC:%.*]] = icmp eq <2 x ptr addrspace(8)> [[A_RSRC]], [[B_RSRC]] -; CHECK-NEXT: [[RET_OFF:%.*]] = icmp eq <2 x i32> [[A_OFF]], [[B_OFF]] -; CHECK-NEXT: [[RET:%.*]] = and <2 x i1> [[RET_RSRC]], [[RET_OFF]] +; CHECK-NEXT: [[RET:%.*]] = icmp eq <2 x i32> [[A_OFF]], [[B_OFF]] ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %ret = icmp eq <2 x ptr addrspace(7)> %a, %b @@ -519,9 +511,7 @@ define <2 x i1> @icmp_ne_vec(<2 x ptr addrspace(7)> %a, <2 x ptr addrspace(7)> % ; CHECK-NEXT: [[B_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[B]], 1 ; CHECK-NEXT: [[A_RSRC:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[A]], 0 ; CHECK-NEXT: [[A_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[A]], 1 -; CHECK-NEXT: [[RET_RSRC:%.*]] = icmp ne <2 x ptr addrspace(8)> [[A_RSRC]], [[B_RSRC]] -; CHECK-NEXT: [[RET_OFF:%.*]] = icmp ne <2 x i32> [[A_OFF]], [[B_OFF]] -; CHECK-NEXT: [[RET:%.*]] = or <2 x i1> [[RET_RSRC]], [[RET_OFF]] +; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[A_OFF]], [[B_OFF]] ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %ret = icmp ne <2 x ptr addrspace(7)> %a, %b diff --git a/llvm/test/Transforms/GVN/assume-equal.ll b/llvm/test/Transforms/GVN/assume-equal.ll index a38980169fc5..d2a584e89170 100644 --- a/llvm/test/Transforms/GVN/assume-equal.ll +++ b/llvm/test/Transforms/GVN/assume-equal.ll @@ -404,12 +404,14 @@ define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint(ptr %p, ptr %p ret i64 %int } -define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2) { -; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint_addrsize( +; If the pointer and address size does not match, we can't replace a pointer +; with different provenance in ptrtoint, as the non-address bits may not match. +define i64 @assume_ptr_eq_different_prov_matters_ptrtoint_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2) { +; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_matters_ptrtoint_addrsize( ; CHECK-SAME: ptr addrspace(1) [[P:%.*]], ptr addrspace(1) [[P2:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[P]], [[P2]] ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[INT:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 +; CHECK-NEXT: [[INT:%.*]] = ptrtoint ptr addrspace(1) [[P2]] to i64 ; CHECK-NEXT: ret i64 [[INT]] ; %cmp = icmp eq ptr addrspace(1) %p, %p2 diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll index adf3aa12623b..f33fa5089d8d 100644 --- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll +++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll @@ -206,7 +206,7 @@ define ptr @gep_sub_ptrtoaddr_different_obj(ptr %p, ptr %p2, ptr %p3) { ret ptr %gep } -; The use in ptrtoaddr should be replaced. The uses in ptrtoint and icmp should +; The use in ptrtoaddr and icmp should be replaced. The use in ptrtoint should ; not be replaced, as the non-address bits differ. The use in the return value ; should not be replaced as the provenace differs. define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2, ptr addrspace(1) %p3) { @@ -216,7 +216,7 @@ define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize(ptr addrspace( ; CHECK-NEXT: [[P2_ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P2]] to i32 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[P2_ADDR]], [[P_ADDR]] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspace(1) [[P]], i32 [[SUB]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[GEP]], [[P3]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(1) [[P2]], [[P3]] ; CHECK-NEXT: call void @use.i1(i1 [[CMP]]) ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i64 ; CHECK-NEXT: [[INT:%.*]] = trunc i64 [[TMP1]] to i32