From eb1e808fdb44fee0cb8beea0ea8f162b82e8d92f Mon Sep 17 00:00:00 2001 From: Gergo Stomfai Date: Tue, 3 Mar 2026 08:51:12 +0000 Subject: [PATCH] [IR] Mark reduction intrinsics as nocreateundeforpoison (#184173) In investigating #156233, it came up that select folds like here: https://alive2.llvm.org/ce/z/Y6jzj6 cannot be carried out, or easily fixed for now, because integer reductions do not propagate noundef, even if their arguments are noundef. This patch adds this propagation. --- llvm/include/llvm/IR/Intrinsics.td | 2 +- llvm/lib/Analysis/ValueTracking.cpp | 2 +- .../AMX/amx-low-intrinsics-no-amx-bitcast.ll | 2 +- .../CodeGen/X86/AMX/amx-low-intrinsics.ll | 2 +- .../X86/extracts-non-extendable.ll | 3 +-- .../X86/non-load-reduced-as-part-of-bv.ll | 2 +- .../SLPVectorizer/X86/reduction-logical.ll | 2 +- ...reduction-gather-non-scheduled-extracts.ll | 3 +-- llvm/unittests/Analysis/ValueTrackingTest.cpp | 23 ++++++++++++++++++- 9 files changed, 30 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index a1c91486f7c3..5b5fffaa4895 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2709,7 +2709,7 @@ def int_memset_element_unordered_atomic //===------------------------ Reduction Intrinsics ------------------------===// // -let IntrProperties = [IntrNoMem, IntrSpeculatable] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] in { def int_vector_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [LLVMVectorElementType<0>, diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 8bc40d46ca53..3ddbc3bce804 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -7572,7 +7572,7 @@ static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind, case Instruction::Call: if (auto *II = dyn_cast(Op)) { switch (II->getIntrinsicID()) { - // TODO: Add more intrinsics. + // NOTE: Use IntrNoCreateUndefOrPoison when possible. case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::abs: diff --git a/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics-no-amx-bitcast.ll b/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics-no-amx-bitcast.ll index ec516d265098..a3725ebeb939 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics-no-amx-bitcast.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics-no-amx-bitcast.ll @@ -203,7 +203,7 @@ attributes #0 = { noinline nounwind optnone } ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) } ;. ; CHECK: [[PROF0]] = !{!"branch_weights", i32 4, i32 1} ;. diff --git a/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics.ll index 39c66c78bacb..0e140f2b453d 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics.ll @@ -545,7 +545,7 @@ attributes #0 = { noinline nounwind optnone } ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) } ;. ; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} ; CHECK: [[PROF1]] = !{!"unknown", !"x86-lower-amx-intrinsics"} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extracts-non-extendable.ll b/llvm/test/Transforms/SLPVectorizer/X86/extracts-non-extendable.ll index 3a7184f8e051..0875b8dd2f9e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extracts-non-extendable.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extracts-non-extendable.ll @@ -13,8 +13,7 @@ define void @test(i64 %v) { ; CHECK-NEXT: [[TMP19:%.*]] = icmp ult i64 0, 0 ; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]]) -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i1 [[TMP18]], i1 false -; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP20]] +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP18]], i1 [[TMP19]], i1 false ; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP8]], i1 false, i1 false ; CHECK-NEXT: br i1 [[OP_RDX1]], label %[[BB_I107_PREHEADER:.*]], label %[[BB_I27_I_PREHEADER:.*]] ; CHECK: [[BB_I107_PREHEADER]]: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll index 7df97492b874..b965d27fe3ea 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll @@ -14,7 +14,7 @@ define i1 @foo() { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> , <8 x i1> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 false, i1 [[TMP5]], i1 false +; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP5]], i1 false, i1 false ; CHECK-NEXT: ret i1 [[OP_RDX]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index 1904540c2314..5e0dea82bdda 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -361,7 +361,7 @@ define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) { ; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP6]], i1 false +; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP8]] ; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP9]], i1 [[C2]], i1 false diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll b/llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll index ae5018a63e21..5834ad3a106e 100644 --- a/llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll @@ -10,8 +10,7 @@ define void @tes() { ; X86: 1: ; X86-NEXT: [[TMP2:%.*]] = shufflevector <2 x i1> zeroinitializer, <2 x i1> [[TMP0]], <4 x i32> ; X86-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]]) -; X86-NEXT: [[OP_RDX:%.*]] = select i1 false, i1 [[TMP3]], i1 false -; X86-NEXT: [[OP_RDX1:%.*]] = select i1 false, i1 [[OP_RDX]], i1 false +; X86-NEXT: [[OP_RDX1:%.*]] = select i1 false, i1 [[TMP3]], i1 false ; X86-NEXT: br i1 [[OP_RDX1]], label [[TMP4:%.*]], label [[TMP5:%.*]] ; X86: 4: ; X86-NEXT: ret void diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index fa06b0caa6a6..de481e39307c 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -1354,7 +1354,28 @@ TEST(ValueTracking, canCreatePoisonOrUndef) { {{false, false}, "call {i32, i1} @llvm.usub.with.overflow.i32(i32 %x, i32 %y)"}, {{false, false}, - "call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 %y)"}}; + "call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 %y)"}, + {{false, false}, "call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %vx)"}, + {{false, false}, "call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %vx)"}, + {{false, false}, "call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %vx)"}, + {{false, false}, "call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %vx)"}, + {{false, false}, "call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %vx)"}, + {{false, false}, + "call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vx)"}, + {{false, false}, + "call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vx)"}, + {{false, false}, + "call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %vx)"}, + {{false, false}, + "call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %vx)"}, + {{false, false}, + "call i32 @llvm.vector.reduce.fmax.v4i32(<4 x i32> %vx)"}, + {{false, false}, + "call i32 @llvm.vector.reduce.fmin.v4i32(<4 x i32> %vx)"}, + {{false, false}, + "call i32 @llvm.vector.reduce.fmaximum.v4i32(<4 x i32> %vx)"}, + {{false, false}, + "call i32 @llvm.vector.reduce.fmaximum.v4i32(<4 x i32> %vx)"}}; std::string AssemblyStr = AsmHead; for (auto &Itm : Data)