[IR] Mark reduction intrinsics as nocreateundeforpoison (#184173)

In investigating #156233, it came up that select folds like here:
https://alive2.llvm.org/ce/z/Y6jzj6 cannot be carried out, or easily
fixed for now, because integer reductions do not propagate noundef, even
if their arguments are noundef. This patch adds this propagation.
This commit is contained in:
Gergo Stomfai 2026-03-03 08:51:12 +00:00 committed by GitHub
parent 9cda40735a
commit eb1e808fdb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 30 additions and 11 deletions

View File

@ -2709,7 +2709,7 @@ def int_memset_element_unordered_atomic
//===------------------------ Reduction Intrinsics ------------------------===//
//
let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] in {
def int_vector_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[LLVMVectorElementType<0>,

View File

@ -7572,7 +7572,7 @@ static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind,
case Instruction::Call:
if (auto *II = dyn_cast<IntrinsicInst>(Op)) {
switch (II->getIntrinsicID()) {
// TODO: Add more intrinsics.
// NOTE: Use IntrNoCreateUndefOrPoison when possible.
case Intrinsic::ctlz:
case Intrinsic::cttz:
case Intrinsic::abs:

View File

@ -203,7 +203,7 @@ attributes #0 = { noinline nounwind optnone }
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
;.
; CHECK: [[PROF0]] = !{!"branch_weights", i32 4, i32 1}
;.

View File

@ -545,7 +545,7 @@ attributes #0 = { noinline nounwind optnone }
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
;.
; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
; CHECK: [[PROF1]] = !{!"unknown", !"x86-lower-amx-intrinsics"}

View File

@ -13,8 +13,7 @@ define void @test(i64 %v) {
; CHECK-NEXT: [[TMP19:%.*]] = icmp ult i64 0, 0
; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i1 [[TMP18]], i1 false
; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP20]]
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP18]], i1 [[TMP19]], i1 false
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP8]], i1 false, i1 false
; CHECK-NEXT: br i1 [[OP_RDX1]], label %[[BB_I107_PREHEADER:.*]], label %[[BB_I27_I_PREHEADER:.*]]
; CHECK: [[BB_I107_PREHEADER]]:

View File

@ -14,7 +14,7 @@ define i1 @foo() {
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 undef, i1 undef>, <8 x i1> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]])
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 false, i1 [[TMP5]], i1 false
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP5]], i1 false, i1 false
; CHECK-NEXT: ret i1 [[OP_RDX]]
;
entry:

View File

@ -361,7 +361,7 @@ define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) {
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP6]], i1 false
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP8]]
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP9]], i1 [[C2]], i1 false

View File

@ -10,8 +10,7 @@ define void @tes() {
; X86: 1:
; X86-NEXT: [[TMP2:%.*]] = shufflevector <2 x i1> zeroinitializer, <2 x i1> [[TMP0]], <4 x i32> <i32 0, i32 0, i32 0, i32 2>
; X86-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
; X86-NEXT: [[OP_RDX:%.*]] = select i1 false, i1 [[TMP3]], i1 false
; X86-NEXT: [[OP_RDX1:%.*]] = select i1 false, i1 [[OP_RDX]], i1 false
; X86-NEXT: [[OP_RDX1:%.*]] = select i1 false, i1 [[TMP3]], i1 false
; X86-NEXT: br i1 [[OP_RDX1]], label [[TMP4:%.*]], label [[TMP5:%.*]]
; X86: 4:
; X86-NEXT: ret void

View File

@ -1354,7 +1354,28 @@ TEST(ValueTracking, canCreatePoisonOrUndef) {
{{false, false},
"call {i32, i1} @llvm.usub.with.overflow.i32(i32 %x, i32 %y)"},
{{false, false},
"call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 %y)"}};
"call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 %y)"},
{{false, false}, "call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %vx)"},
{{false, false}, "call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %vx)"},
{{false, false}, "call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %vx)"},
{{false, false}, "call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %vx)"},
{{false, false}, "call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %vx)"},
{{false, false},
"call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vx)"},
{{false, false},
"call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vx)"},
{{false, false},
"call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %vx)"},
{{false, false},
"call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %vx)"},
{{false, false},
"call i32 @llvm.vector.reduce.fmax.v4i32(<4 x i32> %vx)"},
{{false, false},
"call i32 @llvm.vector.reduce.fmin.v4i32(<4 x i32> %vx)"},
{{false, false},
"call i32 @llvm.vector.reduce.fmaximum.v4i32(<4 x i32> %vx)"},
{{false, false},
"call i32 @llvm.vector.reduce.fmaximum.v4i32(<4 x i32> %vx)"}};
std::string AssemblyStr = AsmHead;
for (auto &Itm : Data)