[IR] Mark reduction intrinsics as nocreateundeforpoison (#184173)
In investigating #156233, it came up that select folds like here: https://alive2.llvm.org/ce/z/Y6jzj6 cannot be carried out, or easily fixed for now, because integer reductions do not propagate noundef, even if their arguments are noundef. This patch adds this propagation.
This commit is contained in:
parent
9cda40735a
commit
eb1e808fdb
@ -2709,7 +2709,7 @@ def int_memset_element_unordered_atomic
|
||||
|
||||
//===------------------------ Reduction Intrinsics ------------------------===//
|
||||
//
|
||||
let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
|
||||
let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] in {
|
||||
|
||||
def int_vector_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
|
||||
[LLVMVectorElementType<0>,
|
||||
|
||||
@ -7572,7 +7572,7 @@ static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind,
|
||||
case Instruction::Call:
|
||||
if (auto *II = dyn_cast<IntrinsicInst>(Op)) {
|
||||
switch (II->getIntrinsicID()) {
|
||||
// TODO: Add more intrinsics.
|
||||
// NOTE: Use IntrNoCreateUndefOrPoison when possible.
|
||||
case Intrinsic::ctlz:
|
||||
case Intrinsic::cttz:
|
||||
case Intrinsic::abs:
|
||||
|
||||
@ -203,7 +203,7 @@ attributes #0 = { noinline nounwind optnone }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone }
|
||||
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind }
|
||||
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
;.
|
||||
; CHECK: [[PROF0]] = !{!"branch_weights", i32 4, i32 1}
|
||||
;.
|
||||
|
||||
@ -545,7 +545,7 @@ attributes #0 = { noinline nounwind optnone }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone }
|
||||
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind }
|
||||
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
;.
|
||||
; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
|
||||
; CHECK: [[PROF1]] = !{!"unknown", !"x86-lower-amx-intrinsics"}
|
||||
|
||||
@ -13,8 +13,7 @@ define void @test(i64 %v) {
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = icmp ult i64 0, 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]])
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i1 [[TMP18]], i1 false
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP20]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP18]], i1 [[TMP19]], i1 false
|
||||
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP8]], i1 false, i1 false
|
||||
; CHECK-NEXT: br i1 [[OP_RDX1]], label %[[BB_I107_PREHEADER:.*]], label %[[BB_I27_I_PREHEADER:.*]]
|
||||
; CHECK: [[BB_I107_PREHEADER]]:
|
||||
|
||||
@ -14,7 +14,7 @@ define i1 @foo() {
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 undef, i1 undef>, <8 x i1> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]])
|
||||
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 false, i1 [[TMP5]], i1 false
|
||||
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP5]], i1 false, i1 false
|
||||
; CHECK-NEXT: ret i1 [[OP_RDX]]
|
||||
;
|
||||
entry:
|
||||
|
||||
@ -361,7 +361,7 @@ define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) {
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP6]], i1 false
|
||||
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP8]]
|
||||
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP9]], i1 [[C2]], i1 false
|
||||
|
||||
@ -10,8 +10,7 @@ define void @tes() {
|
||||
; X86: 1:
|
||||
; X86-NEXT: [[TMP2:%.*]] = shufflevector <2 x i1> zeroinitializer, <2 x i1> [[TMP0]], <4 x i32> <i32 0, i32 0, i32 0, i32 2>
|
||||
; X86-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
|
||||
; X86-NEXT: [[OP_RDX:%.*]] = select i1 false, i1 [[TMP3]], i1 false
|
||||
; X86-NEXT: [[OP_RDX1:%.*]] = select i1 false, i1 [[OP_RDX]], i1 false
|
||||
; X86-NEXT: [[OP_RDX1:%.*]] = select i1 false, i1 [[TMP3]], i1 false
|
||||
; X86-NEXT: br i1 [[OP_RDX1]], label [[TMP4:%.*]], label [[TMP5:%.*]]
|
||||
; X86: 4:
|
||||
; X86-NEXT: ret void
|
||||
|
||||
@ -1354,7 +1354,28 @@ TEST(ValueTracking, canCreatePoisonOrUndef) {
|
||||
{{false, false},
|
||||
"call {i32, i1} @llvm.usub.with.overflow.i32(i32 %x, i32 %y)"},
|
||||
{{false, false},
|
||||
"call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 %y)"}};
|
||||
"call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 %y)"},
|
||||
{{false, false}, "call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %vx)"},
|
||||
{{false, false}, "call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %vx)"},
|
||||
{{false, false}, "call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %vx)"},
|
||||
{{false, false}, "call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %vx)"},
|
||||
{{false, false}, "call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %vx)"},
|
||||
{{false, false},
|
||||
"call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vx)"},
|
||||
{{false, false},
|
||||
"call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vx)"},
|
||||
{{false, false},
|
||||
"call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %vx)"},
|
||||
{{false, false},
|
||||
"call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %vx)"},
|
||||
{{false, false},
|
||||
"call i32 @llvm.vector.reduce.fmax.v4i32(<4 x i32> %vx)"},
|
||||
{{false, false},
|
||||
"call i32 @llvm.vector.reduce.fmin.v4i32(<4 x i32> %vx)"},
|
||||
{{false, false},
|
||||
"call i32 @llvm.vector.reduce.fmaximum.v4i32(<4 x i32> %vx)"},
|
||||
{{false, false},
|
||||
"call i32 @llvm.vector.reduce.fmaximum.v4i32(<4 x i32> %vx)"}};
|
||||
|
||||
std::string AssemblyStr = AsmHead;
|
||||
for (auto &Itm : Data)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user