llvm-project/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
Florian Hahn 1de3dc7d23 [LV] Bail out early if BTC+1 wraps.
Currently we fail to detect the case where BTC + 1 wraps, i.e. the
vector trip count is 0, In those cases, the minimum iteration count
check will fail, and the vector code will never be executed.

Explicitly check for this condition in computeMaxVF and avoid trying to
vectorize alltogether.

Note that a number of tests needed to be updated, because the vector
loop would never be executed given the input IR.

Fixes https://github.com/llvm/llvm-project/issues/122558.
2025-01-14 22:07:38 +00:00

697 lines
30 KiB
LLVM

; RUN: opt %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s
; Make sure that integer poison-generating flags (i.e., nuw/nsw, exact and inbounds)
; are dropped from instructions in blocks that need predication and are linearized
; and masked after vectorization. We only drop flags from scalar instructions that
; contribute to the address computation of a masked vector load/store. After
; linearizing the control flow and removing their guarding condition, these
; instructions could generate a poison value which would be used as base address of
; the masked vector load/store (see PR52111). For gather/scatter cases,
; posiong-generating flags can be preserved since poison addresses in the vector GEP
; reaching the gather/scatter instruction will be masked-out by the gather/scatter
; instruction itself and won't be used.
; We need AVX512 target features for the loop to be vectorized with masks instead of
; predicates.
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load.
; Test for PR52111.
define void @drop_scalar_nuw_nsw(ptr noalias nocapture readonly %input,
ptr %output) local_unnamed_addr #0 {
; CHECK-LABEL: @drop_scalar_nuw_nsw(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
%i23 = icmp eq i64 %iv, 0
br i1 %i23, label %if.end, label %if.then
if.then:
%i27 = sub nuw nsw i64 %iv, 1
%i29 = getelementptr inbounds float, ptr %input, i64 %i27
%i30 = load float, ptr %i29, align 4, !invariant.load !0
br label %if.end
if.end:
%i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
%i35 = getelementptr inbounds float, ptr %output, i64 %iv
store float %i34, ptr %i35, align 4
%iv.inc = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.inc, 4
br i1 %exitcond, label %loop.exit, label %loop.header
loop.exit:
ret void
}
; Variant with getelementptr nusw.
define void @drop_scalar_gep_nusw(ptr noalias nocapture readonly %input,
ptr %output) local_unnamed_addr #0 {
; CHECK-LABEL: @drop_scalar_gep_nusw(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
%i23 = icmp eq i64 %iv, 0
br i1 %i23, label %if.end, label %if.then
if.then:
%i27 = sub nuw nsw i64 %iv, 1
%i29 = getelementptr nusw float, ptr %input, i64 %i27
%i30 = load float, ptr %i29, align 4, !invariant.load !0
br label %if.end
if.end:
%i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
%i35 = getelementptr nusw float, ptr %output, i64 %iv
store float %i34, ptr %i35, align 4
%iv.inc = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.inc, 4
br i1 %exitcond, label %loop.exit, label %loop.header
loop.exit:
ret void
}
; Variant with getelementptr nuw.
define void @drop_scalar_gep_nuw(ptr noalias nocapture readonly %input,
ptr %output) local_unnamed_addr #0 {
; CHECK-LABEL: @drop_scalar_gep_nuw(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
%i23 = icmp eq i64 %iv, 0
br i1 %i23, label %if.end, label %if.then
if.then:
%i27 = sub nuw nsw i64 %iv, 1
%i29 = getelementptr nuw float, ptr %input, i64 %i27
%i30 = load float, ptr %i29, align 4, !invariant.load !0
br label %if.end
if.end:
%i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
%i35 = getelementptr nuw float, ptr %output, i64 %iv
store float %i34, ptr %i35, align 4
%iv.inc = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.inc, 4
br i1 %exitcond, label %loop.exit, label %loop.header
loop.exit:
ret void
}
; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load.
; In this case, 'sub' and 'getelementptr' are not guarded by the predicate.
define void @drop_nonpred_scalar_nuw_nsw(ptr noalias nocapture readonly %input,
ptr %output) local_unnamed_addr #0 {
; CHECK-LABEL: @drop_nonpred_scalar_nuw_nsw(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK: [[TMP5:%.*]] = sub i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
%i27 = sub i64 %iv, 1
%i29 = getelementptr float, ptr %input, i64 %i27
%i23 = icmp eq i64 %iv, 0
br i1 %i23, label %if.end, label %if.then
if.then:
%i30 = load float, ptr %i29, align 4, !invariant.load !0
br label %if.end
if.end:
%i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
%i35 = getelementptr inbounds float, ptr %output, i64 %iv
store float %i34, ptr %i35, align 4
%iv.inc = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.inc, 4
br i1 %exitcond, label %loop.exit, label %loop.header
loop.exit:
ret void
}
; Preserve poison-generating flags from vector 'sub', 'mul' and 'getelementptr' feeding a masked gather.
define void @preserve_vector_nuw_nsw(ptr noalias nocapture readonly %input,
ptr %output) local_unnamed_addr #0 {
; CHECK-LABEL: @preserve_vector_nuw_nsw(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw <4 x i64> [[TMP5]], splat (i64 2)
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], <4 x i64> [[TMP6]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP7]], i32 4, <4 x i1> [[TMP8]], <4 x float> poison), !invariant.load !0
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
%i23 = icmp eq i64 %iv, 0
br i1 %i23, label %if.end, label %if.then
if.then:
%i27 = sub nuw nsw i64 %iv, 1
%i28 = mul nuw nsw i64 %i27, 2
%i29 = getelementptr inbounds float, ptr %input, i64 %i28
%i30 = load float, ptr %i29, align 4, !invariant.load !0
br label %if.end
if.end:
%i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
%i35 = getelementptr inbounds float, ptr %output, i64 %iv
store float %i34, ptr %i35, align 4
%iv.inc = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.inc, 4
br i1 %exitcond, label %loop.exit, label %loop.header
loop.exit:
ret void
}
; Drop poison-generating flags from vector 'sub' and 'gep' feeding a masked load.
define void @drop_vector_nuw_nsw(ptr noalias nocapture readonly %input,
ptr %output, ptr noalias %ptrs) local_unnamed_addr #0 {
; CHECK-LABEL: @drop_vector_nuw_nsw(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[INPUT:%.*]], <4 x i64> [[TMP6]]
; CHECK: [[TMP10:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x ptr> [[TMP7]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP11]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP12]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
%i23 = icmp eq i64 %iv, 0
%gep = getelementptr inbounds ptr, ptr %ptrs, i64 %iv
%i27 = sub nuw nsw i64 %iv, 1
%i29 = getelementptr inbounds float, ptr %input, i64 %i27
store ptr %i29, ptr %gep
br i1 %i23, label %if.end, label %if.then
if.then:
%i30 = load float, ptr %i29, align 4, !invariant.load !0
br label %if.end
if.end:
%i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
%i35 = getelementptr inbounds float, ptr %output, i64 %iv
store float %i34, ptr %i35, align 4
%iv.inc = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.inc, 4
br i1 %exitcond, label %loop.exit, label %loop.header
loop.exit:
ret void
}
; Preserve poison-generating flags from 'sub', which is not contributing to any address computation
; of any masked load/store/gather/scatter.
define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 {
; CHECK-LABEL: @preserve_nuw_nsw_no_addr(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 4
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
%i23 = icmp eq i64 %iv, 0
br i1 %i23, label %if.end, label %if.then
if.then:
%i27 = sub nuw nsw i64 %iv, 1
br label %if.end
if.end:
%i34 = phi i64 [ 0, %loop.header ], [ %i27, %if.then ]
%i35 = getelementptr inbounds i64, ptr %output, i64 %iv
store i64 %i34, ptr %i35, align 4
%iv.inc = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.inc, 4
br i1 %exitcond, label %loop.exit, label %loop.header
loop.exit:
ret void
}
; Drop poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked load.
define void @drop_scalar_exact(ptr noalias nocapture readonly %input,
ptr %output) local_unnamed_addr #0 {
; CHECK-LABEL: @drop_scalar_exact(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
; CHECK-NEXT: [[TMP8:%.*]] = sdiv i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP9]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP11]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
%i7 = icmp ne i64 %iv, 0
%i8 = and i64 %iv, 1
%i9 = icmp eq i64 %i8, 0
%i10 = and i1 %i7, %i9
br i1 %i10, label %if.end, label %if.then
if.then:
%i26 = sdiv exact i64 %iv, 1
%i29 = getelementptr inbounds float, ptr %input, i64 %i26
%i30 = load float, ptr %i29, align 4, !invariant.load !0
br label %if.end
if.end:
%i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
%i35 = getelementptr inbounds float, ptr %output, i64 %iv
store float %i34, ptr %i35, align 4
%iv.inc = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.inc, 4
br i1 %exitcond, label %loop.exit, label %loop.header
loop.exit:
ret void
}
define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 {
; CHECK-LABEL: define void @drop_zext_nneg(
; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP4]], i32 8, <4 x i1> [[TMP0]], <4 x double> poison)
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x double> zeroinitializer, <4 x double> [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3
; CHECK-NEXT: store double [[TMP6]], ptr [[P1]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[BODY:%.*]]
; CHECK: body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[NEXT:%.*]], [[ELSE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE]]
; CHECK: then:
; CHECK-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[TMP8]] to i64
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr double, ptr [[P]], i64 [[ZEXT]]
; CHECK-NEXT: [[IDX2:%.*]] = getelementptr double, ptr [[P]], i64 [[ZEXT]]
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[IDX2]], align 8
; CHECK-NEXT: br label [[ELSE]]
; CHECK: else:
; CHECK-NEXT: [[PHI:%.*]] = phi double [ [[TMP9]], [[THEN]] ], [ 0.000000e+00, [[BODY]] ]
; CHECK-NEXT: store double [[PHI]], ptr [[P1]], align 8
; CHECK-NEXT: [[NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[NEXT]], 1024
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %body
body:
%iv = phi i64 [ %next, %else ], [ 0, %entry ]
%0 = trunc i64 %iv to i32
%c = icmp eq i32 %0, 0
br i1 %c, label %then, label %else
then:
%zext = zext nneg i32 %0 to i64
%idx1 = getelementptr double, ptr %p, i64 %zext
%idx2 = getelementptr double, ptr %p, i64 %zext
%1 = load double, ptr %idx2, align 8
br label %else
else:
%phi = phi double [ %1, %then ], [ 0.000000e+00, %body ]
store double %phi, ptr %p1, align 8
%next = add i64 %iv, 1
%cmp = icmp eq i64 %next, 1024
br i1 %cmp, label %exit, label %body
exit:
ret void
}
; Preserve poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked gather.
define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input,
ptr %output) local_unnamed_addr #0 {
; CHECK-LABEL: @preserve_vector_exact_no_addr(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
; CHECK-NEXT: [[TMP8:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], <4 x i64> [[TMP8]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP9]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
%i7 = icmp ne i64 %iv, 0
%i8 = and i64 %iv, 1
%i9 = icmp eq i64 %i8, 0
%i10 = and i1 %i7, %i9
br i1 %i10, label %if.end, label %if.then
if.then:
%i26 = sdiv exact i64 %iv, 2
%i29 = getelementptr inbounds float, ptr %input, i64 %i26
%i30 = load float, ptr %i29, align 4, !invariant.load !0
br label %if.end
if.end:
%i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
%i35 = getelementptr inbounds float, ptr %output, i64 %iv
store float %i34, ptr %i35, align 4
%iv.inc = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.inc, 4
br i1 %exitcond, label %loop.exit, label %loop.header
loop.exit:
ret void
}
; Preserve poison-generating flags from 'sdiv', which is not contributing to any address computation
; of any masked load/store/gather/scatter.
define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 {
; CHECK-LABEL: @preserve_exact_no_addr(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 4
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
%i23 = icmp eq i64 %iv, 0
br i1 %i23, label %if.end, label %if.then
if.then:
%i27 = sdiv exact i64 %iv, 2
br label %if.end
if.end:
%i34 = phi i64 [ 0, %loop.header ], [ %i27, %if.then ]
%i35 = getelementptr inbounds i64, ptr %output, i64 %iv
store i64 %i34, ptr %i35, align 4
%iv.inc = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.inc, 4
br i1 %exitcond, label %loop.exit, label %loop.header
loop.exit:
ret void
}
; Make sure we don't vectorize a loop with a phi feeding a poison value to
; a masked load/gather.
define void @dont_vectorize_poison_phi(ptr noalias nocapture readonly %input,
; CHECK-LABEL: @dont_vectorize_poison_phi(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: [[POISON:%.*]] = phi i64 [ poison, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[IF_END:%.*]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_INC]], [[IF_END]] ]
; CHECK-NEXT: [[I23:%.*]] = icmp eq i64 [[IV]], 0
; CHECK-NEXT: br i1 [[I23]], label [[IF_END]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[I29:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], i64 [[POISON]]
; CHECK-NEXT: [[I30:%.*]] = load float, ptr [[I29]], align 4, !invariant.load !0
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[I34:%.*]] = phi float [ 0.000000e+00, [[LOOP_HEADER]] ], [ [[I30]], [[IF_THEN]] ]
; CHECK-NEXT: [[I35:%.*]] = getelementptr inbounds float, ptr [[OUTPUT:%.*]], i64 [[IV]]
; CHECK-NEXT: store float [[I34]], ptr [[I35]], align 4
; CHECK-NEXT: [[IV_INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_INC]], 4
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_EXIT:%.*]], label [[LOOP_HEADER]]
; CHECK: loop.exit:
; CHECK-NEXT: ret void
;
ptr %output) local_unnamed_addr #0 {
entry:
br label %loop.header
loop.header:
%poison = phi i64 [ poison, %entry ], [ %iv.inc, %if.end ]
%iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
%i23 = icmp eq i64 %iv, 0
br i1 %i23, label %if.end, label %if.then
if.then:
%i29 = getelementptr inbounds float, ptr %input, i64 %poison
%i30 = load float, ptr %i29, align 4, !invariant.load !0
br label %if.end
if.end:
%i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
%i35 = getelementptr inbounds float, ptr %output, i64 %iv
store float %i34, ptr %i35, align 4
%iv.inc = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.inc, 4
br i1 %exitcond, label %loop.exit, label %loop.header
loop.exit:
ret void
}
@c = external global [5 x i8]
; Test case for https://github.com/llvm/llvm-project/issues/70590.
; Note that the then block has UB, but I could not find any other way to
; construct a suitable test case.
define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
; CHECK-LABEL: @pr70590_recipe_without_underlying_instr(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.+]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_SREM_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i64> [[VEC_IND]],
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]]
; CHECK: pred.srem.if:
; CHECK-NEXT: [[TMP4:%.*]] = srem i64 3, 0
; CHECK-NEXT: br label [[PRED_SREM_CONTINUE]]
; CHECK: pred.srem.continue:
; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ poison, %vector.body ], [ [[TMP4]], [[PRED_SREM_IF]] ]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_SREM_IF1:%.*]], label [[PRED_SREM_CONTINUE2:%.*]]
; CHECK: pred.srem.if1:
; CHECK-NEXT: [[TMP7:%.*]] = srem i64 3, 0
; CHECK-NEXT: br label [[PRED_SREM_CONTINUE2]]
; CHECK: pred.srem.continue2:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_SREM_IF3:%.*]], label [[PRED_SREM_CONTINUE4:%.*]]
; CHECK: pred.srem.if3:
; CHECK-NEXT: [[TMP10:%.*]] = srem i64 3, 0
; CHECK-NEXT: br label [[PRED_SREM_CONTINUE4]]
; CHECK: pred.srem.continue4:
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_SREM_IF5:%.*]], label [[PRED_SREM_CONTINUE6]]
; CHECK: pred.srem.if5:
; CHECK-NEXT: [[TMP13:%.*]] = srem i64 3, 0
; CHECK-NEXT: br label [[PRED_SREM_CONTINUE6]]
; CHECK: pred.srem.continue6:
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP5]], -3
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP0]], [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i8> [[WIDE_LOAD]], <4 x i8> zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr %dst, i64 [[TMP0]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i32 0
; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP20]], align 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: br i1 true, label %middle.block, label %vector.body
; CHECK: middle.block:
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %inc, %loop.latch ]
%cmp = icmp eq i64 %iv, %n
br i1 %cmp, label %loop.latch, label %then
then:
%rem = srem i64 3, 0
%add3 = add i64 %rem, -3
%add5 = add i64 %iv, %add3
%gep = getelementptr [5 x i8], ptr @c, i64 0, i64 %add5
%l = load i8, ptr %gep, align 1
br label %loop.latch
loop.latch:
%sr = phi i8 [ 0, %loop.header ], [ %l , %then ]
%gep.dst = getelementptr i8, ptr %dst, i64 %iv
store i8 %sr, ptr %gep.dst, align 4
%inc = add i64 %iv, 1
%exitcond.not = icmp eq i64 %inc, 4
br i1 %exitcond.not, label %exit, label %loop.header
exit:
ret void
}
; %B.gep.0 and pointers based on it can preserve inbounds, as the inbounds
; versionused unconditionally in the store in the latch.
; FIXME: at the moment, inbounds is dropped from both the GEP feeding the vector load ans tore
define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr readonly %C) #0 {
; CHECK-LABEL: define void @Bgep_inbounds_unconditionally_due_to_store(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr %C, i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20)
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr %B, i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD2]], splat (float 2.000000e+00)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> splat (float 3.300000e+01), <4 x float> [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 0
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
; CHECK-NEXT: br i1 [[TMP9]], label %middle.block, label %vector.body
entry:
br label %loop.body
loop.body:
%iv1 = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%C.gep = getelementptr inbounds i32, ptr %C, i64 %iv1
%C.lv = load i32, ptr %C.gep, align 4
%cmp = icmp eq i32 %C.lv, 20
%B.gep.0 = getelementptr inbounds float, ptr %B, i64 %iv1
br i1 %cmp, label %loop.latch, label %else
else:
%B.lv = load float, ptr %B.gep.0, align 4
%add = fadd float %B.lv, 2.0
br label %loop.latch
loop.latch:
%add.sink = phi float [ %add, %else ], [ 33.0, %loop.body ]
store float %add.sink, ptr %B.gep.0, align 4
%iv.next = add nuw nsw i64 %iv1, 1
%exitcond.not = icmp eq i64 %iv.next, 10000
br i1 %exitcond.not, label %exit, label %loop.body
exit:
ret void
}
attributes #0 = { noinline nounwind uwtable "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" }
!0 = !{}