
Materialize the vector trip count computation using VPInstruction instead of directly creating IR. This is one of the last few steps needed to model the full vector skeleton in VPlan. It also simplifies vector-trip count computations for scalable vectors, as we can re-use the UF x VF computation. PR: https://github.com/llvm/llvm-project/pull/151925
87 lines
5.1 KiB
LLVM
87 lines
5.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -scalable-vectorization=on -force-target-supports-scalable-vectors=true -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S %s | FileCheck %s
|
|
|
|
define i32 @iv_live_out_wide(ptr %dst) {
|
|
; CHECK-LABEL: define i32 @iv_live_out_wide(
|
|
; CHECK-SAME: ptr [[DST:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32
|
|
; CHECK-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 4
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 2000, [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2
|
|
; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 2000, [[TMP6]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 2000, [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[STEP_2]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
|
|
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i32> [[TMP7]], splat (i32 1)
|
|
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP8]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP5]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[INDEX]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 2
|
|
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]]
|
|
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP10]], align 2
|
|
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP14]], align 2
|
|
; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i32> [[BROADCAST_SPLAT]], [[STEP_ADD]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT2]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 2
|
|
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
|
|
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 2 x i32> [[TMP15]], i32 [[TMP19]]
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 2000, [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[E_EXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]]
|
|
; CHECK-NEXT: store i16 0, ptr [[GEP_DST]], align 2
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[STEP_2]], [[IV]]
|
|
; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 2000
|
|
; CHECK-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: [[E_EXIT]]:
|
|
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP20]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
entry:
|
|
%step.1 = sext i8 0 to i32
|
|
%step.2 = add nsw i32 %step.1, 1
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%gep.dst = getelementptr inbounds i16, ptr %dst, i32 %iv
|
|
store i16 0, ptr %gep.dst, align 2
|
|
%iv.next = add i32 %step.2, %iv
|
|
%cmp.i = icmp slt i32 %iv.next, 2000
|
|
br i1 %cmp.i, label %loop, label %e.exit
|
|
|
|
e.exit:
|
|
%res = phi i32 [ %iv.next, %loop ]
|
|
ret i32 %res
|
|
}
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
;.
|