When a recipe can be safely sunk and all of its users are outside the vector loop region in the same dedicated exit block, the recipe does not need to be executed on every iteration. This patch extends the VPlan-based LICM (Loop Invariant Code Motion) to also sink such recipes from the vector loop region into the exit block. This reduces redundant computation and improves cost model accuracy. TODO: Support nested loop sinking TODO: Support sinking `VPReplicateRecipe` (requires `replicateByVF` fixes) TODO: Support recipes with multiple defined values (e.g., interleaved loads) TODO: Clone recipes without users to all exit blocks TODO: Support PHI node users by checking incoming value blocks TODO: Support sinking when users are in multiple blocks TODO: Clone recipes when users are on multiple exit paths Co-authored-by: Luke Lau <luke@igalia.com> --------- Co-authored-by: Luke Lau <luke@igalia.com> Co-authored-by: Luke Lau <luke_lau@icloud.com>
202 lines
8.5 KiB
LLVM
202 lines
8.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S %s | FileCheck %s
|
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
|
|
|
define void @test(i32 %tc, ptr %p) {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: br label [[FOR_BODY_LR_PH_I_I_I:%.*]]
|
|
; CHECK: for.body.lr.ph.i.i.i:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TC:%.*]], -1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
|
|
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_I_I_I:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH_I_I_I]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY_I_I_I:%.*]]
|
|
; CHECK: for.body.i.i.i:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC_I_I_I:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: br label [[FOR_INC_I_I_I]]
|
|
; CHECK: for.inc.i.i.i:
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TC]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY_I_I_I]], label [[FOR_END_I_I_I]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: for.end.i.i.i:
|
|
; CHECK-NEXT: [[LCSSA:%.*]] = phi ptr [ [[P:%.*]], [[FOR_INC_I_I_I]] ], [ [[P]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: unreachable
|
|
;
|
|
br label %for.body.lr.ph.i.i.i
|
|
|
|
for.body.lr.ph.i.i.i:
|
|
br label %for.body.i.i.i
|
|
|
|
for.body.i.i.i:
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.i.i.i ], [ 0, %for.body.lr.ph.i.i.i ]
|
|
br label %for.inc.i.i.i
|
|
|
|
for.inc.i.i.i:
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp ne i32 %lftr.wideiv, %tc
|
|
br i1 %exitcond, label %for.body.i.i.i, label %for.end.i.i.i
|
|
|
|
for.end.i.i.i:
|
|
%lcssa = phi ptr [ %p, %for.inc.i.i.i ]
|
|
unreachable
|
|
}
|
|
|
|
; PR16139
|
|
define void @test2(ptr %x) {
|
|
; CHECK-LABEL: @test2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: indirectbr ptr [[X:%.*]], [label [[L0:%.*]], label [[L1:%.*]]]
|
|
; CHECK: L0:
|
|
; CHECK-NEXT: br label [[L0]]
|
|
; CHECK: L1:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
indirectbr ptr %x, [ label %L0, label %L1 ]
|
|
|
|
L0:
|
|
br label %L0
|
|
|
|
L1:
|
|
ret void
|
|
}
|
|
|
|
; This loop has different uniform instructions before and after LCSSA.
|
|
define void @test3(ptr %p) {
|
|
; CHECK-LABEL: @test3(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ADD41:%.*]] = add i32 3, 3
|
|
; CHECK-NEXT: [[IDXPROM4736:%.*]] = zext i32 [[ADD41]] to i64
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 6, 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i32 7, 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add i32 8, 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = add i32 9, 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 2
|
|
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP3]], i32 3
|
|
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
|
; CHECK: pred.store.if:
|
|
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P:%.*]], i64 0, i64 6
|
|
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX48]], align 1
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
|
|
; CHECK: pred.store.continue:
|
|
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
|
|
; CHECK: pred.store.if1:
|
|
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 7
|
|
; CHECK-NEXT: store i8 0, ptr [[TMP15]], align 1
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]]
|
|
; CHECK: pred.store.continue2:
|
|
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
|
|
; CHECK: pred.store.if3:
|
|
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 8
|
|
; CHECK-NEXT: store i8 0, ptr [[TMP17]], align 1
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
|
|
; CHECK: pred.store.continue4:
|
|
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
|
|
; CHECK: pred.store.if5:
|
|
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 9
|
|
; CHECK-NEXT: store i8 0, ptr [[TMP19]], align 1
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]]
|
|
; CHECK: pred.store.continue6:
|
|
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[FIRST_INACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> <i1 false, i1 false, i1 true, i1 true>, i1 false)
|
|
; CHECK-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[FIRST_INACTIVE_LANE]], 1
|
|
; CHECK-NEXT: [[INC46_LCSSA:%.*]] = extractelement <4 x i32> [[TMP11]], i64 [[LAST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br label [[WHILE_END:%.*]]
|
|
; CHECK: while.end:
|
|
; CHECK-NEXT: [[ADD58:%.*]] = add i32 [[INC46_LCSSA]], 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%add41 = add i32 3, 3
|
|
%idxprom4736 = zext i32 %add41 to i64
|
|
br label %while.body
|
|
|
|
while.body:
|
|
%idxprom4738 = phi i64 [ %idxprom47, %while.body ], [ %idxprom4736, %entry ]
|
|
%pos.337 = phi i32 [ %inc46, %while.body ], [ %add41, %entry ]
|
|
%inc46 = add i32 %pos.337, 1
|
|
%arrayidx48 = getelementptr inbounds [1024 x i8], ptr %p, i64 0, i64 %idxprom4738
|
|
store i8 0, ptr %arrayidx48, align 1
|
|
%and43 = and i32 %inc46, 3
|
|
%cmp44 = icmp eq i32 %and43, 0
|
|
%idxprom47 = zext i32 %inc46 to i64
|
|
br i1 %cmp44, label %while.end, label %while.body
|
|
|
|
while.end:
|
|
%add58 = add i32 %inc46, 4
|
|
ret void
|
|
}
|
|
|
|
; Make sure LV doesn't crash on IR where some LCSSA uses are unreachable.
|
|
define i32 @pr57508(ptr %src) {
|
|
; CHECK-LABEL: @pr57508(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2000
|
|
; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 2000, [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[LOCAL:%.*]] = phi i32 [ [[LOCAL_NEXT:%.*]], [[LOOP]] ], [ 2000, [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[LOCAL_NEXT]] = add i32 [[LOCAL]], 1
|
|
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 2000
|
|
; CHECK-NEXT: br i1 [[EC]], label [[LOOP_EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; CHECK: loop.exit:
|
|
; CHECK-NEXT: unreachable
|
|
; CHECK: bb:
|
|
; CHECK-NEXT: [[LOCAL_USE:%.*]] = add i32 poison, 1
|
|
; CHECK-NEXT: ret i32 [[LOCAL_USE]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
|
|
%local = phi i32 [ %local.next, %loop ], [ 0, %entry ]
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%local.next = add i32 %local, 1
|
|
%ec = icmp eq i64 %iv, 2000
|
|
br i1 %ec, label %loop.exit, label %loop
|
|
|
|
loop.exit:
|
|
unreachable
|
|
|
|
bb:
|
|
%local.use = add i32 %local, 1
|
|
ret i32 %local.use
|
|
}
|