After https://github.com/llvm/llvm-project/pull/153643, there may be a BranchOnCond with constant condition in the entry block. Simplify those in removeBranchOnConst. This removes a number of redundant conditional branch from entry blocks. In some cases, it may also make the original scalar loop unreachable, because we know it will never execute. In that case, we need to remove the loop from LoopInfo, because all unreachable blocks may dominate each other, making LoopInfo invalid. In those cases, we can also completely remove the loop, for which I'll share a follow-up patch. Depends on https://github.com/llvm/llvm-project/pull/153643. PR: https://github.com/llvm/llvm-project/pull/154510
871 lines
46 KiB
LLVM
871 lines
46 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
|
|
|
|
declare void @init(ptr nocapture nofree)
|
|
|
|
; Test case where the predicated load in the loop has an access size of 2 but
|
|
; has an alignment of 4.
|
|
define i16 @test_access_size_not_multiple_of_align(i64 %len, ptr %test_base) {
|
|
; CHECK-LABEL: @test_access_size_not_multiple_of_align(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [163840 x i16], align 4
|
|
; CHECK-NEXT: call void @init(ptr [[ALLOCA]])
|
|
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_LOAD_CONTINUE2]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP1]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
|
; CHECK: pred.load.if:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 4
|
|
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i16> poison, i16 [[TMP7]], i32 0
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
|
|
; CHECK: pred.load.continue:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]]
|
|
; CHECK: pred.load.if1:
|
|
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP11]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 4
|
|
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i16> [[TMP9]], i16 [[TMP13]], i32 1
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
|
|
; CHECK: pred.load.continue2:
|
|
; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i16> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
|
|
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> [[TMP18]], <2 x i16> zeroinitializer
|
|
; CHECK-NEXT: [[TMP15]] = add <2 x i16> [[VEC_PHI]], [[PREDPHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
|
|
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[TMP17:%.*]] = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> [[TMP15]])
|
|
; CHECK-NEXT: br label [[LOOP_EXIT:%.*]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
|
|
; CHECK-NEXT: [[ACCUM:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
|
|
; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_T:%.*]] = load i8, ptr [[TEST_ADDR]], align 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[L_T]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[PRED:%.*]], label [[LATCH]]
|
|
; CHECK: pred:
|
|
; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[IV]]
|
|
; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[ADDR]], align 4
|
|
; CHECK-NEXT: br label [[LATCH]]
|
|
; CHECK: latch:
|
|
; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i16 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
|
|
; CHECK-NEXT: [[ACCUM_NEXT]] = add i16 [[ACCUM]], [[VAL_PHI]]
|
|
; CHECK-NEXT: [[EXIT:%.*]] = icmp eq i64 [[IV]], 4095
|
|
; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]]
|
|
; CHECK: loop_exit:
|
|
; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i16 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i16 [[ACCUM_NEXT_LCSSA]]
|
|
;
|
|
entry:
|
|
%alloca = alloca [163840 x i16], align 4
|
|
call void @init(ptr %alloca)
|
|
br label %loop
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
|
|
%accum = phi i16 [ 0, %entry ], [ %accum.next, %latch ]
|
|
%iv.next = add i64 %iv, 1
|
|
%test_addr = getelementptr inbounds i8, ptr %test_base, i64 %iv
|
|
%l.t = load i8, ptr %test_addr
|
|
%cmp = icmp sge i8 %l.t, 0
|
|
br i1 %cmp, label %pred, label %latch
|
|
pred:
|
|
%addr = getelementptr inbounds i16, ptr %alloca, i64 %iv
|
|
%val = load i16, ptr %addr, align 4
|
|
br label %latch
|
|
latch:
|
|
%val.phi = phi i16 [0, %loop], [%val, %pred]
|
|
%accum.next = add i16 %accum, %val.phi
|
|
%exit = icmp eq i64 %iv, 4095
|
|
br i1 %exit, label %loop_exit, label %loop
|
|
|
|
loop_exit:
|
|
ret i16 %accum.next
|
|
}
|
|
|
|
; Test case where the predicated load in the loop has an access size of 4 and
|
|
; an alignment of 4, but the start pointer is offset by 1.
|
|
define i32 @test_access_size_multiple_of_align_but_offset_by_1(i64 %len, ptr %test_base) {
|
|
; CHECK-LABEL: @test_access_size_multiple_of_align_but_offset_by_1(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [163840 x i32], align 4
|
|
; CHECK-NEXT: call void @init(ptr [[ALLOCA]])
|
|
; CHECK-NEXT: [[START:%.*]] = getelementptr i8, ptr [[ALLOCA]], i64 2
|
|
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_LOAD_CONTINUE2]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP1]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
|
; CHECK: pred.load.if:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[START]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
|
|
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
|
|
; CHECK: pred.load.continue:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]]
|
|
; CHECK: pred.load.if1:
|
|
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[START]], i64 [[TMP11]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
|
|
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
|
|
; CHECK: pred.load.continue2:
|
|
; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
|
|
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP18]], <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP15]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
|
|
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP15]])
|
|
; CHECK-NEXT: br label [[LOOP_EXIT:%.*]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
|
|
; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
|
|
; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_T:%.*]] = load i8, ptr [[TEST_ADDR]], align 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[L_T]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[PRED:%.*]], label [[LATCH]]
|
|
; CHECK: pred:
|
|
; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[START]], i64 [[IV]]
|
|
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4
|
|
; CHECK-NEXT: br label [[LATCH]]
|
|
; CHECK: latch:
|
|
; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
|
|
; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
|
|
; CHECK-NEXT: [[EXIT:%.*]] = icmp eq i64 [[IV]], 4095
|
|
; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]]
|
|
; CHECK: loop_exit:
|
|
; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]]
|
|
;
|
|
entry:
|
|
%alloca = alloca [163840 x i32], align 4
|
|
call void @init(ptr %alloca)
|
|
%start = getelementptr i8, ptr %alloca, i64 2
|
|
br label %loop
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
|
|
%accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
|
|
%iv.next = add i64 %iv, 1
|
|
%test_addr = getelementptr inbounds i8, ptr %test_base, i64 %iv
|
|
%l.t = load i8, ptr %test_addr
|
|
%cmp = icmp sge i8 %l.t, 0
|
|
br i1 %cmp, label %pred, label %latch
|
|
pred:
|
|
%addr = getelementptr inbounds i32, ptr %start, i64 %iv
|
|
%val = load i32, ptr %addr, align 4
|
|
br label %latch
|
|
latch:
|
|
%val.phi = phi i32 [0, %loop], [%val, %pred]
|
|
%accum.next = add i32 %accum, %val.phi
|
|
%exit = icmp eq i64 %iv, 4095
|
|
br i1 %exit, label %loop_exit, label %loop
|
|
|
|
loop_exit:
|
|
ret i32 %accum.next
|
|
}
|
|
|
|
|
|
define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
|
|
; CHECK-LABEL: @loop_requires_scev_predicate(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i32], align 4
|
|
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i32], align 4
|
|
; CHECK-NEXT: call void @init(ptr [[P1]])
|
|
; CHECK-NEXT: call void @init(ptr [[P2]])
|
|
; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END:%.*]], 1023
|
|
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[END]] to i10
|
|
; CHECK-NEXT: [[TMP1:%.*]] = zext i10 [[TMP0]] to i64
|
|
; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
|
|
; CHECK: vector.scevcheck:
|
|
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255
|
|
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i8
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP9]], align 4
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P2]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x i32>, ptr [[TMP12]], align 4
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
|
; CHECK: pred.store.if:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
|
|
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[TMP8]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
|
|
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 0
|
|
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]]
|
|
; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
|
|
; CHECK: pred.store.continue:
|
|
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
|
|
; CHECK: pred.store.if3:
|
|
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[TMP20]]
|
|
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
|
|
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 1
|
|
; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
|
|
; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
|
|
; CHECK: pred.store.continue4:
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
|
|
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]]
|
|
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[DOWORK:%.*]] = icmp ne i32 [[TMP26]], 0
|
|
; CHECK-NEXT: br i1 [[DOWORK]], label [[FOR_DOWORK:%.*]], label [[FOR_INC]]
|
|
; CHECK: for.dowork:
|
|
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]]
|
|
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
|
|
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP26]], [[TMP27]]
|
|
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[GEP_IND]]
|
|
; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4
|
|
; CHECK-NEXT: br label [[FOR_INC]]
|
|
; CHECK: for.inc:
|
|
; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1
|
|
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32
|
|
; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%p1 = alloca [1024 x i32]
|
|
%p2 = alloca [1024 x i32]
|
|
call void @init(ptr %p1)
|
|
call void @init(ptr %p2)
|
|
%end.clamped = and i32 %end, 1023
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ]
|
|
%gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ]
|
|
%arrayidx = getelementptr inbounds i32, ptr %p1, i64 %gep.ind
|
|
%0 = load i32, ptr %arrayidx, align 4
|
|
%dowork = icmp ne i32 %0, 0
|
|
br i1 %dowork, label %for.dowork, label %for.inc
|
|
|
|
for.dowork:
|
|
%arrayidx3 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind
|
|
%1 = load i32, ptr %arrayidx3, align 4
|
|
%add = add i32 %0, %1
|
|
%arrayidx5 = getelementptr inbounds i32, ptr %dest, i64 %gep.ind
|
|
store i32 %add, ptr %arrayidx5, align 4
|
|
br label %for.inc
|
|
|
|
for.inc:
|
|
%ind.next = add i8 %ind, 1
|
|
%conv = zext i8 %ind.next to i32
|
|
%gep.ind.next = add i64 %gep.ind, 1
|
|
%cmp = icmp ult i32 %conv, %end.clamped
|
|
br i1 %cmp, label %for.body, label %exit
|
|
|
|
exit:
|
|
ret i32 0
|
|
}
|
|
|
|
|
|
; Test reverse loops where we should be able to prove loads in predicated blocks
|
|
; are safe to load unconditionally.
|
|
define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
|
|
; CHECK-LABEL: @test_rev_loops_deref_loads(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[LOCAL_DEST:%.*]] = alloca [1024 x i32], align 4
|
|
; CHECK-NEXT: [[LOCAL_SRC:%.*]] = alloca [1024 x i32], align 4
|
|
; CHECK-NEXT: [[LOCAL_CMP:%.*]] = alloca [1024 x i32], align 4
|
|
; CHECK-NEXT: call void @init(ptr [[LOCAL_SRC]])
|
|
; CHECK-NEXT: call void @init(ptr [[LOCAL_CMP]])
|
|
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4
|
|
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3)
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 -1
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
|
|
; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD1]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
|
; CHECK: pred.store.if:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 0
|
|
; CHECK-NEXT: [[TMP12:%.*]] = shl nsw i32 [[TMP11]], 2
|
|
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
|
|
; CHECK: pred.store.continue:
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
|
|
; CHECK: pred.store.if3:
|
|
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -1
|
|
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP14]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 1
|
|
; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i32 [[TMP16]], 2
|
|
; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
|
|
; CHECK: pred.store.continue4:
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
|
|
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br label [[EXIT:%.*]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1023, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[IV]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP19]], 3
|
|
; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[IV]]
|
|
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP20]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[IV]]
|
|
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4
|
|
; CHECK-NEXT: br label [[FOR_INC]]
|
|
; CHECK: for.inc:
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
|
|
; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%local_dest = alloca [1024 x i32], align 4
|
|
%local_src = alloca [1024 x i32], align 4
|
|
%local_cmp = alloca [1024 x i32], align 4
|
|
call void @init(ptr %local_src)
|
|
call void @init(ptr %local_cmp)
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.inc ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], ptr %local_cmp, i64 0, i64 %iv
|
|
%0 = load i32, ptr %arrayidx, align 4
|
|
%cmp3.not = icmp eq i32 %0, 3
|
|
br i1 %cmp3.not, label %for.inc, label %if.then
|
|
|
|
if.then:
|
|
%arrayidx5 = getelementptr inbounds [1024 x i32], ptr %local_src, i64 0, i64 %iv
|
|
%1 = load i32, ptr %arrayidx5, align 4
|
|
%mul = shl nsw i32 %1, 2
|
|
%arrayidx7 = getelementptr inbounds [1024 x i32], ptr %local_dest, i64 0, i64 %iv
|
|
store i32 %mul, ptr %arrayidx7, align 4
|
|
br label %for.inc
|
|
|
|
for.inc:
|
|
%iv.next = add nsw i64 %iv, -1
|
|
%cmp2.not = icmp eq i64 %iv, 0
|
|
br i1 %cmp2.not, label %exit, label %for.body
|
|
|
|
exit:
|
|
call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %local_dest, i64 1024, i1 false)
|
|
ret void
|
|
}
|
|
|
|
|
|
; Test reverse loops where we *cannot* prove loads in predicated blocks are safe
|
|
; to load unconditionally.
|
|
define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %dest) {
|
|
; CHECK-LABEL: @test_rev_loops_non_deref_loads(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[LOCAL_DEST:%.*]] = alloca [1024 x i32], align 4
|
|
; CHECK-NEXT: [[LOCAL_SRC:%.*]] = alloca [1024 x i32], align 4
|
|
; CHECK-NEXT: [[LOCAL_CMP:%.*]] = alloca [1024 x i32], align 4
|
|
; CHECK-NEXT: call void @init(ptr [[LOCAL_SRC]])
|
|
; CHECK-NEXT: call void @init(ptr [[LOCAL_CMP]])
|
|
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1023, i64 1022>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 -1)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 -1
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4
|
|
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3)
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
|
; CHECK: pred.store.if:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP8]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP11]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = shl nsw i32 [[TMP10]], 2
|
|
; CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
|
|
; CHECK: pred.store.continue:
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
|
|
; CHECK: pred.store.if1:
|
|
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP15]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
|
|
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP18]]
|
|
; CHECK-NEXT: [[TMP20:%.*]] = shl nsw i32 [[TMP17]], 2
|
|
; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
|
|
; CHECK: pred.store.continue2:
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 -2)
|
|
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
|
|
; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br label [[EXIT:%.*]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1023, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
|
|
; CHECK-NEXT: [[OFF:%.*]] = add i64 [[IV]], -1
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFF]]
|
|
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP22]], 3
|
|
; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[OFF]]
|
|
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP23]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[OFF]]
|
|
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4
|
|
; CHECK-NEXT: br label [[FOR_INC]]
|
|
; CHECK: for.inc:
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
|
|
; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%local_dest = alloca [1024 x i32], align 4
|
|
%local_src = alloca [1024 x i32], align 4
|
|
%local_cmp = alloca [1024 x i32], align 4
|
|
call void @init(ptr %local_src)
|
|
call void @init(ptr %local_cmp)
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.inc ]
|
|
%off = add i64 %iv, -1
|
|
%arrayidx = getelementptr inbounds [1024 x i32], ptr %local_cmp, i64 0, i64 %off
|
|
%0 = load i32, ptr %arrayidx, align 4
|
|
%cmp3.not = icmp eq i32 %0, 3
|
|
br i1 %cmp3.not, label %for.inc, label %if.then
|
|
|
|
if.then:
|
|
%arrayidx5 = getelementptr inbounds [1024 x i32], ptr %local_src, i64 0, i64 %off
|
|
%1 = load i32, ptr %arrayidx5, align 4
|
|
%mul = shl nsw i32 %1, 2
|
|
%arrayidx7 = getelementptr inbounds [1024 x i32], ptr %local_dest, i64 0, i64 %off
|
|
store i32 %mul, ptr %arrayidx7, align 4
|
|
br label %for.inc
|
|
|
|
for.inc:
|
|
%iv.next = add nsw i64 %iv, -1
|
|
%cmp2.not = icmp eq i64 %iv, 0
|
|
br i1 %cmp2.not, label %exit, label %for.body
|
|
|
|
exit:
|
|
call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %local_dest, i64 1024, i1 false)
|
|
ret void
|
|
}
|
|
|
|
|
|
; Test a loop with a positive step recurrence that has a strided access
|
|
define i16 @test_strided_access(i64 %len, ptr %test_base) {
|
|
; CHECK-LABEL: @test_strided_access(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [163840 x i16], align 4
|
|
; CHECK-NEXT: call void @init(ptr [[ALLOCA]])
|
|
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP1]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 2)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP7]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP6]], align 2
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 2
|
|
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i16> poison, i16 [[TMP9]], i32 0
|
|
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i16> [[TMP11]], i16 [[TMP10]], i32 1
|
|
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> [[TMP12]], <2 x i16> zeroinitializer
|
|
; CHECK-NEXT: [[TMP13]] = add <2 x i16> [[VEC_PHI]], [[PREDPHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
|
|
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
|
|
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[TMP15:%.*]] = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> [[TMP13]])
|
|
; CHECK-NEXT: br label [[LOOP_EXIT:%.*]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
|
|
; CHECK-NEXT: [[ACCUM:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
|
|
; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_T:%.*]] = load i8, ptr [[TEST_ADDR]], align 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[L_T]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[PRED:%.*]], label [[LATCH]]
|
|
; CHECK: pred:
|
|
; CHECK-NEXT: [[IV_STRIDE:%.*]] = mul i64 [[IV]], 2
|
|
; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[IV_STRIDE]]
|
|
; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[ADDR]], align 2
|
|
; CHECK-NEXT: br label [[LATCH]]
|
|
; CHECK: latch:
|
|
; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i16 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
|
|
; CHECK-NEXT: [[ACCUM_NEXT]] = add i16 [[ACCUM]], [[VAL_PHI]]
|
|
; CHECK-NEXT: [[EXIT:%.*]] = icmp eq i64 [[IV]], 4095
|
|
; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]]
|
|
; CHECK: loop_exit:
|
|
; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i16 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i16 [[ACCUM_NEXT_LCSSA]]
|
|
;
|
|
entry:
|
|
%alloca = alloca [163840 x i16], align 4
|
|
call void @init(ptr %alloca)
|
|
br label %loop
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
|
|
%accum = phi i16 [ 0, %entry ], [ %accum.next, %latch ]
|
|
%iv.next = add i64 %iv, 1
|
|
%test_addr = getelementptr inbounds i8, ptr %test_base, i64 %iv
|
|
%l.t = load i8, ptr %test_addr
|
|
%cmp = icmp sge i8 %l.t, 0
|
|
br i1 %cmp, label %pred, label %latch
|
|
pred:
|
|
%iv.stride = mul i64 %iv, 2
|
|
%addr = getelementptr inbounds i16, ptr %alloca, i64 %iv.stride
|
|
%val = load i16, ptr %addr, align 2
|
|
br label %latch
|
|
latch:
|
|
%val.phi = phi i16 [0, %loop], [%val, %pred]
|
|
%accum.next = add i16 %accum, %val.phi
|
|
%exit = icmp eq i64 %iv, 4095
|
|
br i1 %exit, label %loop_exit, label %loop
|
|
|
|
loop_exit:
|
|
ret i16 %accum.next
|
|
}
|
|
|
|
|
|
; Test a loop with a negative step recurrence that has a strided access
|
|
define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly %dest) {
|
|
; CHECK-LABEL: @test_rev_loops_strided_deref_loads(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[LOCAL_DEST:%.*]] = alloca [1024 x i32], align 4
|
|
; CHECK-NEXT: [[LOCAL_SRC:%.*]] = alloca [1024 x i32], align 4
|
|
; CHECK-NEXT: [[LOCAL_CMP:%.*]] = alloca [1024 x i32], align 4
|
|
; CHECK-NEXT: call void @init(ptr [[LOCAL_SRC]])
|
|
; CHECK-NEXT: call void @init(ptr [[LOCAL_CMP]])
|
|
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 511, i64 510>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 511, [[INDEX]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4
|
|
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3)
|
|
; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 2)
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP7]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP9]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
|
|
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
|
|
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP12]], i32 1
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
|
; CHECK: pred.store.if:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
|
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = shl nsw i32 [[TMP11]], 2
|
|
; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
|
|
; CHECK: pred.store.continue:
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
|
|
; CHECK: pred.store.if1:
|
|
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -1
|
|
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP17]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = shl nsw i32 [[TMP12]], 2
|
|
; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
|
|
; CHECK: pred.store.continue2:
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 -2)
|
|
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
|
|
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br label [[EXIT:%.*]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 511, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[IV]]
|
|
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP21]], 3
|
|
; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: [[IV_STRIDED:%.*]] = mul i64 [[IV]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[IV_STRIDED]]
|
|
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP22]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[IV]]
|
|
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4
|
|
; CHECK-NEXT: br label [[FOR_INC]]
|
|
; CHECK: for.inc:
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
|
|
; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%local_dest = alloca [1024 x i32], align 4
|
|
%local_src = alloca [1024 x i32], align 4
|
|
%local_cmp = alloca [1024 x i32], align 4
|
|
call void @init(ptr %local_src)
|
|
call void @init(ptr %local_cmp)
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 511, %entry ], [ %iv.next, %for.inc ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], ptr %local_cmp, i64 0, i64 %iv
|
|
%0 = load i32, ptr %arrayidx, align 4
|
|
%cmp3.not = icmp eq i32 %0, 3
|
|
br i1 %cmp3.not, label %for.inc, label %if.then
|
|
|
|
if.then:
|
|
%iv.strided = mul i64 %iv, 2
|
|
%arrayidx5 = getelementptr inbounds [1024 x i32], ptr %local_src, i64 0, i64 %iv.strided
|
|
%1 = load i32, ptr %arrayidx5, align 4
|
|
%mul = shl nsw i32 %1, 2
|
|
%arrayidx7 = getelementptr inbounds [1024 x i32], ptr %local_dest, i64 0, i64 %iv
|
|
store i32 %mul, ptr %arrayidx7, align 4
|
|
br label %for.inc
|
|
|
|
for.inc:
|
|
%iv.next = add nsw i64 %iv, -1
|
|
%cmp2.not = icmp eq i64 %iv, 0
|
|
br i1 %cmp2.not, label %exit, label %for.body
|
|
|
|
exit:
|
|
call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %local_dest, i64 1024, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @adding_offset_overflows(i32 %n, ptr %A) {
|
|
; CHECK-LABEL: @adding_offset_overflows(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[B:%.*]] = alloca [62 x i32], align 4
|
|
; CHECK-NEXT: [[C:%.*]] = alloca [144 x i32], align 4
|
|
; CHECK-NEXT: call void @init(ptr [[B]])
|
|
; CHECK-NEXT: call void @init(ptr [[C]])
|
|
; CHECK-NEXT: [[PRE:%.*]] = icmp slt i32 [[N:%.*]], 1
|
|
; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[PH:%.*]]
|
|
; CHECK: ph:
|
|
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i64 1, [[N_VEC]]
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
|
; CHECK: pred.load.if:
|
|
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 0
|
|
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP15]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
|
|
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
|
|
; CHECK: pred.load.continue:
|
|
; CHECK-NEXT: [[TMP19:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP18]], [[PRED_LOAD_IF]] ]
|
|
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
|
|
; CHECK: pred.load.if1:
|
|
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 1
|
|
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP21]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP22]], align 4
|
|
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP13]], i32 1
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
|
|
; CHECK: pred.load.continue2:
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[WIDE_LOAD1]] to <2 x i64>
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
|
; CHECK: pred.store.if:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[C]], i64 [[TMP7]]
|
|
; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
|
|
; CHECK: pred.store.continue:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
|
|
; CHECK: pred.store.if3:
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[C]], i64 [[TMP10]]
|
|
; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]]
|
|
; CHECK: pred.store.continue4:
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 1, [[PH]] ]
|
|
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
|
|
; CHECK: loop.header:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
|
|
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
|
|
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i32 [[L_A]], 0
|
|
; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[IF_THEN:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_IDX:%.*]] = load i32, ptr [[GEP_B]], align 4
|
|
; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[L_IDX]] to i64
|
|
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr i32, ptr [[C]], i64 [[IDX_EXT]]
|
|
; CHECK-NEXT: store i32 0, ptr [[GEP_C]], align 4
|
|
; CHECK-NEXT: br label [[LOOP_LATCH]]
|
|
; CHECK: loop.latch:
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]]
|
|
; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+]]
|
|
; CHECK: exit.loopexit:
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%B = alloca [62 x i32], align 4
|
|
%C = alloca [144 x i32], align 4
|
|
call void @init(ptr %B)
|
|
call void @init(ptr %C)
|
|
%pre = icmp slt i32 %n, 1
|
|
br i1 %pre, label %exit, label %ph
|
|
|
|
ph:
|
|
%wide.trip.count = zext i32 %n to i64
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ 1, %ph ], [ %iv.next, %loop.latch ]
|
|
%gep.A = getelementptr i32, ptr %A, i64 %iv
|
|
%l.A = load i32, ptr %gep.A, align 4
|
|
%c.1 = icmp eq i32 %l.A, 0
|
|
br i1 %c.1, label %loop.latch, label %if.then
|
|
|
|
if.then:
|
|
%gep.B = getelementptr i32, ptr %B, i64 %iv
|
|
%l.idx = load i32, ptr %gep.B, align 4
|
|
%idx.ext = sext i32 %l.idx to i64
|
|
%gep.C = getelementptr i32, ptr %C, i64 %idx.ext
|
|
store i32 0, ptr %gep.C, align 4
|
|
br label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add i64 %iv, 1
|
|
%ec = icmp eq i64 %iv.next, %wide.trip.count
|
|
br i1 %ec, label %exit, label %loop.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|