Florian Hahn 50b9ca4dda
[VPlan] Simplify Plan's entry in removeBranchOnConst. (#154510)
After https://github.com/llvm/llvm-project/pull/153643, there may be a
BranchOnCond with constant condition in the entry block.

Simplify those in removeBranchOnConst. This removes a number of
redundant conditional branch from entry blocks.

In some cases, it may also make the original scalar loop unreachable,
because we know it will never execute. In that case, we need to remove
the loop from LoopInfo, because all unreachable blocks may dominate each
other, making LoopInfo invalid. In those cases, we can also completely
remove the loop, for which I'll share a follow-up patch.

Depends on https://github.com/llvm/llvm-project/pull/153643.

PR: https://github.com/llvm/llvm-project/pull/154510
2025-09-18 19:25:05 +01:00

678 lines
31 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
define void @flags1(i64 %n, ptr nocapture %A) {
; CHECK-LABEL: define void @flags1(
; CHECK-SAME: i64 [[N:%.*]], ptr captures(none) [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD]], splat (i32 3)
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i32 [[TMP4]], 3
; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%2 = getelementptr inbounds i32, ptr %A, i64 %iv
%3 = load i32, ptr %2, align 4
%4 = mul nsw i32 %3, 3
store i32 %4, ptr %2, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv.next, %n
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @flags2(i64 %n, ptr %A) {
; CHECK-LABEL: define void @flags2(
; CHECK-SAME: i64 [[N:%.*]], ptr [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[WIDE_LOAD]], splat (i32 3)
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 3
; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%2 = getelementptr inbounds i32, ptr %A, i64 %iv
%3 = load i32, ptr %2, align 4
%4 = mul i32 %3, 3
store i32 %4, ptr %2, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv.next, %n
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; Make sure we copy fast math flags and use them for the final reduction.
define float @fast_math(ptr noalias %s) {
; CHECK-LABEL: define float @fast_math(
; CHECK-SAME: ptr noalias [[S:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[S]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP1]])
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[S]], i64 [[IV]]
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD]] = fadd fast float [[RED]], [[TMP4]]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 256
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[LOOP]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret float [[ADD_LCSSA]]
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%red = phi float [ 0.000000e+00, %entry ], [ %add, %loop ]
%arrayidx = getelementptr inbounds float, ptr %s, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%add = fadd fast float %red, %0
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, 256
br i1 %ec, label %exit, label %loop
exit:
%add.lcssa = phi float [ %add, %loop ]
ret float %add.lcssa
}
define void @gep_with_shared_nusw_and_others(i64 %n, ptr %A) {
; CHECK-LABEL: define void @gep_with_shared_nusw_and_others(
; CHECK-SAME: i64 [[N:%.*]], ptr [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-NEXT: store <4 x float> [[WIDE_LOAD]], ptr [[TMP1]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_NUSW:%.*]] = getelementptr nusw float, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_NUSW]], align 4
; CHECK-NEXT: [[GEP_NUSW_NUW:%.*]] = getelementptr nusw nuw float, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: store float [[L]], ptr [[GEP_NUSW_NUW]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.nusw = getelementptr nusw float, ptr %A, i64 %iv
%l = load float, ptr %gep.nusw, align 4
%gep.nusw.nuw = getelementptr nusw nuw float, ptr %A, i64 %iv
store float %l, ptr %gep.nusw.nuw, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, %n
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @exact_on_first_shift(ptr noalias %A, ptr noalias %B) {
; CHECK-LABEL: define void @exact_on_first_shift(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[WIDE_LOAD]], splat (i32 10)
; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 128, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
; CHECK-NEXT: [[LSHR_1:%.*]] = lshr exact i32 [[L_A]], 10
; CHECK-NEXT: store i32 [[LSHR_1]], ptr [[GEP_A]], align 4
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[LSHR_2:%.*]] = lshr i32 [[L_A]], 10
; CHECK-NEXT: store i32 [[LSHR_2]], ptr [[GEP_B]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i64 %iv
%l.A = load i32, ptr %gep.A, align 4
%lshr.1 = lshr exact i32 %l.A, 10
store i32 %lshr.1, ptr %gep.A, align 4
%gep.B = getelementptr i32, ptr %B, i64 %iv
%lshr.2 = lshr i32 %l.A, 10
store i32 %lshr.2, ptr %gep.B, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 128
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @exact_on_second_shift(ptr noalias %A, ptr noalias %B) {
; CHECK-LABEL: define void @exact_on_second_shift(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[WIDE_LOAD]], splat (i32 10)
; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 128, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
; CHECK-NEXT: [[LSHR_1:%.*]] = lshr i32 [[L_A]], 10
; CHECK-NEXT: store i32 [[LSHR_1]], ptr [[GEP_A]], align 4
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[LSHR_2:%.*]] = lshr exact i32 [[L_A]], 10
; CHECK-NEXT: store i32 [[LSHR_2]], ptr [[GEP_B]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i64 %iv
%l.A = load i32, ptr %gep.A, align 4
%lshr.1 = lshr i32 %l.A, 10
store i32 %lshr.1, ptr %gep.A, align 4
%gep.B = getelementptr i32, ptr %B, i64 %iv
%lshr.2 = lshr exact i32 %l.A, 10
store i32 %lshr.2, ptr %gep.B, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 128
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @exact_on_both_shifts(ptr noalias %A, ptr noalias %B) {
; CHECK-LABEL: define void @exact_on_both_shifts(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact <4 x i32> [[WIDE_LOAD]], splat (i32 10)
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 128, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
; CHECK-NEXT: [[LSHR_1:%.*]] = lshr exact i32 [[L_A]], 10
; CHECK-NEXT: store i32 [[LSHR_1]], ptr [[GEP_A]], align 4
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[LSHR_2:%.*]] = lshr exact i32 [[L_A]], 10
; CHECK-NEXT: store i32 [[LSHR_2]], ptr [[GEP_B]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i64 %iv
%l.A = load i32, ptr %gep.A, align 4
%lshr.1 = lshr exact i32 %l.A, 10
store i32 %lshr.1, ptr %gep.A, align 4
%gep.B = getelementptr i32, ptr %B, i64 %iv
%lshr.2 = lshr exact i32 %l.A, 10
store i32 %lshr.2, ptr %gep.B, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 128
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @disjoint_on_first_or(ptr noalias %A, ptr noalias %B) {
; CHECK-LABEL: define void @disjoint_on_first_or(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[WIDE_LOAD]], splat (i32 10)
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 128, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
; CHECK-NEXT: [[OR_1:%.*]] = or disjoint i32 [[L_A]], 10
; CHECK-NEXT: store i32 [[OR_1]], ptr [[GEP_A]], align 4
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[OR_2:%.*]] = or i32 [[L_A]], 10
; CHECK-NEXT: store i32 [[OR_2]], ptr [[GEP_B]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i64 %iv
%l.A = load i32, ptr %gep.A, align 4
%or.1 = or disjoint i32 %l.A, 10
store i32 %or.1, ptr %gep.A, align 4
%gep.B = getelementptr i32, ptr %B, i64 %iv
%or.2 = or i32 %l.A, 10
store i32 %or.2, ptr %gep.B, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 128
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @disjoint_on_second_or(ptr noalias %A, ptr noalias %B) {
; CHECK-LABEL: define void @disjoint_on_second_or(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[WIDE_LOAD]], splat (i32 10)
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 128, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
; CHECK-NEXT: [[OR_1:%.*]] = or i32 [[L_A]], 10
; CHECK-NEXT: store i32 [[OR_1]], ptr [[GEP_A]], align 4
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[OR_2:%.*]] = or disjoint i32 [[L_A]], 10
; CHECK-NEXT: store i32 [[OR_2]], ptr [[GEP_B]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i64 %iv
%l.A = load i32, ptr %gep.A, align 4
%or.1 = or i32 %l.A, 10
store i32 %or.1, ptr %gep.A, align 4
%gep.B = getelementptr i32, ptr %B, i64 %iv
%or.2 = or disjoint i32 %l.A, 10
store i32 %or.2, ptr %gep.B, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 128
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @disjoint_on_both_or(ptr noalias %A, ptr noalias %B) {
; CHECK-LABEL: define void @disjoint_on_both_or(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <4 x i32> [[WIDE_LOAD]], splat (i32 10)
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 128, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
; CHECK-NEXT: [[OR_1:%.*]] = or disjoint i32 [[L_A]], 10
; CHECK-NEXT: store i32 [[OR_1]], ptr [[GEP_A]], align 4
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[OR_2:%.*]] = or disjoint i32 [[L_A]], 10
; CHECK-NEXT: store i32 [[OR_2]], ptr [[GEP_B]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i64 %iv
%l.A = load i32, ptr %gep.A, align 4
%or.1 = or disjoint i32 %l.A, 10
store i32 %or.1, ptr %gep.A, align 4
%gep.B = getelementptr i32, ptr %B, i64 %iv
%or.2 = or disjoint i32 %l.A, 10
store i32 %or.2, ptr %gep.B, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 128
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @trunc_flags_no_common(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
; CHECK-LABEL: define void @trunc_flags_no_common(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[WIDE_LOAD]] to <4 x i16>
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP2]], align 4
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP2]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 128, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
; CHECK-NEXT: [[TRUNC_1:%.*]] = trunc nsw i32 [[L_A]] to i16
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i16, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: store i16 [[TRUNC_1]], ptr [[GEP_B]], align 4
; CHECK-NEXT: [[TRUNC_2:%.*]] = trunc nuw i32 [[L_A]] to i16
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr i16, ptr [[C]], i64 [[IV]]
; CHECK-NEXT: store i16 [[TRUNC_2]], ptr [[GEP_B]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i64 %iv
%l.A = load i32, ptr %gep.A, align 4
%trunc.1 = trunc nsw i32 %l.A to i16
%gep.B = getelementptr i16, ptr %B, i64 %iv
store i16 %trunc.1, ptr %gep.B, align 4
%trunc.2 = trunc nuw i32 %l.A to i16
%gep.C = getelementptr i16, ptr %C, i64 %iv
store i16 %trunc.2, ptr %gep.B, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 128
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @trunc_flags_common(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
; CHECK-LABEL: define void @trunc_flags_common(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw <4 x i32> [[WIDE_LOAD]] to <4 x i16>
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP2]], align 4
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP2]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 128, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
; CHECK-NEXT: [[TRUNC_1:%.*]] = trunc nuw i32 [[L_A]] to i16
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i16, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: store i16 [[TRUNC_1]], ptr [[GEP_B]], align 4
; CHECK-NEXT: [[TRUNC_2:%.*]] = trunc nuw nsw i32 [[L_A]] to i16
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr i16, ptr [[C]], i64 [[IV]]
; CHECK-NEXT: store i16 [[TRUNC_2]], ptr [[GEP_B]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i64 %iv
%l.A = load i32, ptr %gep.A, align 4
%trunc.1 = trunc nuw i32 %l.A to i16
%gep.B = getelementptr i16, ptr %B, i64 %iv
store i16 %trunc.1, ptr %gep.B, align 4
%trunc.2 = trunc nuw nsw i32 %l.A to i16
%gep.C = getelementptr i16, ptr %C, i64 %iv
store i16 %trunc.2, ptr %gep.B, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 128
br i1 %ec, label %exit, label %loop
exit:
ret void
}