Luke Lau 3482a9c6cb
[VPlan] Explicitly reassociate header mask in logical and (#180898)
We reassociate ((x && y) && z) -> (x && (y && z)) if x has more than
use, in order to allow simplifying the header mask further. However this
is somewhat unreliable as there are times when it doesn't have more than
one use, e.g. see the case we run into in
https://github.com/llvm/llvm-project/pull/173265/changes#r2769759907.

This moves it into a separate transformation that always reassociates
the header mask regardless of the number of uses, which prevents some
fragile test changes in #173265.

We need to run it before both calls to simplifyRecipes in optimize. I
considered putting it in simplifyRecipes itself but simplifyRecipes is
also called after unrolling and when the loop region is dissolved which
causes vputils::findHeaderMask to assert.

There isn't really any benefit to reassociating masks that aren't the
header mask so the existing simplification was removed.
2026-02-12 14:56:15 +00:00

1458 lines
78 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -mcpu=skylake-avx512 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=COST %s
; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -mcpu=skylake-avx512 -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck --check-prefixes=FORCED %s
define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) {
; COST-LABEL: define void @switch_default_to_latch_common_dest(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0:[0-9]+]] {
; COST-NEXT: [[ENTRY:.*]]:
; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; COST: [[VECTOR_PH]]:
; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; COST-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; COST-NEXT: br label %[[VECTOR_BODY:.*]]
; COST: [[VECTOR_BODY]]:
; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
; COST-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP7]], [[TMP8]]
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP10]])
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; COST-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; COST-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; COST: [[MIDDLE_BLOCK]]:
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; COST: [[SCALAR_PH]]:
; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
; COST: [[LOOP_HEADER]]:
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT: switch i64 [[L]], label %[[LOOP_LATCH]] [
; COST-NEXT: i64 -12, label %[[IF_THEN:.*]]
; COST-NEXT: i64 13, label %[[IF_THEN]]
; COST-NEXT: ]
; COST: [[IF_THEN]]:
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[LOOP_LATCH]]:
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
; COST: [[EXIT]]:
; COST-NEXT: ret void
;
; FORCED-LABEL: define void @switch_default_to_latch_common_dest(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0:[0-9]+]] {
; FORCED-NEXT: [[ENTRY:.*]]:
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED: [[VECTOR_PH]]:
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCED: [[VECTOR_BODY]]:
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
; FORCED-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
; FORCED-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP15]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[TMP8]], <4 x i1> [[TMP16]])
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED: [[SCALAR_PH]]:
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
; FORCED: [[LOOP_HEADER]]:
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT: switch i64 [[L]], label %[[LOOP_LATCH]] [
; FORCED-NEXT: i64 -12, label %[[IF_THEN:.*]]
; FORCED-NEXT: i64 13, label %[[IF_THEN]]
; FORCED-NEXT: ]
; FORCED: [[IF_THEN]]:
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[LOOP_LATCH]]:
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
; FORCED: [[EXIT]]:
; FORCED-NEXT: ret void
;
entry:
br label %loop.header
loop.header:
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
%l = load i64, ptr %ptr.iv, align 1
switch i64 %l, label %loop.latch [
i64 -12, label %if.then
i64 13, label %if.then
]
if.then:
store i64 42, ptr %ptr.iv, align 1
br label %loop.latch
loop.latch:
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
%ec = icmp eq ptr %ptr.iv.next, %end
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
define void @switch_default_to_latch_common_dest_using_branches(ptr %start, ptr %end) {
; COST-LABEL: define void @switch_default_to_latch_common_dest_using_branches(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; COST-NEXT: [[ENTRY:.*]]:
; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; COST: [[VECTOR_PH]]:
; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; COST-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; COST-NEXT: br label %[[VECTOR_BODY:.*]]
; COST: [[VECTOR_BODY]]:
; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
; COST-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
; COST-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
; COST-NEXT: [[TMP11:%.*]] = or <4 x i1> [[TMP10]], [[TMP7]]
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP11]])
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; COST-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; COST-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; COST: [[MIDDLE_BLOCK]]:
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; COST: [[SCALAR_PH]]:
; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
; COST: [[LOOP_HEADER]]:
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT: [[EQ_1:%.*]] = icmp eq i64 [[L]], -12
; COST-NEXT: [[EQ_2:%.*]] = icmp eq i64 [[L]], 13
; COST-NEXT: br i1 [[EQ_1]], label %[[IF_THEN:.*]], label %[[ELSE:.*]]
; COST: [[IF_THEN]]:
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[ELSE]]:
; COST-NEXT: br i1 [[EQ_2]], label %[[IF_THEN]], label %[[LOOP_LATCH]]
; COST: [[LOOP_LATCH]]:
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
; COST: [[EXIT]]:
; COST-NEXT: ret void
;
; FORCED-LABEL: define void @switch_default_to_latch_common_dest_using_branches(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; FORCED-NEXT: [[ENTRY:.*]]:
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED: [[VECTOR_PH]]:
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCED: [[VECTOR_BODY]]:
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
; FORCED-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true)
; FORCED-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
; FORCED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP9]]
; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP10]]
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP17]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[TMP8]], <4 x i1> [[TMP18]])
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED: [[SCALAR_PH]]:
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
; FORCED: [[LOOP_HEADER]]:
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT: [[EQ_1:%.*]] = icmp eq i64 [[L]], -12
; FORCED-NEXT: [[EQ_2:%.*]] = icmp eq i64 [[L]], 13
; FORCED-NEXT: br i1 [[EQ_1]], label %[[IF_THEN:.*]], label %[[ELSE:.*]]
; FORCED: [[IF_THEN]]:
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[ELSE]]:
; FORCED-NEXT: br i1 [[EQ_2]], label %[[IF_THEN]], label %[[LOOP_LATCH]]
; FORCED: [[LOOP_LATCH]]:
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
; FORCED: [[EXIT]]:
; FORCED-NEXT: ret void
;
entry:
br label %loop.header
loop.header:
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
%l = load i64, ptr %ptr.iv, align 1
%eq.1 = icmp eq i64 %l, -12
%eq.2 = icmp eq i64 %l, 13
br i1 %eq.1, label %if.then, label %else
if.then:
store i64 42, ptr %ptr.iv, align 1
br label %loop.latch
else:
br i1 %eq.2, label %if.then, label %loop.latch
loop.latch:
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
%ec = icmp eq ptr %ptr.iv.next, %end
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
; TODO: Instead of using masked stores, the store can be sunk, executed
; unconditionally and fed by selects.
define void @switch_all_dests_distinct(ptr %start, ptr %end) {
; COST-LABEL: define void @switch_all_dests_distinct(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; COST-NEXT: [[ENTRY:.*]]:
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
; COST: [[LOOP_HEADER]]:
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
; COST-NEXT: i64 13, label %[[IF_THEN_2:.*]]
; COST-NEXT: i64 0, label %[[IF_THEN_3:.*]]
; COST-NEXT: ]
; COST: [[IF_THEN_1]]:
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[IF_THEN_2]]:
; COST-NEXT: store i64 0, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[IF_THEN_3]]:
; COST-NEXT: store i64 1, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[DEFAULT]]:
; COST-NEXT: store i64 2, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[LOOP_LATCH]]:
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; COST: [[EXIT]]:
; COST-NEXT: ret void
;
; FORCED-LABEL: define void @switch_all_dests_distinct(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; FORCED-NEXT: [[ENTRY:.*]]:
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED: [[VECTOR_PH]]:
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCED: [[VECTOR_BODY]]:
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
; FORCED-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
; FORCED-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP13]]
; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP14]]
; FORCED-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true)
; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true)
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP13]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr align 1 [[TMP8]], <4 x i1> [[TMP14]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP11]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[TMP8]], <4 x i1> [[TMP12]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP9]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[TMP8]], <4 x i1> [[TMP10]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP19]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr align 1 [[TMP8]], <4 x i1> [[TMP20]])
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED: [[SCALAR_PH]]:
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
; FORCED: [[LOOP_HEADER]]:
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
; FORCED-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
; FORCED-NEXT: i64 13, label %[[IF_THEN_2:.*]]
; FORCED-NEXT: i64 0, label %[[IF_THEN_3:.*]]
; FORCED-NEXT: ]
; FORCED: [[IF_THEN_1]]:
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[IF_THEN_2]]:
; FORCED-NEXT: store i64 0, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[IF_THEN_3]]:
; FORCED-NEXT: store i64 1, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[DEFAULT]]:
; FORCED-NEXT: store i64 2, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[LOOP_LATCH]]:
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
; FORCED: [[EXIT]]:
; FORCED-NEXT: ret void
;
entry:
br label %loop.header
loop.header:
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
%l = load i64, ptr %ptr.iv, align 1
switch i64 %l, label %default [
i64 -12, label %if.then.1
i64 13, label %if.then.2
i64 0, label %if.then.3
]
if.then.1:
store i64 42, ptr %ptr.iv, align 1
br label %loop.latch
if.then.2:
store i64 0, ptr %ptr.iv, align 1
br label %loop.latch
if.then.3:
store i64 1, ptr %ptr.iv, align 1
br label %loop.latch
default:
store i64 2, ptr %ptr.iv, align 1
br label %loop.latch
loop.latch:
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
%ec = icmp eq ptr %ptr.iv.next, %end
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %end) {
; COST-LABEL: define void @switch_all_dests_distinct_variant_using_branches(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; COST-NEXT: [[ENTRY:.*]]:
; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; COST: [[VECTOR_PH]]:
; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; COST-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; COST-NEXT: br label %[[VECTOR_BODY:.*]]
; COST: [[VECTOR_BODY]]:
; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
; COST-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
; COST-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
; COST-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
; COST-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP13]])
; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP14]])
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP7]])
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; COST-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; COST-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; COST: [[MIDDLE_BLOCK]]:
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; COST: [[SCALAR_PH]]:
; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
; COST: [[LOOP_HEADER]]:
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT: [[EQ_0:%.*]] = icmp eq i64 [[L]], -12
; COST-NEXT: [[EQ_1:%.*]] = icmp eq i64 [[L]], 13
; COST-NEXT: [[EQ_2:%.*]] = icmp eq i64 [[L]], 0
; COST-NEXT: br i1 [[EQ_0]], label %[[IF_THEN_1:.*]], label %[[ELSE_1:.*]]
; COST: [[ELSE_1]]:
; COST-NEXT: br i1 [[EQ_1]], label %[[IF_THEN_2:.*]], label %[[ELSE_2:.*]]
; COST: [[ELSE_2]]:
; COST-NEXT: br i1 [[EQ_2]], label %[[IF_THEN_3:.*]], label %[[LOOP_LATCH]]
; COST: [[IF_THEN_1]]:
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[IF_THEN_2]]:
; COST-NEXT: store i64 0, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[IF_THEN_3]]:
; COST-NEXT: store i64 1, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[DEFAULT:.*:]]
; COST-NEXT: store i64 2, ptr poison, align 1
; COST-NEXT: unreachable
; COST: [[LOOP_LATCH]]:
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
; COST: [[EXIT]]:
; COST-NEXT: ret void
;
; FORCED-LABEL: define void @switch_all_dests_distinct_variant_using_branches(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; FORCED-NEXT: [[ENTRY:.*]]:
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED: [[VECTOR_PH]]:
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCED: [[VECTOR_BODY]]:
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true)
; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
; FORCED-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true)
; FORCED-NEXT: [[TMP18:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true)
; FORCED-NEXT: [[TMP19:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP22:%.*]] = select <4 x i1> [[TMP20]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP21]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr align 1 [[TMP8]], <4 x i1> [[TMP22]])
; FORCED-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP23]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[TMP8]], <4 x i1> [[TMP24]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP9]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[TMP8]], <4 x i1> [[TMP10]])
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED: [[SCALAR_PH]]:
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
; FORCED: [[LOOP_HEADER]]:
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT: [[EQ_0:%.*]] = icmp eq i64 [[L]], -12
; FORCED-NEXT: [[EQ_1:%.*]] = icmp eq i64 [[L]], 13
; FORCED-NEXT: [[EQ_2:%.*]] = icmp eq i64 [[L]], 0
; FORCED-NEXT: br i1 [[EQ_0]], label %[[IF_THEN_1:.*]], label %[[ELSE_1:.*]]
; FORCED: [[ELSE_1]]:
; FORCED-NEXT: br i1 [[EQ_1]], label %[[IF_THEN_2:.*]], label %[[ELSE_2:.*]]
; FORCED: [[ELSE_2]]:
; FORCED-NEXT: br i1 [[EQ_2]], label %[[IF_THEN_3:.*]], label %[[LOOP_LATCH]]
; FORCED: [[IF_THEN_1]]:
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[IF_THEN_2]]:
; FORCED-NEXT: store i64 0, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[IF_THEN_3]]:
; FORCED-NEXT: store i64 1, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[DEFAULT:.*:]]
; FORCED-NEXT: store i64 2, ptr poison, align 1
; FORCED-NEXT: unreachable
; FORCED: [[LOOP_LATCH]]:
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
; FORCED: [[EXIT]]:
; FORCED-NEXT: ret void
;
entry:
br label %loop.header
loop.header:
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
%l = load i64, ptr %ptr.iv, align 1
%eq.0 = icmp eq i64 %l, -12
%eq.1 = icmp eq i64 %l, 13
%eq.2 = icmp eq i64 %l, 0
br i1 %eq.0, label %if.then.1, label %else.1
else.1:
br i1 %eq.1, label %if.then.2, label %else.2
else.2:
br i1 %eq.2, label %if.then.3, label %loop.latch
if.then.1:
store i64 42, ptr %ptr.iv, align 1
br label %loop.latch
if.then.2:
store i64 0, ptr %ptr.iv, align 1
br label %loop.latch
if.then.3:
store i64 1, ptr %ptr.iv, align 1
br label %loop.latch
default:
store i64 2, ptr %ptr.iv, align 1
br label %loop.latch
loop.latch:
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
%ec = icmp eq ptr %ptr.iv.next, %end
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
define void @switch_multiple_common_dests(ptr %start, ptr %end) {
; COST-LABEL: define void @switch_multiple_common_dests(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; COST-NEXT: [[ENTRY:.*]]:
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
; COST: [[LOOP_HEADER]]:
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
; COST-NEXT: i64 0, label %[[IF_THEN_1]]
; COST-NEXT: i64 13, label %[[IF_THEN_2:.*]]
; COST-NEXT: i64 14, label %[[IF_THEN_2]]
; COST-NEXT: i64 15, label %[[IF_THEN_2]]
; COST-NEXT: ]
; COST: [[IF_THEN_1]]:
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[IF_THEN_2]]:
; COST-NEXT: store i64 0, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[DEFAULT]]:
; COST-NEXT: store i64 2, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[LOOP_LATCH]]:
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; COST: [[EXIT]]:
; COST-NEXT: ret void
;
; FORCED-LABEL: define void @switch_multiple_common_dests(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; FORCED-NEXT: [[ENTRY:.*]]:
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED: [[VECTOR_PH]]:
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCED: [[VECTOR_BODY]]:
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
; FORCED-NEXT: [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
; FORCED-NEXT: [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 14)
; FORCED-NEXT: [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 14)
; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 15)
; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 15)
; FORCED-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP23]], [[TMP25]]
; FORCED-NEXT: [[TMP28:%.*]] = or <4 x i1> [[TMP24]], [[TMP26]]
; FORCED-NEXT: [[TMP21:%.*]] = or <4 x i1> [[TMP13]], [[TMP17]]
; FORCED-NEXT: [[TMP22:%.*]] = or <4 x i1> [[TMP14]], [[TMP18]]
; FORCED-NEXT: [[TMP35:%.*]] = or <4 x i1> [[TMP21]], [[TMP15]]
; FORCED-NEXT: [[TMP36:%.*]] = or <4 x i1> [[TMP22]], [[TMP16]]
; FORCED-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP27]], [[TMP35]]
; FORCED-NEXT: [[TMP38:%.*]] = or <4 x i1> [[TMP28]], [[TMP36]]
; FORCED-NEXT: [[TMP39:%.*]] = xor <4 x i1> [[TMP37]], splat (i1 true)
; FORCED-NEXT: [[TMP40:%.*]] = xor <4 x i1> [[TMP38]], splat (i1 true)
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP35]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[TMP8]], <4 x i1> [[TMP36]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP27]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[TMP8]], <4 x i1> [[TMP28]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP39]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr align 1 [[TMP8]], <4 x i1> [[TMP40]])
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT: br i1 [[TMP41]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED: [[SCALAR_PH]]:
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
; FORCED: [[LOOP_HEADER]]:
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
; FORCED-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
; FORCED-NEXT: i64 0, label %[[IF_THEN_1]]
; FORCED-NEXT: i64 13, label %[[IF_THEN_2:.*]]
; FORCED-NEXT: i64 14, label %[[IF_THEN_2]]
; FORCED-NEXT: i64 15, label %[[IF_THEN_2]]
; FORCED-NEXT: ]
; FORCED: [[IF_THEN_1]]:
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[IF_THEN_2]]:
; FORCED-NEXT: store i64 0, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[DEFAULT]]:
; FORCED-NEXT: store i64 2, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[LOOP_LATCH]]:
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
; FORCED: [[EXIT]]:
; FORCED-NEXT: ret void
;
entry:
br label %loop.header
loop.header:
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
%l = load i64, ptr %ptr.iv, align 1
switch i64 %l, label %default [
i64 -12, label %if.then.1
i64 0, label %if.then.1
i64 13, label %if.then.2
i64 14, label %if.then.2
i64 15, label %if.then.2
]
if.then.1:
store i64 42, ptr %ptr.iv, align 1
br label %loop.latch
if.then.2:
store i64 0, ptr %ptr.iv, align 1
br label %loop.latch
default:
store i64 2, ptr %ptr.iv, align 1
br label %loop.latch
loop.latch:
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
%ec = icmp eq ptr %ptr.iv.next, %end
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; COST-LABEL: define void @switch4_default_common_dest_with_case(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; COST-NEXT: [[ENTRY:.*]]:
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
; COST: [[LOOP_HEADER]]:
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
; COST-NEXT: i64 13, label %[[IF_THEN_2:.*]]
; COST-NEXT: i64 0, label %[[DEFAULT]]
; COST-NEXT: ]
; COST: [[IF_THEN_1]]:
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[IF_THEN_2]]:
; COST-NEXT: store i64 0, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[DEFAULT]]:
; COST-NEXT: store i64 2, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[LOOP_LATCH]]:
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; COST: [[EXIT]]:
; COST-NEXT: ret void
;
; FORCED-LABEL: define void @switch4_default_common_dest_with_case(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; FORCED-NEXT: [[ENTRY:.*]]:
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED: [[VECTOR_PH]]:
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCED: [[VECTOR_BODY]]:
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP11]]
; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP12]]
; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true)
; FORCED-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true)
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP11]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[TMP8]], <4 x i1> [[TMP12]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP15]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[TMP8]], <4 x i1> [[TMP16]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP20]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr align 1 [[TMP8]], <4 x i1> [[TMP21]])
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED: [[SCALAR_PH]]:
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
; FORCED: [[LOOP_HEADER]]:
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
; FORCED-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
; FORCED-NEXT: i64 13, label %[[IF_THEN_2:.*]]
; FORCED-NEXT: i64 0, label %[[DEFAULT]]
; FORCED-NEXT: ]
; FORCED: [[IF_THEN_1]]:
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[IF_THEN_2]]:
; FORCED-NEXT: store i64 0, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[DEFAULT]]:
; FORCED-NEXT: store i64 2, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[LOOP_LATCH]]:
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]]
; FORCED: [[EXIT]]:
; FORCED-NEXT: ret void
;
entry:
br label %loop.header
loop.header:
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
%l = load i64, ptr %ptr.iv, align 1
switch i64 %l, label %default [
i64 -12, label %if.then.1
i64 13, label %if.then.2
i64 0, label %default
]
if.then.1:
store i64 42, ptr %ptr.iv, align 1
br label %loop.latch
if.then.2:
store i64 0, ptr %ptr.iv, align 1
br label %loop.latch
default:
store i64 2, ptr %ptr.iv, align 1
br label %loop.latch
loop.latch:
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
%ec = icmp eq ptr %ptr.iv.next, %end
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end, i64 %x) {
; COST-LABEL: define void @switch_under_br_default_common_dest_with_case(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; COST-NEXT: [[ENTRY:.*]]:
; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; COST: [[VECTOR_PH]]:
; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; COST-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; COST-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
; COST-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; COST-NEXT: br label %[[VECTOR_BODY:.*]]
; COST: [[VECTOR_BODY]]:
; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
; COST-NEXT: [[TMP7:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
; COST-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
; COST-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
; COST-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP10]], [[TMP11]]
; COST-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true)
; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP11]])
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP10]])
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP14]])
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; COST-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; COST-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; COST: [[MIDDLE_BLOCK]]:
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; COST: [[SCALAR_PH]]:
; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
; COST: [[LOOP_HEADER]]:
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT: [[C:%.*]] = icmp ule i64 [[L]], [[X]]
; COST-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
; COST: [[THEN]]:
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
; COST-NEXT: i64 13, label %[[IF_THEN_2:.*]]
; COST-NEXT: i64 0, label %[[DEFAULT]]
; COST-NEXT: ]
; COST: [[IF_THEN_1]]:
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[IF_THEN_2]]:
; COST-NEXT: store i64 0, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[DEFAULT]]:
; COST-NEXT: store i64 2, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[LOOP_LATCH]]:
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
; COST: [[EXIT]]:
; COST-NEXT: ret void
;
; FORCED-LABEL: define void @switch_under_br_default_common_dest_with_case(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; FORCED-NEXT: [[ENTRY:.*]]:
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED: [[VECTOR_PH]]:
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCED: [[VECTOR_BODY]]:
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT: [[TMP9:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; FORCED-NEXT: [[TMP10:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]]
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
; FORCED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP26:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP15]], [[TMP25]]
; FORCED-NEXT: [[TMP20:%.*]] = or <4 x i1> [[TMP16]], [[TMP26]]
; FORCED-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP27]], splat (i1 true)
; FORCED-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP20]], splat (i1 true)
; FORCED-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP21]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP22]], <4 x i1> zeroinitializer
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP25]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[TMP8]], <4 x i1> [[TMP26]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP15]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[TMP8]], <4 x i1> [[TMP16]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP23]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr align 1 [[TMP8]], <4 x i1> [[TMP24]])
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED: [[SCALAR_PH]]:
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
; FORCED: [[LOOP_HEADER]]:
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT: [[C:%.*]] = icmp ule i64 [[L]], [[X]]
; FORCED-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
; FORCED: [[THEN]]:
; FORCED-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
; FORCED-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
; FORCED-NEXT: i64 13, label %[[IF_THEN_2:.*]]
; FORCED-NEXT: i64 0, label %[[DEFAULT]]
; FORCED-NEXT: ]
; FORCED: [[IF_THEN_1]]:
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[IF_THEN_2]]:
; FORCED-NEXT: store i64 0, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[DEFAULT]]:
; FORCED-NEXT: store i64 2, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[LOOP_LATCH]]:
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]]
; FORCED: [[EXIT]]:
; FORCED-NEXT: ret void
;
entry:
br label %loop.header
loop.header:
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
%l = load i64, ptr %ptr.iv, align 1
%c = icmp ule i64 %l, %x
br i1 %c, label %then, label %loop.latch
then:
switch i64 %l, label %default [
i64 -12, label %if.then.1
i64 13, label %if.then.2
i64 0, label %default
]
if.then.1:
store i64 42, ptr %ptr.iv, align 1
br label %loop.latch
if.then.2:
store i64 0, ptr %ptr.iv, align 1
br label %loop.latch
default:
store i64 2, ptr %ptr.iv, align 1
br label %loop.latch
loop.latch:
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
%ec = icmp eq ptr %ptr.iv.next, %end
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end, i64 %x) {
; COST-LABEL: define void @br_under_switch_default_common_dest_with_case(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; COST-NEXT: [[ENTRY:.*]]:
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
; COST: [[LOOP_HEADER]]:
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
; COST-NEXT: i64 13, label %[[IF_THEN_2:.*]]
; COST-NEXT: i64 0, label %[[DEFAULT]]
; COST-NEXT: ]
; COST: [[IF_THEN_1]]:
; COST-NEXT: [[C:%.*]] = icmp ule i64 [[L]], [[X]]
; COST-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[IF_THEN_2]]
; COST: [[THEN]]:
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[DEFAULT]]
; COST: [[IF_THEN_2]]:
; COST-NEXT: store i64 0, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[DEFAULT]]:
; COST-NEXT: store i64 2, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[LOOP_LATCH]]:
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; COST: [[EXIT]]:
; COST-NEXT: ret void
;
; FORCED-LABEL: define void @br_under_switch_default_common_dest_with_case(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; FORCED-NEXT: [[ENTRY:.*]]:
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED: [[VECTOR_PH]]:
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCED: [[VECTOR_BODY]]:
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
; FORCED-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP25]]
; FORCED-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP10]], [[TMP26]]
; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true)
; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true)
; FORCED-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; FORCED-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]]
; FORCED-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true)
; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true)
; FORCED-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP19]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP28:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP20]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP25]]
; FORCED-NEXT: [[TMP30:%.*]] = or <4 x i1> [[TMP28]], [[TMP26]]
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP29]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr align 1 [[TMP8]], <4 x i1> [[TMP30]])
; FORCED-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer
; FORCED-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP32]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[TMP8]], <4 x i1> [[TMP33]])
; FORCED-NEXT: [[TMP36:%.*]] = or <4 x i1> [[TMP32]], [[TMP15]]
; FORCED-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP33]], [[TMP16]]
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP36]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr align 1 [[TMP8]], <4 x i1> [[TMP37]])
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED: [[SCALAR_PH]]:
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
; FORCED: [[LOOP_HEADER]]:
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
; FORCED-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
; FORCED-NEXT: i64 13, label %[[IF_THEN_2:.*]]
; FORCED-NEXT: i64 0, label %[[DEFAULT]]
; FORCED-NEXT: ]
; FORCED: [[IF_THEN_1]]:
; FORCED-NEXT: [[C:%.*]] = icmp ule i64 [[L]], [[X]]
; FORCED-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[IF_THEN_2]]
; FORCED: [[THEN]]:
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[DEFAULT]]
; FORCED: [[IF_THEN_2]]:
; FORCED-NEXT: store i64 0, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[DEFAULT]]:
; FORCED-NEXT: store i64 2, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[LOOP_LATCH]]:
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+]]
; FORCED: [[EXIT]]:
; FORCED-NEXT: ret void
;
entry:
br label %loop.header
loop.header:
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
%l = load i64, ptr %ptr.iv, align 1
switch i64 %l, label %default [
i64 -12, label %if.then.1
i64 13, label %if.then.2
i64 0, label %default
]
if.then.1:
%c = icmp ule i64 %l, %x
br i1 %c, label %then, label %if.then.2
then:
store i64 42, ptr %ptr.iv, align 1
br label %default
if.then.2:
store i64 0, ptr %ptr.iv, align 1
br label %loop.latch
default:
store i64 2, ptr %ptr.iv, align 1
br label %loop.latch
loop.latch:
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
%ec = icmp eq ptr %ptr.iv.next, %end
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
define void @large_number_of_cases(ptr %start, ptr %end) {
; COST-LABEL: define void @large_number_of_cases(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; COST-NEXT: [[ENTRY:.*]]:
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
; COST: [[LOOP_HEADER]]:
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT: switch i64 [[L]], label %[[LOOP_LATCH]] [
; COST-NEXT: i64 1, label %[[IF_THEN:.*]]
; COST-NEXT: i64 3, label %[[IF_THEN]]
; COST-NEXT: i64 11, label %[[IF_THEN]]
; COST-NEXT: i64 99, label %[[IF_THEN]]
; COST-NEXT: i64 213, label %[[IF_THEN]]
; COST-NEXT: i64 238, label %[[IF_THEN]]
; COST-NEXT: i64 513, label %[[IF_THEN]]
; COST-NEXT: i64 791, label %[[IF_THEN]]
; COST-NEXT: i64 899, label %[[IF_THEN]]
; COST-NEXT: ]
; COST: [[IF_THEN]]:
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT: br label %[[LOOP_LATCH]]
; COST: [[LOOP_LATCH]]:
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; COST: [[EXIT]]:
; COST-NEXT: ret void
;
; FORCED-LABEL: define void @large_number_of_cases(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; FORCED-NEXT: [[ENTRY:.*]]:
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED: [[VECTOR_PH]]:
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCED: [[VECTOR_BODY]]:
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 1)
; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 1)
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3)
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 3)
; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 11)
; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 11)
; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 99)
; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 99)
; FORCED-NEXT: [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 213)
; FORCED-NEXT: [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 213)
; FORCED-NEXT: [[TMP19:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 238)
; FORCED-NEXT: [[TMP20:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 238)
; FORCED-NEXT: [[TMP21:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 513)
; FORCED-NEXT: [[TMP22:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 513)
; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 791)
; FORCED-NEXT: [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 791)
; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 899)
; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 899)
; FORCED-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
; FORCED-NEXT: [[TMP28:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
; FORCED-NEXT: [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP13]]
; FORCED-NEXT: [[TMP30:%.*]] = or <4 x i1> [[TMP28]], [[TMP14]]
; FORCED-NEXT: [[TMP31:%.*]] = or <4 x i1> [[TMP29]], [[TMP15]]
; FORCED-NEXT: [[TMP32:%.*]] = or <4 x i1> [[TMP30]], [[TMP16]]
; FORCED-NEXT: [[TMP33:%.*]] = or <4 x i1> [[TMP31]], [[TMP17]]
; FORCED-NEXT: [[TMP34:%.*]] = or <4 x i1> [[TMP32]], [[TMP18]]
; FORCED-NEXT: [[TMP35:%.*]] = or <4 x i1> [[TMP33]], [[TMP19]]
; FORCED-NEXT: [[TMP36:%.*]] = or <4 x i1> [[TMP34]], [[TMP20]]
; FORCED-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP35]], [[TMP21]]
; FORCED-NEXT: [[TMP38:%.*]] = or <4 x i1> [[TMP36]], [[TMP22]]
; FORCED-NEXT: [[TMP39:%.*]] = or <4 x i1> [[TMP37]], [[TMP23]]
; FORCED-NEXT: [[TMP40:%.*]] = or <4 x i1> [[TMP38]], [[TMP24]]
; FORCED-NEXT: [[TMP57:%.*]] = or <4 x i1> [[TMP39]], [[TMP25]]
; FORCED-NEXT: [[TMP58:%.*]] = or <4 x i1> [[TMP40]], [[TMP26]]
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[NEXT_GEP]], <4 x i1> [[TMP57]])
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr align 1 [[TMP8]], <4 x i1> [[TMP58]])
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT: [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT: br i1 [[TMP59]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED: [[SCALAR_PH]]:
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
; FORCED: [[LOOP_HEADER]]:
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT: switch i64 [[L]], label %[[LOOP_LATCH]] [
; FORCED-NEXT: i64 1, label %[[IF_THEN:.*]]
; FORCED-NEXT: i64 3, label %[[IF_THEN]]
; FORCED-NEXT: i64 11, label %[[IF_THEN]]
; FORCED-NEXT: i64 99, label %[[IF_THEN]]
; FORCED-NEXT: i64 213, label %[[IF_THEN]]
; FORCED-NEXT: i64 238, label %[[IF_THEN]]
; FORCED-NEXT: i64 513, label %[[IF_THEN]]
; FORCED-NEXT: i64 791, label %[[IF_THEN]]
; FORCED-NEXT: i64 899, label %[[IF_THEN]]
; FORCED-NEXT: ]
; FORCED: [[IF_THEN]]:
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT: br label %[[LOOP_LATCH]]
; FORCED: [[LOOP_LATCH]]:
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]]
; FORCED: [[EXIT]]:
; FORCED-NEXT: ret void
;
entry:
br label %loop.header
loop.header:
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
%l = load i64, ptr %ptr.iv, align 1
switch i64 %l, label %loop.latch [
i64 1, label %if.then
i64 3, label %if.then
i64 11, label %if.then
i64 99, label %if.then
i64 213, label %if.then
i64 238, label %if.then
i64 513, label %if.then
i64 791, label %if.then
i64 899, label %if.then
]
if.then:
store i64 42, ptr %ptr.iv, align 1
br label %loop.latch
loop.latch:
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
%ec = icmp eq ptr %ptr.iv.next, %end
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
;.
; COST: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; COST: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; COST: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; COST: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; COST: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; COST: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; COST: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; COST: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
; COST: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; COST: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
;.
; FORCED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; FORCED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; FORCED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; FORCED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; FORCED: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; FORCED: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; FORCED: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; FORCED: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
; FORCED: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; FORCED: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
; FORCED: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
; FORCED: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
; FORCED: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
; FORCED: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
; FORCED: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
; FORCED: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
; FORCED: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
; FORCED: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}
; FORCED: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
; FORCED: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META1]]}
;.