This PR reassociates logical ands in order to enable more simplifications. The driving motivation for this is that with tail folding all blocks inside the loop body will end up using the header mask. However this can end up nestled deep within a chain of logical ands from other edges. Typically the header mask will be a leaf nested in the LHS, e.g. (headermask & y) & z. So pulling it out allows it to be simplified further, e.g. allows it to be optimised away to VP intrinsics with EVL tail folding.
1458 lines
78 KiB
LLVM
1458 lines
78 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -mcpu=skylake-avx512 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=COST %s
|
|
; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -mcpu=skylake-avx512 -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck --check-prefixes=FORCED %s
|
|
|
|
define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) {
|
|
; COST-LABEL: define void @switch_default_to_latch_common_dest(
|
|
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; COST-NEXT: [[ENTRY:.*]]:
|
|
; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
|
|
; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; COST: [[VECTOR_PH]]:
|
|
; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
|
|
; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; COST-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; COST-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; COST: [[VECTOR_BODY]]:
|
|
; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
|
; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
|
; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
|
|
; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
|
|
; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
|
|
; COST-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP7]], [[TMP8]]
|
|
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP10]])
|
|
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; COST-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; COST-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; COST: [[MIDDLE_BLOCK]]:
|
|
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; COST: [[SCALAR_PH]]:
|
|
; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
|
|
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; COST: [[LOOP_HEADER]]:
|
|
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: switch i64 [[L]], label %[[LOOP_LATCH]] [
|
|
; COST-NEXT: i64 -12, label %[[IF_THEN:.*]]
|
|
; COST-NEXT: i64 13, label %[[IF_THEN]]
|
|
; COST-NEXT: ]
|
|
; COST: [[IF_THEN]]:
|
|
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[LOOP_LATCH]]:
|
|
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; COST: [[EXIT]]:
|
|
; COST-NEXT: ret void
|
|
;
|
|
; FORCED-LABEL: define void @switch_default_to_latch_common_dest(
|
|
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; FORCED-NEXT: [[ENTRY:.*]]:
|
|
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
|
|
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; FORCED: [[VECTOR_PH]]:
|
|
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
|
|
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; FORCED: [[VECTOR_BODY]]:
|
|
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
|
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
|
|
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
|
|
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
|
|
; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
|
|
; FORCED-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP15]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
|
|
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; FORCED: [[MIDDLE_BLOCK]]:
|
|
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; FORCED: [[SCALAR_PH]]:
|
|
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
|
|
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; FORCED: [[LOOP_HEADER]]:
|
|
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: switch i64 [[L]], label %[[LOOP_LATCH]] [
|
|
; FORCED-NEXT: i64 -12, label %[[IF_THEN:.*]]
|
|
; FORCED-NEXT: i64 13, label %[[IF_THEN]]
|
|
; FORCED-NEXT: ]
|
|
; FORCED: [[IF_THEN]]:
|
|
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[LOOP_LATCH]]:
|
|
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; FORCED: [[EXIT]]:
|
|
; FORCED-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
|
|
%l = load i64, ptr %ptr.iv, align 1
|
|
switch i64 %l, label %loop.latch [
|
|
i64 -12, label %if.then
|
|
i64 13, label %if.then
|
|
]
|
|
|
|
if.then:
|
|
store i64 42, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
loop.latch:
|
|
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
|
|
%ec = icmp eq ptr %ptr.iv.next, %end
|
|
br i1 %ec, label %exit, label %loop.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @switch_default_to_latch_common_dest_using_branches(ptr %start, ptr %end) {
|
|
; COST-LABEL: define void @switch_default_to_latch_common_dest_using_branches(
|
|
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
|
|
; COST-NEXT: [[ENTRY:.*]]:
|
|
; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
|
|
; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; COST: [[VECTOR_PH]]:
|
|
; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
|
|
; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; COST-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; COST-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; COST: [[VECTOR_BODY]]:
|
|
; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
|
; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
|
; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
|
|
; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
|
|
; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
|
|
; COST-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
|
|
; COST-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
|
|
; COST-NEXT: [[TMP11:%.*]] = or <4 x i1> [[TMP10]], [[TMP7]]
|
|
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP11]])
|
|
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; COST-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; COST-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; COST: [[MIDDLE_BLOCK]]:
|
|
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; COST: [[SCALAR_PH]]:
|
|
; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
|
|
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; COST: [[LOOP_HEADER]]:
|
|
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: [[EQ_1:%.*]] = icmp eq i64 [[L]], -12
|
|
; COST-NEXT: [[EQ_2:%.*]] = icmp eq i64 [[L]], 13
|
|
; COST-NEXT: br i1 [[EQ_1]], label %[[IF_THEN:.*]], label %[[ELSE:.*]]
|
|
; COST: [[IF_THEN]]:
|
|
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[ELSE]]:
|
|
; COST-NEXT: br i1 [[EQ_2]], label %[[IF_THEN]], label %[[LOOP_LATCH]]
|
|
; COST: [[LOOP_LATCH]]:
|
|
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; COST: [[EXIT]]:
|
|
; COST-NEXT: ret void
|
|
;
|
|
; FORCED-LABEL: define void @switch_default_to_latch_common_dest_using_branches(
|
|
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
|
|
; FORCED-NEXT: [[ENTRY:.*]]:
|
|
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
|
|
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; FORCED: [[VECTOR_PH]]:
|
|
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
|
|
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; FORCED: [[VECTOR_BODY]]:
|
|
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
|
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
|
|
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
|
|
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
|
|
; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP9]]
|
|
; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP10]]
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP17]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP18]])
|
|
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; FORCED: [[MIDDLE_BLOCK]]:
|
|
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; FORCED: [[SCALAR_PH]]:
|
|
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
|
|
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; FORCED: [[LOOP_HEADER]]:
|
|
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: [[EQ_1:%.*]] = icmp eq i64 [[L]], -12
|
|
; FORCED-NEXT: [[EQ_2:%.*]] = icmp eq i64 [[L]], 13
|
|
; FORCED-NEXT: br i1 [[EQ_1]], label %[[IF_THEN:.*]], label %[[ELSE:.*]]
|
|
; FORCED: [[IF_THEN]]:
|
|
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[ELSE]]:
|
|
; FORCED-NEXT: br i1 [[EQ_2]], label %[[IF_THEN]], label %[[LOOP_LATCH]]
|
|
; FORCED: [[LOOP_LATCH]]:
|
|
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; FORCED: [[EXIT]]:
|
|
; FORCED-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
|
|
%l = load i64, ptr %ptr.iv, align 1
|
|
%eq.1 = icmp eq i64 %l, -12
|
|
%eq.2 = icmp eq i64 %l, 13
|
|
br i1 %eq.1, label %if.then, label %else
|
|
|
|
if.then:
|
|
store i64 42, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
else:
|
|
br i1 %eq.2, label %if.then, label %loop.latch
|
|
|
|
loop.latch:
|
|
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
|
|
%ec = icmp eq ptr %ptr.iv.next, %end
|
|
br i1 %ec, label %exit, label %loop.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; TODO: Instead of using masked stores, the store can be sunk, executed
|
|
; unconditionally and fed by selects.
|
|
define void @switch_all_dests_distinct(ptr %start, ptr %end) {
|
|
; COST-LABEL: define void @switch_all_dests_distinct(
|
|
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
|
|
; COST-NEXT: [[ENTRY:.*]]:
|
|
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; COST: [[LOOP_HEADER]]:
|
|
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
|
|
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
|
|
; COST-NEXT: i64 13, label %[[IF_THEN_2:.*]]
|
|
; COST-NEXT: i64 0, label %[[IF_THEN_3:.*]]
|
|
; COST-NEXT: ]
|
|
; COST: [[IF_THEN_1]]:
|
|
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[IF_THEN_2]]:
|
|
; COST-NEXT: store i64 0, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[IF_THEN_3]]:
|
|
; COST-NEXT: store i64 1, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[DEFAULT]]:
|
|
; COST-NEXT: store i64 2, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[LOOP_LATCH]]:
|
|
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
|
|
; COST: [[EXIT]]:
|
|
; COST-NEXT: ret void
|
|
;
|
|
; FORCED-LABEL: define void @switch_all_dests_distinct(
|
|
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
|
|
; FORCED-NEXT: [[ENTRY:.*]]:
|
|
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
|
|
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; FORCED: [[VECTOR_PH]]:
|
|
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
|
|
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; FORCED: [[VECTOR_BODY]]:
|
|
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
|
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
|
|
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
|
|
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
|
|
; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
|
|
; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
|
|
; FORCED-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
|
|
; FORCED-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
|
|
; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP13]]
|
|
; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP14]]
|
|
; FORCED-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true)
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP13]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 1, <4 x i1> [[TMP14]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP11]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP9]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP19]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP20]])
|
|
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; FORCED: [[MIDDLE_BLOCK]]:
|
|
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; FORCED: [[SCALAR_PH]]:
|
|
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
|
|
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; FORCED: [[LOOP_HEADER]]:
|
|
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
|
|
; FORCED-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
|
|
; FORCED-NEXT: i64 13, label %[[IF_THEN_2:.*]]
|
|
; FORCED-NEXT: i64 0, label %[[IF_THEN_3:.*]]
|
|
; FORCED-NEXT: ]
|
|
; FORCED: [[IF_THEN_1]]:
|
|
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[IF_THEN_2]]:
|
|
; FORCED-NEXT: store i64 0, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[IF_THEN_3]]:
|
|
; FORCED-NEXT: store i64 1, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[DEFAULT]]:
|
|
; FORCED-NEXT: store i64 2, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[LOOP_LATCH]]:
|
|
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; FORCED: [[EXIT]]:
|
|
; FORCED-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
|
|
%l = load i64, ptr %ptr.iv, align 1
|
|
switch i64 %l, label %default [
|
|
i64 -12, label %if.then.1
|
|
i64 13, label %if.then.2
|
|
i64 0, label %if.then.3
|
|
]
|
|
|
|
if.then.1:
|
|
store i64 42, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
if.then.2:
|
|
store i64 0, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
if.then.3:
|
|
store i64 1, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
default:
|
|
store i64 2, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
loop.latch:
|
|
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
|
|
%ec = icmp eq ptr %ptr.iv.next, %end
|
|
br i1 %ec, label %exit, label %loop.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %end) {
|
|
; COST-LABEL: define void @switch_all_dests_distinct_variant_using_branches(
|
|
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
|
|
; COST-NEXT: [[ENTRY:.*]]:
|
|
; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
|
|
; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; COST: [[VECTOR_PH]]:
|
|
; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
|
|
; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; COST-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; COST-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; COST: [[VECTOR_BODY]]:
|
|
; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
|
; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
|
; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
|
|
; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
|
|
; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
|
|
; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
|
|
; COST-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
|
|
; COST-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
|
|
; COST-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
|
|
; COST-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
|
|
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP13]])
|
|
; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
|
|
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP14]])
|
|
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP7]])
|
|
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; COST-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; COST-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; COST: [[MIDDLE_BLOCK]]:
|
|
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; COST: [[SCALAR_PH]]:
|
|
; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
|
|
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; COST: [[LOOP_HEADER]]:
|
|
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: [[EQ_0:%.*]] = icmp eq i64 [[L]], -12
|
|
; COST-NEXT: [[EQ_1:%.*]] = icmp eq i64 [[L]], 13
|
|
; COST-NEXT: [[EQ_2:%.*]] = icmp eq i64 [[L]], 0
|
|
; COST-NEXT: br i1 [[EQ_0]], label %[[IF_THEN_1:.*]], label %[[ELSE_1:.*]]
|
|
; COST: [[ELSE_1]]:
|
|
; COST-NEXT: br i1 [[EQ_1]], label %[[IF_THEN_2:.*]], label %[[ELSE_2:.*]]
|
|
; COST: [[ELSE_2]]:
|
|
; COST-NEXT: br i1 [[EQ_2]], label %[[IF_THEN_3:.*]], label %[[LOOP_LATCH]]
|
|
; COST: [[IF_THEN_1]]:
|
|
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[IF_THEN_2]]:
|
|
; COST-NEXT: store i64 0, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[IF_THEN_3]]:
|
|
; COST-NEXT: store i64 1, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[DEFAULT:.*:]]
|
|
; COST-NEXT: store i64 2, ptr poison, align 1
|
|
; COST-NEXT: unreachable
|
|
; COST: [[LOOP_LATCH]]:
|
|
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; COST: [[EXIT]]:
|
|
; COST-NEXT: ret void
|
|
;
|
|
; FORCED-LABEL: define void @switch_all_dests_distinct_variant_using_branches(
|
|
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
|
|
; FORCED-NEXT: [[ENTRY:.*]]:
|
|
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
|
|
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; FORCED: [[VECTOR_PH]]:
|
|
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
|
|
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; FORCED: [[VECTOR_BODY]]:
|
|
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
|
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
|
|
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
|
|
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
|
|
; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
|
|
; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
|
|
; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP18:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP19:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP18]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP19]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP22:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP20]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP21]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 1, <4 x i1> [[TMP22]])
|
|
; FORCED-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP23]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP9]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]])
|
|
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; FORCED: [[MIDDLE_BLOCK]]:
|
|
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; FORCED: [[SCALAR_PH]]:
|
|
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
|
|
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; FORCED: [[LOOP_HEADER]]:
|
|
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: [[EQ_0:%.*]] = icmp eq i64 [[L]], -12
|
|
; FORCED-NEXT: [[EQ_1:%.*]] = icmp eq i64 [[L]], 13
|
|
; FORCED-NEXT: [[EQ_2:%.*]] = icmp eq i64 [[L]], 0
|
|
; FORCED-NEXT: br i1 [[EQ_0]], label %[[IF_THEN_1:.*]], label %[[ELSE_1:.*]]
|
|
; FORCED: [[ELSE_1]]:
|
|
; FORCED-NEXT: br i1 [[EQ_1]], label %[[IF_THEN_2:.*]], label %[[ELSE_2:.*]]
|
|
; FORCED: [[ELSE_2]]:
|
|
; FORCED-NEXT: br i1 [[EQ_2]], label %[[IF_THEN_3:.*]], label %[[LOOP_LATCH]]
|
|
; FORCED: [[IF_THEN_1]]:
|
|
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[IF_THEN_2]]:
|
|
; FORCED-NEXT: store i64 0, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[IF_THEN_3]]:
|
|
; FORCED-NEXT: store i64 1, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[DEFAULT:.*:]]
|
|
; FORCED-NEXT: store i64 2, ptr poison, align 1
|
|
; FORCED-NEXT: unreachable
|
|
; FORCED: [[LOOP_LATCH]]:
|
|
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; FORCED: [[EXIT]]:
|
|
; FORCED-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
|
|
%l = load i64, ptr %ptr.iv, align 1
|
|
%eq.0 = icmp eq i64 %l, -12
|
|
%eq.1 = icmp eq i64 %l, 13
|
|
%eq.2 = icmp eq i64 %l, 0
|
|
br i1 %eq.0, label %if.then.1, label %else.1
|
|
|
|
else.1:
|
|
br i1 %eq.1, label %if.then.2, label %else.2
|
|
|
|
else.2:
|
|
br i1 %eq.2, label %if.then.3, label %loop.latch
|
|
|
|
if.then.1:
|
|
store i64 42, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
if.then.2:
|
|
store i64 0, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
if.then.3:
|
|
store i64 1, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
default:
|
|
store i64 2, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
loop.latch:
|
|
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
|
|
%ec = icmp eq ptr %ptr.iv.next, %end
|
|
br i1 %ec, label %exit, label %loop.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
|
|
|
|
define void @switch_multiple_common_dests(ptr %start, ptr %end) {
|
|
; COST-LABEL: define void @switch_multiple_common_dests(
|
|
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
|
|
; COST-NEXT: [[ENTRY:.*]]:
|
|
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; COST: [[LOOP_HEADER]]:
|
|
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
|
|
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
|
|
; COST-NEXT: i64 0, label %[[IF_THEN_1]]
|
|
; COST-NEXT: i64 13, label %[[IF_THEN_2:.*]]
|
|
; COST-NEXT: i64 14, label %[[IF_THEN_2]]
|
|
; COST-NEXT: i64 15, label %[[IF_THEN_2]]
|
|
; COST-NEXT: ]
|
|
; COST: [[IF_THEN_1]]:
|
|
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[IF_THEN_2]]:
|
|
; COST-NEXT: store i64 0, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[DEFAULT]]:
|
|
; COST-NEXT: store i64 2, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[LOOP_LATCH]]:
|
|
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
|
|
; COST: [[EXIT]]:
|
|
; COST-NEXT: ret void
|
|
;
|
|
; FORCED-LABEL: define void @switch_multiple_common_dests(
|
|
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
|
|
; FORCED-NEXT: [[ENTRY:.*]]:
|
|
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
|
|
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; FORCED: [[VECTOR_PH]]:
|
|
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
|
|
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; FORCED: [[VECTOR_BODY]]:
|
|
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
|
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
|
|
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
|
|
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
|
|
; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
|
|
; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
|
|
; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 14)
|
|
; FORCED-NEXT: [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 14)
|
|
; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 15)
|
|
; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 15)
|
|
; FORCED-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP23]], [[TMP25]]
|
|
; FORCED-NEXT: [[TMP28:%.*]] = or <4 x i1> [[TMP24]], [[TMP26]]
|
|
; FORCED-NEXT: [[TMP21:%.*]] = or <4 x i1> [[TMP13]], [[TMP17]]
|
|
; FORCED-NEXT: [[TMP22:%.*]] = or <4 x i1> [[TMP14]], [[TMP18]]
|
|
; FORCED-NEXT: [[TMP35:%.*]] = or <4 x i1> [[TMP21]], [[TMP15]]
|
|
; FORCED-NEXT: [[TMP36:%.*]] = or <4 x i1> [[TMP22]], [[TMP16]]
|
|
; FORCED-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP27]], [[TMP35]]
|
|
; FORCED-NEXT: [[TMP38:%.*]] = or <4 x i1> [[TMP28]], [[TMP36]]
|
|
; FORCED-NEXT: [[TMP39:%.*]] = xor <4 x i1> [[TMP37]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP40:%.*]] = xor <4 x i1> [[TMP38]], splat (i1 true)
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP35]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP36]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP27]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP28]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP39]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP40]])
|
|
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[TMP41]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; FORCED: [[MIDDLE_BLOCK]]:
|
|
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; FORCED: [[SCALAR_PH]]:
|
|
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
|
|
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; FORCED: [[LOOP_HEADER]]:
|
|
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
|
|
; FORCED-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
|
|
; FORCED-NEXT: i64 0, label %[[IF_THEN_1]]
|
|
; FORCED-NEXT: i64 13, label %[[IF_THEN_2:.*]]
|
|
; FORCED-NEXT: i64 14, label %[[IF_THEN_2]]
|
|
; FORCED-NEXT: i64 15, label %[[IF_THEN_2]]
|
|
; FORCED-NEXT: ]
|
|
; FORCED: [[IF_THEN_1]]:
|
|
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[IF_THEN_2]]:
|
|
; FORCED-NEXT: store i64 0, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[DEFAULT]]:
|
|
; FORCED-NEXT: store i64 2, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[LOOP_LATCH]]:
|
|
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
|
|
; FORCED: [[EXIT]]:
|
|
; FORCED-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
|
|
%l = load i64, ptr %ptr.iv, align 1
|
|
switch i64 %l, label %default [
|
|
i64 -12, label %if.then.1
|
|
i64 0, label %if.then.1
|
|
i64 13, label %if.then.2
|
|
i64 14, label %if.then.2
|
|
i64 15, label %if.then.2
|
|
]
|
|
|
|
if.then.1:
|
|
store i64 42, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
if.then.2:
|
|
store i64 0, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
default:
|
|
store i64 2, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
loop.latch:
|
|
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
|
|
%ec = icmp eq ptr %ptr.iv.next, %end
|
|
br i1 %ec, label %exit, label %loop.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
|
|
; COST-LABEL: define void @switch4_default_common_dest_with_case(
|
|
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
|
|
; COST-NEXT: [[ENTRY:.*]]:
|
|
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; COST: [[LOOP_HEADER]]:
|
|
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
|
|
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
|
|
; COST-NEXT: i64 13, label %[[IF_THEN_2:.*]]
|
|
; COST-NEXT: i64 0, label %[[DEFAULT]]
|
|
; COST-NEXT: ]
|
|
; COST: [[IF_THEN_1]]:
|
|
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[IF_THEN_2]]:
|
|
; COST-NEXT: store i64 0, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[DEFAULT]]:
|
|
; COST-NEXT: store i64 2, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[LOOP_LATCH]]:
|
|
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
|
|
; COST: [[EXIT]]:
|
|
; COST-NEXT: ret void
|
|
;
|
|
; FORCED-LABEL: define void @switch4_default_common_dest_with_case(
|
|
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
|
|
; FORCED-NEXT: [[ENTRY:.*]]:
|
|
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
|
|
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; FORCED: [[VECTOR_PH]]:
|
|
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
|
|
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; FORCED: [[VECTOR_BODY]]:
|
|
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
|
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
|
|
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
|
|
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
|
|
; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP11]]
|
|
; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP12]]
|
|
; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true)
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP11]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP15]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP20]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP21]])
|
|
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
; FORCED: [[MIDDLE_BLOCK]]:
|
|
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; FORCED: [[SCALAR_PH]]:
|
|
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
|
|
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; FORCED: [[LOOP_HEADER]]:
|
|
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
|
|
; FORCED-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
|
|
; FORCED-NEXT: i64 13, label %[[IF_THEN_2:.*]]
|
|
; FORCED-NEXT: i64 0, label %[[DEFAULT]]
|
|
; FORCED-NEXT: ]
|
|
; FORCED: [[IF_THEN_1]]:
|
|
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[IF_THEN_2]]:
|
|
; FORCED-NEXT: store i64 0, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[DEFAULT]]:
|
|
; FORCED-NEXT: store i64 2, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[LOOP_LATCH]]:
|
|
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]]
|
|
; FORCED: [[EXIT]]:
|
|
; FORCED-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
|
|
%l = load i64, ptr %ptr.iv, align 1
|
|
switch i64 %l, label %default [
|
|
i64 -12, label %if.then.1
|
|
i64 13, label %if.then.2
|
|
i64 0, label %default
|
|
]
|
|
|
|
if.then.1:
|
|
store i64 42, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
if.then.2:
|
|
store i64 0, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
default:
|
|
store i64 2, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
loop.latch:
|
|
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
|
|
%ec = icmp eq ptr %ptr.iv.next, %end
|
|
br i1 %ec, label %exit, label %loop.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end, i64 %x) {
|
|
; COST-LABEL: define void @switch_under_br_default_common_dest_with_case(
|
|
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
|
; COST-NEXT: [[ENTRY:.*]]:
|
|
; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
|
|
; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; COST: [[VECTOR_PH]]:
|
|
; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
|
|
; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; COST-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; COST-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
|
|
; COST-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
|
|
; COST-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; COST: [[VECTOR_BODY]]:
|
|
; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
|
; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
|
; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
|
|
; COST-NEXT: [[TMP7:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
|
|
; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
|
|
; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
|
|
; COST-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
|
|
; COST-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
|
|
; COST-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP10]], [[TMP11]]
|
|
; COST-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true)
|
|
; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
|
|
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP11]])
|
|
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP10]])
|
|
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP14]])
|
|
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; COST-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; COST-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; COST: [[MIDDLE_BLOCK]]:
|
|
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; COST: [[SCALAR_PH]]:
|
|
; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
|
|
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; COST: [[LOOP_HEADER]]:
|
|
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: [[C:%.*]] = icmp ule i64 [[L]], [[X]]
|
|
; COST-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
|
|
; COST: [[THEN]]:
|
|
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
|
|
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
|
|
; COST-NEXT: i64 13, label %[[IF_THEN_2:.*]]
|
|
; COST-NEXT: i64 0, label %[[DEFAULT]]
|
|
; COST-NEXT: ]
|
|
; COST: [[IF_THEN_1]]:
|
|
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[IF_THEN_2]]:
|
|
; COST-NEXT: store i64 0, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[DEFAULT]]:
|
|
; COST-NEXT: store i64 2, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[LOOP_LATCH]]:
|
|
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; COST: [[EXIT]]:
|
|
; COST-NEXT: ret void
|
|
;
|
|
; FORCED-LABEL: define void @switch_under_br_default_common_dest_with_case(
|
|
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
|
; FORCED-NEXT: [[ENTRY:.*]]:
|
|
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
|
|
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; FORCED: [[VECTOR_PH]]:
|
|
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
|
|
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
|
|
; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
|
|
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; FORCED: [[VECTOR_BODY]]:
|
|
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
|
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
|
|
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
|
|
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
|
|
; FORCED-NEXT: [[TMP9:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
|
|
; FORCED-NEXT: [[TMP10:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]]
|
|
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP26:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP15]], [[TMP25]]
|
|
; FORCED-NEXT: [[TMP20:%.*]] = or <4 x i1> [[TMP16]], [[TMP26]]
|
|
; FORCED-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP27]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP20]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP21]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP22]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP25]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP26]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP15]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP23]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]])
|
|
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
|
|
; FORCED: [[MIDDLE_BLOCK]]:
|
|
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; FORCED: [[SCALAR_PH]]:
|
|
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
|
|
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; FORCED: [[LOOP_HEADER]]:
|
|
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: [[C:%.*]] = icmp ule i64 [[L]], [[X]]
|
|
; FORCED-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
|
|
; FORCED: [[THEN]]:
|
|
; FORCED-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
|
|
; FORCED-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
|
|
; FORCED-NEXT: i64 13, label %[[IF_THEN_2:.*]]
|
|
; FORCED-NEXT: i64 0, label %[[DEFAULT]]
|
|
; FORCED-NEXT: ]
|
|
; FORCED: [[IF_THEN_1]]:
|
|
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[IF_THEN_2]]:
|
|
; FORCED-NEXT: store i64 0, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[DEFAULT]]:
|
|
; FORCED-NEXT: store i64 2, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[LOOP_LATCH]]:
|
|
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]]
|
|
; FORCED: [[EXIT]]:
|
|
; FORCED-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
|
|
%l = load i64, ptr %ptr.iv, align 1
|
|
%c = icmp ule i64 %l, %x
|
|
br i1 %c, label %then, label %loop.latch
|
|
|
|
then:
|
|
switch i64 %l, label %default [
|
|
i64 -12, label %if.then.1
|
|
i64 13, label %if.then.2
|
|
i64 0, label %default
|
|
]
|
|
|
|
if.then.1:
|
|
store i64 42, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
if.then.2:
|
|
store i64 0, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
default:
|
|
store i64 2, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
loop.latch:
|
|
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
|
|
%ec = icmp eq ptr %ptr.iv.next, %end
|
|
br i1 %ec, label %exit, label %loop.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end, i64 %x) {
|
|
; COST-LABEL: define void @br_under_switch_default_common_dest_with_case(
|
|
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
|
; COST-NEXT: [[ENTRY:.*]]:
|
|
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; COST: [[LOOP_HEADER]]:
|
|
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
|
|
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
|
|
; COST-NEXT: i64 13, label %[[IF_THEN_2:.*]]
|
|
; COST-NEXT: i64 0, label %[[DEFAULT]]
|
|
; COST-NEXT: ]
|
|
; COST: [[IF_THEN_1]]:
|
|
; COST-NEXT: [[C:%.*]] = icmp ule i64 [[L]], [[X]]
|
|
; COST-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[IF_THEN_2]]
|
|
; COST: [[THEN]]:
|
|
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[DEFAULT]]
|
|
; COST: [[IF_THEN_2]]:
|
|
; COST-NEXT: store i64 0, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[DEFAULT]]:
|
|
; COST-NEXT: store i64 2, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[LOOP_LATCH]]:
|
|
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
|
|
; COST: [[EXIT]]:
|
|
; COST-NEXT: ret void
|
|
;
|
|
; FORCED-LABEL: define void @br_under_switch_default_common_dest_with_case(
|
|
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
|
; FORCED-NEXT: [[ENTRY:.*]]:
|
|
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
|
|
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; FORCED: [[VECTOR_PH]]:
|
|
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
|
|
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
|
|
; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
|
|
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; FORCED: [[VECTOR_BODY]]:
|
|
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
|
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
|
|
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
|
|
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
|
|
; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
|
|
; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
|
|
; FORCED-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP25]]
|
|
; FORCED-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP10]], [[TMP26]]
|
|
; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
|
|
; FORCED-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]]
|
|
; FORCED-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true)
|
|
; FORCED-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP19]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP28:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP20]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP25]]
|
|
; FORCED-NEXT: [[TMP30:%.*]] = or <4 x i1> [[TMP28]], [[TMP26]]
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP29]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP30]])
|
|
; FORCED-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP32]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP33]])
|
|
; FORCED-NEXT: [[TMP36:%.*]] = or <4 x i1> [[TMP32]], [[TMP15]]
|
|
; FORCED-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP33]], [[TMP16]]
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP36]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP37]])
|
|
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
|
|
; FORCED: [[MIDDLE_BLOCK]]:
|
|
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; FORCED: [[SCALAR_PH]]:
|
|
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
|
|
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; FORCED: [[LOOP_HEADER]]:
|
|
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
|
|
; FORCED-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
|
|
; FORCED-NEXT: i64 13, label %[[IF_THEN_2:.*]]
|
|
; FORCED-NEXT: i64 0, label %[[DEFAULT]]
|
|
; FORCED-NEXT: ]
|
|
; FORCED: [[IF_THEN_1]]:
|
|
; FORCED-NEXT: [[C:%.*]] = icmp ule i64 [[L]], [[X]]
|
|
; FORCED-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[IF_THEN_2]]
|
|
; FORCED: [[THEN]]:
|
|
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[DEFAULT]]
|
|
; FORCED: [[IF_THEN_2]]:
|
|
; FORCED-NEXT: store i64 0, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[DEFAULT]]:
|
|
; FORCED-NEXT: store i64 2, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[LOOP_LATCH]]:
|
|
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+]]
|
|
; FORCED: [[EXIT]]:
|
|
; FORCED-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
|
|
%l = load i64, ptr %ptr.iv, align 1
|
|
switch i64 %l, label %default [
|
|
i64 -12, label %if.then.1
|
|
i64 13, label %if.then.2
|
|
i64 0, label %default
|
|
]
|
|
|
|
if.then.1:
|
|
%c = icmp ule i64 %l, %x
|
|
br i1 %c, label %then, label %if.then.2
|
|
|
|
then:
|
|
store i64 42, ptr %ptr.iv, align 1
|
|
br label %default
|
|
|
|
if.then.2:
|
|
store i64 0, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
default:
|
|
store i64 2, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
loop.latch:
|
|
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
|
|
%ec = icmp eq ptr %ptr.iv.next, %end
|
|
br i1 %ec, label %exit, label %loop.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @large_number_of_cases(ptr %start, ptr %end) {
|
|
; COST-LABEL: define void @large_number_of_cases(
|
|
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
|
|
; COST-NEXT: [[ENTRY:.*]]:
|
|
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; COST: [[LOOP_HEADER]]:
|
|
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: switch i64 [[L]], label %[[LOOP_LATCH]] [
|
|
; COST-NEXT: i64 1, label %[[IF_THEN:.*]]
|
|
; COST-NEXT: i64 3, label %[[IF_THEN]]
|
|
; COST-NEXT: i64 11, label %[[IF_THEN]]
|
|
; COST-NEXT: i64 99, label %[[IF_THEN]]
|
|
; COST-NEXT: i64 213, label %[[IF_THEN]]
|
|
; COST-NEXT: i64 238, label %[[IF_THEN]]
|
|
; COST-NEXT: i64 513, label %[[IF_THEN]]
|
|
; COST-NEXT: i64 791, label %[[IF_THEN]]
|
|
; COST-NEXT: i64 899, label %[[IF_THEN]]
|
|
; COST-NEXT: ]
|
|
; COST: [[IF_THEN]]:
|
|
; COST-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; COST-NEXT: br label %[[LOOP_LATCH]]
|
|
; COST: [[LOOP_LATCH]]:
|
|
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
|
|
; COST: [[EXIT]]:
|
|
; COST-NEXT: ret void
|
|
;
|
|
; FORCED-LABEL: define void @large_number_of_cases(
|
|
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
|
|
; FORCED-NEXT: [[ENTRY:.*]]:
|
|
; FORCED-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
|
|
; FORCED-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
|
|
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; FORCED-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; FORCED-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; FORCED-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
|
|
; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; FORCED: [[VECTOR_PH]]:
|
|
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
|
|
; FORCED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; FORCED-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; FORCED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; FORCED: [[VECTOR_BODY]]:
|
|
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
|
; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
|
|
; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
|
|
; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
|
|
; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 1)
|
|
; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 1)
|
|
; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3)
|
|
; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 3)
|
|
; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 11)
|
|
; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 11)
|
|
; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 99)
|
|
; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 99)
|
|
; FORCED-NEXT: [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 213)
|
|
; FORCED-NEXT: [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 213)
|
|
; FORCED-NEXT: [[TMP19:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 238)
|
|
; FORCED-NEXT: [[TMP20:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 238)
|
|
; FORCED-NEXT: [[TMP21:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 513)
|
|
; FORCED-NEXT: [[TMP22:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 513)
|
|
; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 791)
|
|
; FORCED-NEXT: [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 791)
|
|
; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 899)
|
|
; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 899)
|
|
; FORCED-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
|
|
; FORCED-NEXT: [[TMP28:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
|
|
; FORCED-NEXT: [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP13]]
|
|
; FORCED-NEXT: [[TMP30:%.*]] = or <4 x i1> [[TMP28]], [[TMP14]]
|
|
; FORCED-NEXT: [[TMP31:%.*]] = or <4 x i1> [[TMP29]], [[TMP15]]
|
|
; FORCED-NEXT: [[TMP32:%.*]] = or <4 x i1> [[TMP30]], [[TMP16]]
|
|
; FORCED-NEXT: [[TMP33:%.*]] = or <4 x i1> [[TMP31]], [[TMP17]]
|
|
; FORCED-NEXT: [[TMP34:%.*]] = or <4 x i1> [[TMP32]], [[TMP18]]
|
|
; FORCED-NEXT: [[TMP35:%.*]] = or <4 x i1> [[TMP33]], [[TMP19]]
|
|
; FORCED-NEXT: [[TMP36:%.*]] = or <4 x i1> [[TMP34]], [[TMP20]]
|
|
; FORCED-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP35]], [[TMP21]]
|
|
; FORCED-NEXT: [[TMP38:%.*]] = or <4 x i1> [[TMP36]], [[TMP22]]
|
|
; FORCED-NEXT: [[TMP39:%.*]] = or <4 x i1> [[TMP37]], [[TMP23]]
|
|
; FORCED-NEXT: [[TMP40:%.*]] = or <4 x i1> [[TMP38]], [[TMP24]]
|
|
; FORCED-NEXT: [[TMP57:%.*]] = or <4 x i1> [[TMP39]], [[TMP25]]
|
|
; FORCED-NEXT: [[TMP58:%.*]] = or <4 x i1> [[TMP40]], [[TMP26]]
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP57]])
|
|
; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP58]])
|
|
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; FORCED-NEXT: [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[TMP59]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
|
; FORCED: [[MIDDLE_BLOCK]]:
|
|
; FORCED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; FORCED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; FORCED: [[SCALAR_PH]]:
|
|
; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
|
|
; FORCED-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; FORCED: [[LOOP_HEADER]]:
|
|
; FORCED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; FORCED-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: switch i64 [[L]], label %[[LOOP_LATCH]] [
|
|
; FORCED-NEXT: i64 1, label %[[IF_THEN:.*]]
|
|
; FORCED-NEXT: i64 3, label %[[IF_THEN]]
|
|
; FORCED-NEXT: i64 11, label %[[IF_THEN]]
|
|
; FORCED-NEXT: i64 99, label %[[IF_THEN]]
|
|
; FORCED-NEXT: i64 213, label %[[IF_THEN]]
|
|
; FORCED-NEXT: i64 238, label %[[IF_THEN]]
|
|
; FORCED-NEXT: i64 513, label %[[IF_THEN]]
|
|
; FORCED-NEXT: i64 791, label %[[IF_THEN]]
|
|
; FORCED-NEXT: i64 899, label %[[IF_THEN]]
|
|
; FORCED-NEXT: ]
|
|
; FORCED: [[IF_THEN]]:
|
|
; FORCED-NEXT: store i64 42, ptr [[PTR_IV]], align 1
|
|
; FORCED-NEXT: br label %[[LOOP_LATCH]]
|
|
; FORCED: [[LOOP_LATCH]]:
|
|
; FORCED-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
|
|
; FORCED-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
|
|
; FORCED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]]
|
|
; FORCED: [[EXIT]]:
|
|
; FORCED-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
|
|
%l = load i64, ptr %ptr.iv, align 1
|
|
switch i64 %l, label %loop.latch [
|
|
i64 1, label %if.then
|
|
i64 3, label %if.then
|
|
i64 11, label %if.then
|
|
i64 99, label %if.then
|
|
i64 213, label %if.then
|
|
i64 238, label %if.then
|
|
i64 513, label %if.then
|
|
i64 791, label %if.then
|
|
i64 899, label %if.then
|
|
]
|
|
|
|
if.then:
|
|
store i64 42, ptr %ptr.iv, align 1
|
|
br label %loop.latch
|
|
|
|
loop.latch:
|
|
%ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
|
|
%ec = icmp eq ptr %ptr.iv.next, %end
|
|
br i1 %ec, label %exit, label %loop.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
;.
|
|
; COST: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; COST: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; COST: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; COST: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
; COST: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
|
|
; COST: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
|
|
; COST: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
|
|
; COST: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
|
|
; COST: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
|
|
; COST: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
|
|
;.
|
|
; FORCED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; FORCED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; FORCED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; FORCED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
; FORCED: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
|
|
; FORCED: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
|
|
; FORCED: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
|
|
; FORCED: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
|
|
; FORCED: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
|
|
; FORCED: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
|
|
; FORCED: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
|
|
; FORCED: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
|
|
; FORCED: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
|
|
; FORCED: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
|
|
; FORCED: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
|
|
; FORCED: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
|
|
; FORCED: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
|
|
; FORCED: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}
|
|
; FORCED: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
|
|
; FORCED: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META1]]}
|
|
;.
|