
Dissolving the hierarchical VPlan CFG and converting abstract to concrete recipes can expose additional simplification opportunities. Do a final run of simplifyRecipes before executing the VPlan.
1204 lines
85 KiB
LLVM
1204 lines
85 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
|
|
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck --check-prefixes=CHECK,IC1VF4 %s
|
|
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck --check-prefixes=CHECK,IC4VF4 %s
|
|
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck --check-prefixes=CHECK,IC4VF1 %s
|
|
|
|
define i64 @select_decreasing_induction_icmp_const_start(ptr %a) {
|
|
; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
|
|
; IC1VF4-SAME: ptr [[A:%.*]]) {
|
|
; IC1VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC1VF4: [[VECTOR_PH]]:
|
|
; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC1VF4: [[VECTOR_BODY]]:
|
|
; IC1VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 19999, i64 19998, i64 19997, i64 19996>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]]
|
|
; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0
|
|
; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3
|
|
; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
|
|
; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
|
|
; IC1VF4-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
|
|
; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
|
|
; IC1VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
|
|
; IC1VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; IC1VF4: [[MIDDLE_BLOCK]]:
|
|
; IC1VF4-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP4]])
|
|
; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], 9223372036854775807
|
|
; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 331
|
|
; IC1VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC1VF4: [[SCALAR_PH]]:
|
|
; IC1VF4-NEXT: br label %[[LOOP:.*]]
|
|
; IC1VF4: [[LOOP]]:
|
|
; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ 19999, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
|
|
; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
|
; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
|
|
; IC1VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
|
|
; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
|
|
; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
|
|
; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
|
|
; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; IC1VF4: [[EXIT]]:
|
|
; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC1VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
|
|
;
|
|
; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
|
|
; IC4VF4-SAME: ptr [[A:%.*]]) {
|
|
; IC4VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC4VF4: [[VECTOR_PH]]:
|
|
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF4: [[VECTOR_BODY]]:
|
|
; IC4VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 19999, i64 19998, i64 19997, i64 19996>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4)
|
|
; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]]
|
|
; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0
|
|
; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3
|
|
; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -4
|
|
; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -3
|
|
; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -8
|
|
; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3
|
|
; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -12
|
|
; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3
|
|
; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
|
|
; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
|
|
; IC4VF4-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
|
|
; IC4VF4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD6]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
|
|
; IC4VF4-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD8]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i64> [[REVERSE5]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[REVERSE7]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i64> [[REVERSE9]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
|
|
; IC4VF4-NEXT: [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
|
|
; IC4VF4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
|
|
; IC4VF4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
|
|
; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
|
; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4)
|
|
; IC4VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
|
|
; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; IC4VF4: [[MIDDLE_BLOCK]]:
|
|
; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX10:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP15]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX10]], <4 x i64> [[TMP16]])
|
|
; IC4VF4-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[RDX_MINMAX11]])
|
|
; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP18]], 9223372036854775807
|
|
; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP18]], i64 331
|
|
; IC4VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF4: [[SCALAR_PH]]:
|
|
; IC4VF4-NEXT: br label %[[LOOP:.*]]
|
|
; IC4VF4: [[LOOP]]:
|
|
; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ 19999, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
|
|
; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
|
; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
|
|
; IC4VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
|
|
; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
|
|
; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
|
|
; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
|
|
; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; IC4VF4: [[EXIT]]:
|
|
; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC4VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
|
|
;
|
|
; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
|
|
; IC4VF1-SAME: ptr [[A:%.*]]) {
|
|
; IC4VF1-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC4VF1: [[VECTOR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF1: [[VECTOR_BODY]]:
|
|
; IC4VF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]]
|
|
; IC4VF1-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -2
|
|
; IC4VF1-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -3
|
|
; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
|
|
; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
|
|
; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
|
|
; IC4VF1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP3]], align 8
|
|
; IC4VF1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8
|
|
; IC4VF1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 8
|
|
; IC4VF1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8
|
|
; IC4VF1-NEXT: [[TMP11:%.*]] = icmp sgt i64 [[TMP7]], 3
|
|
; IC4VF1-NEXT: [[TMP12:%.*]] = icmp sgt i64 [[TMP8]], 3
|
|
; IC4VF1-NEXT: [[TMP13:%.*]] = icmp sgt i64 [[TMP9]], 3
|
|
; IC4VF1-NEXT: [[TMP14:%.*]] = icmp sgt i64 [[TMP10]], 3
|
|
; IC4VF1-NEXT: [[TMP15]] = select i1 [[TMP11]], i64 [[OFFSET_IDX]], i64 [[VEC_PHI]]
|
|
; IC4VF1-NEXT: [[TMP16]] = select i1 [[TMP12]], i64 [[TMP0]], i64 [[VEC_PHI1]]
|
|
; IC4VF1-NEXT: [[TMP17]] = select i1 [[TMP13]], i64 [[TMP1]], i64 [[VEC_PHI2]]
|
|
; IC4VF1-NEXT: [[TMP18]] = select i1 [[TMP14]], i64 [[TMP2]], i64 [[VEC_PHI3]]
|
|
; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; IC4VF1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
|
|
; IC4VF1-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; IC4VF1: [[MIDDLE_BLOCK]]:
|
|
; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP15]], i64 [[TMP16]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX]], i64 [[TMP17]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX4]], i64 [[TMP18]])
|
|
; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], 9223372036854775807
|
|
; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 331
|
|
; IC4VF1-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF1: [[SCALAR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[LOOP:.*]]
|
|
; IC4VF1: [[LOOP]]:
|
|
; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ 19999, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
|
; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
|
|
; IC4VF1-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
|
|
; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
|
|
; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
|
|
; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
|
|
; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; IC4VF1: [[EXIT]]:
|
|
; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC4VF1-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop: ; preds = %entry, %loop
|
|
%iv = phi i64 [ 19999, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i64 [ 331, %entry ], [ %spec.select, %loop ]
|
|
%gep.a.iv = getelementptr inbounds i64, ptr %a, i64 %iv
|
|
%ld.a = load i64, ptr %gep.a.iv, align 8
|
|
%cmp.a.3 = icmp sgt i64 %ld.a, 3
|
|
%spec.select = select i1 %cmp.a.3, i64 %iv, i64 %rdx
|
|
%iv.next = add nsw i64 %iv, -1
|
|
%exit.cond = icmp eq i64 %iv, 0
|
|
br i1 %exit.cond, label %exit, label %loop
|
|
|
|
exit: ; preds = %loop
|
|
ret i64 %spec.select
|
|
}
|
|
|
|
@table = constant [13 x i16] [i16 10, i16 35, i16 69, i16 147, i16 280, i16 472, i16 682, i16 1013, i16 1559, i16 2544, i16 4553, i16 6494, i16 10000], align 1
|
|
|
|
define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) {
|
|
; IC1VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
|
|
; IC1VF4-SAME: i16 noundef [[VAL:%.*]]) {
|
|
; IC1VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC1VF4: [[VECTOR_PH]]:
|
|
; IC1VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
|
|
; IC1VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
|
|
; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC1VF4: [[VECTOR_BODY]]:
|
|
; IC1VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
|
|
; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
|
|
; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
|
|
; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 0
|
|
; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 -3
|
|
; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 1
|
|
; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC1VF4-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i16> [[REVERSE]], [[BROADCAST_SPLAT]]
|
|
; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
|
|
; IC1VF4-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]]
|
|
; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
|
|
; IC1VF4-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
|
|
; IC1VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; IC1VF4: [[MIDDLE_BLOCK]]:
|
|
; IC1VF4-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[TMP5]])
|
|
; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP7]], 32767
|
|
; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0
|
|
; IC1VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC1VF4: [[SCALAR_PH]]:
|
|
; IC1VF4-NEXT: br label %[[LOOP:.*]]
|
|
; IC1VF4: [[LOOP]]:
|
|
; IC1VF4-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC1VF4-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
|
|
; IC1VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
|
|
; IC1VF4-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1
|
|
; IC1VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]]
|
|
; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
|
|
; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
|
|
; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
|
|
; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; IC1VF4: [[EXIT]]:
|
|
; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC1VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
|
|
;
|
|
; IC4VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
|
|
; IC4VF4-SAME: i16 noundef [[VAL:%.*]]) {
|
|
; IC4VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC4VF4: [[VECTOR_PH]]:
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
|
|
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF4: [[VECTOR_BODY]]:
|
|
; IC4VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ]
|
|
; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
|
|
; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4)
|
|
; IC4VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
|
|
; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
|
|
; IC4VF4-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
|
|
; IC4VF4-NEXT: [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 4, i32 5, i32 6, i32 7>
|
|
; IC4VF4-NEXT: [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 8, i32 9, i32 10, i32 11>
|
|
; IC4VF4-NEXT: [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 12, i32 13, i32 14, i32 15>
|
|
; IC4VF4-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11)
|
|
; IC4VF4-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11)
|
|
; IC4VF4-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11)
|
|
; IC4VF4-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11)
|
|
; IC4VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
|
|
; IC4VF4-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF]]:
|
|
; IC4VF4-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0
|
|
; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]]
|
|
; IC4VF4-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1
|
|
; IC4VF4-NEXT: [[TMP8:%.*]] = insertelement <4 x i16> poison, i16 [[TMP7]], i32 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE]]:
|
|
; IC4VF4-NEXT: [[TMP9:%.*]] = phi <4 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
|
|
; IC4VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
|
|
; IC4VF4-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF15]]:
|
|
; IC4VF4-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1
|
|
; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]]
|
|
; IC4VF4-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 1
|
|
; IC4VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x i16> [[TMP9]], i16 [[TMP13]], i32 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE16]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE16]]:
|
|
; IC4VF4-NEXT: [[TMP15:%.*]] = phi <4 x i16> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ]
|
|
; IC4VF4-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
|
|
; IC4VF4-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF17]]:
|
|
; IC4VF4-NEXT: [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2
|
|
; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]]
|
|
; IC4VF4-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP18]], align 1
|
|
; IC4VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP19]], i32 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE18]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE18]]:
|
|
; IC4VF4-NEXT: [[TMP21:%.*]] = phi <4 x i16> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ]
|
|
; IC4VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
|
|
; IC4VF4-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF19]]:
|
|
; IC4VF4-NEXT: [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3
|
|
; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]]
|
|
; IC4VF4-NEXT: [[TMP25:%.*]] = load i16, ptr [[TMP24]], align 1
|
|
; IC4VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> [[TMP21]], i16 [[TMP25]], i32 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE20]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE20]]:
|
|
; IC4VF4-NEXT: [[TMP27:%.*]] = phi <4 x i16> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ]
|
|
; IC4VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
|
|
; IC4VF4-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF21]]:
|
|
; IC4VF4-NEXT: [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4
|
|
; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]]
|
|
; IC4VF4-NEXT: [[TMP31:%.*]] = load i16, ptr [[TMP30]], align 1
|
|
; IC4VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> poison, i16 [[TMP31]], i32 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE22]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE22]]:
|
|
; IC4VF4-NEXT: [[TMP33:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ]
|
|
; IC4VF4-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
|
|
; IC4VF4-NEXT: br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF23]]:
|
|
; IC4VF4-NEXT: [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5
|
|
; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]]
|
|
; IC4VF4-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP36]], align 1
|
|
; IC4VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x i16> [[TMP33]], i16 [[TMP37]], i32 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE24]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE24]]:
|
|
; IC4VF4-NEXT: [[TMP39:%.*]] = phi <4 x i16> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ]
|
|
; IC4VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
|
|
; IC4VF4-NEXT: br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF25]]:
|
|
; IC4VF4-NEXT: [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6
|
|
; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]]
|
|
; IC4VF4-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP42]], align 1
|
|
; IC4VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x i16> [[TMP39]], i16 [[TMP43]], i32 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE26]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE26]]:
|
|
; IC4VF4-NEXT: [[TMP45:%.*]] = phi <4 x i16> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ]
|
|
; IC4VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
|
|
; IC4VF4-NEXT: br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF27]]:
|
|
; IC4VF4-NEXT: [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7
|
|
; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]]
|
|
; IC4VF4-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP48]], align 1
|
|
; IC4VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x i16> [[TMP45]], i16 [[TMP49]], i32 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE28]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE28]]:
|
|
; IC4VF4-NEXT: [[TMP51:%.*]] = phi <4 x i16> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ]
|
|
; IC4VF4-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
|
|
; IC4VF4-NEXT: br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF29]]:
|
|
; IC4VF4-NEXT: [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8
|
|
; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]]
|
|
; IC4VF4-NEXT: [[TMP55:%.*]] = load i16, ptr [[TMP54]], align 1
|
|
; IC4VF4-NEXT: [[TMP56:%.*]] = insertelement <4 x i16> poison, i16 [[TMP55]], i32 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE30]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE30]]:
|
|
; IC4VF4-NEXT: [[TMP57:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ]
|
|
; IC4VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
|
|
; IC4VF4-NEXT: br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF31]]:
|
|
; IC4VF4-NEXT: [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9
|
|
; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]]
|
|
; IC4VF4-NEXT: [[TMP61:%.*]] = load i16, ptr [[TMP60]], align 1
|
|
; IC4VF4-NEXT: [[TMP62:%.*]] = insertelement <4 x i16> [[TMP57]], i16 [[TMP61]], i32 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE32]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE32]]:
|
|
; IC4VF4-NEXT: [[TMP63:%.*]] = phi <4 x i16> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ]
|
|
; IC4VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
|
|
; IC4VF4-NEXT: br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF33]]:
|
|
; IC4VF4-NEXT: [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10
|
|
; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]]
|
|
; IC4VF4-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP66]], align 1
|
|
; IC4VF4-NEXT: [[TMP68:%.*]] = insertelement <4 x i16> [[TMP63]], i16 [[TMP67]], i32 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE34]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE34]]:
|
|
; IC4VF4-NEXT: [[TMP69:%.*]] = phi <4 x i16> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ]
|
|
; IC4VF4-NEXT: [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
|
|
; IC4VF4-NEXT: br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF35]]:
|
|
; IC4VF4-NEXT: [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11
|
|
; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]]
|
|
; IC4VF4-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP72]], align 1
|
|
; IC4VF4-NEXT: [[TMP74:%.*]] = insertelement <4 x i16> [[TMP69]], i16 [[TMP73]], i32 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE36]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE36]]:
|
|
; IC4VF4-NEXT: [[TMP75:%.*]] = phi <4 x i16> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ]
|
|
; IC4VF4-NEXT: [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
|
|
; IC4VF4-NEXT: br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF37]]:
|
|
; IC4VF4-NEXT: [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12
|
|
; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]]
|
|
; IC4VF4-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP78]], align 1
|
|
; IC4VF4-NEXT: [[TMP80:%.*]] = insertelement <4 x i16> poison, i16 [[TMP79]], i32 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE38]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE38]]:
|
|
; IC4VF4-NEXT: [[TMP81:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ]
|
|
; IC4VF4-NEXT: [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
|
|
; IC4VF4-NEXT: br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF39]]:
|
|
; IC4VF4-NEXT: [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13
|
|
; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]]
|
|
; IC4VF4-NEXT: [[TMP85:%.*]] = load i16, ptr [[TMP84]], align 1
|
|
; IC4VF4-NEXT: [[TMP86:%.*]] = insertelement <4 x i16> [[TMP81]], i16 [[TMP85]], i32 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE40]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE40]]:
|
|
; IC4VF4-NEXT: [[TMP87:%.*]] = phi <4 x i16> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ]
|
|
; IC4VF4-NEXT: [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
|
|
; IC4VF4-NEXT: br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF41]]:
|
|
; IC4VF4-NEXT: [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14
|
|
; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]]
|
|
; IC4VF4-NEXT: [[TMP91:%.*]] = load i16, ptr [[TMP90]], align 1
|
|
; IC4VF4-NEXT: [[TMP92:%.*]] = insertelement <4 x i16> [[TMP87]], i16 [[TMP91]], i32 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE42]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE42]]:
|
|
; IC4VF4-NEXT: [[TMP93:%.*]] = phi <4 x i16> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ]
|
|
; IC4VF4-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
|
|
; IC4VF4-NEXT: br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]]
|
|
; IC4VF4: [[PRED_LOAD_IF43]]:
|
|
; IC4VF4-NEXT: [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15
|
|
; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]]
|
|
; IC4VF4-NEXT: [[TMP97:%.*]] = load i16, ptr [[TMP96]], align 1
|
|
; IC4VF4-NEXT: [[TMP98:%.*]] = insertelement <4 x i16> [[TMP93]], i16 [[TMP97]], i32 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE44]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE44]]:
|
|
; IC4VF4-NEXT: [[TMP99:%.*]] = phi <4 x i16> [ [[TMP93]], %[[PRED_LOAD_CONTINUE42]] ], [ [[TMP98]], %[[PRED_LOAD_IF43]] ]
|
|
; IC4VF4-NEXT: [[TMP100:%.*]] = icmp ugt <4 x i16> [[TMP27]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP101:%.*]] = icmp ugt <4 x i16> [[TMP51]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP102:%.*]] = icmp ugt <4 x i16> [[TMP75]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP103:%.*]] = icmp ugt <4 x i16> [[TMP99]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
|
|
; IC4VF4-NEXT: [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1)
|
|
; IC4VF4-NEXT: [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1)
|
|
; IC4VF4-NEXT: [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1)
|
|
; IC4VF4-NEXT: [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]]
|
|
; IC4VF4-NEXT: [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]]
|
|
; IC4VF4-NEXT: [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]]
|
|
; IC4VF4-NEXT: [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]]
|
|
; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]]
|
|
; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]]
|
|
; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]]
|
|
; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]]
|
|
; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
|
|
; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4)
|
|
; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; IC4VF4: [[MIDDLE_BLOCK]]:
|
|
; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]])
|
|
; IC4VF4-NEXT: [[TMP116:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[RDX_MINMAX46]])
|
|
; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP116]], 32767
|
|
; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP116]], i16 0
|
|
; IC4VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF4: [[SCALAR_PH]]:
|
|
; IC4VF4-NEXT: br label %[[LOOP:.*]]
|
|
; IC4VF4: [[LOOP]]:
|
|
; IC4VF4-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC4VF4-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
|
|
; IC4VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
|
|
; IC4VF4-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1
|
|
; IC4VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]]
|
|
; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
|
|
; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
|
|
; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
|
|
; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; IC4VF4: [[EXIT]]:
|
|
; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC4VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
|
|
;
|
|
; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
|
|
; IC4VF1-SAME: i16 noundef [[VAL:%.*]]) {
|
|
; IC4VF1-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC4VF1: [[VECTOR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF1: [[VECTOR_BODY]]:
|
|
; IC4VF1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
|
|
; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
|
|
; IC4VF1-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], -2
|
|
; IC4VF1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], -3
|
|
; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
|
|
; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP0]]
|
|
; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP1]]
|
|
; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP2]]
|
|
; IC4VF1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP3]], align 1
|
|
; IC4VF1-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP4]], align 1
|
|
; IC4VF1-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP5]], align 1
|
|
; IC4VF1-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
|
|
; IC4VF1-NEXT: [[TMP11:%.*]] = icmp ugt i16 [[TMP7]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP12:%.*]] = icmp ugt i16 [[TMP8]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP13:%.*]] = icmp ugt i16 [[TMP9]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP14:%.*]] = icmp ugt i16 [[TMP10]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP15:%.*]] = add nsw i16 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP16:%.*]] = add nsw i16 [[TMP0]], -1
|
|
; IC4VF1-NEXT: [[TMP17:%.*]] = add nsw i16 [[TMP1]], -1
|
|
; IC4VF1-NEXT: [[TMP18:%.*]] = add nsw i16 [[TMP2]], -1
|
|
; IC4VF1-NEXT: [[TMP19]] = select i1 [[TMP11]], i16 [[TMP15]], i16 [[VEC_PHI]]
|
|
; IC4VF1-NEXT: [[TMP20]] = select i1 [[TMP12]], i16 [[TMP16]], i16 [[VEC_PHI1]]
|
|
; IC4VF1-NEXT: [[TMP21]] = select i1 [[TMP13]], i16 [[TMP17]], i16 [[VEC_PHI2]]
|
|
; IC4VF1-NEXT: [[TMP22]] = select i1 [[TMP14]], i16 [[TMP18]], i16 [[VEC_PHI3]]
|
|
; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; IC4VF1-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
|
|
; IC4VF1-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; IC4VF1: [[MIDDLE_BLOCK]]:
|
|
; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP19]], i16 [[TMP20]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX]], i16 [[TMP21]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX4]], i16 [[TMP22]])
|
|
; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[RDX_MINMAX5]], 32767
|
|
; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0
|
|
; IC4VF1-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF1: [[SCALAR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[LOOP:.*]]
|
|
; IC4VF1: [[LOOP]]:
|
|
; IC4VF1-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
|
|
; IC4VF1-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1
|
|
; IC4VF1-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]]
|
|
; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
|
|
; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
|
|
; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
|
|
; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; IC4VF1: [[EXIT]]:
|
|
; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC4VF1-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop: ; preds = %entry, %loop
|
|
%iv = phi i16 [ 12, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i16 [ 0, %entry ], [ %spec.select, %loop ]
|
|
%gep.table.iv = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 %iv
|
|
%ld.table = load i16, ptr %gep.table.iv, align 1
|
|
%cmp.table.val = icmp ugt i16 %ld.table, %val
|
|
%iv.next = add nsw i16 %iv, -1
|
|
%spec.select = select i1 %cmp.table.val, i16 %iv.next, i16 %rdx
|
|
%exit.cond = icmp eq i16 %iv.next, 0
|
|
br i1 %exit.cond, label %exit, label %loop
|
|
|
|
exit: ; preds = %loop
|
|
%spec.select.lcssa = phi i16 [ %spec.select, %loop ]
|
|
ret i16 %spec.select.lcssa
|
|
}
|
|
|
|
@tablef = constant [13 x half] [half 10.0, half 35.0, half 69.0, half 147.0, half 280.0, half 472.0, half 682.0, half 1013.0, half 1559.0, half 2544.0, half 4556.0, half 6496.0, half 10000.0], align 1
|
|
|
|
define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) {
|
|
; IC1VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
|
|
; IC1VF4-SAME: half noundef [[VAL:%.*]]) {
|
|
; IC1VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC1VF4: [[VECTOR_PH]]:
|
|
; IC1VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x half> poison, half [[VAL]], i64 0
|
|
; IC1VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer
|
|
; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC1VF4: [[VECTOR_BODY]]:
|
|
; IC1VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
|
|
; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
|
|
; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
|
|
; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 0
|
|
; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i32 -3
|
|
; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x half>, ptr [[TMP2]], align 1
|
|
; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x half> [[WIDE_LOAD]], <4 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC1VF4-NEXT: [[TMP3:%.*]] = fcmp ugt <4 x half> [[REVERSE]], [[BROADCAST_SPLAT]]
|
|
; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
|
|
; IC1VF4-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]]
|
|
; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
|
|
; IC1VF4-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
|
|
; IC1VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; IC1VF4: [[MIDDLE_BLOCK]]:
|
|
; IC1VF4-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[TMP5]])
|
|
; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP7]], 32767
|
|
; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0
|
|
; IC1VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC1VF4: [[SCALAR_PH]]:
|
|
; IC1VF4-NEXT: br label %[[LOOP:.*]]
|
|
; IC1VF4: [[LOOP]]:
|
|
; IC1VF4-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC1VF4-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
|
|
; IC1VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
|
|
; IC1VF4-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1
|
|
; IC1VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]]
|
|
; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
|
|
; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
|
|
; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
|
|
; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; IC1VF4: [[EXIT]]:
|
|
; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC1VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
|
|
;
|
|
; IC4VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
|
|
; IC4VF4-SAME: half noundef [[VAL:%.*]]) {
|
|
; IC4VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC4VF4: [[VECTOR_PH]]:
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x half> poison, half [[VAL]], i64 0
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer
|
|
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF4: [[VECTOR_BODY]]:
|
|
; IC4VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ]
|
|
; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
|
|
; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4)
|
|
; IC4VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
|
|
; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
|
|
; IC4VF4-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
|
|
; IC4VF4-NEXT: [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 4, i32 5, i32 6, i32 7>
|
|
; IC4VF4-NEXT: [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 8, i32 9, i32 10, i32 11>
|
|
; IC4VF4-NEXT: [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 12, i32 13, i32 14, i32 15>
|
|
; IC4VF4-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11)
|
|
; IC4VF4-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11)
|
|
; IC4VF4-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11)
|
|
; IC4VF4-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11)
|
|
; IC4VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
|
|
; IC4VF4-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF]]:
|
|
; IC4VF4-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0
|
|
; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]]
|
|
; IC4VF4-NEXT: [[TMP7:%.*]] = load half, ptr [[TMP6]], align 1
|
|
; IC4VF4-NEXT: [[TMP8:%.*]] = insertelement <4 x half> poison, half [[TMP7]], i32 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE]]:
|
|
; IC4VF4-NEXT: [[TMP9:%.*]] = phi <4 x half> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
|
|
; IC4VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
|
|
; IC4VF4-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF15]]:
|
|
; IC4VF4-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1
|
|
; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]]
|
|
; IC4VF4-NEXT: [[TMP13:%.*]] = load half, ptr [[TMP12]], align 1
|
|
; IC4VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x half> [[TMP9]], half [[TMP13]], i32 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE16]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE16]]:
|
|
; IC4VF4-NEXT: [[TMP15:%.*]] = phi <4 x half> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ]
|
|
; IC4VF4-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
|
|
; IC4VF4-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF17]]:
|
|
; IC4VF4-NEXT: [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2
|
|
; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]]
|
|
; IC4VF4-NEXT: [[TMP19:%.*]] = load half, ptr [[TMP18]], align 1
|
|
; IC4VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x half> [[TMP15]], half [[TMP19]], i32 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE18]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE18]]:
|
|
; IC4VF4-NEXT: [[TMP21:%.*]] = phi <4 x half> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ]
|
|
; IC4VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
|
|
; IC4VF4-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF19]]:
|
|
; IC4VF4-NEXT: [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3
|
|
; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]]
|
|
; IC4VF4-NEXT: [[TMP25:%.*]] = load half, ptr [[TMP24]], align 1
|
|
; IC4VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x half> [[TMP21]], half [[TMP25]], i32 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE20]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE20]]:
|
|
; IC4VF4-NEXT: [[TMP27:%.*]] = phi <4 x half> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ]
|
|
; IC4VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
|
|
; IC4VF4-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF21]]:
|
|
; IC4VF4-NEXT: [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4
|
|
; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]]
|
|
; IC4VF4-NEXT: [[TMP31:%.*]] = load half, ptr [[TMP30]], align 1
|
|
; IC4VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x half> poison, half [[TMP31]], i32 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE22]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE22]]:
|
|
; IC4VF4-NEXT: [[TMP33:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ]
|
|
; IC4VF4-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
|
|
; IC4VF4-NEXT: br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF23]]:
|
|
; IC4VF4-NEXT: [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5
|
|
; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]]
|
|
; IC4VF4-NEXT: [[TMP37:%.*]] = load half, ptr [[TMP36]], align 1
|
|
; IC4VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x half> [[TMP33]], half [[TMP37]], i32 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE24]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE24]]:
|
|
; IC4VF4-NEXT: [[TMP39:%.*]] = phi <4 x half> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ]
|
|
; IC4VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
|
|
; IC4VF4-NEXT: br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF25]]:
|
|
; IC4VF4-NEXT: [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6
|
|
; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]]
|
|
; IC4VF4-NEXT: [[TMP43:%.*]] = load half, ptr [[TMP42]], align 1
|
|
; IC4VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x half> [[TMP39]], half [[TMP43]], i32 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE26]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE26]]:
|
|
; IC4VF4-NEXT: [[TMP45:%.*]] = phi <4 x half> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ]
|
|
; IC4VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
|
|
; IC4VF4-NEXT: br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF27]]:
|
|
; IC4VF4-NEXT: [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7
|
|
; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]]
|
|
; IC4VF4-NEXT: [[TMP49:%.*]] = load half, ptr [[TMP48]], align 1
|
|
; IC4VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x half> [[TMP45]], half [[TMP49]], i32 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE28]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE28]]:
|
|
; IC4VF4-NEXT: [[TMP51:%.*]] = phi <4 x half> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ]
|
|
; IC4VF4-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
|
|
; IC4VF4-NEXT: br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF29]]:
|
|
; IC4VF4-NEXT: [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8
|
|
; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]]
|
|
; IC4VF4-NEXT: [[TMP55:%.*]] = load half, ptr [[TMP54]], align 1
|
|
; IC4VF4-NEXT: [[TMP56:%.*]] = insertelement <4 x half> poison, half [[TMP55]], i32 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE30]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE30]]:
|
|
; IC4VF4-NEXT: [[TMP57:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ]
|
|
; IC4VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
|
|
; IC4VF4-NEXT: br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF31]]:
|
|
; IC4VF4-NEXT: [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9
|
|
; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]]
|
|
; IC4VF4-NEXT: [[TMP61:%.*]] = load half, ptr [[TMP60]], align 1
|
|
; IC4VF4-NEXT: [[TMP62:%.*]] = insertelement <4 x half> [[TMP57]], half [[TMP61]], i32 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE32]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE32]]:
|
|
; IC4VF4-NEXT: [[TMP63:%.*]] = phi <4 x half> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ]
|
|
; IC4VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
|
|
; IC4VF4-NEXT: br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF33]]:
|
|
; IC4VF4-NEXT: [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10
|
|
; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]]
|
|
; IC4VF4-NEXT: [[TMP67:%.*]] = load half, ptr [[TMP66]], align 1
|
|
; IC4VF4-NEXT: [[TMP68:%.*]] = insertelement <4 x half> [[TMP63]], half [[TMP67]], i32 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE34]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE34]]:
|
|
; IC4VF4-NEXT: [[TMP69:%.*]] = phi <4 x half> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ]
|
|
; IC4VF4-NEXT: [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
|
|
; IC4VF4-NEXT: br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF35]]:
|
|
; IC4VF4-NEXT: [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11
|
|
; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]]
|
|
; IC4VF4-NEXT: [[TMP73:%.*]] = load half, ptr [[TMP72]], align 1
|
|
; IC4VF4-NEXT: [[TMP74:%.*]] = insertelement <4 x half> [[TMP69]], half [[TMP73]], i32 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE36]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE36]]:
|
|
; IC4VF4-NEXT: [[TMP75:%.*]] = phi <4 x half> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ]
|
|
; IC4VF4-NEXT: [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
|
|
; IC4VF4-NEXT: br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF37]]:
|
|
; IC4VF4-NEXT: [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12
|
|
; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]]
|
|
; IC4VF4-NEXT: [[TMP79:%.*]] = load half, ptr [[TMP78]], align 1
|
|
; IC4VF4-NEXT: [[TMP80:%.*]] = insertelement <4 x half> poison, half [[TMP79]], i32 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE38]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE38]]:
|
|
; IC4VF4-NEXT: [[TMP81:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ]
|
|
; IC4VF4-NEXT: [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
|
|
; IC4VF4-NEXT: br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF39]]:
|
|
; IC4VF4-NEXT: [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13
|
|
; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]]
|
|
; IC4VF4-NEXT: [[TMP85:%.*]] = load half, ptr [[TMP84]], align 1
|
|
; IC4VF4-NEXT: [[TMP86:%.*]] = insertelement <4 x half> [[TMP81]], half [[TMP85]], i32 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE40]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE40]]:
|
|
; IC4VF4-NEXT: [[TMP87:%.*]] = phi <4 x half> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ]
|
|
; IC4VF4-NEXT: [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
|
|
; IC4VF4-NEXT: br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF41]]:
|
|
; IC4VF4-NEXT: [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14
|
|
; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]]
|
|
; IC4VF4-NEXT: [[TMP91:%.*]] = load half, ptr [[TMP90]], align 1
|
|
; IC4VF4-NEXT: [[TMP92:%.*]] = insertelement <4 x half> [[TMP87]], half [[TMP91]], i32 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE42]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE42]]:
|
|
; IC4VF4-NEXT: [[TMP93:%.*]] = phi <4 x half> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ]
|
|
; IC4VF4-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
|
|
; IC4VF4-NEXT: br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]]
|
|
; IC4VF4: [[PRED_LOAD_IF43]]:
|
|
; IC4VF4-NEXT: [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15
|
|
; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]]
|
|
; IC4VF4-NEXT: [[TMP97:%.*]] = load half, ptr [[TMP96]], align 1
|
|
; IC4VF4-NEXT: [[TMP98:%.*]] = insertelement <4 x half> [[TMP93]], half [[TMP97]], i32 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE44]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE44]]:
|
|
; IC4VF4-NEXT: [[TMP99:%.*]] = phi <4 x half> [ [[TMP93]], %[[PRED_LOAD_CONTINUE42]] ], [ [[TMP98]], %[[PRED_LOAD_IF43]] ]
|
|
; IC4VF4-NEXT: [[TMP100:%.*]] = fcmp ugt <4 x half> [[TMP27]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP101:%.*]] = fcmp ugt <4 x half> [[TMP51]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP102:%.*]] = fcmp ugt <4 x half> [[TMP75]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP103:%.*]] = fcmp ugt <4 x half> [[TMP99]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
|
|
; IC4VF4-NEXT: [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1)
|
|
; IC4VF4-NEXT: [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1)
|
|
; IC4VF4-NEXT: [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1)
|
|
; IC4VF4-NEXT: [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]]
|
|
; IC4VF4-NEXT: [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]]
|
|
; IC4VF4-NEXT: [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]]
|
|
; IC4VF4-NEXT: [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]]
|
|
; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]]
|
|
; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]]
|
|
; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]]
|
|
; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]]
|
|
; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
|
|
; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4)
|
|
; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; IC4VF4: [[MIDDLE_BLOCK]]:
|
|
; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]])
|
|
; IC4VF4-NEXT: [[TMP116:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[RDX_MINMAX46]])
|
|
; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP116]], 32767
|
|
; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP116]], i16 0
|
|
; IC4VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF4: [[SCALAR_PH]]:
|
|
; IC4VF4-NEXT: br label %[[LOOP:.*]]
|
|
; IC4VF4: [[LOOP]]:
|
|
; IC4VF4-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC4VF4-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
|
|
; IC4VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
|
|
; IC4VF4-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1
|
|
; IC4VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]]
|
|
; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
|
|
; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
|
|
; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
|
|
; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; IC4VF4: [[EXIT]]:
|
|
; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC4VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
|
|
;
|
|
; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
|
|
; IC4VF1-SAME: half noundef [[VAL:%.*]]) {
|
|
; IC4VF1-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC4VF1: [[VECTOR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF1: [[VECTOR_BODY]]:
|
|
; IC4VF1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
|
|
; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
|
|
; IC4VF1-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], -2
|
|
; IC4VF1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], -3
|
|
; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
|
|
; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP0]]
|
|
; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP1]]
|
|
; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP2]]
|
|
; IC4VF1-NEXT: [[TMP7:%.*]] = load half, ptr [[TMP3]], align 1
|
|
; IC4VF1-NEXT: [[TMP8:%.*]] = load half, ptr [[TMP4]], align 1
|
|
; IC4VF1-NEXT: [[TMP9:%.*]] = load half, ptr [[TMP5]], align 1
|
|
; IC4VF1-NEXT: [[TMP10:%.*]] = load half, ptr [[TMP6]], align 1
|
|
; IC4VF1-NEXT: [[TMP11:%.*]] = fcmp ugt half [[TMP7]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP12:%.*]] = fcmp ugt half [[TMP8]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP13:%.*]] = fcmp ugt half [[TMP9]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP14:%.*]] = fcmp ugt half [[TMP10]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP15:%.*]] = add nsw i16 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP16:%.*]] = add nsw i16 [[TMP0]], -1
|
|
; IC4VF1-NEXT: [[TMP17:%.*]] = add nsw i16 [[TMP1]], -1
|
|
; IC4VF1-NEXT: [[TMP18:%.*]] = add nsw i16 [[TMP2]], -1
|
|
; IC4VF1-NEXT: [[TMP19]] = select i1 [[TMP11]], i16 [[TMP15]], i16 [[VEC_PHI]]
|
|
; IC4VF1-NEXT: [[TMP20]] = select i1 [[TMP12]], i16 [[TMP16]], i16 [[VEC_PHI1]]
|
|
; IC4VF1-NEXT: [[TMP21]] = select i1 [[TMP13]], i16 [[TMP17]], i16 [[VEC_PHI2]]
|
|
; IC4VF1-NEXT: [[TMP22]] = select i1 [[TMP14]], i16 [[TMP18]], i16 [[VEC_PHI3]]
|
|
; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; IC4VF1-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
|
|
; IC4VF1-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; IC4VF1: [[MIDDLE_BLOCK]]:
|
|
; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP19]], i16 [[TMP20]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX]], i16 [[TMP21]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX4]], i16 [[TMP22]])
|
|
; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[RDX_MINMAX5]], 32767
|
|
; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0
|
|
; IC4VF1-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF1: [[SCALAR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[LOOP:.*]]
|
|
; IC4VF1: [[LOOP]]:
|
|
; IC4VF1-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
|
|
; IC4VF1-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1
|
|
; IC4VF1-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]]
|
|
; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
|
|
; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
|
|
; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
|
|
; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; IC4VF1: [[EXIT]]:
|
|
; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC4VF1-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop: ; preds = %entry, %loop
|
|
%iv = phi i16 [ 12, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i16 [ 0, %entry ], [ %spec.select, %loop ]
|
|
%gep.table.iv = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 %iv
|
|
%ld.table = load half, ptr %gep.table.iv, align 1
|
|
%cmp.table.val = fcmp ugt half %ld.table, %val
|
|
%iv.next = add nsw i16 %iv, -1
|
|
%spec.select = select i1 %cmp.table.val, i16 %iv.next, i16 %rdx
|
|
%exit.cond = icmp eq i16 %iv.next, 0
|
|
br i1 %exit.cond, label %exit, label %loop
|
|
|
|
exit: ; preds = %loop
|
|
%spec.select.lcssa = phi i16 [ %spec.select, %loop ]
|
|
ret i16 %spec.select.lcssa
|
|
}
|
|
|
|
; The signed sentinel value for decreasing-IV vectorization is LONG_MAX, and since
|
|
; the IV hits this value with smin vectorization, it needs to be vectorized with a
|
|
; an unsigned sentinel and umin instead.
|
|
define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) {
|
|
; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned(
|
|
; IC1VF4-SAME: ptr [[A:%.*]]) {
|
|
; IC1VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC1VF4: [[VECTOR_PH]]:
|
|
; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC1VF4: [[VECTOR_BODY]]:
|
|
; IC1VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 9223372036854775807, i64 9223372036854775806, i64 9223372036854775805, i64 9223372036854775804>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]]
|
|
; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0
|
|
; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3
|
|
; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
|
|
; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
|
|
; IC1VF4-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
|
|
; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
|
|
; IC1VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
|
|
; IC1VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; IC1VF4: [[MIDDLE_BLOCK]]:
|
|
; IC1VF4-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP4]])
|
|
; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], -1
|
|
; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 331
|
|
; IC1VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC1VF4: [[SCALAR_PH]]:
|
|
; IC1VF4-NEXT: br label %[[LOOP:.*]]
|
|
; IC1VF4: [[LOOP]]:
|
|
; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
|
|
; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
|
; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
|
|
; IC1VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
|
|
; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
|
|
; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
|
|
; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
|
|
; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; IC1VF4: [[EXIT]]:
|
|
; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC1VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
|
|
;
|
|
; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned(
|
|
; IC4VF4-SAME: ptr [[A:%.*]]) {
|
|
; IC4VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC4VF4: [[VECTOR_PH]]:
|
|
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF4: [[VECTOR_BODY]]:
|
|
; IC4VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 9223372036854775807, i64 9223372036854775806, i64 9223372036854775805, i64 9223372036854775804>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4)
|
|
; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]]
|
|
; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0
|
|
; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3
|
|
; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -4
|
|
; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -3
|
|
; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -8
|
|
; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3
|
|
; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -12
|
|
; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3
|
|
; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
|
|
; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
|
|
; IC4VF4-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
|
|
; IC4VF4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD6]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
|
|
; IC4VF4-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD8]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i64> [[REVERSE5]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[REVERSE7]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i64> [[REVERSE9]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
|
|
; IC4VF4-NEXT: [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
|
|
; IC4VF4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
|
|
; IC4VF4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
|
|
; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
|
; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4)
|
|
; IC4VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
|
|
; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; IC4VF4: [[MIDDLE_BLOCK]]:
|
|
; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX10:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP15]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX10]], <4 x i64> [[TMP16]])
|
|
; IC4VF4-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[RDX_MINMAX11]])
|
|
; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP18]], -1
|
|
; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP18]], i64 331
|
|
; IC4VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF4: [[SCALAR_PH]]:
|
|
; IC4VF4-NEXT: br label %[[LOOP:.*]]
|
|
; IC4VF4: [[LOOP]]:
|
|
; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
|
|
; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
|
; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
|
|
; IC4VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
|
|
; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
|
|
; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
|
|
; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
|
|
; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; IC4VF4: [[EXIT]]:
|
|
; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC4VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
|
|
;
|
|
; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned(
|
|
; IC4VF1-SAME: ptr [[A:%.*]]) {
|
|
; IC4VF1-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC4VF1: [[VECTOR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF1: [[VECTOR_BODY]]:
|
|
; IC4VF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]]
|
|
; IC4VF1-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -2
|
|
; IC4VF1-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -3
|
|
; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
|
|
; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
|
|
; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
|
|
; IC4VF1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP3]], align 8
|
|
; IC4VF1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8
|
|
; IC4VF1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 8
|
|
; IC4VF1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8
|
|
; IC4VF1-NEXT: [[TMP11:%.*]] = icmp sgt i64 [[TMP7]], 3
|
|
; IC4VF1-NEXT: [[TMP12:%.*]] = icmp sgt i64 [[TMP8]], 3
|
|
; IC4VF1-NEXT: [[TMP13:%.*]] = icmp sgt i64 [[TMP9]], 3
|
|
; IC4VF1-NEXT: [[TMP14:%.*]] = icmp sgt i64 [[TMP10]], 3
|
|
; IC4VF1-NEXT: [[TMP15]] = select i1 [[TMP11]], i64 [[OFFSET_IDX]], i64 [[VEC_PHI]]
|
|
; IC4VF1-NEXT: [[TMP16]] = select i1 [[TMP12]], i64 [[TMP0]], i64 [[VEC_PHI1]]
|
|
; IC4VF1-NEXT: [[TMP17]] = select i1 [[TMP13]], i64 [[TMP1]], i64 [[VEC_PHI2]]
|
|
; IC4VF1-NEXT: [[TMP18]] = select i1 [[TMP14]], i64 [[TMP2]], i64 [[VEC_PHI3]]
|
|
; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; IC4VF1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
|
|
; IC4VF1-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; IC4VF1: [[MIDDLE_BLOCK]]:
|
|
; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP15]], i64 [[TMP16]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.umin.i64(i64 [[RDX_MINMAX]], i64 [[TMP17]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.umin.i64(i64 [[RDX_MINMAX4]], i64 [[TMP18]])
|
|
; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -1
|
|
; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 331
|
|
; IC4VF1-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF1: [[SCALAR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[LOOP:.*]]
|
|
; IC4VF1: [[LOOP]]:
|
|
; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
|
; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
|
|
; IC4VF1-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
|
|
; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
|
|
; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
|
|
; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
|
|
; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; IC4VF1: [[EXIT]]:
|
|
; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC4VF1-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop: ; preds = %entry, %loop
|
|
%iv = phi i64 [ 9223372036854775807, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i64 [ 331, %entry ], [ %spec.select, %loop ]
|
|
%gep.a.iv = getelementptr inbounds i64, ptr %a, i64 %iv
|
|
%ld.a = load i64, ptr %gep.a.iv, align 8
|
|
%cmp.a.3 = icmp sgt i64 %ld.a, 3
|
|
%spec.select = select i1 %cmp.a.3, i64 %iv, i64 %rdx
|
|
%iv.next = add nsw i64 %iv, -1
|
|
%exit.cond = icmp eq i64 %iv, 0
|
|
br i1 %exit.cond, label %exit, label %loop
|
|
|
|
exit: ; preds = %loop
|
|
ret i64 %spec.select
|
|
}
|
|
|
|
; The unsigned sentinel value for decreasing-IV vectorization is ULONG_MAX,
|
|
; and since the IV hits this value, it is impossible to vectorize this case.
|
|
; In this test, %iv's range will include both signed and unsigned
|
|
; maximum (sentinel) values.
|
|
define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr %a, ptr %b, i64 %rdx.start) {
|
|
; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
|
|
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
|
|
; CHECK-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A_IV]], align 1
|
|
; CHECK-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B_IV]], align 1
|
|
; CHECK-NEXT: [[CMP_A_B:%.*]] = icmp sgt i8 [[LD_A]], [[LD_B]]
|
|
; CHECK-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
|
|
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
|
|
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret i64 [[COND_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ -1, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i64 [ %rdx.start, %entry ], [ %cond, %loop ]
|
|
%iv.next = add i64 %iv, -1
|
|
%gep.a.iv = getelementptr inbounds i8, ptr %a, i64 %iv.next
|
|
%ld.a = load i8, ptr %gep.a.iv, align 1
|
|
%gep.b.iv = getelementptr inbounds i8, ptr %b, i64 %iv.next
|
|
%ld.b = load i8, ptr %gep.b.iv, align 1
|
|
%cmp.a.b = icmp sgt i8 %ld.a, %ld.b
|
|
%cond = select i1 %cmp.a.b, i64 %iv.next, i64 %rdx
|
|
%exit.cond = icmp eq i64 %iv.next, 0
|
|
br i1 %exit.cond, label %exit, label %loop
|
|
|
|
exit:
|
|
ret i64 %cond
|
|
}
|
|
|
|
define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
|
|
; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
|
|
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ]
|
|
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ]
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
|
|
; CHECK-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
|
|
; CHECK-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
|
|
; CHECK-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
|
|
; CHECK-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
|
|
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
|
|
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret i64 [[COND_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop: ; preds = %entry, %loop
|
|
%iv = phi i64 [ %iv.next, %loop ], [ %n, %entry ]
|
|
%rdx = phi i64 [ %cond, %loop ], [ %rdx.start, %entry ]
|
|
%iv.next = add nsw i64 %iv, -1
|
|
%gep.a.iv = getelementptr inbounds i64, ptr %a, i64 %iv.next
|
|
%ld.a = load i64, ptr %gep.a.iv, align 8
|
|
%gep.b.iv = getelementptr inbounds i64, ptr %b, i64 %iv.next
|
|
%ld.b = load i64, ptr %gep.b.iv, align 8
|
|
%cmp.a.b = icmp sgt i64 %ld.a, %ld.b
|
|
%cond = select i1 %cmp.a.b, i64 %iv.next, i64 %rdx
|
|
%exit.cond = icmp ugt i64 %iv, 1
|
|
br i1 %exit.cond, label %loop, label %exit
|
|
|
|
exit: ; preds = %loop
|
|
ret i64 %cond
|
|
}
|