This reverts commit e30f9c19464bcf1bf1e9f69b63884fb78ad2d05d. Re-land, now that the reported crash causing the revert has been fixed as part of 77fb84889 (#187504). Original message: Replace manual region dissolution code in simplifyBranchConditionForVFAndUF with using general removeBranchOnConst. simplifyBranchConditionForVFAndUF now just creates a (BranchOnCond true) or updates BranchOnTwoConds. The loop then gets automatically removed by running removeBranchOnConst. This removes a bunch of special logic to handle header phi replacements and CFG updates. With the new code, there's no restriction on what kind of header phi recipes the loop contains. Note that VPEVLBasedIVRecipe needs to be marked as readnone. This is technically unrelated, but I could not find an independent test that would be impacted. The code to deal with epilogue resume values now needs updating, because we may simplify a reduction directly to the start value. PR: https://github.com/llvm/llvm-project/pull/181252
454 lines
23 KiB
LLVM
454 lines
23 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
|
|
; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=8 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF8UF1 %s
|
|
; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=8 -force-vector-interleave=2 -S %s | FileCheck --check-prefixes=VF8UF2 %s
|
|
; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=16 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF16UF1 %s
|
|
|
|
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
|
|
|
; Check if the vector loop condition can be simplified to true for a given
|
|
; VF/IC combination.
|
|
define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosync nofree {
|
|
; VF8UF1-LABEL: define i8 @test_early_exit_max_tc_less_than_16(
|
|
; VF8UF1-SAME: ptr dereferenceable(16) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; VF8UF1-NEXT: [[ENTRY:.*:]]
|
|
; VF8UF1-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; VF8UF1: [[VECTOR_PH]]:
|
|
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; VF8UF1: [[VECTOR_BODY]]:
|
|
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
|
|
; VF8UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
|
|
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[P_SRC]], align 1
|
|
; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; VF8UF1-NEXT: [[TMP2:%.*]] = freeze <8 x i1> [[TMP3]]
|
|
; VF8UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP2]])
|
|
; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
|
|
; VF8UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
|
|
; VF8UF1: [[VECTOR_BODY_INTERIM]]:
|
|
; VF8UF1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; VF8UF1: [[MIDDLE_BLOCK]]:
|
|
; VF8UF1-NEXT: br label %[[EXIT:.*]]
|
|
; VF8UF1: [[VECTOR_EARLY_EXIT]]:
|
|
; VF8UF1-NEXT: br label %[[EXIT]]
|
|
; VF8UF1: [[EXIT]]:
|
|
; VF8UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ]
|
|
; VF8UF1-NEXT: ret i8 [[RES]]
|
|
;
|
|
; VF8UF2-LABEL: define i8 @test_early_exit_max_tc_less_than_16(
|
|
; VF8UF2-SAME: ptr dereferenceable(16) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; VF8UF2-NEXT: [[ENTRY:.*:]]
|
|
; VF8UF2-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; VF8UF2: [[VECTOR_PH]]:
|
|
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; VF8UF2: [[VECTOR_BODY]]:
|
|
; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
|
|
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1
|
|
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
|
|
; VF8UF2-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer
|
|
; VF8UF2-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP1]]
|
|
; VF8UF2-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP2]]
|
|
; VF8UF2-NEXT: [[TMP3:%.*]] = or <8 x i1> [[TMP6]], [[TMP5]]
|
|
; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
|
|
; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
|
|
; VF8UF2: [[MIDDLE_BLOCK]]:
|
|
; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]]
|
|
; VF8UF2: [[MIDDLE_SPLIT]]:
|
|
; VF8UF2-NEXT: br label %[[EXIT:.*]]
|
|
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
|
|
; VF8UF2-NEXT: br label %[[EXIT]]
|
|
; VF8UF2: [[EXIT]]:
|
|
; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_SPLIT]] ]
|
|
; VF8UF2-NEXT: ret i8 [[RES]]
|
|
;
|
|
; VF16UF1-LABEL: define i8 @test_early_exit_max_tc_less_than_16(
|
|
; VF16UF1-SAME: ptr dereferenceable(16) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; VF16UF1-NEXT: [[ENTRY:.*:]]
|
|
; VF16UF1-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; VF16UF1: [[VECTOR_PH]]:
|
|
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; VF16UF1: [[VECTOR_BODY]]:
|
|
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[A]], align 1
|
|
; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; VF16UF1-NEXT: [[TMP1:%.*]] = freeze <16 x i1> [[TMP3]]
|
|
; VF16UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP1]])
|
|
; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
|
|
; VF16UF1: [[MIDDLE_BLOCK]]:
|
|
; VF16UF1-NEXT: br label %[[MIDDLE_SPLIT:.*]]
|
|
; VF16UF1: [[MIDDLE_SPLIT]]:
|
|
; VF16UF1-NEXT: br label %[[EXIT:.*]]
|
|
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
|
|
; VF16UF1-NEXT: br label %[[EXIT]]
|
|
; VF16UF1: [[EXIT]]:
|
|
; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_SPLIT]] ]
|
|
; VF16UF1-NEXT: ret i8 [[RES]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
|
|
%p.src = getelementptr inbounds i8, ptr %A, i64 %iv
|
|
%l = load i8, ptr %p.src, align 1
|
|
%c = icmp eq i8 %l, 0
|
|
br i1 %c, label %exit, label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add nsw i64 %iv, 1
|
|
%cmp = icmp eq i64 %iv.next, 16
|
|
br i1 %cmp, label %exit, label %loop.header
|
|
|
|
exit:
|
|
%res = phi i8 [ 0, %loop.header ], [ 1, %loop.latch ]
|
|
ret i8 %res
|
|
}
|
|
|
|
define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr dereferenceable(16) %A) nosync nofree {
|
|
; VF8UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
|
|
; VF8UF1-SAME: ptr dereferenceable(16) [[A:%.*]]) #[[ATTR0]] {
|
|
; VF8UF1-NEXT: [[ENTRY:.*:]]
|
|
; VF8UF1-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; VF8UF1: [[VECTOR_PH]]:
|
|
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; VF8UF1: [[VECTOR_BODY]]:
|
|
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
|
|
; VF8UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
|
|
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[P_SRC]], align 1
|
|
; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; VF8UF1-NEXT: [[TMP2:%.*]] = freeze <8 x i1> [[TMP3]]
|
|
; VF8UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP2]])
|
|
; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
|
|
; VF8UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
|
|
; VF8UF1: [[VECTOR_BODY_INTERIM]]:
|
|
; VF8UF1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; VF8UF1: [[MIDDLE_BLOCK]]:
|
|
; VF8UF1-NEXT: br label %[[EXIT:.*]]
|
|
; VF8UF1: [[VECTOR_EARLY_EXIT]]:
|
|
; VF8UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 false)
|
|
; VF8UF1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
|
|
; VF8UF1-NEXT: br label %[[EXIT]]
|
|
; VF8UF1: [[EXIT]]:
|
|
; VF8UF1-NEXT: [[RES:%.*]] = phi i64 [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ]
|
|
; VF8UF1-NEXT: ret i64 [[RES]]
|
|
;
|
|
; VF8UF2-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
|
|
; VF8UF2-SAME: ptr dereferenceable(16) [[A:%.*]]) #[[ATTR0]] {
|
|
; VF8UF2-NEXT: [[ENTRY:.*:]]
|
|
; VF8UF2-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; VF8UF2: [[VECTOR_PH]]:
|
|
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; VF8UF2: [[VECTOR_BODY]]:
|
|
; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
|
|
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1
|
|
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
|
|
; VF8UF2-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer
|
|
; VF8UF2-NEXT: [[TMP13:%.*]] = freeze <8 x i1> [[TMP1]]
|
|
; VF8UF2-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP2]]
|
|
; VF8UF2-NEXT: [[TMP3:%.*]] = or <8 x i1> [[TMP13]], [[TMP6]]
|
|
; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
|
|
; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
|
|
; VF8UF2: [[MIDDLE_BLOCK]]:
|
|
; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]]
|
|
; VF8UF2: [[MIDDLE_SPLIT]]:
|
|
; VF8UF2-NEXT: br label %[[EXIT:.*]]
|
|
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
|
|
; VF8UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 false)
|
|
; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 8, [[TMP5]]
|
|
; VF8UF2-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP1]], i1 false)
|
|
; VF8UF2-NEXT: [[TMP9:%.*]] = add i64 0, [[TMP8]]
|
|
; VF8UF2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP8]], 8
|
|
; VF8UF2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[TMP7]]
|
|
; VF8UF2-NEXT: br label %[[EXIT]]
|
|
; VF8UF2: [[EXIT]]:
|
|
; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[TMP11]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_SPLIT]] ]
|
|
; VF8UF2-NEXT: ret i64 [[RES]]
|
|
;
|
|
; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
|
|
; VF16UF1-SAME: ptr dereferenceable(16) [[A:%.*]]) #[[ATTR0]] {
|
|
; VF16UF1-NEXT: [[ENTRY:.*:]]
|
|
; VF16UF1-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; VF16UF1: [[VECTOR_PH]]:
|
|
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; VF16UF1: [[VECTOR_BODY]]:
|
|
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[A]], align 1
|
|
; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; VF16UF1-NEXT: [[TMP1:%.*]] = freeze <16 x i1> [[TMP3]]
|
|
; VF16UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP1]])
|
|
; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
|
|
; VF16UF1: [[MIDDLE_BLOCK]]:
|
|
; VF16UF1-NEXT: br label %[[MIDDLE_SPLIT:.*]]
|
|
; VF16UF1: [[MIDDLE_SPLIT]]:
|
|
; VF16UF1-NEXT: br label %[[EXIT:.*]]
|
|
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
|
|
; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 false)
|
|
; VF16UF1-NEXT: br label %[[EXIT]]
|
|
; VF16UF1: [[EXIT]]:
|
|
; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[FIRST_ACTIVE_LANE]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_SPLIT]] ]
|
|
; VF16UF1-NEXT: ret i64 [[RES]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
|
|
%p.src = getelementptr inbounds i8, ptr %A, i64 %iv
|
|
%l = load i8, ptr %p.src, align 1
|
|
%c = icmp eq i8 %l, 0
|
|
br i1 %c, label %exit, label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add nsw i64 %iv, 1
|
|
%cmp = icmp eq i64 %iv.next, 16
|
|
br i1 %cmp, label %exit, label %loop.header
|
|
|
|
exit:
|
|
%res = phi i64 [ %iv, %loop.header ], [ 1, %loop.latch ]
|
|
ret i64 %res
|
|
}
|
|
|
|
define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosync nofree {
|
|
; VF8UF1-LABEL: define i8 @test_early_exit_max_vector_tc_eq_16(
|
|
; VF8UF1-SAME: ptr dereferenceable(17) [[A:%.*]]) #[[ATTR0]] {
|
|
; VF8UF1-NEXT: [[ENTRY:.*:]]
|
|
; VF8UF1-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; VF8UF1: [[VECTOR_PH]]:
|
|
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; VF8UF1: [[VECTOR_BODY]]:
|
|
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
|
|
; VF8UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
|
|
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
|
|
; VF8UF1-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; VF8UF1-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP1]]
|
|
; VF8UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP5]])
|
|
; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
|
|
; VF8UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
|
|
; VF8UF1: [[VECTOR_BODY_INTERIM]]:
|
|
; VF8UF1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; VF8UF1: [[MIDDLE_BLOCK]]:
|
|
; VF8UF1-NEXT: br label %[[SCALAR_PH:.*]]
|
|
; VF8UF1: [[VECTOR_EARLY_EXIT]]:
|
|
; VF8UF1-NEXT: br label %[[EXIT:.*]]
|
|
; VF8UF1: [[SCALAR_PH]]:
|
|
; VF8UF1-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; VF8UF1: [[LOOP_HEADER]]:
|
|
; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; VF8UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
|
|
; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
|
|
; VF8UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
|
|
; VF8UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
|
|
; VF8UF1: [[LOOP_LATCH]]:
|
|
; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
|
|
; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
|
|
; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; VF8UF1: [[EXIT]]:
|
|
; VF8UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
|
|
; VF8UF1-NEXT: ret i8 [[RES]]
|
|
;
|
|
; VF8UF2-LABEL: define i8 @test_early_exit_max_vector_tc_eq_16(
|
|
; VF8UF2-SAME: ptr dereferenceable(17) [[A:%.*]]) #[[ATTR0]] {
|
|
; VF8UF2-NEXT: [[ENTRY:.*:]]
|
|
; VF8UF2-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; VF8UF2: [[VECTOR_PH]]:
|
|
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; VF8UF2: [[VECTOR_BODY]]:
|
|
; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
|
|
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1
|
|
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
|
|
; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer
|
|
; VF8UF2-NEXT: [[TMP7:%.*]] = freeze <8 x i1> [[TMP2]]
|
|
; VF8UF2-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP3]]
|
|
; VF8UF2-NEXT: [[TMP4:%.*]] = or <8 x i1> [[TMP7]], [[TMP6]]
|
|
; VF8UF2-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP4]])
|
|
; VF8UF2-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
|
|
; VF8UF2: [[MIDDLE_BLOCK]]:
|
|
; VF8UF2-NEXT: br label %[[SCALAR_PH:.*]]
|
|
; VF8UF2: [[SCALAR_PH]]:
|
|
; VF8UF2-NEXT: br label %[[SCALAR_PH_SPLIT:.*]]
|
|
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
|
|
; VF8UF2-NEXT: br label %[[EXIT:.*]]
|
|
; VF8UF2: [[SCALAR_PH_SPLIT]]:
|
|
; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; VF8UF2: [[LOOP_HEADER]]:
|
|
; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
|
|
; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
|
|
; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
|
|
; VF8UF2-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
|
|
; VF8UF2: [[LOOP_LATCH]]:
|
|
; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
|
|
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
|
|
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; VF8UF2: [[EXIT]]:
|
|
; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
|
|
; VF8UF2-NEXT: ret i8 [[RES]]
|
|
;
|
|
; VF16UF1-LABEL: define i8 @test_early_exit_max_vector_tc_eq_16(
|
|
; VF16UF1-SAME: ptr dereferenceable(17) [[A:%.*]]) #[[ATTR0]] {
|
|
; VF16UF1-NEXT: [[ENTRY:.*:]]
|
|
; VF16UF1-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; VF16UF1: [[VECTOR_PH]]:
|
|
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; VF16UF1: [[VECTOR_BODY]]:
|
|
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[A]], align 1
|
|
; VF16UF1-NEXT: [[TMP1:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; VF16UF1-NEXT: [[TMP3:%.*]] = freeze <16 x i1> [[TMP1]]
|
|
; VF16UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
|
|
; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
|
|
; VF16UF1: [[MIDDLE_BLOCK]]:
|
|
; VF16UF1-NEXT: br label %[[SCALAR_PH:.*]]
|
|
; VF16UF1: [[SCALAR_PH]]:
|
|
; VF16UF1-NEXT: br label %[[SCALAR_PH_SPLIT:.*]]
|
|
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
|
|
; VF16UF1-NEXT: br label %[[EXIT:.*]]
|
|
; VF16UF1: [[SCALAR_PH_SPLIT]]:
|
|
; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; VF16UF1: [[LOOP_HEADER]]:
|
|
; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
|
|
; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
|
|
; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
|
|
; VF16UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
|
|
; VF16UF1: [[LOOP_LATCH]]:
|
|
; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
|
|
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
|
|
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; VF16UF1: [[EXIT]]:
|
|
; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
|
|
; VF16UF1-NEXT: ret i8 [[RES]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
|
|
%p.src = getelementptr inbounds i8, ptr %A, i64 %iv
|
|
%l = load i8, ptr %p.src, align 1
|
|
%c = icmp eq i8 %l, 0
|
|
br i1 %c, label %exit, label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add nsw i64 %iv, 1
|
|
%cmp = icmp eq i64 %iv.next, 17
|
|
br i1 %cmp, label %exit, label %loop.header
|
|
|
|
exit:
|
|
%res = phi i8 [ 0, %loop.header ], [ 1, %loop.latch ]
|
|
ret i8 %res
|
|
}
|
|
|
|
define i1 @test_early_exit_max_tc_less_than_16_non_canonical_iv(ptr dereferenceable(32) %A) nosync nofree {
|
|
; VF8UF1-LABEL: define i1 @test_early_exit_max_tc_less_than_16_non_canonical_iv(
|
|
; VF8UF1-SAME: ptr dereferenceable(32) [[A:%.*]]) #[[ATTR0]] {
|
|
; VF8UF1-NEXT: [[ENTRY:.*:]]
|
|
; VF8UF1-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; VF8UF1: [[VECTOR_PH]]:
|
|
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; VF8UF1: [[VECTOR_BODY]]:
|
|
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
|
|
; VF8UF1-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY_INTERIM]] ]
|
|
; VF8UF1-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
|
|
; VF8UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
|
|
; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; VF8UF1-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]]
|
|
; VF8UF1-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP4]])
|
|
; VF8UF1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
|
|
; VF8UF1-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 8)
|
|
; VF8UF1-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
|
|
; VF8UF1: [[VECTOR_BODY_INTERIM]]:
|
|
; VF8UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; VF8UF1: [[MIDDLE_BLOCK]]:
|
|
; VF8UF1-NEXT: [[TMP9:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i64>
|
|
; VF8UF1-NEXT: [[TMP2:%.*]] = icmp eq <8 x i64> [[TMP9]], [[VEC_IND]]
|
|
; VF8UF1-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP2]], i32 7
|
|
; VF8UF1-NEXT: br label %[[EXIT:.*]]
|
|
; VF8UF1: [[VECTOR_EARLY_EXIT]]:
|
|
; VF8UF1-NEXT: br label %[[EXIT]]
|
|
; VF8UF1: [[EXIT]]:
|
|
; VF8UF1-NEXT: [[RES:%.*]] = phi i1 [ false, %[[VECTOR_EARLY_EXIT]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ]
|
|
; VF8UF1-NEXT: ret i1 [[RES]]
|
|
;
|
|
; VF8UF2-LABEL: define i1 @test_early_exit_max_tc_less_than_16_non_canonical_iv(
|
|
; VF8UF2-SAME: ptr dereferenceable(32) [[A:%.*]]) #[[ATTR0]] {
|
|
; VF8UF2-NEXT: [[ENTRY:.*:]]
|
|
; VF8UF2-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; VF8UF2: [[VECTOR_PH]]:
|
|
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; VF8UF2: [[VECTOR_BODY]]:
|
|
; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2
|
|
; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
|
|
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
|
|
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
|
|
; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer
|
|
; VF8UF2-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]]
|
|
; VF8UF2-NEXT: [[TMP7:%.*]] = freeze <8 x i1> [[TMP5]]
|
|
; VF8UF2-NEXT: [[TMP8:%.*]] = or <8 x i1> [[TMP6]], [[TMP7]]
|
|
; VF8UF2-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP8]])
|
|
; VF8UF2-NEXT: br i1 [[TMP9]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM:.*]]
|
|
; VF8UF2: [[VECTOR_BODY_INTERIM]]:
|
|
; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
|
|
; VF8UF2: [[MIDDLE_BLOCK]]:
|
|
; VF8UF2-NEXT: [[TMP10:%.*]] = zext <8 x i8> [[WIDE_LOAD1]] to <8 x i64>
|
|
; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i64> [[TMP10]], <i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17>
|
|
; VF8UF2-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
|
|
; VF8UF2-NEXT: br label %[[EXIT:.*]]
|
|
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
|
|
; VF8UF2-NEXT: br label %[[EXIT]]
|
|
; VF8UF2: [[EXIT]]:
|
|
; VF8UF2-NEXT: [[RES:%.*]] = phi i1 [ false, %[[VECTOR_EARLY_EXIT]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ]
|
|
; VF8UF2-NEXT: ret i1 [[RES]]
|
|
;
|
|
; VF16UF1-LABEL: define i1 @test_early_exit_max_tc_less_than_16_non_canonical_iv(
|
|
; VF16UF1-SAME: ptr dereferenceable(32) [[A:%.*]]) #[[ATTR0]] {
|
|
; VF16UF1-NEXT: [[ENTRY:.*:]]
|
|
; VF16UF1-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; VF16UF1: [[VECTOR_PH]]:
|
|
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; VF16UF1: [[VECTOR_BODY]]:
|
|
; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2
|
|
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
|
|
; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; VF16UF1-NEXT: [[TMP4:%.*]] = freeze <16 x i1> [[TMP3]]
|
|
; VF16UF1-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP4]])
|
|
; VF16UF1-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM:.*]]
|
|
; VF16UF1: [[VECTOR_BODY_INTERIM]]:
|
|
; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]]
|
|
; VF16UF1: [[MIDDLE_BLOCK]]:
|
|
; VF16UF1-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i64>
|
|
; VF16UF1-NEXT: [[TMP2:%.*]] = icmp eq <16 x i64> [[TMP6]], <i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17>
|
|
; VF16UF1-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP2]], i32 15
|
|
; VF16UF1-NEXT: br label %[[EXIT:.*]]
|
|
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
|
|
; VF16UF1-NEXT: br label %[[EXIT]]
|
|
; VF16UF1: [[EXIT]]:
|
|
; VF16UF1-NEXT: [[RES:%.*]] = phi i1 [ false, %[[VECTOR_EARLY_EXIT]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ]
|
|
; VF16UF1-NEXT: ret i1 [[RES]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ 2, %entry ], [ %iv.next, %loop.latch ]
|
|
%p.src = getelementptr inbounds i8, ptr %A, i64 %iv
|
|
%l = load i8, ptr %p.src, align 1
|
|
%l.ext = zext i8 %l to i64
|
|
%c.2 = icmp eq i64 %l.ext, %iv
|
|
%c = icmp eq i8 %l, 0
|
|
br i1 %c, label %exit, label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add nsw i64 %iv, 1
|
|
%cmp = icmp eq i64 %iv.next, 18
|
|
br i1 %cmp, label %exit, label %loop.header
|
|
|
|
exit:
|
|
%res = phi i1 [ 0, %loop.header ], [ %c.2, %loop.latch ]
|
|
ret i1 %res
|
|
}
|