No need to schedule entry nodes where all instructions are not memory read/write instructions and their operands are either constants, or arguments, or phis, or instructions from others blocks, or their users are phis or from the other blocks. The resulting vector instructions can be placed at the beginning of the basic block without scheduling (if operands does not need to be scheduled) or at the end of the block (if users are outside of the block). It may save some compile time and scheduling resources. Differential Revision: https://reviews.llvm.org/D121121
164 lines
9.0 KiB
LLVM
164 lines
9.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -slp-vectorizer -S | FileCheck %s --check-prefix=DEFAULT
|
|
; RUN: opt < %s -slp-schedule-budget=0 -slp-min-tree-size=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=GATHER
|
|
; RUN: opt < %s -slp-schedule-budget=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=MAX-COST
|
|
|
|
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
target triple = "aarch64--linux-gnu"
|
|
|
|
@a = common global [80 x i8] zeroinitializer, align 16
|
|
|
|
define void @PR28330(i32 %n) {
|
|
; DEFAULT-LABEL: @PR28330(
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
|
|
; DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
|
|
; DEFAULT: for.body:
|
|
; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
|
|
; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
|
|
; DEFAULT-NEXT: br label [[FOR_BODY]]
|
|
;
|
|
; GATHER-LABEL: @PR28330(
|
|
; GATHER-NEXT: entry:
|
|
; GATHER-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
|
|
; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
|
|
; GATHER-NEXT: br label [[FOR_BODY:%.*]]
|
|
; GATHER: for.body:
|
|
; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; GATHER-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
|
|
; GATHER-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
|
|
; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
|
|
; GATHER-NEXT: br label [[FOR_BODY]]
|
|
;
|
|
; MAX-COST-LABEL: @PR28330(
|
|
; MAX-COST-NEXT: entry:
|
|
; MAX-COST-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
|
|
; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
|
|
; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]
|
|
; MAX-COST: for.body:
|
|
; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
|
|
; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
|
|
; MAX-COST-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
|
|
; MAX-COST-NEXT: br label [[FOR_BODY]]
|
|
;
|
|
entry:
|
|
%p0 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
|
|
%p1 = icmp eq i8 %p0, 0
|
|
%p2 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
|
|
%p3 = icmp eq i8 %p2, 0
|
|
%p4 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
|
|
%p5 = icmp eq i8 %p4, 0
|
|
%p6 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
|
|
%p7 = icmp eq i8 %p6, 0
|
|
%p8 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
|
|
%p9 = icmp eq i8 %p8, 0
|
|
%p10 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
|
|
%p11 = icmp eq i8 %p10, 0
|
|
%p12 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
|
|
%p13 = icmp eq i8 %p12, 0
|
|
%p14 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
|
|
%p15 = icmp eq i8 %p14, 0
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%p17 = phi i32 [ %p34, %for.body ], [ 0, %entry ]
|
|
%p19 = select i1 %p1, i32 -720, i32 -80
|
|
%p20 = add i32 %p17, %p19
|
|
%p21 = select i1 %p3, i32 -720, i32 -80
|
|
%p22 = add i32 %p20, %p21
|
|
%p23 = select i1 %p5, i32 -720, i32 -80
|
|
%p24 = add i32 %p22, %p23
|
|
%p25 = select i1 %p7, i32 -720, i32 -80
|
|
%p26 = add i32 %p24, %p25
|
|
%p27 = select i1 %p9, i32 -720, i32 -80
|
|
%p28 = add i32 %p26, %p27
|
|
%p29 = select i1 %p11, i32 -720, i32 -80
|
|
%p30 = add i32 %p28, %p29
|
|
%p31 = select i1 %p13, i32 -720, i32 -80
|
|
%p32 = add i32 %p30, %p31
|
|
%p33 = select i1 %p15, i32 -720, i32 -80
|
|
%p34 = add i32 %p32, %p33
|
|
br label %for.body
|
|
}
|
|
|
|
define void @PR32038(i32 %n) {
|
|
; DEFAULT-LABEL: @PR32038(
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
|
|
; DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
|
|
; DEFAULT: for.body:
|
|
; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
|
|
; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5
|
|
; DEFAULT-NEXT: br label [[FOR_BODY]]
|
|
;
|
|
; GATHER-LABEL: @PR32038(
|
|
; GATHER-NEXT: entry:
|
|
; GATHER-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
|
|
; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
|
|
; GATHER-NEXT: br label [[FOR_BODY:%.*]]
|
|
; GATHER: for.body:
|
|
; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; GATHER-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
|
|
; GATHER-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
|
|
; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5
|
|
; GATHER-NEXT: br label [[FOR_BODY]]
|
|
;
|
|
; MAX-COST-LABEL: @PR32038(
|
|
; MAX-COST-NEXT: entry:
|
|
; MAX-COST-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
|
|
; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
|
|
; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]
|
|
; MAX-COST: for.body:
|
|
; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
|
|
; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
|
|
; MAX-COST-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5
|
|
; MAX-COST-NEXT: br label [[FOR_BODY]]
|
|
;
|
|
entry:
|
|
%p0 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
|
|
%p1 = icmp eq i8 %p0, 0
|
|
%p2 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
|
|
%p3 = icmp eq i8 %p2, 0
|
|
%p4 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
|
|
%p5 = icmp eq i8 %p4, 0
|
|
%p6 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
|
|
%p7 = icmp eq i8 %p6, 0
|
|
%p8 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
|
|
%p9 = icmp eq i8 %p8, 0
|
|
%p10 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
|
|
%p11 = icmp eq i8 %p10, 0
|
|
%p12 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
|
|
%p13 = icmp eq i8 %p12, 0
|
|
%p14 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
|
|
%p15 = icmp eq i8 %p14, 0
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%p17 = phi i32 [ %p34, %for.body ], [ 0, %entry ]
|
|
%p19 = select i1 %p1, i32 -720, i32 -80
|
|
%p20 = add i32 -5, %p19
|
|
%p21 = select i1 %p3, i32 -720, i32 -80
|
|
%p22 = add i32 %p20, %p21
|
|
%p23 = select i1 %p5, i32 -720, i32 -80
|
|
%p24 = add i32 %p22, %p23
|
|
%p25 = select i1 %p7, i32 -720, i32 -80
|
|
%p26 = add i32 %p24, %p25
|
|
%p27 = select i1 %p9, i32 -720, i32 -80
|
|
%p28 = add i32 %p26, %p27
|
|
%p29 = select i1 %p11, i32 -720, i32 -80
|
|
%p30 = add i32 %p28, %p29
|
|
%p31 = select i1 %p13, i32 -720, i32 -80
|
|
%p32 = add i32 %p30, %p31
|
|
%p33 = select i1 %p15, i32 -720, i32 -80
|
|
%p34 = add i32 %p32, %p33
|
|
br label %for.body
|
|
}
|