No need to schedule entry nodes where all instructions are not memory read/write instructions and their operands are either constants, or arguments, or phis, or instructions from others blocks, or their users are phis or from the other blocks. The resulting vector instructions can be placed at the beginning of the basic block without scheduling (if operands does not need to be scheduled) or at the end of the block (if users are outside of the block). It may save some compile time and scheduling resources. Differential Revision: https://reviews.llvm.org/D121121
54 lines
2.2 KiB
LLVM
54 lines
2.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell | FileCheck %s
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
|
|
|
|
define void @test() #0 {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], <i64 3, i64 2, i64 1, i64 0>
|
|
; CHECK-NEXT: [[TMP3]] = extractelement <4 x i64> [[TMP2]], i32 3
|
|
; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0
|
|
; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP4]], 32
|
|
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP2]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], <i64 32, i64 32, i64 32, i64 32>
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]])
|
|
; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP7]], 0
|
|
; CHECK-NEXT: [[OP_EXTRA1]] = add i64 [[OP_EXTRA]], [[TMP3]]
|
|
; CHECK-NEXT: br label [[LOOP]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%dummy_phi = phi i64 [ 1, %entry ], [ %last, %loop ]
|
|
%0 = phi i64 [ 2, %entry ], [ %fork, %loop ]
|
|
%inc1 = add i64 %0, 1
|
|
%inc2 = add i64 %0, 2
|
|
%inc11 = add i64 1, %inc1
|
|
%exact1 = ashr exact i64 %inc11, 32
|
|
%inc3 = add i64 %0, 3
|
|
%dummy_add = add i16 0, 0
|
|
%inc12 = add i64 1, %inc2
|
|
%exact2 = ashr exact i64 %inc12, 32
|
|
%dummy_shl = shl i64 %inc3, 32
|
|
%inc13 = add i64 1, %inc3
|
|
%exact3 = ashr exact i64 %inc13, 32
|
|
%fork = add i64 %0, 0
|
|
%sum1 = add i64 %exact3, %exact2
|
|
%sum2 = add i64 %sum1, %exact1
|
|
%zsum = add i64 %sum2, 0
|
|
%sext22 = add i64 1, %fork
|
|
%exact4 = ashr exact i64 %sext22, 32
|
|
%join = add i64 %fork, %zsum
|
|
%last = add i64 %join, %exact4
|
|
br label %loop
|
|
}
|
|
|