
Epilogue vectorization currently relies on the resume phi for the canonical induction being always available, which is why VPPhi are considered to have side-effects, to prevent their removal. This patch adds a new ResumeForEpilogue opcode to mark the resume phi as used for epilogue vectorization. This allows treating VPPhis in general as not having side-effects, enabling removal of unused VPPhis.
122 lines
8.1 KiB
LLVM
122 lines
8.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes=loop-vectorize -enable-masked-interleaved-mem-accesses -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -force-vector-width=8 -S %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-apple-macosx13.0.0"
|
|
|
|
define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 {
|
|
; CHECK-LABEL: @test_pr59090(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ]
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 10000)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[INDEX]], 6
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[B:%.*]], align 1, !llvm.access.group [[ACC_GRP0:![0-9]+]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
|
; CHECK: pred.store.if:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
|
|
; CHECK: pred.store.continue:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
|
|
; CHECK: pred.store.if1:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
|
|
; CHECK: pred.store.continue2:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
|
|
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
|
|
; CHECK: pred.store.if3:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
|
|
; CHECK: pred.store.continue4:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
|
|
; CHECK: pred.store.if5:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
|
|
; CHECK: pred.store.continue6:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
|
|
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
|
|
; CHECK: pred.store.if7:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
|
|
; CHECK: pred.store.continue8:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
|
|
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
|
|
; CHECK: pred.store.if9:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
|
|
; CHECK: pred.store.continue10:
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
|
|
; CHECK: pred.store.if11:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
|
|
; CHECK: pred.store.continue12:
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
|
|
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14]]
|
|
; CHECK: pred.store.if13:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
|
|
; CHECK: pred.store.continue14:
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[L_OUT:%.*]], i64 [[TMP2]]
|
|
; CHECK-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
|
; CHECK-NEXT: [[TMP15:%.*]] = and <48 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false>
|
|
; CHECK-NEXT: call void @llvm.masked.store.v48i8.p0(<48 x i8> <i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison>, ptr [[TMP13]], i32 1, <48 x i1> [[TMP15]])
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10008
|
|
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br label [[EXIT:%.*]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[IV_MUL:%.*]] = mul nuw i64 [[IV]], 6
|
|
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: store i8 [[L]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: [[ARRAYIDX77:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[IV_MUL]]
|
|
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX77]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[IV_MUL]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX97:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[ADD_2]]
|
|
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX97]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 10000
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%iv.mul = mul nuw i64 %iv, 6
|
|
%l = load i8, ptr %b, align 1, !llvm.access.group !0
|
|
store i8 %l, ptr %b, align 1, !llvm.access.group !0
|
|
%arrayidx77 = getelementptr i8, ptr %l_out, i64 %iv.mul
|
|
store i8 0, ptr %arrayidx77, align 1, !llvm.access.group !0
|
|
%add.2 = add i64 %iv.mul, 2
|
|
%arrayidx97 = getelementptr i8, ptr %l_out, i64 %add.2
|
|
store i8 0, ptr %arrayidx97, align 1, !llvm.access.group !0
|
|
%iv.next = add nsw nuw i64 %iv, 1
|
|
%exitcond.not = icmp eq i64 %iv, 10000
|
|
br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !1
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "target-cpu"="skx" }
|
|
|
|
!0 = distinct !{}
|
|
!1 = distinct !{!1, !3}
|
|
!3 = !{!"llvm.loop.parallel_accesses", !0}
|