llvm-project/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
David Sherwood bf2b14acf3
[LV] Enable auto-vectorisation of loops with uncountable exits (#133099)
Until now the feature to enable vectorisation of some early exit
loops with uncountable exits was controlled under a flag, off by
default. Now that we have efficient code generation for
vectorising such loops (see PR #130766) and we still have some
time from the next LLVM release it seems like a good time point
to enable the feature by default. If any issues arise post-commit
it can be easily reverted.

Using this patch I built and ran the LLVM test suite successfully,
which on neoverse-v1 led to the vectorisation of 114 additional
early exit loops. I also built and ran SPEC2017 successfully for
both neoverse-v1 and neoverse-v2.
2025-06-27 10:39:33 +01:00

257 lines
9.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S -debug %s 2>&1 | FileCheck %s
; REQUIRES: asserts
declare void @init(ptr)
define i64 @multi_exiting_to_different_exits_live_in_exit_values() {
; CHECK: multi_exiting_to_different_exits_live_in_exit_values
; CHECK-LABEL: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: IR %src = alloca [128 x i32], align 4
; CHECK-NEXT: IR call void @init(ptr %src)
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src>
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of ir<%c.1>
; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]>
; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.split
; CHECK-EMPTY:
; CHECK-NEXT: middle.split:
; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]>
; CHECK-NEXT: Successor(s): vector.early.exit, middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<e2>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<e2>:
; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1> from middle.block)
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: vector.early.exit:
; CHECK-NEXT: Successor(s): ir-bb<e1>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<e1>:
; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0> from vector.early.exit)
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<[[RESUME:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: ir-bb<loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop.header>:
; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] (extra operand: vp<[[RESUME]]> from scalar.ph)
; CHECK: No successors
; CHECK-NEXT: }
entry:
%src = alloca [128 x i32]
call void @init(ptr %src)
br label %loop.header
loop.header:
%iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ]
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
%l = load i32, ptr %gep.src
%c.1 = icmp eq i32 %l, 10
br i1 %c.1, label %e1, label %loop.latch
loop.latch:
%inc = add nuw i64 %iv, 1
%c.2 = icmp eq i64 %inc, 128
br i1 %c.2, label %e2, label %loop.header
e1:
%p1 = phi i64 [ 0, %loop.header ]
ret i64 %p1
e2:
%p2 = phi i64 [ 1, %loop.latch ]
ret i64 %p2
}
define i64 @multi_exiting_to_same_exit_live_in_exit_values() {
; CHECK: multi_exiting_to_same_exit_live_in_exit_values
; CHECK-LABEL: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: IR %src = alloca [128 x i32], align 4
; CHECK-NEXT: IR call void @init(ptr %src)
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src>
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of ir<%c.1>
; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]>
; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.split
; CHECK-EMPTY:
; CHECK-NEXT: middle.split:
; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]>
; CHECK-NEXT: Successor(s): vector.early.exit, middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.early.exit:
; CHECK-NEXT: Successor(s): ir-bb<exit>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from vector.early.exit)
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<[[RESUME:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: ir-bb<loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop.header>:
; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] (extra operand: vp<[[RESUME]]> from scalar.ph)
; CHECK: No successors
; CHECK-NEXT: }
entry:
%src = alloca [128 x i32]
call void @init(ptr %src)
br label %loop.header
loop.header:
%iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ]
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
%l = load i32, ptr %gep.src
%c.1 = icmp eq i32 %l, 10
br i1 %c.1, label %exit, label %loop.latch
loop.latch:
%inc = add nuw i64 %iv, 1
%c.2 = icmp eq i64 %inc, 128
br i1 %c.2, label %exit, label %loop.header
exit:
%p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ]
ret i64 %p
}
define i64 @multi_exiting_to_same_exit_live_in_exit_values_2() {
; CHECK: multi_exiting_to_same_exit_live_in_exit_values_2
; CHECK-LABEL: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: IR %src = alloca [128 x i32], align 4
; CHECK-NEXT: IR call void @init(ptr %src)
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src>
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of ir<%c.1>
; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]>
; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.split
; CHECK-EMPTY:
; CHECK-NEXT: middle.split:
; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]>
; CHECK-NEXT: Successor(s): vector.early.exit, middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.early.exit:
; CHECK-NEXT: Successor(s): ir-bb<exit>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from vector.early.exit)
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<[[RESUME:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: ir-bb<loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop.header>:
; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] (extra operand: vp<[[RESUME]]> from scalar.ph)
; CHECK: No successors
; CHECK-NEXT: }
entry:
%src = alloca [128 x i32]
call void @init(ptr %src)
br label %loop.header
loop.header:
%iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ]
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
%l = load i32, ptr %gep.src
%c.1 = icmp eq i32 %l, 10
br i1 %c.1, label %exit, label %loop.latch
loop.latch:
%inc = add nuw i64 %iv, 1
%c.2 = icmp eq i64 %inc, 128
br i1 %c.2, label %exit, label %loop.header
exit:
%p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ]
ret i64 %p
; uselistorder directives
uselistorder label %exit, { 1, 0 }
}