
Until now the feature to enable vectorisation of some early exit loops with uncountable exits was controlled under a flag, off by default. Now that we have efficient code generation for vectorising such loops (see PR #130766) and we still have some time from the next LLVM release it seems like a good time point to enable the feature by default. If any issues arise post-commit it can be easily reverted. Using this patch I built and ran the LLVM test suite successfully, which on neoverse-v1 led to the vectorisation of 114 additional early exit loops. I also built and ran SPEC2017 successfully for both neoverse-v1 and neoverse-v2.
257 lines
9.8 KiB
LLVM
257 lines
9.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S -debug %s 2>&1 | FileCheck %s
|
|
|
|
; REQUIRES: asserts
|
|
|
|
declare void @init(ptr)
|
|
|
|
define i64 @multi_exiting_to_different_exits_live_in_exit_values() {
|
|
; CHECK: multi_exiting_to_different_exits_live_in_exit_values
|
|
; CHECK-LABEL: VPlan 'Initial VPlan for VF={4},UF>=1' {
|
|
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
|
|
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
|
|
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
|
|
; CHECK-NEXT: Live-in ir<128> = original trip-count
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: ir-bb<entry>:
|
|
; CHECK-NEXT: IR %src = alloca [128 x i32], align 4
|
|
; CHECK-NEXT: IR call void @init(ptr %src)
|
|
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: vector.ph:
|
|
; CHECK-NEXT: Successor(s): vector loop
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: <x1> vector loop: {
|
|
; CHECK-NEXT: vector.body:
|
|
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
|
|
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]
|
|
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]>
|
|
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src>
|
|
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
|
|
; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10>
|
|
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
|
|
; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of ir<%c.1>
|
|
; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]>
|
|
; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]>
|
|
; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]>
|
|
; CHECK-NEXT: No successors
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: Successor(s): middle.split
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: middle.split:
|
|
; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]>
|
|
; CHECK-NEXT: Successor(s): vector.early.exit, middle.block
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: middle.block:
|
|
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
|
|
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]>
|
|
; CHECK-NEXT: Successor(s): ir-bb<e2>, scalar.ph
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: ir-bb<e2>:
|
|
; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1> from middle.block)
|
|
; CHECK-NEXT: No successors
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: vector.early.exit:
|
|
; CHECK-NEXT: Successor(s): ir-bb<e1>
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: ir-bb<e1>:
|
|
; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0> from vector.early.exit)
|
|
; CHECK-NEXT: No successors
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: scalar.ph:
|
|
; CHECK-NEXT: EMIT-SCALAR vp<[[RESUME:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
|
|
; CHECK-NEXT: ir-bb<loop.header>
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: ir-bb<loop.header>:
|
|
; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] (extra operand: vp<[[RESUME]]> from scalar.ph)
|
|
; CHECK: No successors
|
|
; CHECK-NEXT: }
|
|
entry:
|
|
%src = alloca [128 x i32]
|
|
call void @init(ptr %src)
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ]
|
|
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
|
|
%l = load i32, ptr %gep.src
|
|
%c.1 = icmp eq i32 %l, 10
|
|
br i1 %c.1, label %e1, label %loop.latch
|
|
|
|
loop.latch:
|
|
%inc = add nuw i64 %iv, 1
|
|
%c.2 = icmp eq i64 %inc, 128
|
|
br i1 %c.2, label %e2, label %loop.header
|
|
|
|
e1:
|
|
%p1 = phi i64 [ 0, %loop.header ]
|
|
ret i64 %p1
|
|
|
|
e2:
|
|
%p2 = phi i64 [ 1, %loop.latch ]
|
|
ret i64 %p2
|
|
}
|
|
|
|
define i64 @multi_exiting_to_same_exit_live_in_exit_values() {
|
|
; CHECK: multi_exiting_to_same_exit_live_in_exit_values
|
|
; CHECK-LABEL: VPlan 'Initial VPlan for VF={4},UF>=1' {
|
|
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
|
|
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
|
|
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
|
|
; CHECK-NEXT: Live-in ir<128> = original trip-count
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: ir-bb<entry>:
|
|
; CHECK-NEXT: IR %src = alloca [128 x i32], align 4
|
|
; CHECK-NEXT: IR call void @init(ptr %src)
|
|
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: vector.ph:
|
|
; CHECK-NEXT: Successor(s): vector loop
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: <x1> vector loop: {
|
|
; CHECK-NEXT: vector.body:
|
|
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
|
|
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
|
|
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]>
|
|
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src>
|
|
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
|
|
; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10>
|
|
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
|
|
; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of ir<%c.1>
|
|
; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]>
|
|
; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]>
|
|
; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]>
|
|
; CHECK-NEXT: No successors
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: Successor(s): middle.split
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: middle.split:
|
|
; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]>
|
|
; CHECK-NEXT: Successor(s): vector.early.exit, middle.block
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: middle.block:
|
|
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
|
|
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]>
|
|
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: vector.early.exit:
|
|
; CHECK-NEXT: Successor(s): ir-bb<exit>
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: ir-bb<exit>:
|
|
; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from vector.early.exit)
|
|
; CHECK-NEXT: No successors
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: scalar.ph:
|
|
; CHECK-NEXT: EMIT-SCALAR vp<[[RESUME:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
|
|
; CHECK-NEXT: ir-bb<loop.header>
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: ir-bb<loop.header>:
|
|
; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] (extra operand: vp<[[RESUME]]> from scalar.ph)
|
|
; CHECK: No successors
|
|
; CHECK-NEXT: }
|
|
|
|
entry:
|
|
%src = alloca [128 x i32]
|
|
call void @init(ptr %src)
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ]
|
|
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
|
|
%l = load i32, ptr %gep.src
|
|
%c.1 = icmp eq i32 %l, 10
|
|
br i1 %c.1, label %exit, label %loop.latch
|
|
|
|
loop.latch:
|
|
%inc = add nuw i64 %iv, 1
|
|
%c.2 = icmp eq i64 %inc, 128
|
|
br i1 %c.2, label %exit, label %loop.header
|
|
|
|
exit:
|
|
%p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ]
|
|
ret i64 %p
|
|
}
|
|
|
|
define i64 @multi_exiting_to_same_exit_live_in_exit_values_2() {
|
|
; CHECK: multi_exiting_to_same_exit_live_in_exit_values_2
|
|
; CHECK-LABEL: VPlan 'Initial VPlan for VF={4},UF>=1' {
|
|
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
|
|
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
|
|
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
|
|
; CHECK-NEXT: Live-in ir<128> = original trip-count
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: ir-bb<entry>:
|
|
; CHECK-NEXT: IR %src = alloca [128 x i32], align 4
|
|
; CHECK-NEXT: IR call void @init(ptr %src)
|
|
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: vector.ph:
|
|
; CHECK-NEXT: Successor(s): vector loop
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: <x1> vector loop: {
|
|
; CHECK-NEXT: vector.body:
|
|
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
|
|
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
|
|
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]>
|
|
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src>
|
|
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
|
|
; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10>
|
|
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
|
|
; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of ir<%c.1>
|
|
; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]>
|
|
; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]>
|
|
; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]>
|
|
; CHECK-NEXT: No successors
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: Successor(s): middle.split
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: middle.split:
|
|
; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]>
|
|
; CHECK-NEXT: Successor(s): vector.early.exit, middle.block
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: middle.block:
|
|
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
|
|
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]>
|
|
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: vector.early.exit:
|
|
; CHECK-NEXT: Successor(s): ir-bb<exit>
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: ir-bb<exit>:
|
|
; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from vector.early.exit)
|
|
; CHECK-NEXT: No successors
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: scalar.ph:
|
|
; CHECK-NEXT: EMIT-SCALAR vp<[[RESUME:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
|
|
; CHECK-NEXT: ir-bb<loop.header>
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: ir-bb<loop.header>:
|
|
; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] (extra operand: vp<[[RESUME]]> from scalar.ph)
|
|
; CHECK: No successors
|
|
; CHECK-NEXT: }
|
|
|
|
entry:
|
|
%src = alloca [128 x i32]
|
|
call void @init(ptr %src)
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ]
|
|
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
|
|
%l = load i32, ptr %gep.src
|
|
%c.1 = icmp eq i32 %l, 10
|
|
br i1 %c.1, label %exit, label %loop.latch
|
|
|
|
loop.latch:
|
|
%inc = add nuw i64 %iv, 1
|
|
%c.2 = icmp eq i64 %inc, 128
|
|
br i1 %c.2, label %exit, label %loop.header
|
|
|
|
exit:
|
|
%p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ]
|
|
ret i64 %p
|
|
|
|
; uselistorder directives
|
|
uselistorder label %exit, { 1, 0 }
|
|
}
|