
PR #112138 introduced initial support for dispatching to multiple exit blocks via split middle blocks. This patch fixes a few issues so that we can enable more tests to use the new enable-early-exit-vectorization flag. Fixes are: 1. The code to bail out for any loop live-out values happens too late. This is because collectUsersInExitBlocks ignores induction variables, which get dealt with in fixupIVUsers. I've moved the check much earlier in processLoop by looking for outside users of loop-defined values. 2. We shouldn't yet be interleaving when vectorising loops with uncountable early exits, since we've not added support for this yet. 3. Similarly, we also shouldn't be creating vector epilogues. 4. Similarly, we shouldn't enable tail-folding. 5. The existing implementation doesn't yet support loops that require scalar epilogues, although I plan to add that as part of PR #88385. 6. The new split middle blocks weren't being added to the parent loop.
123 lines
4.8 KiB
LLVM
123 lines
4.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s
|
|
|
|
declare void @init_mem(ptr, i64);
|
|
|
|
define i64 @one_uncountable_two_countable_same_exit_phi_of_consts() {
|
|
; CHECK-LABEL: define i64 @one_uncountable_two_countable_same_exit_phi_of_consts() {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
|
|
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]]
|
|
; CHECK: search:
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
|
|
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
|
|
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END]], label [[LOOP_INC]]
|
|
; CHECK: loop.inc:
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
|
|
; CHECK: loop.end:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[SEARCH]] ], [ 0, [[LOOP_INC]] ]
|
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
|
;
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%cmp1 = icmp ne i64 %index, 64
|
|
br i1 %cmp1, label %search, label %loop.end
|
|
|
|
search:
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.end, label %loop.inc
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 128
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ 0, %loop ], [ 1, %search ], [ 0, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @one_uncountable_two_countable_diff_exit_no_phis() {
|
|
; CHECK-LABEL: define i64 @one_uncountable_two_countable_diff_exit_no_phis() {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
|
|
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]]
|
|
; CHECK: search:
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
|
|
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
|
|
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END_EARLY:%.*]], label [[LOOP_INC]]
|
|
; CHECK: loop.inc:
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
|
|
; CHECK: loop.end.early:
|
|
; CHECK-NEXT: ret i64 1
|
|
; CHECK: loop.end:
|
|
; CHECK-NEXT: ret i64 0
|
|
;
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%cmp1 = icmp ne i64 %index, 64
|
|
br i1 %cmp1, label %search, label %loop.end
|
|
|
|
search:
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.end.early, label %loop.inc
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 128
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end.early:
|
|
ret i64 1
|
|
|
|
loop.end:
|
|
ret i64 0
|
|
}
|