
Until now the feature to enable vectorisation of some early exit loops with uncountable exits was controlled under a flag, off by default. Now that we have efficient code generation for vectorising such loops (see PR #130766) and we still have some time from the next LLVM release it seems like a good time point to enable the feature by default. If any issues arise post-commit it can be easily reverted. Using this patch I built and ran the LLVM test suite successfully, which on neoverse-v1 led to the vectorisation of 114 additional early exit loops. I also built and ran SPEC2017 successfully for both neoverse-v1 and neoverse-v2.
123 lines
4.8 KiB
LLVM
123 lines
4.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -S < %s -p loop-vectorize | FileCheck %s
|
|
|
|
declare void @init_mem(ptr, i64);
|
|
|
|
define i64 @one_uncountable_two_countable_same_exit_phi_of_consts() {
|
|
; CHECK-LABEL: define i64 @one_uncountable_two_countable_same_exit_phi_of_consts() {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
|
|
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]]
|
|
; CHECK: search:
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
|
|
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
|
|
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END]], label [[LOOP_INC]]
|
|
; CHECK: loop.inc:
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
|
|
; CHECK: loop.end:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[SEARCH]] ], [ 0, [[LOOP_INC]] ]
|
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
|
;
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%cmp1 = icmp ne i64 %index, 64
|
|
br i1 %cmp1, label %search, label %loop.end
|
|
|
|
search:
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.end, label %loop.inc
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 128
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ 0, %loop ], [ 1, %search ], [ 0, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @one_uncountable_two_countable_diff_exit_no_phis() {
|
|
; CHECK-LABEL: define i64 @one_uncountable_two_countable_diff_exit_no_phis() {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
|
|
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]]
|
|
; CHECK: search:
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
|
|
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
|
|
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END_EARLY:%.*]], label [[LOOP_INC]]
|
|
; CHECK: loop.inc:
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
|
|
; CHECK: loop.end.early:
|
|
; CHECK-NEXT: ret i64 1
|
|
; CHECK: loop.end:
|
|
; CHECK-NEXT: ret i64 0
|
|
;
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%cmp1 = icmp ne i64 %index, 64
|
|
br i1 %cmp1, label %search, label %loop.end
|
|
|
|
search:
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.end.early, label %loop.inc
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 128
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end.early:
|
|
ret i64 1
|
|
|
|
loop.end:
|
|
ret i64 0
|
|
}
|