
Until now the feature to enable vectorisation of some early exit loops with uncountable exits was controlled under a flag, off by default. Now that we have efficient code generation for vectorising such loops (see PR #130766) and we still have some time from the next LLVM release it seems like a good time point to enable the feature by default. If any issues arise post-commit it can be easily reverted. Using this patch I built and ran the LLVM test suite successfully, which on neoverse-v1 led to the vectorisation of 114 additional early exit loops. I also built and ran SPEC2017 successfully for both neoverse-v1 and neoverse-v2.
574 lines
18 KiB
LLVM
574 lines
18 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; REQUIRES: asserts
|
|
; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 -disable-output 2>&1 | FileCheck %s
|
|
|
|
declare void @init_mem(ptr, i64);
|
|
|
|
; == SOME LEGAL EXAMPLES ==
|
|
|
|
; The form of the induction variables requires SCEV predicates.
|
|
define i32 @diff_exit_block_needs_scev_check(i32 %end) {
|
|
; CHECK-LABEL: LV: Checking a loop in 'diff_exit_block_needs_scev_check'
|
|
; CHECK: Found an early exit loop with symbolic max backedge taken count: (-1 + (1 umax (zext i10 (trunc i32 %end to i10) to i32)))<nsw>
|
|
; CHECK-NEXT: LV: We can vectorize this loop!
|
|
; CHECK-NOT: LV: Not vectorizing:
|
|
entry:
|
|
%p1 = alloca [1024 x i32]
|
|
%p2 = alloca [1024 x i32]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
%end.clamped = and i32 %end, 1023
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ]
|
|
%gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ]
|
|
%arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %gep.ind
|
|
%0 = load i32, ptr %arrayidx1, align 4
|
|
%arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind
|
|
%1 = load i32, ptr %arrayidx2, align 4
|
|
%cmp.early = icmp eq i32 %0, %1
|
|
br i1 %cmp.early, label %found, label %for.inc
|
|
|
|
for.inc:
|
|
%ind.next = add i8 %ind, 1
|
|
%conv = zext i8 %ind.next to i32
|
|
%gep.ind.next = add i64 %gep.ind, 1
|
|
%cmp = icmp ult i32 %conv, %end.clamped
|
|
br i1 %cmp, label %for.body, label %exit
|
|
|
|
found:
|
|
ret i32 1
|
|
|
|
exit:
|
|
ret i32 0
|
|
}
|
|
|
|
|
|
define i64 @same_exit_block_pre_inc_use1() {
|
|
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1'
|
|
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
|
|
; CHECK-NEXT: LV: We can vectorize this loop!
|
|
; CHECK-NOT: LV: Not vectorizing
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.inc, label %loop.end
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @loop_contains_safe_call() {
|
|
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_safe_call'
|
|
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
|
|
; CHECK-NEXT: LV: We can vectorize this loop!
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds float, ptr %p1, i64 %index
|
|
%ld1 = load float, ptr %arrayidx, align 1
|
|
%sqrt = tail call fast float @llvm.sqrt.f32(float %ld1)
|
|
%cmp = fcmp fast ult float %sqrt, 3.0e+00
|
|
br i1 %cmp, label %loop.inc, label %loop.end
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @loop_contains_safe_div() {
|
|
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_safe_div'
|
|
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
|
|
; CHECK-NEXT: LV: We can vectorize this loop!
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index
|
|
%ld1 = load i32, ptr %arrayidx, align 1
|
|
%div = udiv i32 %ld1, 20000
|
|
%cmp = icmp eq i32 %div, 1
|
|
br i1 %cmp, label %loop.inc, label %loop.end
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(8) %p2) {
|
|
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit'
|
|
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
|
|
; CHECK-NEXT: LV: We can vectorize this loop!
|
|
; CHECK-NOT: LV: Not vectorizing
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index
|
|
%ld1 = load i32, ptr %arrayidx, align 1
|
|
%cmp = icmp eq i32 %ld1, 1
|
|
br i1 %cmp, label %loop.inc, label %loop.end
|
|
|
|
loop.inc:
|
|
%arrayidx2 = getelementptr inbounds i64, ptr %p2, i64 %index
|
|
%ld2 = load i64, ptr %arrayidx2, align 8
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %index, %loop ], [ %ld2, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @one_uncountable_two_countable_same_exit_phi_of_consts() {
|
|
; CHECK-LABEL: LV: Checking a loop in 'one_uncountable_two_countable_same_exit_phi_of_consts'
|
|
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 61
|
|
; CHECK-NEXT: LV: We can vectorize this loop!
|
|
; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of early exit loops requiring a scalar epilogue is unsupported.
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%cmp1 = icmp ne i64 %index, 64
|
|
br i1 %cmp1, label %search, label %loop.end
|
|
|
|
search:
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.end, label %loop.inc
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 128
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ 0, %loop ], [ 1, %search ], [ 0, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
; == SOME ILLEGAL EXAMPLES ==
|
|
|
|
|
|
define i64 @same_exit_block_pre_inc_use1_too_small_allocas() {
|
|
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_allocas'
|
|
; CHECK: LV: Not vectorizing: Loop may fault.
|
|
entry:
|
|
%p1 = alloca [42 x i8]
|
|
%p2 = alloca [42 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.inc, label %loop.end
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) {
|
|
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_deref_ptrs'
|
|
; CHECK: LV: Not vectorizing: Loop may fault.
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.inc, label %loop.end
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) {
|
|
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_unknown_ptrs'
|
|
; CHECK: LV: Not vectorizing: Loop may fault.
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.inc, label %loop.end
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
; The early exit (i.e. unknown exit-not-taken count) is the latch - we don't
|
|
; support this yet.
|
|
define i64 @uncountable_exit_on_last_block() {
|
|
; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_on_last_block'
|
|
; CHECK: LV: Not vectorizing: Early exit is not the latch predecessor.
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %search ], [ 3, %entry ]
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %search, label %loop.end
|
|
|
|
search:
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.end, label %loop
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ 64, %loop ], [ %index, %search ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
; We don't currently support multiple uncountable early exits.
|
|
define i64 @multiple_uncountable_exits() {
|
|
; CHECK-LABEL: LV: Checking a loop in 'multiple_uncountable_exits'
|
|
; CHECK: LV: Not vectorizing: Loop has too many uncountable exits.
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %search1
|
|
|
|
search1:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp1 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp1, label %loop.end, label %search2
|
|
|
|
search2:
|
|
%cmp2 = icmp ult i8 %ld1, 34
|
|
br i1 %cmp2, label %loop.end, label %loop.inc
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %search1, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %index, %search1 ], [ 100, %search2 ], [ 43, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @uncountable_exit_infinite_loop() {
|
|
; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_infinite_loop'
|
|
; CHECK: LV: Not vectorizing: Cannot vectorize uncountable loop.
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.inc, label %loop.end
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br label %loop
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %index, %loop ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @loop_contains_unsafe_call() {
|
|
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_unsafe_call'
|
|
; CHECK: LV: Not vectorizing: Early exit loop contains operations that cannot be speculatively executed.
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index
|
|
%ld1 = load i32, ptr %arrayidx, align 1
|
|
%bad_call = call i32 @foo(i32 %ld1) #0
|
|
%cmp = icmp eq i32 %bad_call, 34
|
|
br i1 %cmp, label %loop.inc, label %loop.end
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @loop_contains_unsafe_div() {
|
|
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_unsafe_div'
|
|
; CHECK: LV: Not vectorizing: Early exit loop contains operations that cannot be speculatively executed.
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i32, ptr %arrayidx, align 1
|
|
%div = udiv i32 20000, %ld1
|
|
%cmp = icmp eq i32 %div, 1
|
|
br i1 %cmp, label %loop.inc, label %loop.end
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
define void @exit_conditions_combined_in_single_branch(ptr noalias dereferenceable(40) %array, ptr readonly align 2 dereferenceable(40) %pred) {
|
|
; CHECK-LABEL: LV: Checking a loop in 'exit_conditions_combined_in_single_branch'
|
|
; CHECK: LV: Not vectorizing: Cannot vectorize uncountable loop.
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
|
|
%data = load i16, ptr %st.addr, align 2
|
|
%inc = add nsw i16 %data, 1
|
|
store i16 %inc, ptr %st.addr, align 2
|
|
%ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
|
|
%ee.val = load i16, ptr %ee.addr, align 2
|
|
%ee.cond = icmp sgt i16 %ee.val, 500
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%counted.cond = icmp eq i64 %iv.next, 20
|
|
%or.cond = select i1 %ee.cond, i1 true, i1 %counted.cond
|
|
br i1 %or.cond, label %exit, label %for.body
|
|
|
|
exit: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
define i64 @uncountable_exit_in_conditional_block(ptr %mask) {
|
|
; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_in_conditional_block'
|
|
; CHECK: LV: Not vectorizing: Early exit is not the latch predecessor.
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %mask, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx1, align 1
|
|
%cmp1 = icmp ne i8 %ld1, 0
|
|
br i1 %cmp1, label %loop.search, label %loop.inc
|
|
|
|
loop.search:
|
|
%arrayidx2 = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx2, align 1
|
|
%arrayidx3 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld3 = load i8, ptr %arrayidx3, align 1
|
|
%cmp2 = icmp eq i8 %ld2, %ld3
|
|
br i1 %cmp2, label %loop.inc, label %loop.end
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %index, %loop.search ], [ 67, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @same_exit_block_pre_inc_use1_with_reduction() {
|
|
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_with_reduction'
|
|
; CHECK: LV: Not vectorizing: Found reductions or recurrences in early-exit loop.
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%red = phi i64 [ %red.next, %loop.inc ], [ 0, %entry ]
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%ld2.zext = zext i8 %ld2 to i64
|
|
%red.next = add i64 %red, %ld2.zext
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.inc, label %loop.end
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%final.ind = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
|
|
%retval = add i64 %red.next, %final.ind
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @uncountable_exit_has_multiple_outside_successors() {
|
|
; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_has_multiple_outside_successors'
|
|
; CHECK: LV: Not vectorizing: Loop contains an unsupported switch
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
switch i8 %ld1, label %loop.inc [
|
|
i8 2, label %loop.end
|
|
i8 3, label %loop.surprise
|
|
]
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.surprise:
|
|
ret i64 3
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
declare i32 @foo(i32) readonly
|
|
declare <vscale x 4 x i32> @foo_vec(<vscale x 4 x i32>)
|
|
|
|
attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
|