On RISC-V, some loops that the loop vectorizer vectorizes pre-LTO may turn out to have the exact trip count exposed after LTO, see #164762. If the trip count is small enough we can fold away the @llvm.experimental.get.vector.length intrinsic based on this corollary from the LangRef: > If %cnt is less than or equal to %max_lanes, the return value is equal to %cnt. This on its own doesn't remove the @llvm.experimental.get.vector.length in #164762 since we also need to teach computeKnownBits about @llvm.experimental.get.vector.length and the sub recurrence, but this PR is a starting point. I've added this in InstCombine rather than InstSimplify since we may need to insert a truncation (@llvm.experimental.get.vector.length can take an i64 %cnt argument, the result is always i32). Note that there was something similar done in VPlan in #167647 for when the loop vectorizer knows the trip count.
90 lines
3.0 KiB
LLVM
90 lines
3.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
|
|
; RUN: opt < %s -passes=instcombine,verify -S | FileCheck %s
|
|
|
|
define i32 @cnt_known_lt() {
|
|
; CHECK-LABEL: define i32 @cnt_known_lt() {
|
|
; CHECK-NEXT: ret i32 1
|
|
;
|
|
%x = call i32 @llvm.experimental.get.vector.length(i32 1, i32 2, i1 false)
|
|
ret i32 %x
|
|
}
|
|
|
|
define i32 @cnt_not_known_lt() {
|
|
; CHECK-LABEL: define i32 @cnt_not_known_lt() {
|
|
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 2, i32 1, i1 false)
|
|
; CHECK-NEXT: ret i32 [[X]]
|
|
;
|
|
%x = call i32 @llvm.experimental.get.vector.length(i32 2, i32 1, i1 false)
|
|
ret i32 %x
|
|
}
|
|
|
|
define i32 @cnt_known_lt_scalable() vscale_range(2, 4) {
|
|
; CHECK-LABEL: define i32 @cnt_known_lt_scalable(
|
|
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: ret i32 2
|
|
;
|
|
%x = call i32 @llvm.experimental.get.vector.length(i32 2, i32 1, i1 true)
|
|
ret i32 %x
|
|
}
|
|
|
|
define i32 @cnt_not_known_lt_scalable() {
|
|
; CHECK-LABEL: define i32 @cnt_not_known_lt_scalable() {
|
|
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 2, i32 1, i1 true)
|
|
; CHECK-NEXT: ret i32 [[X]]
|
|
;
|
|
%x = call i32 @llvm.experimental.get.vector.length(i32 2, i32 1, i1 true)
|
|
ret i32 %x
|
|
}
|
|
|
|
define i32 @cnt_known_lt_runtime(i32 %x) {
|
|
; CHECK-LABEL: define i32 @cnt_known_lt_runtime(
|
|
; CHECK-SAME: i32 [[X:%.*]]) {
|
|
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[X]], 4
|
|
; CHECK-NEXT: call void @llvm.assume(i1 [[ICMP]])
|
|
; CHECK-NEXT: ret i32 [[X]]
|
|
;
|
|
%icmp = icmp ule i32 %x, 3
|
|
call void @llvm.assume(i1 %icmp)
|
|
%y = call i32 @llvm.experimental.get.vector.length(i32 %x, i32 3, i1 false)
|
|
ret i32 %y
|
|
}
|
|
|
|
define i32 @cnt_known_lt_runtime_trunc(i64 %x) {
|
|
; CHECK-LABEL: define i32 @cnt_known_lt_runtime_trunc(
|
|
; CHECK-SAME: i64 [[X:%.*]]) {
|
|
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[X]], 4
|
|
; CHECK-NEXT: call void @llvm.assume(i1 [[ICMP]])
|
|
; CHECK-NEXT: [[Y:%.*]] = trunc nuw nsw i64 [[X]] to i32
|
|
; CHECK-NEXT: ret i32 [[Y]]
|
|
;
|
|
%icmp = icmp ule i64 %x, 3
|
|
call void @llvm.assume(i1 %icmp)
|
|
%y = call i32 @llvm.experimental.get.vector.length(i64 %x, i32 3, i1 false)
|
|
ret i32 %y
|
|
}
|
|
|
|
; FIXME: We should be able to deduce the constant range from AssumptionCache
|
|
; rather than relying on KnownBits, which in this case only knows x <= 3.
|
|
define i32 @cnt_known_lt_runtime_assumption(i32 %x) {
|
|
; CHECK-LABEL: define i32 @cnt_known_lt_runtime_assumption(
|
|
; CHECK-SAME: i32 [[X:%.*]]) {
|
|
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[X]], 3
|
|
; CHECK-NEXT: call void @llvm.assume(i1 [[ICMP]])
|
|
; CHECK-NEXT: [[Y:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[X]], i32 2, i1 false)
|
|
; CHECK-NEXT: ret i32 [[Y]]
|
|
;
|
|
%icmp = icmp ule i32 %x, 2
|
|
call void @llvm.assume(i1 %icmp)
|
|
%y = call i32 @llvm.experimental.get.vector.length(i32 %x, i32 2, i1 false)
|
|
ret i32 %y
|
|
}
|
|
|
|
|
|
define i32 @cnt_known_lt_i16() {
|
|
; CHECK-LABEL: define i32 @cnt_known_lt_i16() {
|
|
; CHECK-NEXT: ret i32 1
|
|
;
|
|
%x = call i32 @llvm.experimental.get.vector.length(i16 1, i32 2, i1 false)
|
|
ret i32 %x
|
|
}
|