
Currently we fail to detect the case where BTC + 1 wraps, i.e. the vector trip count is 0, In those cases, the minimum iteration count check will fail, and the vector code will never be executed. Explicitly check for this condition in computeMaxVF and avoid trying to vectorize alltogether. Note that a number of tests needed to be updated, because the vector loop would never be executed given the input IR. Fixes https://github.com/llvm/llvm-project/issues/122558.
61 lines
2.9 KiB
LLVM
61 lines
2.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt < %s -passes=loop-vectorize -force-vector-width=2 -S | FileCheck %s
|
|
|
|
define void @d() {
|
|
; CHECK-LABEL: define void @d() {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr null, align 4
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr @d, i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> [[BROADCAST_SPLAT]], i32 0)
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x float> zeroinitializer, <2 x float> zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP1]], i32 0
|
|
; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[TMP4]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I7:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[I3:%.*]] = load float, ptr null, align 4
|
|
; CHECK-NEXT: [[I4:%.*]] = getelementptr float, ptr @d, i64 [[I]]
|
|
; CHECK-NEXT: [[I5:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[I3]], i32 0)
|
|
; CHECK-NEXT: [[I6:%.*]] = select i1 [[I5]], float 0.000000e+00, float 0.000000e+00
|
|
; CHECK-NEXT: store float [[I6]], ptr [[I4]], align 4
|
|
; CHECK-NEXT: [[I7]] = add i64 [[I]], 1
|
|
; CHECK-NEXT: [[I8:%.*]] = icmp eq i64 [[I7]], 128
|
|
; CHECK-NEXT: br i1 [[I8]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%i = phi i64 [ 0, %entry ], [ %i7, %loop ]
|
|
%i3 = load float, ptr null, align 4
|
|
%i4 = getelementptr float, ptr @d, i64 %i
|
|
%i5 = tail call i1 @llvm.is.fpclass.f32(float %i3, i32 0)
|
|
%i6 = select i1 %i5, float 0.0, float 0.0
|
|
store float %i6, ptr %i4, align 4
|
|
%i7 = add i64 %i, 1
|
|
%i8 = icmp eq i64 %i7, 128
|
|
br i1 %i8, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
declare i1 @llvm.is.fpclass.f32(float, i32 immarg)
|