
Update optimizeForVFAndUF to completely remove the vector loop region when possible. At the moment, we cannot remove the region if it contains * widened IVs: the recipe is needed to generate the step vector * reductions: ComputeReductionResults requires the reduction phi recipe for codegen. Both cases can be addressed by more explicit modeling. The patch also includes a number of updates to allow executing VPlans without a vector loop region. Depends on https://github.com/llvm/llvm-project/pull/110004
61 lines
2.3 KiB
LLVM
61 lines
2.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes=loop-vectorize -S -mtriple=x86_64-- -o - %s | FileCheck %s
|
|
|
|
; Testcase that verify that we don't get a faulty bitcast that cast between
|
|
; different sizes.
|
|
|
|
%rec8 = type { i16 }
|
|
|
|
@a = global [1 x %rec8] zeroinitializer
|
|
@b = global [2 x ptr] zeroinitializer
|
|
|
|
|
|
define void @f1() {
|
|
; CHECK-LABEL: @f1(
|
|
; CHECK-NEXT: bb1:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sext i16 0 to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [2 x ptr], ptr @b, i16 0, i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[TMP1]], i32 0
|
|
; CHECK-NEXT: store <2 x ptr> <ptr @a, ptr @a>, ptr [[TMP2]], align 8
|
|
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[BB3:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[BB1:%.*]] ]
|
|
; CHECK-NEXT: br label [[BB2:%.*]]
|
|
; CHECK: bb2:
|
|
; CHECK-NEXT: [[C_1_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[_TMP9:%.*]], [[BB2]] ]
|
|
; CHECK-NEXT: [[_TMP1:%.*]] = zext i16 0 to i64
|
|
; CHECK-NEXT: [[_TMP2:%.*]] = getelementptr [1 x %rec8], ptr @a, i16 0, i64 [[_TMP1]]
|
|
; CHECK-NEXT: [[_TMP6:%.*]] = sext i16 [[C_1_0]] to i64
|
|
; CHECK-NEXT: [[_TMP7:%.*]] = getelementptr [2 x ptr], ptr @b, i16 0, i64 [[_TMP6]]
|
|
; CHECK-NEXT: store ptr [[_TMP2]], ptr [[_TMP7]], align 8
|
|
; CHECK-NEXT: [[_TMP9]] = add nsw i16 [[C_1_0]], 1
|
|
; CHECK-NEXT: [[_TMP11:%.*]] = icmp slt i16 [[_TMP9]], 2
|
|
; CHECK-NEXT: br i1 [[_TMP11]], label [[BB2]], label [[BB3]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: bb3:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
|
|
bb1:
|
|
br label %bb2
|
|
|
|
bb2:
|
|
%c.1.0 = phi i16 [ 0, %bb1 ], [ %_tmp9, %bb2 ]
|
|
%_tmp1 = zext i16 0 to i64
|
|
%_tmp2 = getelementptr [1 x %rec8], ptr @a, i16 0, i64 %_tmp1
|
|
%_tmp6 = sext i16 %c.1.0 to i64
|
|
%_tmp7 = getelementptr [2 x ptr], ptr @b, i16 0, i64 %_tmp6
|
|
store ptr %_tmp2, ptr %_tmp7
|
|
%_tmp9 = add nsw i16 %c.1.0, 1
|
|
%_tmp11 = icmp slt i16 %_tmp9, 2
|
|
br i1 %_tmp11, label %bb2, label %bb3
|
|
|
|
bb3:
|
|
ret void
|
|
}
|