
This PR enables scalable loop vectorization for f16 with zvfhmin and bf16 with zvfbfmin. Enabling this was dependent on filling out the gaps for scalable zvfhmin/zvfbfmin codegen, but everything that the loop vectorizer might emit should now be handled. It does this by marking f16 and bf16 as legal in `isLegalElementTypeForRVV`. There are a few users of `isLegalElementTypeForRVV` that have already been enabled in other PRs: - `isLegalStridedLoadStore` #115264 - `isLegalInterleavedAccessType` #115257 - `isLegalMaskedLoadStore` #115145 - `isLegalMaskedGatherScatter` #114945 The remaining user is `isLegalToVectorizeReduction`. We can't promote f16/bf16 reductions to f32 so we need to disable them for scalable vectors. The cost model actually marks these as invalid, but for out-of-tree reductions `ComputeReductionResult` doesn't get costed and it will end up emitting a reduction intrinsic regardless, so we still need to mark them as illegal. We might be able to remove this restriction later for fmax and fmin reductions.
95 lines
5.1 KiB
LLVM
95 lines
5.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s -check-prefix=NO-ZVFHMIN
|
|
; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -S | FileCheck %s -check-prefix=ZVFHMIN
|
|
|
|
define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
|
|
; NO-ZVFHMIN-LABEL: define void @fadd(
|
|
; NO-ZVFHMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; NO-ZVFHMIN-NEXT: [[ENTRY:.*]]:
|
|
; NO-ZVFHMIN-NEXT: br label %[[LOOP:.*]]
|
|
; NO-ZVFHMIN: [[LOOP]]:
|
|
; NO-ZVFHMIN-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
|
|
; NO-ZVFHMIN-NEXT: [[A_GEP:%.*]] = getelementptr half, ptr [[A]], i64 [[I]]
|
|
; NO-ZVFHMIN-NEXT: [[B_GEP:%.*]] = getelementptr half, ptr [[B]], i64 [[I]]
|
|
; NO-ZVFHMIN-NEXT: [[X:%.*]] = load half, ptr [[A_GEP]], align 2
|
|
; NO-ZVFHMIN-NEXT: [[Y:%.*]] = load half, ptr [[B_GEP]], align 2
|
|
; NO-ZVFHMIN-NEXT: [[Z:%.*]] = fadd half [[X]], [[Y]]
|
|
; NO-ZVFHMIN-NEXT: store half [[Z]], ptr [[A_GEP]], align 2
|
|
; NO-ZVFHMIN-NEXT: [[I_NEXT]] = add i64 [[I]], 1
|
|
; NO-ZVFHMIN-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
|
|
; NO-ZVFHMIN-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
|
|
; NO-ZVFHMIN: [[EXIT]]:
|
|
; NO-ZVFHMIN-NEXT: ret void
|
|
;
|
|
; ZVFHMIN-LABEL: define void @fadd(
|
|
; ZVFHMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; ZVFHMIN-NEXT: [[ENTRY:.*]]:
|
|
; ZVFHMIN-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
|
|
; ZVFHMIN-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8
|
|
; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP8]]
|
|
; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; ZVFHMIN: [[VECTOR_PH]]:
|
|
; ZVFHMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
|
|
; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8
|
|
; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]]
|
|
; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
|
|
; ZVFHMIN-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
|
|
; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP12]], 8
|
|
; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; ZVFHMIN: [[VECTOR_BODY]]:
|
|
; ZVFHMIN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; ZVFHMIN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
|
; ZVFHMIN-NEXT: [[TMP1:%.*]] = getelementptr half, ptr [[A]], i64 [[TMP0]]
|
|
; ZVFHMIN-NEXT: [[TMP2:%.*]] = getelementptr half, ptr [[B]], i64 [[TMP0]]
|
|
; ZVFHMIN-NEXT: [[TMP3:%.*]] = getelementptr half, ptr [[TMP1]], i32 0
|
|
; ZVFHMIN-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half>, ptr [[TMP3]], align 2
|
|
; ZVFHMIN-NEXT: [[TMP4:%.*]] = getelementptr half, ptr [[TMP2]], i32 0
|
|
; ZVFHMIN-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x half>, ptr [[TMP4]], align 2
|
|
; ZVFHMIN-NEXT: [[TMP11:%.*]] = fadd <vscale x 8 x half> [[WIDE_LOAD]], [[WIDE_LOAD1]]
|
|
; ZVFHMIN-NEXT: store <vscale x 8 x half> [[TMP11]], ptr [[TMP3]], align 2
|
|
; ZVFHMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; ZVFHMIN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; ZVFHMIN-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; ZVFHMIN: [[MIDDLE_BLOCK]]:
|
|
; ZVFHMIN-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; ZVFHMIN-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; ZVFHMIN: [[SCALAR_PH]]:
|
|
; ZVFHMIN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
|
; ZVFHMIN-NEXT: br label %[[LOOP:.*]]
|
|
; ZVFHMIN: [[LOOP]]:
|
|
; ZVFHMIN-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
|
|
; ZVFHMIN-NEXT: [[A_GEP:%.*]] = getelementptr half, ptr [[A]], i64 [[I]]
|
|
; ZVFHMIN-NEXT: [[B_GEP:%.*]] = getelementptr half, ptr [[B]], i64 [[I]]
|
|
; ZVFHMIN-NEXT: [[X:%.*]] = load half, ptr [[A_GEP]], align 2
|
|
; ZVFHMIN-NEXT: [[Y:%.*]] = load half, ptr [[B_GEP]], align 2
|
|
; ZVFHMIN-NEXT: [[Z:%.*]] = fadd half [[X]], [[Y]]
|
|
; ZVFHMIN-NEXT: store half [[Z]], ptr [[A_GEP]], align 2
|
|
; ZVFHMIN-NEXT: [[I_NEXT]] = add i64 [[I]], 1
|
|
; ZVFHMIN-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
|
|
; ZVFHMIN-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; ZVFHMIN: [[EXIT]]:
|
|
; ZVFHMIN-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
loop:
|
|
%i = phi i64 [0, %entry], [%i.next, %loop]
|
|
%a.gep = getelementptr half, ptr %a, i64 %i
|
|
%b.gep = getelementptr half, ptr %b, i64 %i
|
|
%x = load half, ptr %a.gep
|
|
%y = load half, ptr %b.gep
|
|
%z = fadd half %x, %y
|
|
store half %z, ptr %a.gep
|
|
%i.next = add i64 %i, 1
|
|
%done = icmp eq i64 %i.next, %n
|
|
br i1 %done, label %exit, label %loop
|
|
exit:
|
|
ret void
|
|
}
|
|
;.
|
|
; ZVFHMIN: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; ZVFHMIN: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; ZVFHMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; ZVFHMIN: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
;.
|