
This PR enables scalable loop vectorization for f16 with zvfhmin and bf16 with zvfbfmin. Enabling this was dependent on filling out the gaps for scalable zvfhmin/zvfbfmin codegen, but everything that the loop vectorizer might emit should now be handled. It does this by marking f16 and bf16 as legal in `isLegalElementTypeForRVV`. There are a few users of `isLegalElementTypeForRVV` that have already been enabled in other PRs: - `isLegalStridedLoadStore` #115264 - `isLegalInterleavedAccessType` #115257 - `isLegalMaskedLoadStore` #115145 - `isLegalMaskedGatherScatter` #114945 The remaining user is `isLegalToVectorizeReduction`. We can't promote f16/bf16 reductions to f32 so we need to disable them for scalable vectors. The cost model actually marks these as invalid, but for out-of-tree reductions `ComputeReductionResult` doesn't get costed and it will end up emitting a reduction intrinsic regardless, so we still need to mark them as illegal. We might be able to remove this restriction later for fmax and fmin reductions.
234 lines
14 KiB
LLVM
234 lines
14 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s -check-prefix=NO-ZVFBFMIN
|
|
; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -S | FileCheck %s -check-prefix=ZVFBFMIN
|
|
|
|
define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
|
|
; NO-ZVFBFMIN-LABEL: define void @fadd(
|
|
; NO-ZVFBFMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; NO-ZVFBFMIN-NEXT: [[ENTRY:.*]]:
|
|
; NO-ZVFBFMIN-NEXT: br label %[[LOOP:.*]]
|
|
; NO-ZVFBFMIN: [[LOOP]]:
|
|
; NO-ZVFBFMIN-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
|
|
; NO-ZVFBFMIN-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]]
|
|
; NO-ZVFBFMIN-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]]
|
|
; NO-ZVFBFMIN-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP]], align 2
|
|
; NO-ZVFBFMIN-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP]], align 2
|
|
; NO-ZVFBFMIN-NEXT: [[Z:%.*]] = fadd bfloat [[X]], [[Y]]
|
|
; NO-ZVFBFMIN-NEXT: store bfloat [[Z]], ptr [[A_GEP]], align 2
|
|
; NO-ZVFBFMIN-NEXT: [[I_NEXT]] = add i64 [[I]], 1
|
|
; NO-ZVFBFMIN-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
|
|
; NO-ZVFBFMIN-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
|
|
; NO-ZVFBFMIN: [[EXIT]]:
|
|
; NO-ZVFBFMIN-NEXT: ret void
|
|
;
|
|
; ZVFBFMIN-LABEL: define void @fadd(
|
|
; ZVFBFMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; ZVFBFMIN-NEXT: [[ENTRY:.*]]:
|
|
; ZVFBFMIN-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
|
|
; ZVFBFMIN-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8
|
|
; ZVFBFMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP8]]
|
|
; ZVFBFMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; ZVFBFMIN: [[VECTOR_PH]]:
|
|
; ZVFBFMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
|
|
; ZVFBFMIN-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8
|
|
; ZVFBFMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]]
|
|
; ZVFBFMIN-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
|
|
; ZVFBFMIN-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
|
|
; ZVFBFMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP12]], 8
|
|
; ZVFBFMIN-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; ZVFBFMIN: [[VECTOR_BODY]]:
|
|
; ZVFBFMIN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; ZVFBFMIN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
|
; ZVFBFMIN-NEXT: [[TMP1:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[TMP0]]
|
|
; ZVFBFMIN-NEXT: [[TMP2:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[TMP0]]
|
|
; ZVFBFMIN-NEXT: [[TMP3:%.*]] = getelementptr bfloat, ptr [[TMP1]], i32 0
|
|
; ZVFBFMIN-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x bfloat>, ptr [[TMP3]], align 2
|
|
; ZVFBFMIN-NEXT: [[TMP4:%.*]] = getelementptr bfloat, ptr [[TMP2]], i32 0
|
|
; ZVFBFMIN-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x bfloat>, ptr [[TMP4]], align 2
|
|
; ZVFBFMIN-NEXT: [[TMP11:%.*]] = fadd <vscale x 8 x bfloat> [[WIDE_LOAD]], [[WIDE_LOAD1]]
|
|
; ZVFBFMIN-NEXT: store <vscale x 8 x bfloat> [[TMP11]], ptr [[TMP3]], align 2
|
|
; ZVFBFMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; ZVFBFMIN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; ZVFBFMIN-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; ZVFBFMIN: [[MIDDLE_BLOCK]]:
|
|
; ZVFBFMIN-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; ZVFBFMIN-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; ZVFBFMIN: [[SCALAR_PH]]:
|
|
; ZVFBFMIN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
|
; ZVFBFMIN-NEXT: br label %[[LOOP:.*]]
|
|
; ZVFBFMIN: [[LOOP]]:
|
|
; ZVFBFMIN-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
|
|
; ZVFBFMIN-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]]
|
|
; ZVFBFMIN-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]]
|
|
; ZVFBFMIN-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP]], align 2
|
|
; ZVFBFMIN-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP]], align 2
|
|
; ZVFBFMIN-NEXT: [[Z:%.*]] = fadd bfloat [[X]], [[Y]]
|
|
; ZVFBFMIN-NEXT: store bfloat [[Z]], ptr [[A_GEP]], align 2
|
|
; ZVFBFMIN-NEXT: [[I_NEXT]] = add i64 [[I]], 1
|
|
; ZVFBFMIN-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
|
|
; ZVFBFMIN-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; ZVFBFMIN: [[EXIT]]:
|
|
; ZVFBFMIN-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
loop:
|
|
%i = phi i64 [0, %entry], [%i.next, %loop]
|
|
%a.gep = getelementptr bfloat, ptr %a, i64 %i
|
|
%b.gep = getelementptr bfloat, ptr %b, i64 %i
|
|
%x = load bfloat, ptr %a.gep
|
|
%y = load bfloat, ptr %b.gep
|
|
%z = fadd bfloat %x, %y
|
|
store bfloat %z, ptr %a.gep
|
|
%i.next = add i64 %i, 1
|
|
%done = icmp eq i64 %i.next, %n
|
|
br i1 %done, label %exit, label %loop
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
|
|
; NO-ZVFBFMIN-LABEL: define void @vfwmaccbf16.vv(
|
|
; NO-ZVFBFMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; NO-ZVFBFMIN-NEXT: [[ENTRY:.*]]:
|
|
; NO-ZVFBFMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
|
|
; NO-ZVFBFMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; NO-ZVFBFMIN: [[VECTOR_PH]]:
|
|
; NO-ZVFBFMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
|
|
; NO-ZVFBFMIN-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
|
|
; NO-ZVFBFMIN-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; NO-ZVFBFMIN: [[VECTOR_BODY]]:
|
|
; NO-ZVFBFMIN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; NO-ZVFBFMIN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
|
; NO-ZVFBFMIN-NEXT: [[TMP1:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[TMP0]]
|
|
; NO-ZVFBFMIN-NEXT: [[TMP2:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[TMP0]]
|
|
; NO-ZVFBFMIN-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[C]], i64 [[TMP0]]
|
|
; NO-ZVFBFMIN-NEXT: [[TMP4:%.*]] = getelementptr bfloat, ptr [[TMP1]], i32 0
|
|
; NO-ZVFBFMIN-NEXT: [[WIDE_LOAD:%.*]] = load <8 x bfloat>, ptr [[TMP4]], align 2
|
|
; NO-ZVFBFMIN-NEXT: [[TMP5:%.*]] = getelementptr bfloat, ptr [[TMP2]], i32 0
|
|
; NO-ZVFBFMIN-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x bfloat>, ptr [[TMP5]], align 2
|
|
; NO-ZVFBFMIN-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[TMP3]], i32 0
|
|
; NO-ZVFBFMIN-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP6]], align 4
|
|
; NO-ZVFBFMIN-NEXT: [[TMP7:%.*]] = fpext <8 x bfloat> [[WIDE_LOAD]] to <8 x float>
|
|
; NO-ZVFBFMIN-NEXT: [[TMP8:%.*]] = fpext <8 x bfloat> [[WIDE_LOAD1]] to <8 x float>
|
|
; NO-ZVFBFMIN-NEXT: [[TMP9:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[TMP7]], <8 x float> [[TMP8]], <8 x float> [[WIDE_LOAD2]])
|
|
; NO-ZVFBFMIN-NEXT: store <8 x float> [[TMP9]], ptr [[TMP6]], align 4
|
|
; NO-ZVFBFMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; NO-ZVFBFMIN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; NO-ZVFBFMIN-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; NO-ZVFBFMIN: [[MIDDLE_BLOCK]]:
|
|
; NO-ZVFBFMIN-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; NO-ZVFBFMIN-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; NO-ZVFBFMIN: [[SCALAR_PH]]:
|
|
; NO-ZVFBFMIN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
|
; NO-ZVFBFMIN-NEXT: br label %[[LOOP:.*]]
|
|
; NO-ZVFBFMIN: [[LOOP]]:
|
|
; NO-ZVFBFMIN-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
|
|
; NO-ZVFBFMIN-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]]
|
|
; NO-ZVFBFMIN-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]]
|
|
; NO-ZVFBFMIN-NEXT: [[C_GEP:%.*]] = getelementptr float, ptr [[C]], i64 [[I]]
|
|
; NO-ZVFBFMIN-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP]], align 2
|
|
; NO-ZVFBFMIN-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP]], align 2
|
|
; NO-ZVFBFMIN-NEXT: [[Z:%.*]] = load float, ptr [[C_GEP]], align 4
|
|
; NO-ZVFBFMIN-NEXT: [[X_EXT:%.*]] = fpext bfloat [[X]] to float
|
|
; NO-ZVFBFMIN-NEXT: [[Y_EXT:%.*]] = fpext bfloat [[Y]] to float
|
|
; NO-ZVFBFMIN-NEXT: [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X_EXT]], float [[Y_EXT]], float [[Z]])
|
|
; NO-ZVFBFMIN-NEXT: store float [[FMULADD]], ptr [[C_GEP]], align 4
|
|
; NO-ZVFBFMIN-NEXT: [[I_NEXT]] = add i64 [[I]], 1
|
|
; NO-ZVFBFMIN-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
|
|
; NO-ZVFBFMIN-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; NO-ZVFBFMIN: [[EXIT]]:
|
|
; NO-ZVFBFMIN-NEXT: ret void
|
|
;
|
|
; ZVFBFMIN-LABEL: define void @vfwmaccbf16.vv(
|
|
; ZVFBFMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; ZVFBFMIN-NEXT: [[ENTRY:.*]]:
|
|
; ZVFBFMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; ZVFBFMIN-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
|
; ZVFBFMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
|
|
; ZVFBFMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; ZVFBFMIN: [[VECTOR_PH]]:
|
|
; ZVFBFMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; ZVFBFMIN-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
|
; ZVFBFMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
|
|
; ZVFBFMIN-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
|
|
; ZVFBFMIN-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
|
; ZVFBFMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
|
; ZVFBFMIN-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; ZVFBFMIN: [[VECTOR_BODY]]:
|
|
; ZVFBFMIN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; ZVFBFMIN-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
|
|
; ZVFBFMIN-NEXT: [[TMP7:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[TMP6]]
|
|
; ZVFBFMIN-NEXT: [[TMP8:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[TMP6]]
|
|
; ZVFBFMIN-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[C]], i64 [[TMP6]]
|
|
; ZVFBFMIN-NEXT: [[TMP10:%.*]] = getelementptr bfloat, ptr [[TMP7]], i32 0
|
|
; ZVFBFMIN-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x bfloat>, ptr [[TMP10]], align 2
|
|
; ZVFBFMIN-NEXT: [[TMP11:%.*]] = getelementptr bfloat, ptr [[TMP8]], i32 0
|
|
; ZVFBFMIN-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x bfloat>, ptr [[TMP11]], align 2
|
|
; ZVFBFMIN-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP9]], i32 0
|
|
; ZVFBFMIN-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
|
|
; ZVFBFMIN-NEXT: [[TMP13:%.*]] = fpext <vscale x 4 x bfloat> [[WIDE_LOAD]] to <vscale x 4 x float>
|
|
; ZVFBFMIN-NEXT: [[TMP14:%.*]] = fpext <vscale x 4 x bfloat> [[WIDE_LOAD1]] to <vscale x 4 x float>
|
|
; ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[TMP13]], <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[WIDE_LOAD2]])
|
|
; ZVFBFMIN-NEXT: store <vscale x 4 x float> [[TMP15]], ptr [[TMP12]], align 4
|
|
; ZVFBFMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; ZVFBFMIN-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; ZVFBFMIN-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; ZVFBFMIN: [[MIDDLE_BLOCK]]:
|
|
; ZVFBFMIN-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; ZVFBFMIN-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; ZVFBFMIN: [[SCALAR_PH]]:
|
|
; ZVFBFMIN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
|
; ZVFBFMIN-NEXT: br label %[[LOOP:.*]]
|
|
; ZVFBFMIN: [[LOOP]]:
|
|
; ZVFBFMIN-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
|
|
; ZVFBFMIN-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]]
|
|
; ZVFBFMIN-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]]
|
|
; ZVFBFMIN-NEXT: [[C_GEP:%.*]] = getelementptr float, ptr [[C]], i64 [[I]]
|
|
; ZVFBFMIN-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP]], align 2
|
|
; ZVFBFMIN-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP]], align 2
|
|
; ZVFBFMIN-NEXT: [[Z:%.*]] = load float, ptr [[C_GEP]], align 4
|
|
; ZVFBFMIN-NEXT: [[X_EXT:%.*]] = fpext bfloat [[X]] to float
|
|
; ZVFBFMIN-NEXT: [[Y_EXT:%.*]] = fpext bfloat [[Y]] to float
|
|
; ZVFBFMIN-NEXT: [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X_EXT]], float [[Y_EXT]], float [[Z]])
|
|
; ZVFBFMIN-NEXT: store float [[FMULADD]], ptr [[C_GEP]], align 4
|
|
; ZVFBFMIN-NEXT: [[I_NEXT]] = add i64 [[I]], 1
|
|
; ZVFBFMIN-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
|
|
; ZVFBFMIN-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; ZVFBFMIN: [[EXIT]]:
|
|
; ZVFBFMIN-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
loop:
|
|
%i = phi i64 [0, %entry], [%i.next, %loop]
|
|
%a.gep = getelementptr bfloat, ptr %a, i64 %i
|
|
%b.gep = getelementptr bfloat, ptr %b, i64 %i
|
|
%c.gep = getelementptr float, ptr %c, i64 %i
|
|
%x = load bfloat, ptr %a.gep
|
|
%y = load bfloat, ptr %b.gep
|
|
%z = load float, ptr %c.gep
|
|
%x.ext = fpext bfloat %x to float
|
|
%y.ext = fpext bfloat %y to float
|
|
%fmuladd = call float @llvm.fmuladd.f32(float %x.ext, float %y.ext, float %z)
|
|
store float %fmuladd, ptr %c.gep
|
|
%i.next = add i64 %i, 1
|
|
%done = icmp eq i64 %i.next, %n
|
|
br i1 %done, label %exit, label %loop
|
|
exit:
|
|
ret void
|
|
}
|
|
;.
|
|
; NO-ZVFBFMIN: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; NO-ZVFBFMIN: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; NO-ZVFBFMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; NO-ZVFBFMIN: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
;.
|
|
; ZVFBFMIN: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; ZVFBFMIN: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; ZVFBFMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; ZVFBFMIN: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
; ZVFBFMIN: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
|
|
; ZVFBFMIN: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
|
|
;.
|