We have disabled unrolling for vectorized loops in #151525 but this PR only checked the instruction type. For some loops, there is no instruction with vector type but they are still vector operations (just like the memset zero test in the precommit test). Here we check the operands as well to cover these cases.
726 lines
50 KiB
LLVM
726 lines
50 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -p loop-unroll -mtriple riscv64 -mattr=+v,+f -S %s | FileCheck %s --check-prefixes=COMMON,CHECK
|
|
; RUN: opt -p loop-unroll -mtriple=riscv64 -mcpu=sifive-p870 -S %s | FileCheck %s --check-prefixes=COMMON,SIFIVE
|
|
|
|
define void @reverse(ptr %dst, ptr %src, i64 %len) {
|
|
; CHECK-LABEL: define void @reverse(
|
|
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i64 [[LEN]], [[IV]]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[ARRAYIDX2]], align 16
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[LEN]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SIFIVE-LABEL: define void @reverse(
|
|
; SIFIVE-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; SIFIVE-NEXT: [[ENTRY:.*]]:
|
|
; SIFIVE-NEXT: [[TMP2:%.*]] = add i64 [[LEN]], -1
|
|
; SIFIVE-NEXT: [[XTRAITER:%.*]] = and i64 [[LEN]], 7
|
|
; SIFIVE-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7
|
|
; SIFIVE-NEXT: br i1 [[TMP3]], label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
|
|
; SIFIVE: [[ENTRY_NEW]]:
|
|
; SIFIVE-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[LEN]], [[XTRAITER]]
|
|
; SIFIVE-NEXT: br label %[[FOR_BODY:.*]]
|
|
; SIFIVE: [[FOR_BODY]]:
|
|
; SIFIVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
|
|
; SIFIVE-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], %[[FOR_BODY]] ]
|
|
; SIFIVE-NEXT: [[TMP0:%.*]] = sub nsw i64 [[LEN]], [[IV]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP0]]
|
|
; SIFIVE-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP1]], ptr [[ARRAYIDX2]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; SIFIVE-NEXT: [[TMP4:%.*]] = sub nsw i64 [[LEN]], [[IV_NEXT]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP4]]
|
|
; SIFIVE-NEXT: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX_1]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_NEXT]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP5]], ptr [[ARRAYIDX2_1]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
|
|
; SIFIVE-NEXT: [[TMP6:%.*]] = sub nsw i64 [[LEN]], [[IV_NEXT_1]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP6]]
|
|
; SIFIVE-NEXT: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX_2]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_NEXT_1]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP7]], ptr [[ARRAYIDX2_2]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
|
|
; SIFIVE-NEXT: [[TMP8:%.*]] = sub nsw i64 [[LEN]], [[IV_NEXT_2]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP8]]
|
|
; SIFIVE-NEXT: [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX_3]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_NEXT_2]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP9]], ptr [[ARRAYIDX2_3]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4
|
|
; SIFIVE-NEXT: [[TMP10:%.*]] = sub nsw i64 [[LEN]], [[IV_NEXT_3]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP10]]
|
|
; SIFIVE-NEXT: [[TMP11:%.*]] = load <4 x float>, ptr [[ARRAYIDX_4]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_NEXT_3]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP11]], ptr [[ARRAYIDX2_4]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5
|
|
; SIFIVE-NEXT: [[TMP12:%.*]] = sub nsw i64 [[LEN]], [[IV_NEXT_4]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP12]]
|
|
; SIFIVE-NEXT: [[TMP13:%.*]] = load <4 x float>, ptr [[ARRAYIDX_5]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_NEXT_4]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP13]], ptr [[ARRAYIDX2_5]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6
|
|
; SIFIVE-NEXT: [[TMP14:%.*]] = sub nsw i64 [[LEN]], [[IV_NEXT_5]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP14]]
|
|
; SIFIVE-NEXT: [[TMP15:%.*]] = load <4 x float>, ptr [[ARRAYIDX_6]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_NEXT_5]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP15]], ptr [[ARRAYIDX2_6]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7
|
|
; SIFIVE-NEXT: [[TMP16:%.*]] = sub nsw i64 [[LEN]], [[IV_NEXT_6]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP16]]
|
|
; SIFIVE-NEXT: [[TMP17:%.*]] = load <4 x float>, ptr [[ARRAYIDX_7]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_7:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_NEXT_6]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP17]], ptr [[ARRAYIDX2_7]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
|
|
; SIFIVE-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
|
|
; SIFIVE-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
|
|
; SIFIVE-NEXT: br i1 [[NITER_NCMP_7]], label %[[EXIT_UNR_LCSSA:.*]], label %[[FOR_BODY]]
|
|
; SIFIVE: [[EXIT_UNR_LCSSA]]:
|
|
; SIFIVE-NEXT: [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_7]], %[[FOR_BODY]] ]
|
|
; SIFIVE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
|
|
; SIFIVE-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_BODY_EPIL_PREHEADER]], label %[[EXIT:.*]]
|
|
; SIFIVE: [[FOR_BODY_EPIL_PREHEADER]]:
|
|
; SIFIVE-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR1]], %[[EXIT_UNR_LCSSA]] ]
|
|
; SIFIVE-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
|
|
; SIFIVE-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]])
|
|
; SIFIVE-NEXT: br label %[[FOR_BODY_EPIL:.*]]
|
|
; SIFIVE: [[FOR_BODY_EPIL]]:
|
|
; SIFIVE-NEXT: [[TMP18:%.*]] = sub nsw i64 [[LEN]], [[IV_UNR]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP18]]
|
|
; SIFIVE-NEXT: [[TMP19:%.*]] = load <4 x float>, ptr [[ARRAYIDX_EPIL]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_UNR]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP19]], ptr [[ARRAYIDX2_EPIL]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[IV_UNR]], 1
|
|
; SIFIVE-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 1, [[XTRAITER]]
|
|
; SIFIVE-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[FOR_BODY_EPIL_1:.*]], label %[[EXIT_EPILOG_LCSSA:.*]]
|
|
; SIFIVE: [[FOR_BODY_EPIL_1]]:
|
|
; SIFIVE-NEXT: [[TMP20:%.*]] = sub nsw i64 [[LEN]], [[IV_NEXT_EPIL]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP20]]
|
|
; SIFIVE-NEXT: [[TMP21:%.*]] = load <4 x float>, ptr [[ARRAYIDX_EPIL_1]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_NEXT_EPIL]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP21]], ptr [[ARRAYIDX2_EPIL_1]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[IV_UNR]], 2
|
|
; SIFIVE-NEXT: [[EPIL_ITER_CMP_1:%.*]] = icmp ne i64 2, [[XTRAITER]]
|
|
; SIFIVE-NEXT: br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_BODY_EPIL_2:.*]], label %[[EXIT_EPILOG_LCSSA]]
|
|
; SIFIVE: [[FOR_BODY_EPIL_2]]:
|
|
; SIFIVE-NEXT: [[TMP22:%.*]] = sub nsw i64 [[LEN]], [[IV_NEXT_EPIL_1]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP22]]
|
|
; SIFIVE-NEXT: [[TMP23:%.*]] = load <4 x float>, ptr [[ARRAYIDX_EPIL_2]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_NEXT_EPIL_1]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP23]], ptr [[ARRAYIDX2_EPIL_2]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT_EPIL_2:%.*]] = add nuw nsw i64 [[IV_UNR]], 3
|
|
; SIFIVE-NEXT: [[EPIL_ITER_CMP_2:%.*]] = icmp ne i64 3, [[XTRAITER]]
|
|
; SIFIVE-NEXT: br i1 [[EPIL_ITER_CMP_2]], label %[[FOR_BODY_EPIL_3:.*]], label %[[EXIT_EPILOG_LCSSA]]
|
|
; SIFIVE: [[FOR_BODY_EPIL_3]]:
|
|
; SIFIVE-NEXT: [[TMP24:%.*]] = sub nsw i64 [[LEN]], [[IV_NEXT_EPIL_2]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_EPIL_3:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP24]]
|
|
; SIFIVE-NEXT: [[TMP25:%.*]] = load <4 x float>, ptr [[ARRAYIDX_EPIL_3]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_EPIL_3:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_NEXT_EPIL_2]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP25]], ptr [[ARRAYIDX2_EPIL_3]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT_EPIL_3:%.*]] = add nuw nsw i64 [[IV_UNR]], 4
|
|
; SIFIVE-NEXT: [[EPIL_ITER_CMP_3:%.*]] = icmp ne i64 4, [[XTRAITER]]
|
|
; SIFIVE-NEXT: br i1 [[EPIL_ITER_CMP_3]], label %[[FOR_BODY_EPIL_4:.*]], label %[[EXIT_EPILOG_LCSSA]]
|
|
; SIFIVE: [[FOR_BODY_EPIL_4]]:
|
|
; SIFIVE-NEXT: [[TMP26:%.*]] = sub nsw i64 [[LEN]], [[IV_NEXT_EPIL_3]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_EPIL_4:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP26]]
|
|
; SIFIVE-NEXT: [[TMP27:%.*]] = load <4 x float>, ptr [[ARRAYIDX_EPIL_4]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_EPIL_4:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_NEXT_EPIL_3]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP27]], ptr [[ARRAYIDX2_EPIL_4]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT_EPIL_4:%.*]] = add nuw nsw i64 [[IV_UNR]], 5
|
|
; SIFIVE-NEXT: [[EPIL_ITER_CMP_4:%.*]] = icmp ne i64 5, [[XTRAITER]]
|
|
; SIFIVE-NEXT: br i1 [[EPIL_ITER_CMP_4]], label %[[FOR_BODY_EPIL_5:.*]], label %[[EXIT_EPILOG_LCSSA]]
|
|
; SIFIVE: [[FOR_BODY_EPIL_5]]:
|
|
; SIFIVE-NEXT: [[TMP28:%.*]] = sub nsw i64 [[LEN]], [[IV_NEXT_EPIL_4]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_EPIL_5:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP28]]
|
|
; SIFIVE-NEXT: [[TMP29:%.*]] = load <4 x float>, ptr [[ARRAYIDX_EPIL_5]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_EPIL_5:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_NEXT_EPIL_4]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP29]], ptr [[ARRAYIDX2_EPIL_5]], align 16
|
|
; SIFIVE-NEXT: [[IV_NEXT_EPIL_5:%.*]] = add nuw nsw i64 [[IV_UNR]], 6
|
|
; SIFIVE-NEXT: [[EPIL_ITER_CMP_5:%.*]] = icmp ne i64 6, [[XTRAITER]]
|
|
; SIFIVE-NEXT: br i1 [[EPIL_ITER_CMP_5]], label %[[FOR_BODY_EPIL_6:.*]], label %[[EXIT_EPILOG_LCSSA]]
|
|
; SIFIVE: [[FOR_BODY_EPIL_6]]:
|
|
; SIFIVE-NEXT: [[TMP30:%.*]] = sub nsw i64 [[LEN]], [[IV_NEXT_EPIL_5]]
|
|
; SIFIVE-NEXT: [[ARRAYIDX_EPIL_6:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP30]]
|
|
; SIFIVE-NEXT: [[TMP31:%.*]] = load <4 x float>, ptr [[ARRAYIDX_EPIL_6]], align 16
|
|
; SIFIVE-NEXT: [[ARRAYIDX2_EPIL_6:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[DST]], i64 [[IV_NEXT_EPIL_5]]
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP31]], ptr [[ARRAYIDX2_EPIL_6]], align 16
|
|
; SIFIVE-NEXT: br label %[[EXIT_EPILOG_LCSSA]]
|
|
; SIFIVE: [[EXIT_EPILOG_LCSSA]]:
|
|
; SIFIVE-NEXT: br label %[[EXIT]]
|
|
; SIFIVE: [[EXIT]]:
|
|
; SIFIVE-NEXT: ret void
|
|
;
|
|
entry: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%1 = sub nsw i64 %len, %iv
|
|
%arrayidx = getelementptr inbounds <4 x float>, ptr %src, i64 %1
|
|
%2 = load <4 x float>, ptr %arrayidx, align 16
|
|
%arrayidx2 = getelementptr inbounds nuw <4 x float>, ptr %dst, i64 %iv
|
|
store <4 x float> %2, ptr %arrayidx2, align 16
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond.not = icmp eq i64 %iv.next, %len
|
|
br i1 %exitcond.not, label %exit, label %for.body
|
|
|
|
exit: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @saxpy_tripcount8_full_unroll(ptr %dst, ptr %src, float %a) {
|
|
; COMMON-LABEL: define void @saxpy_tripcount8_full_unroll(
|
|
; COMMON-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], float [[A:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; COMMON-NEXT: [[ENTRY:.*:]]
|
|
; COMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A]], i64 0
|
|
; COMMON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
|
; COMMON-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; COMMON: [[VECTOR_BODY]]:
|
|
; COMMON-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[SRC]], align 4
|
|
; COMMON-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[DST]], align 4
|
|
; COMMON-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD12]])
|
|
; COMMON-NEXT: store <4 x float> [[TMP0]], ptr [[DST]], align 4
|
|
; COMMON-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 4
|
|
; COMMON-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
|
|
; COMMON-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 4
|
|
; COMMON-NEXT: [[WIDE_LOAD12_1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
|
|
; COMMON-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_1]], <4 x float> [[WIDE_LOAD12_1]])
|
|
; COMMON-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 4
|
|
; COMMON-NEXT: ret void
|
|
;
|
|
entry:
|
|
%broadcast.splatinsert = insertelement <4 x float> poison, float %a, i64 0
|
|
%broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
|
|
br label %vector.body
|
|
|
|
vector.body: ; preds = %vector.body, %entry
|
|
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
|
|
%0 = getelementptr inbounds nuw float, ptr %src, i64 %index
|
|
%wide.load = load <4 x float>, ptr %0, align 4
|
|
%1 = getelementptr inbounds nuw float, ptr %dst, i64 %index
|
|
%wide.load12 = load <4 x float>, ptr %1, align 4
|
|
%2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12)
|
|
store <4 x float> %2, ptr %1, align 4
|
|
%index.next = add nuw i64 %index, 4
|
|
%3 = icmp eq i64 %index.next, 8
|
|
br i1 %3, label %exit, label %vector.body
|
|
|
|
exit: ; preds = %vector.body
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @saxpy_tripcount1K_av0(ptr %dst, ptr %src, float %a) {
|
|
; CHECK-LABEL: define void @saxpy_tripcount1K_av0(
|
|
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], float [[A:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD12]])
|
|
; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
|
|
; CHECK-NEXT: br i1 [[TMP3]], label %[[EXIT:.*]], label %[[VECTOR_BODY]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SIFIVE-LABEL: define void @saxpy_tripcount1K_av0(
|
|
; SIFIVE-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], float [[A:%.*]]) #[[ATTR0]] {
|
|
; SIFIVE-NEXT: [[ENTRY:.*]]:
|
|
; SIFIVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A]], i64 0
|
|
; SIFIVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
|
; SIFIVE-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; SIFIVE: [[VECTOR_BODY]]:
|
|
; SIFIVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT_15:%.*]], %[[VECTOR_BODY]] ]
|
|
; SIFIVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
|
|
; SIFIVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
|
|
; SIFIVE-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD12]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP2]], ptr [[TMP1]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT1:%.*]] = add nuw nsw i64 [[INDEX]], 4
|
|
; SIFIVE-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT1]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x float>, ptr [[TMP12]], align 4
|
|
; SIFIVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT1]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_1:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
|
|
; SIFIVE-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_1]], <4 x float> [[WIDE_LOAD12_1]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP5]], ptr [[TMP4]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_1:%.*]] = add nuw nsw i64 [[INDEX]], 8
|
|
; SIFIVE-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_1]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x float>, ptr [[TMP6]], align 4
|
|
; SIFIVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_1]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_2:%.*]] = load <4 x float>, ptr [[TMP7]], align 4
|
|
; SIFIVE-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_2]], <4 x float> [[WIDE_LOAD12_2]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP8]], ptr [[TMP7]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_2:%.*]] = add nuw nsw i64 [[INDEX]], 12
|
|
; SIFIVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_2]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x float>, ptr [[TMP9]], align 4
|
|
; SIFIVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_2]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_3:%.*]] = load <4 x float>, ptr [[TMP10]], align 4
|
|
; SIFIVE-NEXT: [[TMP11:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_3]], <4 x float> [[WIDE_LOAD12_3]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP11]], ptr [[TMP10]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16
|
|
; SIFIVE-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP49]], align 4
|
|
; SIFIVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_4:%.*]] = load <4 x float>, ptr [[TMP13]], align 4
|
|
; SIFIVE-NEXT: [[TMP14:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_4]], <4 x float> [[WIDE_LOAD12_4]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP14]], ptr [[TMP13]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX]], 20
|
|
; SIFIVE-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_4]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP15]], align 4
|
|
; SIFIVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_4]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_5:%.*]] = load <4 x float>, ptr [[TMP16]], align 4
|
|
; SIFIVE-NEXT: [[TMP17:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_5]], <4 x float> [[WIDE_LOAD12_5]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP17]], ptr [[TMP16]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX]], 24
|
|
; SIFIVE-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_5]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP18]], align 4
|
|
; SIFIVE-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_5]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_6:%.*]] = load <4 x float>, ptr [[TMP19]], align 4
|
|
; SIFIVE-NEXT: [[TMP20:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_6]], <4 x float> [[WIDE_LOAD12_6]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP20]], ptr [[TMP19]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_6:%.*]] = add nuw nsw i64 [[INDEX]], 28
|
|
; SIFIVE-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_6]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP21]], align 4
|
|
; SIFIVE-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_6]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_7:%.*]] = load <4 x float>, ptr [[TMP22]], align 4
|
|
; SIFIVE-NEXT: [[TMP23:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_7]], <4 x float> [[WIDE_LOAD12_7]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP23]], ptr [[TMP22]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX]], 32
|
|
; SIFIVE-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_7]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP24]], align 4
|
|
; SIFIVE-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_7]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_8:%.*]] = load <4 x float>, ptr [[TMP25]], align 4
|
|
; SIFIVE-NEXT: [[TMP26:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_8]], <4 x float> [[WIDE_LOAD12_8]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP26]], ptr [[TMP25]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_8:%.*]] = add nuw nsw i64 [[INDEX]], 36
|
|
; SIFIVE-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_8]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4
|
|
; SIFIVE-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_8]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_9:%.*]] = load <4 x float>, ptr [[TMP28]], align 4
|
|
; SIFIVE-NEXT: [[TMP29:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_9]], <4 x float> [[WIDE_LOAD12_9]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP29]], ptr [[TMP28]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_9:%.*]] = add nuw nsw i64 [[INDEX]], 40
|
|
; SIFIVE-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_9]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x float>, ptr [[TMP30]], align 4
|
|
; SIFIVE-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_9]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_10:%.*]] = load <4 x float>, ptr [[TMP31]], align 4
|
|
; SIFIVE-NEXT: [[TMP32:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_10]], <4 x float> [[WIDE_LOAD12_10]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP32]], ptr [[TMP31]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_10:%.*]] = add nuw nsw i64 [[INDEX]], 44
|
|
; SIFIVE-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_10]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x float>, ptr [[TMP33]], align 4
|
|
; SIFIVE-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_10]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_11:%.*]] = load <4 x float>, ptr [[TMP34]], align 4
|
|
; SIFIVE-NEXT: [[TMP35:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_11]], <4 x float> [[WIDE_LOAD12_11]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP35]], ptr [[TMP34]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_11:%.*]] = add nuw nsw i64 [[INDEX]], 48
|
|
; SIFIVE-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_11]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x float>, ptr [[TMP36]], align 4
|
|
; SIFIVE-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_11]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_12:%.*]] = load <4 x float>, ptr [[TMP37]], align 4
|
|
; SIFIVE-NEXT: [[TMP38:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_12]], <4 x float> [[WIDE_LOAD12_12]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP38]], ptr [[TMP37]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_12:%.*]] = add nuw nsw i64 [[INDEX]], 52
|
|
; SIFIVE-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_12]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x float>, ptr [[TMP39]], align 4
|
|
; SIFIVE-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_12]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_13:%.*]] = load <4 x float>, ptr [[TMP40]], align 4
|
|
; SIFIVE-NEXT: [[TMP41:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_13]], <4 x float> [[WIDE_LOAD12_13]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP41]], ptr [[TMP40]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_13:%.*]] = add nuw nsw i64 [[INDEX]], 56
|
|
; SIFIVE-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_13]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x float>, ptr [[TMP42]], align 4
|
|
; SIFIVE-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_13]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_14:%.*]] = load <4 x float>, ptr [[TMP43]], align 4
|
|
; SIFIVE-NEXT: [[TMP44:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_14]], <4 x float> [[WIDE_LOAD12_14]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP44]], ptr [[TMP43]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_14:%.*]] = add nuw nsw i64 [[INDEX]], 60
|
|
; SIFIVE-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_14]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x float>, ptr [[TMP45]], align 4
|
|
; SIFIVE-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_14]]
|
|
; SIFIVE-NEXT: [[WIDE_LOAD12_15:%.*]] = load <4 x float>, ptr [[TMP46]], align 4
|
|
; SIFIVE-NEXT: [[TMP47:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_15]], <4 x float> [[WIDE_LOAD12_15]])
|
|
; SIFIVE-NEXT: store <4 x float> [[TMP47]], ptr [[TMP46]], align 4
|
|
; SIFIVE-NEXT: [[INDEX_NEXT_15]] = add nuw nsw i64 [[INDEX]], 64
|
|
; SIFIVE-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT_15]], 1024
|
|
; SIFIVE-NEXT: br i1 [[TMP48]], label %[[EXIT:.*]], label %[[VECTOR_BODY]]
|
|
; SIFIVE: [[EXIT]]:
|
|
; SIFIVE-NEXT: ret void
|
|
;
|
|
entry:
|
|
%broadcast.splatinsert = insertelement <4 x float> poison, float %a, i64 0
|
|
%broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
|
|
br label %vector.body
|
|
|
|
vector.body: ; preds = %vector.body, %entry
|
|
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
|
|
%0 = getelementptr inbounds nuw float, ptr %src, i64 %index
|
|
%wide.load = load <4 x float>, ptr %0, align 4
|
|
%1 = getelementptr inbounds nuw float, ptr %dst, i64 %index
|
|
%wide.load12 = load <4 x float>, ptr %1, align 4
|
|
%2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12)
|
|
store <4 x float> %2, ptr %1, align 4
|
|
%index.next = add nuw i64 %index, 4
|
|
%3 = icmp eq i64 %index.next, 1024
|
|
br i1 %3, label %exit, label %vector.body
|
|
|
|
exit: ; preds = %vector.body
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @saxpy_tripcount1K_av1(ptr %dst, ptr %src, float %a) {
|
|
; COMMON-LABEL: define void @saxpy_tripcount1K_av1(
|
|
; COMMON-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], float [[A:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[ENTRY:.*]]:
|
|
; COMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A]], i64 0
|
|
; COMMON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
|
; COMMON-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; COMMON: [[VECTOR_BODY]]:
|
|
; COMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; COMMON-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]]
|
|
; COMMON-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
|
|
; COMMON-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX]]
|
|
; COMMON-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
|
|
; COMMON-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD12]])
|
|
; COMMON-NEXT: store <4 x float> [[TMP2]], ptr [[TMP1]], align 4
|
|
; COMMON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; COMMON-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
|
|
; COMMON-NEXT: br i1 [[TMP3]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; COMMON: [[EXIT]]:
|
|
; COMMON-NEXT: ret void
|
|
;
|
|
entry:
|
|
%broadcast.splatinsert = insertelement <4 x float> poison, float %a, i64 0
|
|
%broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
|
|
br label %vector.body
|
|
|
|
vector.body: ; preds = %vector.body, %entry
|
|
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
|
|
%0 = getelementptr inbounds nuw float, ptr %src, i64 %index
|
|
%wide.load = load <4 x float>, ptr %0, align 4
|
|
%1 = getelementptr inbounds nuw float, ptr %dst, i64 %index
|
|
%wide.load12 = load <4 x float>, ptr %1, align 4
|
|
%2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12)
|
|
store <4 x float> %2, ptr %1, align 4
|
|
%index.next = add nuw i64 %index, 4
|
|
%3 = icmp eq i64 %index.next, 1024
|
|
br i1 %3, label %exit, label %vector.body, !llvm.loop !0
|
|
|
|
exit: ; preds = %vector.body
|
|
ret void
|
|
}
|
|
|
|
; On SiFive we should runtime unroll the scalar epilogue loop, but not the
|
|
; vector loop.
|
|
define void @scalar_epilogue(ptr %p, i8 %splat.scalar, i64 %n) {
|
|
; CHECK-LABEL: define void @scalar_epilogue(
|
|
; CHECK-SAME: ptr [[P:%.*]], i8 [[SPLAT_SCALAR:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 32
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_REMAINDER_PREHEADER:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -32
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[SPLAT_SCALAR]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[IV_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[GEP_P_IV:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[IV]]
|
|
; CHECK-NEXT: [[GEP_P_IV_16:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_P_IV]], i64 16
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[GEP_P_IV]], align 1
|
|
; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <16 x i8>, ptr [[GEP_P_IV_16]], align 1
|
|
; CHECK-NEXT: [[ADD_BROADCAST:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: [[ADD_BROADCAST_2:%.*]] = add <16 x i8> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: store <16 x i8> [[ADD_BROADCAST]], ptr [[GEP_P_IV]], align 1
|
|
; CHECK-NEXT: store <16 x i8> [[ADD_BROADCAST_2]], ptr [[GEP_P_IV_16]], align 1
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 32
|
|
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_REMAINDER_PREHEADER]]
|
|
; CHECK: [[SCALAR_REMAINDER_PREHEADER]]:
|
|
; CHECK-NEXT: [[IV_SCALAR_LOOP_PH:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[SCALAR_REMAINDER:.*]]
|
|
; CHECK: [[SCALAR_REMAINDER]]:
|
|
; CHECK-NEXT: [[IV_SCALAR_LOOP:%.*]] = phi i64 [ [[INC:%.*]], %[[SCALAR_REMAINDER]] ], [ [[IV_SCALAR_LOOP_PH]], %[[SCALAR_REMAINDER_PREHEADER]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[IV_SCALAR_LOOP]]
|
|
; CHECK-NEXT: [[SCALAR_LOAD:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[SCALAR_LOAD]], [[SPLAT_SCALAR]]
|
|
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[INC]] = add nuw i64 [[IV_SCALAR_LOOP]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_REMAINDER]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: [[EXIT_LOOPEXIT]]:
|
|
; CHECK-NEXT: br label %[[EXIT]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SIFIVE-LABEL: define void @scalar_epilogue(
|
|
; SIFIVE-SAME: ptr [[P:%.*]], i8 [[SPLAT_SCALAR:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SIFIVE-NEXT: [[ENTRY:.*]]:
|
|
; SIFIVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 32
|
|
; SIFIVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_REMAINDER_PREHEADER:.*]], label %[[VECTOR_PH:.*]]
|
|
; SIFIVE: [[VECTOR_PH]]:
|
|
; SIFIVE-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -32
|
|
; SIFIVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[SPLAT_SCALAR]], i64 0
|
|
; SIFIVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
|
|
; SIFIVE-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; SIFIVE: [[VECTOR_BODY]]:
|
|
; SIFIVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[IV_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; SIFIVE-NEXT: [[GEP_P_IV:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[IV]]
|
|
; SIFIVE-NEXT: [[GEP_P_IV_16:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_P_IV]], i64 16
|
|
; SIFIVE-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[GEP_P_IV]], align 1
|
|
; SIFIVE-NEXT: [[WIDE_LOAD_2:%.*]] = load <16 x i8>, ptr [[GEP_P_IV_16]], align 1
|
|
; SIFIVE-NEXT: [[ADD_BROADCAST:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
|
|
; SIFIVE-NEXT: [[ADD_BROADCAST_2:%.*]] = add <16 x i8> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
|
|
; SIFIVE-NEXT: store <16 x i8> [[ADD_BROADCAST]], ptr [[GEP_P_IV]], align 1
|
|
; SIFIVE-NEXT: store <16 x i8> [[ADD_BROADCAST_2]], ptr [[GEP_P_IV_16]], align 1
|
|
; SIFIVE-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 32
|
|
; SIFIVE-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
|
|
; SIFIVE-NEXT: br i1 [[EXIT_COND]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
|
|
; SIFIVE: [[MIDDLE_BLOCK]]:
|
|
; SIFIVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; SIFIVE-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_REMAINDER_PREHEADER]]
|
|
; SIFIVE: [[SCALAR_REMAINDER_PREHEADER]]:
|
|
; SIFIVE-NEXT: [[IV_SCALAR_LOOP:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
|
|
; SIFIVE-NEXT: [[TMP0:%.*]] = sub i64 [[N]], [[IV_SCALAR_LOOP]]
|
|
; SIFIVE-NEXT: [[TMP1:%.*]] = add i64 [[N]], -1
|
|
; SIFIVE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[IV_SCALAR_LOOP]]
|
|
; SIFIVE-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
|
|
; SIFIVE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
|
|
; SIFIVE-NEXT: br i1 [[LCMP_MOD]], label %[[SCALAR_REMAINDER_PROL_PREHEADER:.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT:.*]]
|
|
; SIFIVE: [[SCALAR_REMAINDER_PROL_PREHEADER]]:
|
|
; SIFIVE-NEXT: br label %[[SCALAR_REMAINDER_PROL:.*]]
|
|
; SIFIVE: [[SCALAR_REMAINDER_PROL]]:
|
|
; SIFIVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[IV_SCALAR_LOOP]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; SIFIVE-NEXT: [[ADD:%.*]] = add i8 [[SCALAR_LOAD]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX]], align 1
|
|
; SIFIVE-NEXT: [[INC:%.*]] = add nuw i64 [[IV_SCALAR_LOOP]], 1
|
|
; SIFIVE-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 1, [[XTRAITER]]
|
|
; SIFIVE-NEXT: br i1 [[PROL_ITER_CMP]], label %[[SCALAR_REMAINDER_PROL_1:.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA:.*]]
|
|
; SIFIVE: [[SCALAR_REMAINDER_PROL_1]]:
|
|
; SIFIVE-NEXT: [[ARRAYIDX_PROL_1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD_PROL_1:%.*]] = load i8, ptr [[ARRAYIDX_PROL_1]], align 1
|
|
; SIFIVE-NEXT: [[ADD_PROL_1:%.*]] = add i8 [[SCALAR_LOAD_PROL_1]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD_PROL_1]], ptr [[ARRAYIDX_PROL_1]], align 1
|
|
; SIFIVE-NEXT: [[INC_PROL_1:%.*]] = add nuw i64 [[IV_SCALAR_LOOP]], 2
|
|
; SIFIVE-NEXT: [[PROL_ITER_CMP_1:%.*]] = icmp ne i64 2, [[XTRAITER]]
|
|
; SIFIVE-NEXT: br i1 [[PROL_ITER_CMP_1]], label %[[SCALAR_REMAINDER_PROL_2:.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]]
|
|
; SIFIVE: [[SCALAR_REMAINDER_PROL_2]]:
|
|
; SIFIVE-NEXT: [[ARRAYIDX_PROL_2:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_PROL_1]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD_PROL_2:%.*]] = load i8, ptr [[ARRAYIDX_PROL_2]], align 1
|
|
; SIFIVE-NEXT: [[ADD_PROL_2:%.*]] = add i8 [[SCALAR_LOAD_PROL_2]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD_PROL_2]], ptr [[ARRAYIDX_PROL_2]], align 1
|
|
; SIFIVE-NEXT: [[INC_PROL_2:%.*]] = add nuw i64 [[IV_SCALAR_LOOP]], 3
|
|
; SIFIVE-NEXT: [[PROL_ITER_CMP_2:%.*]] = icmp ne i64 3, [[XTRAITER]]
|
|
; SIFIVE-NEXT: br i1 [[PROL_ITER_CMP_2]], label %[[SCALAR_REMAINDER_PROL_3:.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]]
|
|
; SIFIVE: [[SCALAR_REMAINDER_PROL_3]]:
|
|
; SIFIVE-NEXT: [[ARRAYIDX_PROL_3:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_PROL_2]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD_PROL_3:%.*]] = load i8, ptr [[ARRAYIDX_PROL_3]], align 1
|
|
; SIFIVE-NEXT: [[ADD_PROL_3:%.*]] = add i8 [[SCALAR_LOAD_PROL_3]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD_PROL_3]], ptr [[ARRAYIDX_PROL_3]], align 1
|
|
; SIFIVE-NEXT: [[INC_PROL_3:%.*]] = add nuw i64 [[IV_SCALAR_LOOP]], 4
|
|
; SIFIVE-NEXT: [[PROL_ITER_CMP_3:%.*]] = icmp ne i64 4, [[XTRAITER]]
|
|
; SIFIVE-NEXT: br i1 [[PROL_ITER_CMP_3]], label %[[SCALAR_REMAINDER_PROL_4:.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]]
|
|
; SIFIVE: [[SCALAR_REMAINDER_PROL_4]]:
|
|
; SIFIVE-NEXT: [[ARRAYIDX_PROL_4:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_PROL_3]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD_PROL_4:%.*]] = load i8, ptr [[ARRAYIDX_PROL_4]], align 1
|
|
; SIFIVE-NEXT: [[ADD_PROL_4:%.*]] = add i8 [[SCALAR_LOAD_PROL_4]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD_PROL_4]], ptr [[ARRAYIDX_PROL_4]], align 1
|
|
; SIFIVE-NEXT: [[INC_PROL_4:%.*]] = add nuw i64 [[IV_SCALAR_LOOP]], 5
|
|
; SIFIVE-NEXT: [[PROL_ITER_CMP_4:%.*]] = icmp ne i64 5, [[XTRAITER]]
|
|
; SIFIVE-NEXT: br i1 [[PROL_ITER_CMP_4]], label %[[SCALAR_REMAINDER_PROL_5:.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]]
|
|
; SIFIVE: [[SCALAR_REMAINDER_PROL_5]]:
|
|
; SIFIVE-NEXT: [[ARRAYIDX_PROL_5:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_PROL_4]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD_PROL_5:%.*]] = load i8, ptr [[ARRAYIDX_PROL_5]], align 1
|
|
; SIFIVE-NEXT: [[ADD_PROL_5:%.*]] = add i8 [[SCALAR_LOAD_PROL_5]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD_PROL_5]], ptr [[ARRAYIDX_PROL_5]], align 1
|
|
; SIFIVE-NEXT: [[INC_PROL_5:%.*]] = add nuw i64 [[IV_SCALAR_LOOP]], 6
|
|
; SIFIVE-NEXT: [[PROL_ITER_CMP_5:%.*]] = icmp ne i64 6, [[XTRAITER]]
|
|
; SIFIVE-NEXT: br i1 [[PROL_ITER_CMP_5]], label %[[SCALAR_REMAINDER_PROL_6:.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]]
|
|
; SIFIVE: [[SCALAR_REMAINDER_PROL_6]]:
|
|
; SIFIVE-NEXT: [[ARRAYIDX_PROL_6:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_PROL_5]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD_PROL_6:%.*]] = load i8, ptr [[ARRAYIDX_PROL_6]], align 1
|
|
; SIFIVE-NEXT: [[ADD_PROL_6:%.*]] = add i8 [[SCALAR_LOAD_PROL_6]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD_PROL_6]], ptr [[ARRAYIDX_PROL_6]], align 1
|
|
; SIFIVE-NEXT: [[INC_PROL_6:%.*]] = add nuw i64 [[IV_SCALAR_LOOP]], 7
|
|
; SIFIVE-NEXT: br label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]]
|
|
; SIFIVE: [[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]]:
|
|
; SIFIVE-NEXT: [[IV_SCALAR_LOOP_UNR_PH:%.*]] = phi i64 [ [[INC]], %[[SCALAR_REMAINDER_PROL]] ], [ [[INC_PROL_1]], %[[SCALAR_REMAINDER_PROL_1]] ], [ [[INC_PROL_2]], %[[SCALAR_REMAINDER_PROL_2]] ], [ [[INC_PROL_3]], %[[SCALAR_REMAINDER_PROL_3]] ], [ [[INC_PROL_4]], %[[SCALAR_REMAINDER_PROL_4]] ], [ [[INC_PROL_5]], %[[SCALAR_REMAINDER_PROL_5]] ], [ [[INC_PROL_6]], %[[SCALAR_REMAINDER_PROL_6]] ]
|
|
; SIFIVE-NEXT: br label %[[SCALAR_REMAINDER_PROL_LOOPEXIT]]
|
|
; SIFIVE: [[SCALAR_REMAINDER_PROL_LOOPEXIT]]:
|
|
; SIFIVE-NEXT: [[IV_SCALAR_LOOP_UNR:%.*]] = phi i64 [ [[IV_SCALAR_LOOP]], %[[SCALAR_REMAINDER_PREHEADER]] ], [ [[IV_SCALAR_LOOP_UNR_PH]], %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]] ]
|
|
; SIFIVE-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7
|
|
; SIFIVE-NEXT: br i1 [[TMP3]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_REMAINDER_PREHEADER_NEW:.*]]
|
|
; SIFIVE: [[SCALAR_REMAINDER_PREHEADER_NEW]]:
|
|
; SIFIVE-NEXT: br label %[[SCALAR_REMAINDER:.*]]
|
|
; SIFIVE: [[SCALAR_REMAINDER]]:
|
|
; SIFIVE-NEXT: [[IV_SCALAR_LOOP1:%.*]] = phi i64 [ [[IV_SCALAR_LOOP_UNR]], %[[SCALAR_REMAINDER_PREHEADER_NEW]] ], [ [[INC_7:%.*]], %[[SCALAR_REMAINDER]] ]
|
|
; SIFIVE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[IV_SCALAR_LOOP1]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
|
|
; SIFIVE-NEXT: [[ADD1:%.*]] = add i8 [[SCALAR_LOAD1]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD1]], ptr [[ARRAYIDX1]], align 1
|
|
; SIFIVE-NEXT: [[INC1:%.*]] = add nuw i64 [[IV_SCALAR_LOOP1]], 1
|
|
; SIFIVE-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC1]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD_1:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1
|
|
; SIFIVE-NEXT: [[ADD_1:%.*]] = add i8 [[SCALAR_LOAD_1]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD_1]], ptr [[ARRAYIDX_1]], align 1
|
|
; SIFIVE-NEXT: [[INC_1:%.*]] = add nuw i64 [[IV_SCALAR_LOOP1]], 2
|
|
; SIFIVE-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_1]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD_2:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1
|
|
; SIFIVE-NEXT: [[ADD_2:%.*]] = add i8 [[SCALAR_LOAD_2]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD_2]], ptr [[ARRAYIDX_2]], align 1
|
|
; SIFIVE-NEXT: [[INC_2:%.*]] = add nuw i64 [[IV_SCALAR_LOOP1]], 3
|
|
; SIFIVE-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_2]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD_3:%.*]] = load i8, ptr [[ARRAYIDX_3]], align 1
|
|
; SIFIVE-NEXT: [[ADD_3:%.*]] = add i8 [[SCALAR_LOAD_3]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD_3]], ptr [[ARRAYIDX_3]], align 1
|
|
; SIFIVE-NEXT: [[INC_3:%.*]] = add nuw i64 [[IV_SCALAR_LOOP1]], 4
|
|
; SIFIVE-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_3]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD_4:%.*]] = load i8, ptr [[ARRAYIDX_4]], align 1
|
|
; SIFIVE-NEXT: [[ADD_4:%.*]] = add i8 [[SCALAR_LOAD_4]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD_4]], ptr [[ARRAYIDX_4]], align 1
|
|
; SIFIVE-NEXT: [[INC_4:%.*]] = add nuw i64 [[IV_SCALAR_LOOP1]], 5
|
|
; SIFIVE-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_4]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD_5:%.*]] = load i8, ptr [[ARRAYIDX_5]], align 1
|
|
; SIFIVE-NEXT: [[ADD_5:%.*]] = add i8 [[SCALAR_LOAD_5]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD_5]], ptr [[ARRAYIDX_5]], align 1
|
|
; SIFIVE-NEXT: [[INC_5:%.*]] = add nuw i64 [[IV_SCALAR_LOOP1]], 6
|
|
; SIFIVE-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_5]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD_6:%.*]] = load i8, ptr [[ARRAYIDX_6]], align 1
|
|
; SIFIVE-NEXT: [[ADD_6:%.*]] = add i8 [[SCALAR_LOAD_6]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD_6]], ptr [[ARRAYIDX_6]], align 1
|
|
; SIFIVE-NEXT: [[INC_6:%.*]] = add nuw i64 [[IV_SCALAR_LOOP1]], 7
|
|
; SIFIVE-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_6]]
|
|
; SIFIVE-NEXT: [[SCALAR_LOAD_7:%.*]] = load i8, ptr [[ARRAYIDX_7]], align 1
|
|
; SIFIVE-NEXT: [[ADD_7:%.*]] = add i8 [[SCALAR_LOAD_7]], [[SPLAT_SCALAR]]
|
|
; SIFIVE-NEXT: store i8 [[ADD_7]], ptr [[ARRAYIDX_7]], align 1
|
|
; SIFIVE-NEXT: [[INC_7]] = add nuw i64 [[IV_SCALAR_LOOP1]], 8
|
|
; SIFIVE-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INC_7]], [[N]]
|
|
; SIFIVE-NEXT: br i1 [[EXITCOND_NOT_7]], label %[[EXIT_LOOPEXIT_UNR_LCSSA:.*]], label %[[SCALAR_REMAINDER]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; SIFIVE: [[EXIT_LOOPEXIT_UNR_LCSSA]]:
|
|
; SIFIVE-NEXT: br label %[[EXIT_LOOPEXIT]]
|
|
; SIFIVE: [[EXIT_LOOPEXIT]]:
|
|
; SIFIVE-NEXT: br label %[[EXIT]]
|
|
; SIFIVE: [[EXIT]]:
|
|
; SIFIVE-NEXT: ret void
|
|
;
|
|
entry:
|
|
%min.iters.check = icmp ult i64 %n, 32
|
|
br i1 %min.iters.check, label %scalar.remainder, label %vector.ph
|
|
|
|
vector.ph:
|
|
%n.vec = and i64 %n, -32
|
|
%broadcast.splatinsert = insertelement <16 x i8> poison, i8 %splat.scalar, i64 0
|
|
%broadcast.splat = shufflevector <16 x i8> %broadcast.splatinsert, <16 x i8> poison, <16 x i32> zeroinitializer
|
|
br label %vector.body
|
|
|
|
vector.body:
|
|
%iv = phi i64 [ 0, %vector.ph ], [ %iv.next, %vector.body ]
|
|
%gep.p.iv = getelementptr inbounds nuw i8, ptr %p, i64 %iv
|
|
%gep.p.iv.16 = getelementptr inbounds nuw i8, ptr %gep.p.iv, i64 16
|
|
%wide.load = load <16 x i8>, ptr %gep.p.iv, align 1
|
|
%wide.load.2 = load <16 x i8>, ptr %gep.p.iv.16, align 1
|
|
%add.broadcast = add <16 x i8> %wide.load, %broadcast.splat
|
|
%add.broadcast.2 = add <16 x i8> %wide.load.2, %broadcast.splat
|
|
store <16 x i8> %add.broadcast, ptr %gep.p.iv, align 1
|
|
store <16 x i8> %add.broadcast.2, ptr %gep.p.iv.16, align 1
|
|
%iv.next = add nuw i64 %iv, 32
|
|
%exit.cond = icmp eq i64 %iv.next, %n.vec
|
|
br i1 %exit.cond, label %middle.block, label %vector.body, !llvm.loop !2
|
|
|
|
middle.block:
|
|
%cmp.n = icmp eq i64 %n, %n.vec
|
|
br i1 %cmp.n, label %exit, label %scalar.remainder
|
|
|
|
scalar.remainder:
|
|
%iv.scalar.loop = phi i64 [ %inc, %scalar.remainder ], [ %n.vec, %middle.block ], [ 0, %entry ]
|
|
%arrayidx = getelementptr inbounds nuw i8, ptr %p, i64 %iv.scalar.loop
|
|
%scalar.load = load i8, ptr %arrayidx, align 1
|
|
%add = add i8 %scalar.load, %splat.scalar
|
|
store i8 %add, ptr %arrayidx, align 1
|
|
%inc = add nuw i64 %iv.scalar.loop, 1
|
|
%exitcond.not = icmp eq i64 %inc, %n
|
|
br i1 %exitcond.not, label %exit, label %scalar.remainder, !llvm.loop !3
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @vector_operands(ptr %p, i64 %n) {
|
|
; COMMON-LABEL: define void @vector_operands(
|
|
; COMMON-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; COMMON-NEXT: [[ENTRY:.*]]:
|
|
; COMMON-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; COMMON: [[VECTOR_BODY]]:
|
|
; COMMON-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; COMMON-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[ENTRY]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; COMMON-NEXT: [[VL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
|
|
; COMMON-NEXT: [[ADDR:%.*]] = getelementptr i64, ptr [[P]], i64 [[EVL_BASED_IV]]
|
|
; COMMON-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR]], <vscale x 2 x i1> splat (i1 true), i32 [[VL]])
|
|
; COMMON-NEXT: [[VL_ZEXT:%.*]] = zext i32 [[VL]] to i64
|
|
; COMMON-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[VL_ZEXT]], [[EVL_BASED_IV]]
|
|
; COMMON-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[VL_ZEXT]]
|
|
; COMMON-NEXT: [[TMP0:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
|
|
; COMMON-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
|
|
; COMMON: [[EXIT]]:
|
|
; COMMON-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body:
|
|
%evl.based.iv = phi i64 [ 0, %entry ], [ %index.evl.next, %vector.body ]
|
|
%avl = phi i64 [ %n, %entry ], [ %avl.next, %vector.body ]
|
|
%vl = call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 2, i1 true)
|
|
%addr = getelementptr i64, ptr %p, i64 %evl.based.iv
|
|
call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> splat (i64 0), ptr align 8 %addr, <vscale x 2 x i1> splat (i1 true), i32 %vl)
|
|
%vl.zext = zext i32 %vl to i64
|
|
%index.evl.next = add nuw i64 %vl.zext, %evl.based.iv
|
|
%avl.next = sub nuw i64 %avl, %vl.zext
|
|
%3 = icmp eq i64 %avl.next, 0
|
|
br i1 %3, label %exit, label %vector.body, !llvm.loop !2
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
!0 = !{!0, !1}
|
|
!1 = !{!"llvm.loop.isvectorized", i32 1}
|
|
!2 = distinct !{!2, !1}
|
|
!3 = distinct !{!3, !1}
|
|
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
|
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
|
|
;.
|
|
; SIFIVE: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
|
|
; SIFIVE: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; SIFIVE: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
|
|
; SIFIVE: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
|
|
;.
|