172 lines
5.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s
; TODO: Load a element and splat it to a vector could be lowerd to vldrepl
; A load has more than one user shouldn't be lowered to vldrepl
define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
; CHECK-LABEL: should_not_be_optimized:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
; CHECK-NEXT: st.d $a0, $a1, 0
; CHECK-NEXT: ret
%tmp = load i64, ptr %ptr
store i64 %tmp, ptr %dst
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
%tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer
ret <2 x i64> %tmp2
}
define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
; CHECK-LABEL: vldrepl_d_unaligned_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $a0, $a0, 4
; CHECK-NEXT: vldrepl.d $vr0, $a0, 0
; CHECK-NEXT: ret
%p = getelementptr i32, ptr %ptr, i32 1
%tmp = load i64, ptr %p
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
%tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer
ret <2 x i64> %tmp2
}
define <16 x i8> @vldrepl_b(ptr %ptr) {
; CHECK-LABEL: vldrepl_b:
; CHECK: # %bb.0:
; CHECK-NEXT: vldrepl.b $vr0, $a0, 0
; CHECK-NEXT: ret
%tmp = load i8, ptr %ptr
%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %tmp, i32 0
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> poison, <16 x i32> zeroinitializer
ret <16 x i8> %tmp2
}
define <16 x i8> @vldrepl_b_offset(ptr %ptr) {
; CHECK-LABEL: vldrepl_b_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vldrepl.b $vr0, $a0, 33
; CHECK-NEXT: ret
%p = getelementptr i8, ptr %ptr, i64 33
%tmp = load i8, ptr %p
%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %tmp, i32 0
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> poison, <16 x i32> zeroinitializer
ret <16 x i8> %tmp2
}
define <8 x i16> @vldrepl_h(ptr %ptr) {
; CHECK-LABEL: vldrepl_h:
; CHECK: # %bb.0:
; CHECK-NEXT: vldrepl.h $vr0, $a0, 0
; CHECK-NEXT: ret
%tmp = load i16, ptr %ptr
%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %tmp, i32 0
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> poison, <8 x i32> zeroinitializer
ret <8 x i16> %tmp2
}
define <8 x i16> @vldrepl_h_offset(ptr %ptr) {
; CHECK-LABEL: vldrepl_h_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vldrepl.h $vr0, $a0, 66
; CHECK-NEXT: ret
%p = getelementptr i16, ptr %ptr, i64 33
%tmp = load i16, ptr %p
%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %tmp, i32 0
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> poison, <8 x i32> zeroinitializer
ret <8 x i16> %tmp2
}
define <4 x i32> @vldrepl_w(ptr %ptr) {
; CHECK-LABEL: vldrepl_w:
; CHECK: # %bb.0:
; CHECK-NEXT: vldrepl.w $vr0, $a0, 0
; CHECK-NEXT: ret
%tmp = load i32, ptr %ptr
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> zeroinitializer
ret <4 x i32> %tmp2
}
define <4 x i32> @vldrepl_w_offset(ptr %ptr) {
; CHECK-LABEL: vldrepl_w_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vldrepl.w $vr0, $a0, 132
; CHECK-NEXT: ret
%p = getelementptr i32, ptr %ptr, i64 33
%tmp = load i32, ptr %p
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> zeroinitializer
ret <4 x i32> %tmp2
}
define <2 x i64> @vldrepl_d(ptr %ptr) {
; CHECK-LABEL: vldrepl_d:
; CHECK: # %bb.0:
; CHECK-NEXT: vldrepl.d $vr0, $a0, 0
; CHECK-NEXT: ret
%tmp = load i64, ptr %ptr
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
%tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer
ret <2 x i64> %tmp2
}
define <2 x i64> @vldrepl_d_offset(ptr %ptr) {
; CHECK-LABEL: vldrepl_d_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vldrepl.d $vr0, $a0, 264
; CHECK-NEXT: ret
%p = getelementptr i64, ptr %ptr, i64 33
%tmp = load i64, ptr %p
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
%tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer
ret <2 x i64> %tmp2
}
define <4 x float> @vldrepl_w_flt(ptr %ptr) {
; CHECK-LABEL: vldrepl_w_flt:
; CHECK: # %bb.0:
; CHECK-NEXT: vldrepl.w $vr0, $a0, 0
; CHECK-NEXT: ret
%tmp = load float, ptr %ptr
%tmp1 = insertelement <4 x float> zeroinitializer, float %tmp, i32 0
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> poison, <4 x i32> zeroinitializer
ret <4 x float> %tmp2
}
define <4 x float> @vldrepl_w_flt_offset(ptr %ptr) {
; CHECK-LABEL: vldrepl_w_flt_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vldrepl.w $vr0, $a0, 264
; CHECK-NEXT: ret
%p = getelementptr i64, ptr %ptr, i64 33
%tmp = load float, ptr %p
%tmp1 = insertelement <4 x float> zeroinitializer, float %tmp, i32 0
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> poison, <4 x i32> zeroinitializer
ret <4 x float> %tmp2
}
define <2 x double> @vldrepl_d_dbl(ptr %ptr) {
; CHECK-LABEL: vldrepl_d_dbl:
; CHECK: # %bb.0:
; CHECK-NEXT: vldrepl.d $vr0, $a0, 0
; CHECK-NEXT: ret
%tmp = load double, ptr %ptr
%tmp1 = insertelement <2 x double> zeroinitializer, double %tmp, i32 0
%tmp2 = shufflevector <2 x double> %tmp1, <2 x double> poison, <2 x i32> zeroinitializer
ret <2 x double> %tmp2
}
define <2 x double> @vldrepl_d_dbl_offset(ptr %ptr) {
; CHECK-LABEL: vldrepl_d_dbl_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vldrepl.d $vr0, $a0, 264
; CHECK-NEXT: ret
%p = getelementptr i64, ptr %ptr, i64 33
%tmp = load double, ptr %p
%tmp1 = insertelement <2 x double> zeroinitializer, double %tmp, i32 0
%tmp2 = shufflevector <2 x double> %tmp1, <2 x double> poison, <2 x i32> zeroinitializer
ret <2 x double> %tmp2
}