
The SelectionDAG scheduling preference now becomes source order scheduling (machine scheduler generates better code -- even without there being a machine model defined for LoongArch yet). Most of the test changes are trivial instruction reorderings and differing register allocations, without any obvious performance impact. This is similar to commit: 3d0fbafd0bce43bb9106230a45d1130f7a40e5ec
163 lines
5.2 KiB
LLVM
163 lines
5.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
|
|
|
|
define void @mulhs_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
|
|
; CHECK-LABEL: mulhs_v16i8:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vld $vr0, $a1, 0
|
|
; CHECK-NEXT: vld $vr1, $a2, 0
|
|
; CHECK-NEXT: vmuh.b $vr0, $vr0, $vr1
|
|
; CHECK-NEXT: vst $vr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%v0 = load <16 x i8>, ptr %a0
|
|
%v1 = load <16 x i8>, ptr %a1
|
|
%v0s = sext <16 x i8> %v0 to <16 x i16>
|
|
%v1s = sext <16 x i8> %v1 to <16 x i16>
|
|
%m = mul <16 x i16> %v0s, %v1s
|
|
%s = ashr <16 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
|
%v2 = trunc <16 x i16> %s to <16 x i8>
|
|
store <16 x i8> %v2, ptr %res
|
|
ret void
|
|
}
|
|
|
|
define void @mulhu_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
|
|
; CHECK-LABEL: mulhu_v16i8:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vld $vr0, $a1, 0
|
|
; CHECK-NEXT: vld $vr1, $a2, 0
|
|
; CHECK-NEXT: vmuh.bu $vr0, $vr0, $vr1
|
|
; CHECK-NEXT: vst $vr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%v0 = load <16 x i8>, ptr %a0
|
|
%v1 = load <16 x i8>, ptr %a1
|
|
%v0z = zext <16 x i8> %v0 to <16 x i16>
|
|
%v1z = zext <16 x i8> %v1 to <16 x i16>
|
|
%m = mul <16 x i16> %v0z, %v1z
|
|
%s = lshr <16 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
|
%v2 = trunc <16 x i16> %s to <16 x i8>
|
|
store <16 x i8> %v2, ptr %res
|
|
ret void
|
|
}
|
|
|
|
define void @mulhs_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
|
|
; CHECK-LABEL: mulhs_v8i16:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vld $vr0, $a1, 0
|
|
; CHECK-NEXT: vld $vr1, $a2, 0
|
|
; CHECK-NEXT: vmuh.h $vr0, $vr0, $vr1
|
|
; CHECK-NEXT: vst $vr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%v0 = load <8 x i16>, ptr %a0
|
|
%v1 = load <8 x i16>, ptr %a1
|
|
%v0s = sext <8 x i16> %v0 to <8 x i32>
|
|
%v1s = sext <8 x i16> %v1 to <8 x i32>
|
|
%m = mul <8 x i32> %v0s, %v1s
|
|
%s = ashr <8 x i32> %m, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
|
%v2 = trunc <8 x i32> %s to <8 x i16>
|
|
store <8 x i16> %v2, ptr %res
|
|
ret void
|
|
}
|
|
|
|
define void @mulhu_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
|
|
; CHECK-LABEL: mulhu_v8i16:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vld $vr0, $a1, 0
|
|
; CHECK-NEXT: vld $vr1, $a2, 0
|
|
; CHECK-NEXT: vmuh.hu $vr0, $vr0, $vr1
|
|
; CHECK-NEXT: vst $vr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%v0 = load <8 x i16>, ptr %a0
|
|
%v1 = load <8 x i16>, ptr %a1
|
|
%v0z = zext <8 x i16> %v0 to <8 x i32>
|
|
%v1z = zext <8 x i16> %v1 to <8 x i32>
|
|
%m = mul <8 x i32> %v0z, %v1z
|
|
%s = lshr <8 x i32> %m, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
|
%v2 = trunc <8 x i32> %s to <8 x i16>
|
|
store <8 x i16> %v2, ptr %res
|
|
ret void
|
|
}
|
|
|
|
define void @mulhs_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
|
|
; CHECK-LABEL: mulhs_v4i32:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vld $vr0, $a1, 0
|
|
; CHECK-NEXT: vld $vr1, $a2, 0
|
|
; CHECK-NEXT: vmuh.w $vr0, $vr0, $vr1
|
|
; CHECK-NEXT: vst $vr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%v0 = load <4 x i32>, ptr %a0
|
|
%v1 = load <4 x i32>, ptr %a1
|
|
%v0s = sext <4 x i32> %v0 to <4 x i64>
|
|
%v1s = sext <4 x i32> %v1 to <4 x i64>
|
|
%m = mul <4 x i64> %v0s, %v1s
|
|
%s = ashr <4 x i64> %m, <i64 32, i64 32, i64 32, i64 32>
|
|
%v2 = trunc <4 x i64> %s to <4 x i32>
|
|
store <4 x i32> %v2, ptr %res
|
|
ret void
|
|
}
|
|
|
|
define void @mulhu_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
|
|
; CHECK-LABEL: mulhu_v4i32:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vld $vr0, $a1, 0
|
|
; CHECK-NEXT: vld $vr1, $a2, 0
|
|
; CHECK-NEXT: vmuh.wu $vr0, $vr0, $vr1
|
|
; CHECK-NEXT: vst $vr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%v0 = load <4 x i32>, ptr %a0
|
|
%v1 = load <4 x i32>, ptr %a1
|
|
%v0z = zext <4 x i32> %v0 to <4 x i64>
|
|
%v1z = zext <4 x i32> %v1 to <4 x i64>
|
|
%m = mul <4 x i64> %v0z, %v1z
|
|
%s = lshr <4 x i64> %m, <i64 32, i64 32, i64 32, i64 32>
|
|
%v2 = trunc <4 x i64> %s to <4 x i32>
|
|
store <4 x i32> %v2, ptr %res
|
|
ret void
|
|
}
|
|
|
|
define void @mulhs_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
|
|
; CHECK-LABEL: mulhs_v2i64:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vld $vr0, $a1, 0
|
|
; CHECK-NEXT: vld $vr1, $a2, 0
|
|
; CHECK-NEXT: vmuh.d $vr0, $vr0, $vr1
|
|
; CHECK-NEXT: vst $vr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%v0 = load <2 x i64>, ptr %a0
|
|
%v1 = load <2 x i64>, ptr %a1
|
|
%v0s = sext <2 x i64> %v0 to <2 x i128>
|
|
%v1s = sext <2 x i64> %v1 to <2 x i128>
|
|
%m = mul <2 x i128> %v0s, %v1s
|
|
%s = ashr <2 x i128> %m, <i128 64, i128 64>
|
|
%v2 = trunc <2 x i128> %s to <2 x i64>
|
|
store <2 x i64> %v2, ptr %res
|
|
ret void
|
|
}
|
|
|
|
define void @mulhu_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
|
|
; CHECK-LABEL: mulhu_v2i64:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vld $vr0, $a1, 0
|
|
; CHECK-NEXT: vld $vr1, $a2, 0
|
|
; CHECK-NEXT: vmuh.du $vr0, $vr0, $vr1
|
|
; CHECK-NEXT: vst $vr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%v0 = load <2 x i64>, ptr %a0
|
|
%v1 = load <2 x i64>, ptr %a1
|
|
%v0z = zext <2 x i64> %v0 to <2 x i128>
|
|
%v1z = zext <2 x i64> %v1 to <2 x i128>
|
|
%m = mul <2 x i128> %v0z, %v1z
|
|
%s = lshr <2 x i128> %m, <i128 64, i128 64>
|
|
%v2 = trunc <2 x i128> %s to <2 x i64>
|
|
store <2 x i64> %v2, ptr %res
|
|
ret void
|
|
}
|