wanglei a5c90e48b6
[LoongArch] Switch to the Machine Scheduler (#83759)
The SelectionDAG scheduling preference now becomes source order
scheduling (machine scheduler generates better code -- even without
there being a machine model defined for LoongArch yet).

Most of the test changes are trivial instruction reorderings and
differing register allocations, without any obvious performance impact.

This is similar to commit: 3d0fbafd0bce43bb9106230a45d1130f7a40e5ec
2024-03-05 09:15:44 +08:00

163 lines
5.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
define void @mulhs_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: mulhs_v16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vmuh.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <16 x i8>, ptr %a0
%v1 = load <16 x i8>, ptr %a1
%v0s = sext <16 x i8> %v0 to <16 x i16>
%v1s = sext <16 x i8> %v1 to <16 x i16>
%m = mul <16 x i16> %v0s, %v1s
%s = ashr <16 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%v2 = trunc <16 x i16> %s to <16 x i8>
store <16 x i8> %v2, ptr %res
ret void
}
define void @mulhu_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: mulhu_v16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vmuh.bu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <16 x i8>, ptr %a0
%v1 = load <16 x i8>, ptr %a1
%v0z = zext <16 x i8> %v0 to <16 x i16>
%v1z = zext <16 x i8> %v1 to <16 x i16>
%m = mul <16 x i16> %v0z, %v1z
%s = lshr <16 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%v2 = trunc <16 x i16> %s to <16 x i8>
store <16 x i8> %v2, ptr %res
ret void
}
define void @mulhs_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: mulhs_v8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vmuh.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x i16>, ptr %a0
%v1 = load <8 x i16>, ptr %a1
%v0s = sext <8 x i16> %v0 to <8 x i32>
%v1s = sext <8 x i16> %v1 to <8 x i32>
%m = mul <8 x i32> %v0s, %v1s
%s = ashr <8 x i32> %m, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%v2 = trunc <8 x i32> %s to <8 x i16>
store <8 x i16> %v2, ptr %res
ret void
}
define void @mulhu_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: mulhu_v8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vmuh.hu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x i16>, ptr %a0
%v1 = load <8 x i16>, ptr %a1
%v0z = zext <8 x i16> %v0 to <8 x i32>
%v1z = zext <8 x i16> %v1 to <8 x i32>
%m = mul <8 x i32> %v0z, %v1z
%s = lshr <8 x i32> %m, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%v2 = trunc <8 x i32> %s to <8 x i16>
store <8 x i16> %v2, ptr %res
ret void
}
define void @mulhs_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: mulhs_v4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vmuh.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x i32>, ptr %a0
%v1 = load <4 x i32>, ptr %a1
%v0s = sext <4 x i32> %v0 to <4 x i64>
%v1s = sext <4 x i32> %v1 to <4 x i64>
%m = mul <4 x i64> %v0s, %v1s
%s = ashr <4 x i64> %m, <i64 32, i64 32, i64 32, i64 32>
%v2 = trunc <4 x i64> %s to <4 x i32>
store <4 x i32> %v2, ptr %res
ret void
}
define void @mulhu_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: mulhu_v4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vmuh.wu $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x i32>, ptr %a0
%v1 = load <4 x i32>, ptr %a1
%v0z = zext <4 x i32> %v0 to <4 x i64>
%v1z = zext <4 x i32> %v1 to <4 x i64>
%m = mul <4 x i64> %v0z, %v1z
%s = lshr <4 x i64> %m, <i64 32, i64 32, i64 32, i64 32>
%v2 = trunc <4 x i64> %s to <4 x i32>
store <4 x i32> %v2, ptr %res
ret void
}
define void @mulhs_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: mulhs_v2i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vmuh.d $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <2 x i64>, ptr %a0
%v1 = load <2 x i64>, ptr %a1
%v0s = sext <2 x i64> %v0 to <2 x i128>
%v1s = sext <2 x i64> %v1 to <2 x i128>
%m = mul <2 x i128> %v0s, %v1s
%s = ashr <2 x i128> %m, <i128 64, i128 64>
%v2 = trunc <2 x i128> %s to <2 x i64>
store <2 x i64> %v2, ptr %res
ret void
}
define void @mulhu_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: mulhu_v2i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vmuh.du $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <2 x i64>, ptr %a0
%v1 = load <2 x i64>, ptr %a1
%v0z = zext <2 x i64> %v0 to <2 x i128>
%v1z = zext <2 x i64> %v1 to <2 x i128>
%m = mul <2 x i128> %v0z, %v1z
%s = lshr <2 x i128> %m, <i128 64, i128 64>
%v2 = trunc <2 x i128> %s to <2 x i64>
store <2 x i64> %v2, ptr %res
ret void
}