llvm-project/llvm/test/CodeGen/AArch64/mla_mls_merge.ll
Sander de Smalen 61510b51c3 Revert "[AArch64] Enable subreg liveness tracking by default."
This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7.

Some issues were discovered with the bootstrap builds, which
seem like they were caused by this commit. I'm reverting to investigate.
2024-12-12 17:22:15 +00:00

206 lines
8.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-unknown-linux-gnu | FileCheck %s
define <4 x i16> @test_mla0(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
; CHECK-LABEL: test_mla0:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: umull v2.8h, v2.8b, v3.8b
; CHECK-NEXT: umlal v2.8h, v0.8b, v1.8b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
entry:
%vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
%vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %c, <8 x i8> %d)
%add.i = add <8 x i16> %vmull.i.i, %vmull.i
%shuffle.i = shufflevector <8 x i16> %add.i, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle.i
}
define <4 x i16> @test_mla1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
; CHECK-LABEL: test_mla1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: smull v2.8h, v2.8b, v3.8b
; CHECK-NEXT: smlal v2.8h, v0.8b, v1.8b
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
entry:
%vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
%vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %c, <8 x i8> %d)
%add.i = add <8 x i16> %vmull.i.i, %vmull.i
%shuffle.i = shufflevector <8 x i16> %add.i, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle.i
}
define <2 x i32> @test_mla2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
; CHECK-LABEL: test_mla2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: umull v2.4s, v2.4h, v3.4h
; CHECK-NEXT: umlal v2.4s, v0.4h, v1.4h
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
%vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %c, <4 x i16> %d)
%add.i = add <4 x i32> %vmull2.i.i, %vmull2.i
%shuffle.i = shufflevector <4 x i32> %add.i, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
ret <2 x i32> %shuffle.i
}
define <2 x i32> @test_mla3(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
; CHECK-LABEL: test_mla3:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: smull v2.4s, v2.4h, v3.4h
; CHECK-NEXT: smlal v2.4s, v0.4h, v1.4h
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
%vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %c, <4 x i16> %d)
%add.i = add <4 x i32> %vmull2.i.i, %vmull2.i
%shuffle.i = shufflevector <4 x i32> %add.i, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
ret <2 x i32> %shuffle.i
}
define <1 x i64> @test_mla4(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
; CHECK-LABEL: test_mla4:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: umull v2.2d, v2.2s, v3.2s
; CHECK-NEXT: umlal v2.2d, v0.2s, v1.2s
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
%vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %c, <2 x i32> %d)
%add.i = add <2 x i64> %vmull2.i.i, %vmull2.i
%shuffle.i = shufflevector <2 x i64> %add.i, <2 x i64> undef, <1 x i32> zeroinitializer
ret <1 x i64> %shuffle.i
}
define <1 x i64> @test_mla5(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
; CHECK-LABEL: test_mla5:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: smull v2.2d, v2.2s, v3.2s
; CHECK-NEXT: smlal v2.2d, v0.2s, v1.2s
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
%vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %c, <2 x i32> %d)
%add.i = add <2 x i64> %vmull2.i.i, %vmull2.i
%shuffle.i = shufflevector <2 x i64> %add.i, <2 x i64> undef, <1 x i32> zeroinitializer
ret <1 x i64> %shuffle.i
}
define <4 x i16> @test_mls0(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
; CHECK-LABEL: test_mls0:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
; CHECK-NEXT: umlsl v0.8h, v2.8b, v3.8b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
entry:
%vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
%vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %c, <8 x i8> %d)
%sub.i = sub <8 x i16> %vmull.i, %vmull.i.i
%shuffle.i = shufflevector <8 x i16> %sub.i, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle.i
}
define <4 x i16> @test_mls1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
; CHECK-LABEL: test_mls1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
; CHECK-NEXT: smlsl v0.8h, v2.8b, v3.8b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
entry:
%vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
%vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %c, <8 x i8> %d)
%sub.i = sub <8 x i16> %vmull.i, %vmull.i.i
%shuffle.i = shufflevector <8 x i16> %sub.i, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle.i
}
define <2 x i32> @test_mls2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
; CHECK-LABEL: test_mls2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
; CHECK-NEXT: umlsl v0.4s, v2.4h, v3.4h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
%vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %c, <4 x i16> %d)
%sub.i = sub <4 x i32> %vmull2.i, %vmull2.i.i
%shuffle.i = shufflevector <4 x i32> %sub.i, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
ret <2 x i32> %shuffle.i
}
define <2 x i32> @test_mls3(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
; CHECK-LABEL: test_mls3:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
; CHECK-NEXT: smlsl v0.4s, v2.4h, v3.4h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
%vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %c, <4 x i16> %d)
%sub.i = sub <4 x i32> %vmull2.i, %vmull2.i.i
%shuffle.i = shufflevector <4 x i32> %sub.i, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
ret <2 x i32> %shuffle.i
}
define <1 x i64> @test_mls4(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
; CHECK-LABEL: test_mls4:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
; CHECK-NEXT: umlsl v0.2d, v2.2s, v3.2s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
%vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %c, <2 x i32> %d)
%sub.i = sub <2 x i64> %vmull2.i, %vmull2.i.i
%shuffle.i = shufflevector <2 x i64> %sub.i, <2 x i64> undef, <1 x i32> zeroinitializer
ret <1 x i64> %shuffle.i
}
define <1 x i64> @test_mls5(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
; CHECK-LABEL: test_mls5:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
; CHECK-NEXT: smlsl v0.2d, v2.2s, v3.2s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
%vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %c, <2 x i32> %d)
%sub.i = sub <2 x i64> %vmull2.i, %vmull2.i.i
%shuffle.i = shufflevector <2 x i64> %sub.i, <2 x i64> undef, <1 x i32> zeroinitializer
ret <1 x i64> %shuffle.i
}
declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>)
declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>)
declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)