llvm-project/llvm/test/CodeGen/AArch64/pull-negations-after-concat-of-truncates.ll
Harvin Iriawan db158c7c83 [AArch64] Update generic sched model to A510
Refresh of the generic scheduling model to use A510 instead of A55.
  Main benefits are to the little core, and introducing SVE scheduling information.
  Changes tested on various OoO cores, no performance degradation is seen.

  Differential Revision: https://reviews.llvm.org/D156799
2023-08-21 12:25:15 +01:00

68 lines
3.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
define <8 x i16> @not_not_trunc_concat(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: not_not_trunc_concat:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: ret
%notx = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%trnx = trunc <4 x i32> %notx to <4 x i16>
%noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%trny = trunc <4 x i32> %noty to <4 x i16>
%r = shufflevector <4 x i16> %trnx, <4 x i16> %trny, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %r
}
; Chains of concat -> truncate -> negate should flatten out to a single negate.
define <16 x i8> @not_not_trunc_concat_chain(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: not_not_trunc_concat_chain:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: ret
%nota = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
%trna = trunc <4 x i32> %nota to <4 x i16>
%notb = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
%trnb = trunc <4 x i32> %notb to <4 x i16>
%concat_a = shufflevector <4 x i16> %trna, <4 x i16> %trnb, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%trun_concat_a = trunc <8 x i16> %concat_a to <8 x i8>
%notx = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%trnx = trunc <4 x i32> %notx to <4 x i16>
%noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%trny = trunc <4 x i32> %noty to <4 x i16>
%concat_b = shufflevector <4 x i16> %trnx, <4 x i16> %trny, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%trun_concat_b = trunc <8 x i16> %concat_b to <8 x i8>
%r = shufflevector <8 x i8> %trun_concat_a, <8 x i8> %trun_concat_b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %r
}
; Combine should not fire here, otherwise slightly worse code will be emitted.
define <8 x i16> @not_not_trunc_concat_multiple_uses(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: not_not_trunc_concat_multiple_uses:
; CHECK: // %bb.0:
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: xtn v1.4h, v1.4s
; CHECK-NEXT: mvn v0.8b, v0.8b
; CHECK-NEXT: mvn v1.8b, v1.8b
; CHECK-NEXT: add v2.4h, v0.4h, v1.4h
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: mov v2.d[1], v2.d[0]
; CHECK-NEXT: add v0.8h, v0.8h, v2.8h
; CHECK-NEXT: ret
%notx = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%trnx = trunc <4 x i32> %notx to <4 x i16>
%noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%trny = trunc <4 x i32> %noty to <4 x i16>
%concat = shufflevector <4 x i16> %trnx, <4 x i16> %trny, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%add = add <4 x i16> %trnx, %trny
%extend_add = shufflevector <4 x i16> %add, <4 x i16> %add, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%r = add <8 x i16> %concat, %extend_add
ret <8 x i16> %r
}