Paul Walker 41a6bb4c05
[LLVM][CodeGen][SVE] Prefer NEON instructions when zeroing Z registers. (#133929)
Several implementations have zero-latency instructions to zero
registers. To-date no implementation has a dedicated SVE instruction but
we can use the NEON equivalent because it is defined to zero bits
128..VL regardless of the immediate used.

NOTE: The relevant instruction is not available in streaming mode, where
the original SVE DUP instruction remains in use.
2025-04-03 13:15:05 +01:00

146 lines
4.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,STREAMING-SVE
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-none-linux-gnu"
define <vscale x 2 x i64> @test_zeroinit_2xi64() {
; SVE-LABEL: test_zeroinit_2xi64:
; SVE: // %bb.0:
; SVE-NEXT: movi v0.2d, #0000000000000000
; SVE-NEXT: ret
;
; STREAMING-SVE-LABEL: test_zeroinit_2xi64:
; STREAMING-SVE: // %bb.0:
; STREAMING-SVE-NEXT: mov z0.d, #0 // =0x0
; STREAMING-SVE-NEXT: ret
ret <vscale x 2 x i64> zeroinitializer
}
define <vscale x 4 x i32> @test_zeroinit_4xi32() {
; SVE-LABEL: test_zeroinit_4xi32:
; SVE: // %bb.0:
; SVE-NEXT: movi v0.2d, #0000000000000000
; SVE-NEXT: ret
;
; STREAMING-SVE-LABEL: test_zeroinit_4xi32:
; STREAMING-SVE: // %bb.0:
; STREAMING-SVE-NEXT: mov z0.s, #0 // =0x0
; STREAMING-SVE-NEXT: ret
ret <vscale x 4 x i32> zeroinitializer
}
define <vscale x 8 x i16> @test_zeroinit_8xi16() {
; SVE-LABEL: test_zeroinit_8xi16:
; SVE: // %bb.0:
; SVE-NEXT: movi v0.2d, #0000000000000000
; SVE-NEXT: ret
;
; STREAMING-SVE-LABEL: test_zeroinit_8xi16:
; STREAMING-SVE: // %bb.0:
; STREAMING-SVE-NEXT: mov z0.h, #0 // =0x0
; STREAMING-SVE-NEXT: ret
ret <vscale x 8 x i16> zeroinitializer
}
define <vscale x 16 x i8> @test_zeroinit_16xi8() {
; SVE-LABEL: test_zeroinit_16xi8:
; SVE: // %bb.0:
; SVE-NEXT: movi v0.2d, #0000000000000000
; SVE-NEXT: ret
;
; STREAMING-SVE-LABEL: test_zeroinit_16xi8:
; STREAMING-SVE: // %bb.0:
; STREAMING-SVE-NEXT: mov z0.b, #0 // =0x0
; STREAMING-SVE-NEXT: ret
ret <vscale x 16 x i8> zeroinitializer
}
define <vscale x 2 x double> @test_zeroinit_2xf64() {
; SVE-LABEL: test_zeroinit_2xf64:
; SVE: // %bb.0:
; SVE-NEXT: movi v0.2d, #0000000000000000
; SVE-NEXT: ret
;
; STREAMING-SVE-LABEL: test_zeroinit_2xf64:
; STREAMING-SVE: // %bb.0:
; STREAMING-SVE-NEXT: mov z0.d, #0 // =0x0
; STREAMING-SVE-NEXT: ret
ret <vscale x 2 x double> zeroinitializer
}
define <vscale x 4 x float> @test_zeroinit_4xf32() {
; SVE-LABEL: test_zeroinit_4xf32:
; SVE: // %bb.0:
; SVE-NEXT: movi v0.2d, #0000000000000000
; SVE-NEXT: ret
;
; STREAMING-SVE-LABEL: test_zeroinit_4xf32:
; STREAMING-SVE: // %bb.0:
; STREAMING-SVE-NEXT: mov z0.s, #0 // =0x0
; STREAMING-SVE-NEXT: ret
ret <vscale x 4 x float> zeroinitializer
}
define <vscale x 8 x half> @test_zeroinit_8xf16() {
; SVE-LABEL: test_zeroinit_8xf16:
; SVE: // %bb.0:
; SVE-NEXT: movi v0.2d, #0000000000000000
; SVE-NEXT: ret
;
; STREAMING-SVE-LABEL: test_zeroinit_8xf16:
; STREAMING-SVE: // %bb.0:
; STREAMING-SVE-NEXT: mov z0.h, #0 // =0x0
; STREAMING-SVE-NEXT: ret
ret <vscale x 8 x half> zeroinitializer
}
define <vscale x 1 x i1> @test_zeroinit_1xi1() {
; CHECK-LABEL: test_zeroinit_1xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: pfalse p0.b
; CHECK-NEXT: ret
ret <vscale x 1 x i1> zeroinitializer
}
define <vscale x 2 x i1> @test_zeroinit_2xi1() {
; CHECK-LABEL: test_zeroinit_2xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: pfalse p0.b
; CHECK-NEXT: ret
ret <vscale x 2 x i1> zeroinitializer
}
define <vscale x 4 x i1> @test_zeroinit_4xi1() {
; CHECK-LABEL: test_zeroinit_4xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: pfalse p0.b
; CHECK-NEXT: ret
ret <vscale x 4 x i1> zeroinitializer
}
define <vscale x 8 x i1> @test_zeroinit_8xi1() {
; CHECK-LABEL: test_zeroinit_8xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: pfalse p0.b
; CHECK-NEXT: ret
ret <vscale x 8 x i1> zeroinitializer
}
define <vscale x 16 x i1> @test_zeroinit_16xi1() {
; CHECK-LABEL: test_zeroinit_16xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: pfalse p0.b
; CHECK-NEXT: ret
ret <vscale x 16 x i1> zeroinitializer
}
define target("aarch64.svcount") @test_zeroinit_svcount() "target-features"="+sme2" {
; CHECK-LABEL: test_zeroinit_svcount:
; CHECK: // %bb.0:
; CHECK-NEXT: pfalse p0.b
; CHECK-NEXT: ret
ret target("aarch64.svcount") zeroinitializer
}