[AArch64] Don't emit Neon in streaming[-compatible] functions with -fzero-call-used-regs (#116995)

Previously, with `-fzero-call-used-regs` clang/LLVM would incorrectly
emit Neon instructions in streaming functions, and streaming-compatible
functions without SVE.

With this change:

* In streaming functions, Z/p registers will be zeroed
* In streaming compatible functions w/o SVE, D registers will be zeroed
  - (As Neon vector instructions are illegal including `movi v..`)
This commit is contained in:
Benjamin Maxwell 2024-11-21 11:02:07 +00:00 committed by GitHub
parent 5bdee35544
commit 83c7784c35
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 361 additions and 194 deletions

View File

@ -1010,7 +1010,7 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
BitVector GPRsToZero(TRI.getNumRegs());
BitVector FPRsToZero(TRI.getNumRegs());
bool HasSVE = STI.hasSVE();
bool HasSVE = STI.isSVEorStreamingSVEAvailable();
for (MCRegister Reg : RegsToZero.set_bits()) {
if (TRI.isGeneralPurposeRegister(MF, Reg)) {
// For GPRs, we only care to clear out the 64-bit register.

View File

@ -9700,13 +9700,20 @@ void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
if (TRI.isGeneralPurposeRegister(MF, Reg)) {
BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
} else if (STI.hasSVE()) {
} else if (STI.isSVEorStreamingSVEAvailable()) {
BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
.addImm(0)
.addImm(0);
} else {
} else if (STI.isNeonAvailable()) {
BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
.addImm(0);
} else {
// This is a streaming-compatible function without SVE. We don't have full
// Neon (just FPRs), so we can at most use the first 64-bit sub-register.
// So given `movi v..` would be illegal use `fmov d..` instead.
assert(STI.hasNEON() && "Expected to have NEON.");
Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
}
}

View File

@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,DEFAULT
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE-OR-SME
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE-OR-SME
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT
target triple = "aarch64-unknown-linux-gnu"
@result = dso_local global i32 0, align 4
@ -156,32 +160,55 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo
; DEFAULT-NEXT: movi v7.2d, #0000000000000000
; DEFAULT-NEXT: ret
;
; SVE-LABEL: all_arg:
; SVE: // %bb.0: // %entry
; SVE-NEXT: mul w8, w1, w0
; SVE-NEXT: mov x1, #0 // =0x0
; SVE-NEXT: mov x3, #0 // =0x0
; SVE-NEXT: mov x4, #0 // =0x0
; SVE-NEXT: mov x5, #0 // =0x0
; SVE-NEXT: mov x6, #0 // =0x0
; SVE-NEXT: mov x7, #0 // =0x0
; SVE-NEXT: mov x18, #0 // =0x0
; SVE-NEXT: mov z0.d, #0 // =0x0
; SVE-NEXT: orr w0, w8, w2
; SVE-NEXT: mov x2, #0 // =0x0
; SVE-NEXT: mov x8, #0 // =0x0
; SVE-NEXT: mov z1.d, #0 // =0x0
; SVE-NEXT: mov z2.d, #0 // =0x0
; SVE-NEXT: mov z3.d, #0 // =0x0
; SVE-NEXT: mov z4.d, #0 // =0x0
; SVE-NEXT: mov z5.d, #0 // =0x0
; SVE-NEXT: mov z6.d, #0 // =0x0
; SVE-NEXT: mov z7.d, #0 // =0x0
; SVE-NEXT: pfalse p0.b
; SVE-NEXT: pfalse p1.b
; SVE-NEXT: pfalse p2.b
; SVE-NEXT: pfalse p3.b
; SVE-NEXT: ret
; SVE-OR-SME-LABEL: all_arg:
; SVE-OR-SME: // %bb.0: // %entry
; SVE-OR-SME-NEXT: mul w8, w1, w0
; SVE-OR-SME-NEXT: mov x1, #0 // =0x0
; SVE-OR-SME-NEXT: mov x3, #0 // =0x0
; SVE-OR-SME-NEXT: mov x4, #0 // =0x0
; SVE-OR-SME-NEXT: mov x5, #0 // =0x0
; SVE-OR-SME-NEXT: mov x6, #0 // =0x0
; SVE-OR-SME-NEXT: mov x7, #0 // =0x0
; SVE-OR-SME-NEXT: mov x18, #0 // =0x0
; SVE-OR-SME-NEXT: mov z0.d, #0 // =0x0
; SVE-OR-SME-NEXT: orr w0, w8, w2
; SVE-OR-SME-NEXT: mov x2, #0 // =0x0
; SVE-OR-SME-NEXT: mov x8, #0 // =0x0
; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z4.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z5.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z6.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z7.d, #0 // =0x0
; SVE-OR-SME-NEXT: pfalse p0.b
; SVE-OR-SME-NEXT: pfalse p1.b
; SVE-OR-SME-NEXT: pfalse p2.b
; SVE-OR-SME-NEXT: pfalse p3.b
; SVE-OR-SME-NEXT: ret
;
; STREAMING-COMPAT-LABEL: all_arg:
; STREAMING-COMPAT: // %bb.0: // %entry
; STREAMING-COMPAT-NEXT: mul w8, w1, w0
; STREAMING-COMPAT-NEXT: mov x1, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x3, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x4, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0
; STREAMING-COMPAT-NEXT: fmov d0, xzr
; STREAMING-COMPAT-NEXT: orr w0, w8, w2
; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0
; STREAMING-COMPAT-NEXT: fmov d1, xzr
; STREAMING-COMPAT-NEXT: fmov d2, xzr
; STREAMING-COMPAT-NEXT: fmov d3, xzr
; STREAMING-COMPAT-NEXT: fmov d4, xzr
; STREAMING-COMPAT-NEXT: fmov d5, xzr
; STREAMING-COMPAT-NEXT: fmov d6, xzr
; STREAMING-COMPAT-NEXT: fmov d7, xzr
; STREAMING-COMPAT-NEXT: ret
entry:
%mul = mul nsw i32 %b, %a
@ -238,69 +265,117 @@ define dso_local i32 @all(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_
; DEFAULT-NEXT: movi v31.2d, #0000000000000000
; DEFAULT-NEXT: ret
;
; SVE-LABEL: all:
; SVE: // %bb.0: // %entry
; SVE-NEXT: mul w8, w1, w0
; SVE-NEXT: mov x1, #0 // =0x0
; SVE-NEXT: mov x3, #0 // =0x0
; SVE-NEXT: mov x4, #0 // =0x0
; SVE-NEXT: mov x5, #0 // =0x0
; SVE-NEXT: mov x6, #0 // =0x0
; SVE-NEXT: mov x7, #0 // =0x0
; SVE-NEXT: mov x9, #0 // =0x0
; SVE-NEXT: mov x10, #0 // =0x0
; SVE-NEXT: orr w0, w8, w2
; SVE-NEXT: mov x2, #0 // =0x0
; SVE-NEXT: mov x8, #0 // =0x0
; SVE-NEXT: mov x11, #0 // =0x0
; SVE-NEXT: mov x12, #0 // =0x0
; SVE-NEXT: mov x13, #0 // =0x0
; SVE-NEXT: mov x14, #0 // =0x0
; SVE-NEXT: mov x15, #0 // =0x0
; SVE-NEXT: mov x16, #0 // =0x0
; SVE-NEXT: mov x17, #0 // =0x0
; SVE-NEXT: mov x18, #0 // =0x0
; SVE-NEXT: mov z0.d, #0 // =0x0
; SVE-NEXT: mov z1.d, #0 // =0x0
; SVE-NEXT: mov z2.d, #0 // =0x0
; SVE-NEXT: mov z3.d, #0 // =0x0
; SVE-NEXT: mov z4.d, #0 // =0x0
; SVE-NEXT: mov z5.d, #0 // =0x0
; SVE-NEXT: mov z6.d, #0 // =0x0
; SVE-NEXT: mov z7.d, #0 // =0x0
; SVE-NEXT: mov z16.d, #0 // =0x0
; SVE-NEXT: mov z17.d, #0 // =0x0
; SVE-NEXT: mov z18.d, #0 // =0x0
; SVE-NEXT: mov z19.d, #0 // =0x0
; SVE-NEXT: mov z20.d, #0 // =0x0
; SVE-NEXT: mov z21.d, #0 // =0x0
; SVE-NEXT: mov z22.d, #0 // =0x0
; SVE-NEXT: mov z23.d, #0 // =0x0
; SVE-NEXT: mov z24.d, #0 // =0x0
; SVE-NEXT: mov z25.d, #0 // =0x0
; SVE-NEXT: mov z26.d, #0 // =0x0
; SVE-NEXT: mov z27.d, #0 // =0x0
; SVE-NEXT: mov z28.d, #0 // =0x0
; SVE-NEXT: mov z29.d, #0 // =0x0
; SVE-NEXT: mov z30.d, #0 // =0x0
; SVE-NEXT: mov z31.d, #0 // =0x0
; SVE-NEXT: pfalse p0.b
; SVE-NEXT: pfalse p1.b
; SVE-NEXT: pfalse p2.b
; SVE-NEXT: pfalse p3.b
; SVE-NEXT: pfalse p4.b
; SVE-NEXT: pfalse p5.b
; SVE-NEXT: pfalse p6.b
; SVE-NEXT: pfalse p7.b
; SVE-NEXT: pfalse p8.b
; SVE-NEXT: pfalse p9.b
; SVE-NEXT: pfalse p10.b
; SVE-NEXT: pfalse p11.b
; SVE-NEXT: pfalse p12.b
; SVE-NEXT: pfalse p13.b
; SVE-NEXT: pfalse p14.b
; SVE-NEXT: pfalse p15.b
; SVE-NEXT: ret
; SVE-OR-SME-LABEL: all:
; SVE-OR-SME: // %bb.0: // %entry
; SVE-OR-SME-NEXT: mul w8, w1, w0
; SVE-OR-SME-NEXT: mov x1, #0 // =0x0
; SVE-OR-SME-NEXT: mov x3, #0 // =0x0
; SVE-OR-SME-NEXT: mov x4, #0 // =0x0
; SVE-OR-SME-NEXT: mov x5, #0 // =0x0
; SVE-OR-SME-NEXT: mov x6, #0 // =0x0
; SVE-OR-SME-NEXT: mov x7, #0 // =0x0
; SVE-OR-SME-NEXT: mov x9, #0 // =0x0
; SVE-OR-SME-NEXT: mov x10, #0 // =0x0
; SVE-OR-SME-NEXT: orr w0, w8, w2
; SVE-OR-SME-NEXT: mov x2, #0 // =0x0
; SVE-OR-SME-NEXT: mov x8, #0 // =0x0
; SVE-OR-SME-NEXT: mov x11, #0 // =0x0
; SVE-OR-SME-NEXT: mov x12, #0 // =0x0
; SVE-OR-SME-NEXT: mov x13, #0 // =0x0
; SVE-OR-SME-NEXT: mov x14, #0 // =0x0
; SVE-OR-SME-NEXT: mov x15, #0 // =0x0
; SVE-OR-SME-NEXT: mov x16, #0 // =0x0
; SVE-OR-SME-NEXT: mov x17, #0 // =0x0
; SVE-OR-SME-NEXT: mov x18, #0 // =0x0
; SVE-OR-SME-NEXT: mov z0.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z4.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z5.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z6.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z7.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z16.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z17.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z18.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z19.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z20.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z21.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z22.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z23.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z24.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z25.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z26.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z27.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z28.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z29.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z30.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z31.d, #0 // =0x0
; SVE-OR-SME-NEXT: pfalse p0.b
; SVE-OR-SME-NEXT: pfalse p1.b
; SVE-OR-SME-NEXT: pfalse p2.b
; SVE-OR-SME-NEXT: pfalse p3.b
; SVE-OR-SME-NEXT: pfalse p4.b
; SVE-OR-SME-NEXT: pfalse p5.b
; SVE-OR-SME-NEXT: pfalse p6.b
; SVE-OR-SME-NEXT: pfalse p7.b
; SVE-OR-SME-NEXT: pfalse p8.b
; SVE-OR-SME-NEXT: pfalse p9.b
; SVE-OR-SME-NEXT: pfalse p10.b
; SVE-OR-SME-NEXT: pfalse p11.b
; SVE-OR-SME-NEXT: pfalse p12.b
; SVE-OR-SME-NEXT: pfalse p13.b
; SVE-OR-SME-NEXT: pfalse p14.b
; SVE-OR-SME-NEXT: pfalse p15.b
; SVE-OR-SME-NEXT: ret
;
; STREAMING-COMPAT-LABEL: all:
; STREAMING-COMPAT: // %bb.0: // %entry
; STREAMING-COMPAT-NEXT: mul w8, w1, w0
; STREAMING-COMPAT-NEXT: mov x1, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x3, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x4, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x9, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x10, #0 // =0x0
; STREAMING-COMPAT-NEXT: orr w0, w8, w2
; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x11, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x12, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x13, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x14, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x15, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x16, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x17, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0
; STREAMING-COMPAT-NEXT: fmov d0, xzr
; STREAMING-COMPAT-NEXT: fmov d1, xzr
; STREAMING-COMPAT-NEXT: fmov d2, xzr
; STREAMING-COMPAT-NEXT: fmov d3, xzr
; STREAMING-COMPAT-NEXT: fmov d4, xzr
; STREAMING-COMPAT-NEXT: fmov d5, xzr
; STREAMING-COMPAT-NEXT: fmov d6, xzr
; STREAMING-COMPAT-NEXT: fmov d7, xzr
; STREAMING-COMPAT-NEXT: fmov d16, xzr
; STREAMING-COMPAT-NEXT: fmov d17, xzr
; STREAMING-COMPAT-NEXT: fmov d18, xzr
; STREAMING-COMPAT-NEXT: fmov d19, xzr
; STREAMING-COMPAT-NEXT: fmov d20, xzr
; STREAMING-COMPAT-NEXT: fmov d21, xzr
; STREAMING-COMPAT-NEXT: fmov d22, xzr
; STREAMING-COMPAT-NEXT: fmov d23, xzr
; STREAMING-COMPAT-NEXT: fmov d24, xzr
; STREAMING-COMPAT-NEXT: fmov d25, xzr
; STREAMING-COMPAT-NEXT: fmov d26, xzr
; STREAMING-COMPAT-NEXT: fmov d27, xzr
; STREAMING-COMPAT-NEXT: fmov d28, xzr
; STREAMING-COMPAT-NEXT: fmov d29, xzr
; STREAMING-COMPAT-NEXT: fmov d30, xzr
; STREAMING-COMPAT-NEXT: fmov d31, xzr
; STREAMING-COMPAT-NEXT: ret
entry:
%mul = mul nsw i32 %b, %a
@ -355,12 +430,19 @@ define dso_local double @used_arg_float(double noundef %a, float noundef %b) loc
; DEFAULT-NEXT: movi v1.2d, #0000000000000000
; DEFAULT-NEXT: ret
;
; SVE-LABEL: used_arg_float:
; SVE: // %bb.0: // %entry
; SVE-NEXT: fcvt d1, s1
; SVE-NEXT: fmul d0, d1, d0
; SVE-NEXT: mov z1.d, #0 // =0x0
; SVE-NEXT: ret
; SVE-OR-SME-LABEL: used_arg_float:
; SVE-OR-SME: // %bb.0: // %entry
; SVE-OR-SME-NEXT: fcvt d1, s1
; SVE-OR-SME-NEXT: fmul d0, d1, d0
; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0
; SVE-OR-SME-NEXT: ret
;
; STREAMING-COMPAT-LABEL: used_arg_float:
; STREAMING-COMPAT: // %bb.0: // %entry
; STREAMING-COMPAT-NEXT: fcvt d1, s1
; STREAMING-COMPAT-NEXT: fmul d0, d1, d0
; STREAMING-COMPAT-NEXT: fmov d1, xzr
; STREAMING-COMPAT-NEXT: ret
entry:
%conv = fpext float %b to double
@ -376,12 +458,19 @@ define dso_local double @used_float(double noundef %a, float noundef %b) local_u
; DEFAULT-NEXT: movi v1.2d, #0000000000000000
; DEFAULT-NEXT: ret
;
; SVE-LABEL: used_float:
; SVE: // %bb.0: // %entry
; SVE-NEXT: fcvt d1, s1
; SVE-NEXT: fmul d0, d1, d0
; SVE-NEXT: mov z1.d, #0 // =0x0
; SVE-NEXT: ret
; SVE-OR-SME-LABEL: used_float:
; SVE-OR-SME: // %bb.0: // %entry
; SVE-OR-SME-NEXT: fcvt d1, s1
; SVE-OR-SME-NEXT: fmul d0, d1, d0
; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0
; SVE-OR-SME-NEXT: ret
;
; STREAMING-COMPAT-LABEL: used_float:
; STREAMING-COMPAT: // %bb.0: // %entry
; STREAMING-COMPAT-NEXT: fcvt d1, s1
; STREAMING-COMPAT-NEXT: fmul d0, d1, d0
; STREAMING-COMPAT-NEXT: fmov d1, xzr
; STREAMING-COMPAT-NEXT: ret
entry:
%conv = fpext float %b to double
@ -468,32 +557,55 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca
; DEFAULT-NEXT: movi v7.2d, #0000000000000000
; DEFAULT-NEXT: ret
;
; SVE-LABEL: all_arg_float:
; SVE: // %bb.0: // %entry
; SVE-NEXT: fcvt d1, s1
; SVE-NEXT: fmul d0, d1, d0
; SVE-NEXT: mov x0, #0 // =0x0
; SVE-NEXT: mov x1, #0 // =0x0
; SVE-NEXT: mov x2, #0 // =0x0
; SVE-NEXT: mov x3, #0 // =0x0
; SVE-NEXT: mov x4, #0 // =0x0
; SVE-NEXT: mov x5, #0 // =0x0
; SVE-NEXT: mov x6, #0 // =0x0
; SVE-NEXT: mov x7, #0 // =0x0
; SVE-NEXT: mov x8, #0 // =0x0
; SVE-NEXT: mov x18, #0 // =0x0
; SVE-NEXT: mov z1.d, #0 // =0x0
; SVE-NEXT: mov z2.d, #0 // =0x0
; SVE-NEXT: mov z3.d, #0 // =0x0
; SVE-NEXT: mov z4.d, #0 // =0x0
; SVE-NEXT: mov z5.d, #0 // =0x0
; SVE-NEXT: mov z6.d, #0 // =0x0
; SVE-NEXT: mov z7.d, #0 // =0x0
; SVE-NEXT: pfalse p0.b
; SVE-NEXT: pfalse p1.b
; SVE-NEXT: pfalse p2.b
; SVE-NEXT: pfalse p3.b
; SVE-NEXT: ret
; SVE-OR-SME-LABEL: all_arg_float:
; SVE-OR-SME: // %bb.0: // %entry
; SVE-OR-SME-NEXT: fcvt d1, s1
; SVE-OR-SME-NEXT: fmul d0, d1, d0
; SVE-OR-SME-NEXT: mov x0, #0 // =0x0
; SVE-OR-SME-NEXT: mov x1, #0 // =0x0
; SVE-OR-SME-NEXT: mov x2, #0 // =0x0
; SVE-OR-SME-NEXT: mov x3, #0 // =0x0
; SVE-OR-SME-NEXT: mov x4, #0 // =0x0
; SVE-OR-SME-NEXT: mov x5, #0 // =0x0
; SVE-OR-SME-NEXT: mov x6, #0 // =0x0
; SVE-OR-SME-NEXT: mov x7, #0 // =0x0
; SVE-OR-SME-NEXT: mov x8, #0 // =0x0
; SVE-OR-SME-NEXT: mov x18, #0 // =0x0
; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z4.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z5.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z6.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z7.d, #0 // =0x0
; SVE-OR-SME-NEXT: pfalse p0.b
; SVE-OR-SME-NEXT: pfalse p1.b
; SVE-OR-SME-NEXT: pfalse p2.b
; SVE-OR-SME-NEXT: pfalse p3.b
; SVE-OR-SME-NEXT: ret
;
; STREAMING-COMPAT-LABEL: all_arg_float:
; STREAMING-COMPAT: // %bb.0: // %entry
; STREAMING-COMPAT-NEXT: fcvt d1, s1
; STREAMING-COMPAT-NEXT: fmul d0, d1, d0
; STREAMING-COMPAT-NEXT: mov x0, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x1, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x3, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x4, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0
; STREAMING-COMPAT-NEXT: fmov d1, xzr
; STREAMING-COMPAT-NEXT: fmov d2, xzr
; STREAMING-COMPAT-NEXT: fmov d3, xzr
; STREAMING-COMPAT-NEXT: fmov d4, xzr
; STREAMING-COMPAT-NEXT: fmov d5, xzr
; STREAMING-COMPAT-NEXT: fmov d6, xzr
; STREAMING-COMPAT-NEXT: fmov d7, xzr
; STREAMING-COMPAT-NEXT: ret
entry:
%conv = fpext float %b to double
@ -550,69 +662,117 @@ define dso_local double @all_float(double noundef %a, float noundef %b) local_un
; DEFAULT-NEXT: movi v31.2d, #0000000000000000
; DEFAULT-NEXT: ret
;
; SVE-LABEL: all_float:
; SVE: // %bb.0: // %entry
; SVE-NEXT: fcvt d1, s1
; SVE-NEXT: fmul d0, d1, d0
; SVE-NEXT: mov x0, #0 // =0x0
; SVE-NEXT: mov x1, #0 // =0x0
; SVE-NEXT: mov x2, #0 // =0x0
; SVE-NEXT: mov x3, #0 // =0x0
; SVE-NEXT: mov x4, #0 // =0x0
; SVE-NEXT: mov x5, #0 // =0x0
; SVE-NEXT: mov x6, #0 // =0x0
; SVE-NEXT: mov x7, #0 // =0x0
; SVE-NEXT: mov x8, #0 // =0x0
; SVE-NEXT: mov x9, #0 // =0x0
; SVE-NEXT: mov x10, #0 // =0x0
; SVE-NEXT: mov x11, #0 // =0x0
; SVE-NEXT: mov x12, #0 // =0x0
; SVE-NEXT: mov x13, #0 // =0x0
; SVE-NEXT: mov x14, #0 // =0x0
; SVE-NEXT: mov x15, #0 // =0x0
; SVE-NEXT: mov x16, #0 // =0x0
; SVE-NEXT: mov x17, #0 // =0x0
; SVE-NEXT: mov x18, #0 // =0x0
; SVE-NEXT: mov z1.d, #0 // =0x0
; SVE-NEXT: mov z2.d, #0 // =0x0
; SVE-NEXT: mov z3.d, #0 // =0x0
; SVE-NEXT: mov z4.d, #0 // =0x0
; SVE-NEXT: mov z5.d, #0 // =0x0
; SVE-NEXT: mov z6.d, #0 // =0x0
; SVE-NEXT: mov z7.d, #0 // =0x0
; SVE-NEXT: mov z16.d, #0 // =0x0
; SVE-NEXT: mov z17.d, #0 // =0x0
; SVE-NEXT: mov z18.d, #0 // =0x0
; SVE-NEXT: mov z19.d, #0 // =0x0
; SVE-NEXT: mov z20.d, #0 // =0x0
; SVE-NEXT: mov z21.d, #0 // =0x0
; SVE-NEXT: mov z22.d, #0 // =0x0
; SVE-NEXT: mov z23.d, #0 // =0x0
; SVE-NEXT: mov z24.d, #0 // =0x0
; SVE-NEXT: mov z25.d, #0 // =0x0
; SVE-NEXT: mov z26.d, #0 // =0x0
; SVE-NEXT: mov z27.d, #0 // =0x0
; SVE-NEXT: mov z28.d, #0 // =0x0
; SVE-NEXT: mov z29.d, #0 // =0x0
; SVE-NEXT: mov z30.d, #0 // =0x0
; SVE-NEXT: mov z31.d, #0 // =0x0
; SVE-NEXT: pfalse p0.b
; SVE-NEXT: pfalse p1.b
; SVE-NEXT: pfalse p2.b
; SVE-NEXT: pfalse p3.b
; SVE-NEXT: pfalse p4.b
; SVE-NEXT: pfalse p5.b
; SVE-NEXT: pfalse p6.b
; SVE-NEXT: pfalse p7.b
; SVE-NEXT: pfalse p8.b
; SVE-NEXT: pfalse p9.b
; SVE-NEXT: pfalse p10.b
; SVE-NEXT: pfalse p11.b
; SVE-NEXT: pfalse p12.b
; SVE-NEXT: pfalse p13.b
; SVE-NEXT: pfalse p14.b
; SVE-NEXT: pfalse p15.b
; SVE-NEXT: ret
; SVE-OR-SME-LABEL: all_float:
; SVE-OR-SME: // %bb.0: // %entry
; SVE-OR-SME-NEXT: fcvt d1, s1
; SVE-OR-SME-NEXT: fmul d0, d1, d0
; SVE-OR-SME-NEXT: mov x0, #0 // =0x0
; SVE-OR-SME-NEXT: mov x1, #0 // =0x0
; SVE-OR-SME-NEXT: mov x2, #0 // =0x0
; SVE-OR-SME-NEXT: mov x3, #0 // =0x0
; SVE-OR-SME-NEXT: mov x4, #0 // =0x0
; SVE-OR-SME-NEXT: mov x5, #0 // =0x0
; SVE-OR-SME-NEXT: mov x6, #0 // =0x0
; SVE-OR-SME-NEXT: mov x7, #0 // =0x0
; SVE-OR-SME-NEXT: mov x8, #0 // =0x0
; SVE-OR-SME-NEXT: mov x9, #0 // =0x0
; SVE-OR-SME-NEXT: mov x10, #0 // =0x0
; SVE-OR-SME-NEXT: mov x11, #0 // =0x0
; SVE-OR-SME-NEXT: mov x12, #0 // =0x0
; SVE-OR-SME-NEXT: mov x13, #0 // =0x0
; SVE-OR-SME-NEXT: mov x14, #0 // =0x0
; SVE-OR-SME-NEXT: mov x15, #0 // =0x0
; SVE-OR-SME-NEXT: mov x16, #0 // =0x0
; SVE-OR-SME-NEXT: mov x17, #0 // =0x0
; SVE-OR-SME-NEXT: mov x18, #0 // =0x0
; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z4.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z5.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z6.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z7.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z16.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z17.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z18.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z19.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z20.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z21.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z22.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z23.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z24.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z25.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z26.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z27.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z28.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z29.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z30.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z31.d, #0 // =0x0
; SVE-OR-SME-NEXT: pfalse p0.b
; SVE-OR-SME-NEXT: pfalse p1.b
; SVE-OR-SME-NEXT: pfalse p2.b
; SVE-OR-SME-NEXT: pfalse p3.b
; SVE-OR-SME-NEXT: pfalse p4.b
; SVE-OR-SME-NEXT: pfalse p5.b
; SVE-OR-SME-NEXT: pfalse p6.b
; SVE-OR-SME-NEXT: pfalse p7.b
; SVE-OR-SME-NEXT: pfalse p8.b
; SVE-OR-SME-NEXT: pfalse p9.b
; SVE-OR-SME-NEXT: pfalse p10.b
; SVE-OR-SME-NEXT: pfalse p11.b
; SVE-OR-SME-NEXT: pfalse p12.b
; SVE-OR-SME-NEXT: pfalse p13.b
; SVE-OR-SME-NEXT: pfalse p14.b
; SVE-OR-SME-NEXT: pfalse p15.b
; SVE-OR-SME-NEXT: ret
;
; STREAMING-COMPAT-LABEL: all_float:
; STREAMING-COMPAT: // %bb.0: // %entry
; STREAMING-COMPAT-NEXT: fcvt d1, s1
; STREAMING-COMPAT-NEXT: fmul d0, d1, d0
; STREAMING-COMPAT-NEXT: mov x0, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x1, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x3, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x4, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x9, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x10, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x11, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x12, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x13, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x14, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x15, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x16, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x17, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0
; STREAMING-COMPAT-NEXT: fmov d1, xzr
; STREAMING-COMPAT-NEXT: fmov d2, xzr
; STREAMING-COMPAT-NEXT: fmov d3, xzr
; STREAMING-COMPAT-NEXT: fmov d4, xzr
; STREAMING-COMPAT-NEXT: fmov d5, xzr
; STREAMING-COMPAT-NEXT: fmov d6, xzr
; STREAMING-COMPAT-NEXT: fmov d7, xzr
; STREAMING-COMPAT-NEXT: fmov d16, xzr
; STREAMING-COMPAT-NEXT: fmov d17, xzr
; STREAMING-COMPAT-NEXT: fmov d18, xzr
; STREAMING-COMPAT-NEXT: fmov d19, xzr
; STREAMING-COMPAT-NEXT: fmov d20, xzr
; STREAMING-COMPAT-NEXT: fmov d21, xzr
; STREAMING-COMPAT-NEXT: fmov d22, xzr
; STREAMING-COMPAT-NEXT: fmov d23, xzr
; STREAMING-COMPAT-NEXT: fmov d24, xzr
; STREAMING-COMPAT-NEXT: fmov d25, xzr
; STREAMING-COMPAT-NEXT: fmov d26, xzr
; STREAMING-COMPAT-NEXT: fmov d27, xzr
; STREAMING-COMPAT-NEXT: fmov d28, xzr
; STREAMING-COMPAT-NEXT: fmov d29, xzr
; STREAMING-COMPAT-NEXT: fmov d30, xzr
; STREAMING-COMPAT-NEXT: fmov d31, xzr
; STREAMING-COMPAT-NEXT: ret
entry:
%conv = fpext float %b to double