llvm-project/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll
Archibald Elliott 20b2d11896 [ARM] Fix Crash in 't'/'w' handling without fp16/bf16
After https://reviews.llvm.org/rGff4027d152d0 and
https://reviews.llvm.org/rG7d15212b8c0c we saw crashes in SelectionDAG
when trying to use these constraints when you don't have the fp16 or
bf16 extensions.

However, it is still possible to move 16-bit floating point values into
the right place in S registers with a normal `vmov`, even if we don't
have fp16 instructions we can use within the inline assembly string.
This patch therefore fixes the crash.

I think the reason we weren't getting this crash before is because I
think the __fp16 and __bf16 types got an error diagnostic in the Clang
frontend when you didn't have the right architectural extensions to use
them. This restriction was recently relaxed.

The approach for bf16 needs a bit more explanation. Exactly how BF16 is
legalized was changed in rGb769eb02b526e3966847351e15d283514c2ec767 -
effectively, whether you have the right instructions to get a bf16 value
into/out of a S register with MoveTo/FromHPR depends on hasFullFP16, but
whether you use a HPR for a value of type MVT::bf16 depends on hasBF16.
This is why the tests are not changed by `+bf16` vs `-bf16`, but I've
left both sets of RUN lines in case this changes in the future.

Test Changes:
- Added more testing for testing inline asm (the core part)
- fp16-promote.ll and pr47454.ll show improvements where unnecessary
  fp16-fp32 up/down-casts are no longer emitted. This results in fewer
  libcalls where those casts would be done with a libcall.
- aes-erratum-fix.ll is fairly noisy, and I need to revisit this test so
  that the IR is more minimal than it is right now, because most of the
  changes in this commit do not relate to what AES is actually trying to
  verify.

Differential Revision: https://reviews.llvm.org/D143711
2023-03-06 11:55:08 +00:00

174 lines
6.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; No FP16/BF16
; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFTFP
; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFTFP
; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-HARD
; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-HARD
; With FP16, Without BF16
; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP
; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP
; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD
; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD
; With FP16/BF16
; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP,BF16-SOFTFP
; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP,BF16-SOFTFP
; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD,BF16-HARD
; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD,BF16-HARD
; This test ensures that we can use `w` and `t` constraints to allocate
; S-registers for 16-bit FP inputs and outputs for inline assembly, with either
; the softfp or hard float ABIs. (With the soft abi, no S-regs are available).
define half @half_t(half %x) nounwind {
; NO-FP16-SOFTFP-LABEL: half_t:
; NO-FP16-SOFTFP: @ %bb.0: @ %entry
; NO-FP16-SOFTFP-NEXT: vmov s0, r0
; NO-FP16-SOFTFP-NEXT: @APP
; NO-FP16-SOFTFP-NEXT: vmov.f32 s0, s0
; NO-FP16-SOFTFP-NEXT: @NO_APP
; NO-FP16-SOFTFP-NEXT: vmov r0, s0
; NO-FP16-SOFTFP-NEXT: bx lr
;
; NO-FP16-HARD-LABEL: half_t:
; NO-FP16-HARD: @ %bb.0: @ %entry
; NO-FP16-HARD-NEXT: @APP
; NO-FP16-HARD-NEXT: vmov.f32 s0, s0
; NO-FP16-HARD-NEXT: @NO_APP
; NO-FP16-HARD-NEXT: bx lr
;
; FP16-SOFTFP-LABEL: half_t:
; FP16-SOFTFP: @ %bb.0: @ %entry
; FP16-SOFTFP-NEXT: vmov.f16 s0, r0
; FP16-SOFTFP-NEXT: @APP
; FP16-SOFTFP-NEXT: vmov.f32 s0, s0
; FP16-SOFTFP-NEXT: @NO_APP
; FP16-SOFTFP-NEXT: vmov r0, s0
; FP16-SOFTFP-NEXT: bx lr
;
; FP16-HARD-LABEL: half_t:
; FP16-HARD: @ %bb.0: @ %entry
; FP16-HARD-NEXT: @APP
; FP16-HARD-NEXT: vmov.f32 s0, s0
; FP16-HARD-NEXT: @NO_APP
; FP16-HARD-NEXT: bx lr
entry:
%0 = tail call half asm "vmov $0, $1", "=t,t"(half %x)
ret half %0
}
define half @half_w(half %x) nounwind {
; NO-FP16-SOFTFP-LABEL: half_w:
; NO-FP16-SOFTFP: @ %bb.0: @ %entry
; NO-FP16-SOFTFP-NEXT: vmov s0, r0
; NO-FP16-SOFTFP-NEXT: @APP
; NO-FP16-SOFTFP-NEXT: vmov.f32 s0, s0
; NO-FP16-SOFTFP-NEXT: @NO_APP
; NO-FP16-SOFTFP-NEXT: vmov r0, s0
; NO-FP16-SOFTFP-NEXT: bx lr
;
; NO-FP16-HARD-LABEL: half_w:
; NO-FP16-HARD: @ %bb.0: @ %entry
; NO-FP16-HARD-NEXT: @APP
; NO-FP16-HARD-NEXT: vmov.f32 s0, s0
; NO-FP16-HARD-NEXT: @NO_APP
; NO-FP16-HARD-NEXT: bx lr
;
; FP16-SOFTFP-LABEL: half_w:
; FP16-SOFTFP: @ %bb.0: @ %entry
; FP16-SOFTFP-NEXT: vmov.f16 s0, r0
; FP16-SOFTFP-NEXT: @APP
; FP16-SOFTFP-NEXT: vmov.f32 s0, s0
; FP16-SOFTFP-NEXT: @NO_APP
; FP16-SOFTFP-NEXT: vmov r0, s0
; FP16-SOFTFP-NEXT: bx lr
;
; FP16-HARD-LABEL: half_w:
; FP16-HARD: @ %bb.0: @ %entry
; FP16-HARD-NEXT: @APP
; FP16-HARD-NEXT: vmov.f32 s0, s0
; FP16-HARD-NEXT: @NO_APP
; FP16-HARD-NEXT: bx lr
entry:
%0 = tail call half asm "vmov $0, $1", "=w,w"(half %x)
ret half %0
}
define bfloat @bf16_t(bfloat %x) nounwind {
; NO-FP16-SOFTFP-LABEL: bf16_t:
; NO-FP16-SOFTFP: @ %bb.0: @ %entry
; NO-FP16-SOFTFP-NEXT: vmov s0, r0
; NO-FP16-SOFTFP-NEXT: @APP
; NO-FP16-SOFTFP-NEXT: vmov.f32 s0, s0
; NO-FP16-SOFTFP-NEXT: @NO_APP
; NO-FP16-SOFTFP-NEXT: vmov r0, s0
; NO-FP16-SOFTFP-NEXT: bx lr
;
; NO-FP16-HARD-LABEL: bf16_t:
; NO-FP16-HARD: @ %bb.0: @ %entry
; NO-FP16-HARD-NEXT: @APP
; NO-FP16-HARD-NEXT: vmov.f32 s0, s0
; NO-FP16-HARD-NEXT: @NO_APP
; NO-FP16-HARD-NEXT: bx lr
;
; FP16-SOFTFP-LABEL: bf16_t:
; FP16-SOFTFP: @ %bb.0: @ %entry
; FP16-SOFTFP-NEXT: vmov s0, r0
; FP16-SOFTFP-NEXT: @APP
; FP16-SOFTFP-NEXT: vmov.f32 s0, s0
; FP16-SOFTFP-NEXT: @NO_APP
; FP16-SOFTFP-NEXT: vmov r0, s0
; FP16-SOFTFP-NEXT: bx lr
;
; FP16-HARD-LABEL: bf16_t:
; FP16-HARD: @ %bb.0: @ %entry
; FP16-HARD-NEXT: @APP
; FP16-HARD-NEXT: vmov.f32 s0, s0
; FP16-HARD-NEXT: @NO_APP
; FP16-HARD-NEXT: bx lr
entry:
%0 = tail call bfloat asm "vmov $0, $1", "=t,t"(bfloat %x)
ret bfloat %0
}
define bfloat @bf16_w(bfloat %x) nounwind {
; NO-FP16-SOFTFP-LABEL: bf16_w:
; NO-FP16-SOFTFP: @ %bb.0: @ %entry
; NO-FP16-SOFTFP-NEXT: vmov s0, r0
; NO-FP16-SOFTFP-NEXT: @APP
; NO-FP16-SOFTFP-NEXT: vmov.f32 s0, s0
; NO-FP16-SOFTFP-NEXT: @NO_APP
; NO-FP16-SOFTFP-NEXT: vmov r0, s0
; NO-FP16-SOFTFP-NEXT: bx lr
;
; NO-FP16-HARD-LABEL: bf16_w:
; NO-FP16-HARD: @ %bb.0: @ %entry
; NO-FP16-HARD-NEXT: @APP
; NO-FP16-HARD-NEXT: vmov.f32 s0, s0
; NO-FP16-HARD-NEXT: @NO_APP
; NO-FP16-HARD-NEXT: bx lr
;
; FP16-SOFTFP-LABEL: bf16_w:
; FP16-SOFTFP: @ %bb.0: @ %entry
; FP16-SOFTFP-NEXT: vmov s0, r0
; FP16-SOFTFP-NEXT: @APP
; FP16-SOFTFP-NEXT: vmov.f32 s0, s0
; FP16-SOFTFP-NEXT: @NO_APP
; FP16-SOFTFP-NEXT: vmov r0, s0
; FP16-SOFTFP-NEXT: bx lr
;
; FP16-HARD-LABEL: bf16_w:
; FP16-HARD: @ %bb.0: @ %entry
; FP16-HARD-NEXT: @APP
; FP16-HARD-NEXT: vmov.f32 s0, s0
; FP16-HARD-NEXT: @NO_APP
; FP16-HARD-NEXT: bx lr
entry:
%0 = tail call bfloat asm "vmov $0, $1", "=w,w"(bfloat %x)
ret bfloat %0
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; BF16-HARD: {{.*}}
; BF16-SOFTFP: {{.*}}