
Lack of these entries caused clang to crash on the following code: ```c __m256bh fun(__m256bh arg) { return arg; } __m256bh run() { __m256bh arg= {0}; fun(arg); } ``` It caused the FastISel to fail since it handled the call lowering basing on the X86CallingConv table. Curiously, if FastISel fails somewhere down the line and selectionDAGISel fallbacks, the crash does not occur. Following code _does not_ crash: ```c __m256bh fun(__m256bh arg) { return arg; } __m256bh run() { __m256bh arg= {0}; return fun(arg); } ``` This is puzzling to me. Obviously, if FastISel fails then compiler fallbacks to something else to lower these calls -- but since the X86callingConv table _doesn't_ have entries for vNbf16 how does this other thing manage not to crash? It has to use some other mechanism, one which doesn't use the table. This rises following questions: - how is this lowering accomplished without, presumably, using the CallingConv entries? - why is the table not used? I mean this points to some logic duplication (fastISel way vs. the other bug-free way) - How to properly test this? There is a test for vNbf16 values, but it also must not be using the FastISel path? This duplication of logic makes it hard to test this, since we don't have direct control whether the FastISel path or the other one is used. Nonetheless, this PR fixes the crash, though I didn't create a test for it, since I am unsure yet how it should look like. I would like to learn how the working non-FastISel mechanism works; I tried looking for it, but didn't yet manage to find anything
1163 lines
54 KiB
LLVM
1163 lines
54 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -fast-isel=false -mtriple=x86_64-linux-unknown -mattr=+sse2 < %s | FileCheck -check-prefixes=SSE2 %s
|
|
; RUN: llc -fast-isel -mtriple=x86_64-linux-unknown -mattr=+sse2 < %s | FileCheck -check-prefixes=FAST_ISEL_SSE2 %s
|
|
; RUN: llc -fast-isel=false -mtriple=x86_64-linux-unknown -mattr=+avx512bf16,avx512vl < %s | FileCheck -check-prefixes=AVX512BF16 %s
|
|
; RUN: llc -fast-isel -mtriple=x86_64-linux-unknown -mattr=+avx512bf16,avx512vl < %s | FileCheck -check-prefixes=FAST_ISEL_AVX512BF16 %s
|
|
; RUN: llc -fast-isel=false -mtriple=x86_64-linux-unknown -mattr=+avxneconvert < %s | FileCheck -check-prefixes=AVXNECONVERT %s
|
|
; RUN: llc -fast-isel -mtriple=x86_64-linux-unknown -mattr=+avxneconvert < %s | FileCheck -check-prefixes=FAST_ISEL_AVXNECONVERT %s
|
|
|
|
define bfloat @return_arg_bf16(bfloat %x) #0 {
|
|
; SSE2-LABEL: return_arg_bf16:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; FAST_ISEL_SSE2-LABEL: return_arg_bf16:
|
|
; FAST_ISEL_SSE2: # %bb.0:
|
|
; FAST_ISEL_SSE2-NEXT: pushq %rax
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movd %eax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: popq %rax
|
|
; FAST_ISEL_SSE2-NEXT: retq
|
|
;
|
|
; AVX512BF16-LABEL: return_arg_bf16:
|
|
; AVX512BF16: # %bb.0:
|
|
; AVX512BF16-NEXT: retq
|
|
;
|
|
; FAST_ISEL_AVX512BF16-LABEL: return_arg_bf16:
|
|
; FAST_ISEL_AVX512BF16: # %bb.0:
|
|
; FAST_ISEL_AVX512BF16-NEXT: vpextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_AVX512BF16-NEXT: shll $16, %eax
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovd %eax, %xmm0
|
|
; FAST_ISEL_AVX512BF16-NEXT: vcvtneps2bf16 %xmm0, %xmm0
|
|
; FAST_ISEL_AVX512BF16-NEXT: retq
|
|
;
|
|
; AVXNECONVERT-LABEL: return_arg_bf16:
|
|
; AVXNECONVERT: # %bb.0:
|
|
; AVXNECONVERT-NEXT: retq
|
|
;
|
|
; FAST_ISEL_AVXNECONVERT-LABEL: return_arg_bf16:
|
|
; FAST_ISEL_AVXNECONVERT: # %bb.0:
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: shll $16, %eax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovd %eax, %xmm0
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: retq
|
|
ret bfloat %x
|
|
}
|
|
|
|
define <2 x bfloat> @return_arg_v2bf16(<2 x bfloat> %x) #0 {
|
|
; SSE2-LABEL: return_arg_v2bf16:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; FAST_ISEL_SSE2-LABEL: return_arg_v2bf16:
|
|
; FAST_ISEL_SSE2: # %bb.0:
|
|
; FAST_ISEL_SSE2-NEXT: subq $40, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $1, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movd %eax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm1, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: addq $40, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: retq
|
|
;
|
|
; AVX512BF16-LABEL: return_arg_v2bf16:
|
|
; AVX512BF16: # %bb.0:
|
|
; AVX512BF16-NEXT: retq
|
|
;
|
|
; FAST_ISEL_AVX512BF16-LABEL: return_arg_v2bf16:
|
|
; FAST_ISEL_AVX512BF16: # %bb.0:
|
|
; FAST_ISEL_AVX512BF16-NEXT: retq
|
|
;
|
|
; AVXNECONVERT-LABEL: return_arg_v2bf16:
|
|
; AVXNECONVERT: # %bb.0:
|
|
; AVXNECONVERT-NEXT: retq
|
|
;
|
|
; FAST_ISEL_AVXNECONVERT-LABEL: return_arg_v2bf16:
|
|
; FAST_ISEL_AVXNECONVERT: # %bb.0:
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: retq
|
|
ret <2 x bfloat> %x
|
|
}
|
|
|
|
define <3 x bfloat> @return_arg_v3bf16(<3 x bfloat> %x) #0 {
|
|
; SSE2-LABEL: return_arg_v3bf16:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; FAST_ISEL_SSE2-LABEL: return_arg_v3bf16:
|
|
; FAST_ISEL_SSE2: # %bb.0:
|
|
; FAST_ISEL_SSE2-NEXT: subq $40, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $2, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $1, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movd %eax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
; FAST_ISEL_SSE2-NEXT: movaps %xmm1, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: addq $40, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: retq
|
|
;
|
|
; AVX512BF16-LABEL: return_arg_v3bf16:
|
|
; AVX512BF16: # %bb.0:
|
|
; AVX512BF16-NEXT: retq
|
|
;
|
|
; FAST_ISEL_AVX512BF16-LABEL: return_arg_v3bf16:
|
|
; FAST_ISEL_AVX512BF16: # %bb.0:
|
|
; FAST_ISEL_AVX512BF16-NEXT: vpextrw $2, %xmm0, %eax
|
|
; FAST_ISEL_AVX512BF16-NEXT: shll $16, %eax
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovd %eax, %xmm1
|
|
; FAST_ISEL_AVX512BF16-NEXT: vpextrw $1, %xmm0, %eax
|
|
; FAST_ISEL_AVX512BF16-NEXT: shll $16, %eax
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovd %eax, %xmm2
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovd %xmm0, %eax
|
|
; FAST_ISEL_AVX512BF16-NEXT: shll $16, %eax
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovd %eax, %xmm0
|
|
; FAST_ISEL_AVX512BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovd %xmm1, %eax
|
|
; FAST_ISEL_AVX512BF16-NEXT: vcvtneps2bf16 %xmm0, %xmm0
|
|
; FAST_ISEL_AVX512BF16-NEXT: vcvtneps2bf16 %xmm2, %xmm1
|
|
; FAST_ISEL_AVX512BF16-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
|
; FAST_ISEL_AVX512BF16-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
|
; FAST_ISEL_AVX512BF16-NEXT: retq
|
|
;
|
|
; AVXNECONVERT-LABEL: return_arg_v3bf16:
|
|
; AVXNECONVERT: # %bb.0:
|
|
; AVXNECONVERT-NEXT: retq
|
|
;
|
|
; FAST_ISEL_AVXNECONVERT-LABEL: return_arg_v3bf16:
|
|
; FAST_ISEL_AVXNECONVERT: # %bb.0:
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpextrw $2, %xmm0, %eax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: shll $16, %eax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovd %eax, %xmm1
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpextrw $1, %xmm0, %eax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: shll $16, %eax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovd %eax, %xmm2
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovd %xmm0, %eax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: shll $16, %eax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovd %eax, %xmm0
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: {vex} vcvtneps2bf16 %xmm1, %xmm1
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovd %xmm1, %eax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: {vex} vcvtneps2bf16 %xmm2, %xmm1
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovq %xmm1, %rax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: movl %eax, %ecx
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: shrl $16, %ecx
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm1
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: shrq $32, %rax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovd %eax, %xmm1
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpbroadcastw %xmm1, %xmm1
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: retq
|
|
ret <3 x bfloat> %x
|
|
}
|
|
|
|
define <4 x bfloat> @return_arg_v4bf16(<4 x bfloat> %x) #0 {
|
|
; SSE2-LABEL: return_arg_v4bf16:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; FAST_ISEL_SSE2-LABEL: return_arg_v4bf16:
|
|
; FAST_ISEL_SSE2: # %bb.0:
|
|
; FAST_ISEL_SSE2-NEXT: subq $56, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $3, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $2, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $1, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movd %eax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
; FAST_ISEL_SSE2-NEXT: addq $56, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: retq
|
|
;
|
|
; AVX512BF16-LABEL: return_arg_v4bf16:
|
|
; AVX512BF16: # %bb.0:
|
|
; AVX512BF16-NEXT: retq
|
|
;
|
|
; FAST_ISEL_AVX512BF16-LABEL: return_arg_v4bf16:
|
|
; FAST_ISEL_AVX512BF16: # %bb.0:
|
|
; FAST_ISEL_AVX512BF16-NEXT: retq
|
|
;
|
|
; AVXNECONVERT-LABEL: return_arg_v4bf16:
|
|
; AVXNECONVERT: # %bb.0:
|
|
; AVXNECONVERT-NEXT: retq
|
|
;
|
|
; FAST_ISEL_AVXNECONVERT-LABEL: return_arg_v4bf16:
|
|
; FAST_ISEL_AVXNECONVERT: # %bb.0:
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: retq
|
|
ret <4 x bfloat> %x
|
|
}
|
|
|
|
define <8 x bfloat> @return_arg_v8bf16(<8 x bfloat> %x) #0 {
|
|
; SSE2-LABEL: return_arg_v8bf16:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; FAST_ISEL_SSE2-LABEL: return_arg_v8bf16:
|
|
; FAST_ISEL_SSE2: # %bb.0:
|
|
; FAST_ISEL_SSE2-NEXT: pushq %r14
|
|
; FAST_ISEL_SSE2-NEXT: pushq %rbx
|
|
; FAST_ISEL_SSE2-NEXT: subq $56, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $7, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $6, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $5, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $4, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $3, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $2, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $1, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movd %eax, %xmm1
|
|
; FAST_ISEL_SSE2-NEXT: movd %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm1, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %eax
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shlq $32, %rax
|
|
; FAST_ISEL_SSE2-NEXT: orq %r14, %rax
|
|
; FAST_ISEL_SSE2-NEXT: movq %rax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %eax
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shlq $32, %rax
|
|
; FAST_ISEL_SSE2-NEXT: orq %r14, %rax
|
|
; FAST_ISEL_SSE2-NEXT: movq %rax, %xmm1
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
; FAST_ISEL_SSE2-NEXT: addq $56, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: popq %rbx
|
|
; FAST_ISEL_SSE2-NEXT: popq %r14
|
|
; FAST_ISEL_SSE2-NEXT: retq
|
|
;
|
|
; AVX512BF16-LABEL: return_arg_v8bf16:
|
|
; AVX512BF16: # %bb.0:
|
|
; AVX512BF16-NEXT: retq
|
|
;
|
|
; FAST_ISEL_AVX512BF16-LABEL: return_arg_v8bf16:
|
|
; FAST_ISEL_AVX512BF16: # %bb.0:
|
|
; FAST_ISEL_AVX512BF16-NEXT: retq
|
|
;
|
|
; AVXNECONVERT-LABEL: return_arg_v8bf16:
|
|
; AVXNECONVERT: # %bb.0:
|
|
; AVXNECONVERT-NEXT: retq
|
|
;
|
|
; FAST_ISEL_AVXNECONVERT-LABEL: return_arg_v8bf16:
|
|
; FAST_ISEL_AVXNECONVERT: # %bb.0:
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: retq
|
|
ret <8 x bfloat> %x
|
|
}
|
|
|
|
define <16 x bfloat> @return_arg_v16bf16(<16 x bfloat> %x) #0 {
|
|
;
|
|
; SSE2-LABEL: return_arg_v16bf16:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: retq
|
|
;
|
|
; FAST_ISEL_SSE2-LABEL: return_arg_v16bf16:
|
|
; FAST_ISEL_SSE2: # %bb.0:
|
|
; FAST_ISEL_SSE2-NEXT: pushq %r14
|
|
; FAST_ISEL_SSE2-NEXT: pushq %rbx
|
|
; FAST_ISEL_SSE2-NEXT: subq $104, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $7, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $6, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $5, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $4, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $3, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $2, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $1, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $7, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $6, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $5, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $4, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $3, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $2, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $1, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movd %eax, %xmm1
|
|
; FAST_ISEL_SSE2-NEXT: movd %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm1, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %eax
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shlq $32, %rax
|
|
; FAST_ISEL_SSE2-NEXT: orq %r14, %rax
|
|
; FAST_ISEL_SSE2-NEXT: movq %rax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %eax
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shlq $32, %rax
|
|
; FAST_ISEL_SSE2-NEXT: orq %r14, %rax
|
|
; FAST_ISEL_SSE2-NEXT: movq %rax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %eax
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shlq $32, %rax
|
|
; FAST_ISEL_SSE2-NEXT: orq %r14, %rax
|
|
; FAST_ISEL_SSE2-NEXT: movq %rax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %eax
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shlq $32, %rax
|
|
; FAST_ISEL_SSE2-NEXT: orq %r14, %rax
|
|
; FAST_ISEL_SSE2-NEXT: movq %rax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
; FAST_ISEL_SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: addq $104, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: popq %rbx
|
|
; FAST_ISEL_SSE2-NEXT: popq %r14
|
|
; FAST_ISEL_SSE2-NEXT: retq
|
|
;
|
|
; AVX512BF16-LABEL: return_arg_v16bf16:
|
|
; AVX512BF16: # %bb.0:
|
|
; AVX512BF16-NEXT: retq
|
|
;
|
|
; FAST_ISEL_AVX512BF16-LABEL: return_arg_v16bf16:
|
|
; FAST_ISEL_AVX512BF16: # %bb.0:
|
|
; FAST_ISEL_AVX512BF16-NEXT: retq
|
|
;
|
|
; AVXNECONVERT-LABEL: return_arg_v16bf16:
|
|
; AVXNECONVERT: # %bb.0:
|
|
; AVXNECONVERT-NEXT: retq
|
|
;
|
|
; FAST_ISEL_AVXNECONVERT-LABEL: return_arg_v16bf16:
|
|
; FAST_ISEL_AVXNECONVERT: # %bb.0:
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: retq
|
|
ret <16 x bfloat> %x
|
|
}
|
|
|
|
declare bfloat @returns_bf16(bfloat)
|
|
declare <2 x bfloat> @returns_v2bf16(<2 x bfloat>)
|
|
declare <3 x bfloat> @returns_v3bf16(<3 x bfloat>)
|
|
declare <4 x bfloat> @returns_v4bf16(<4 x bfloat>)
|
|
declare <8 x bfloat> @returns_v8bf16(<8 x bfloat>)
|
|
declare <16 x bfloat> @returns_v16bf16(<16 x bfloat>)
|
|
|
|
define bfloat @call_ret_bf16(ptr %ptr) #0 {
|
|
;
|
|
; SSE2-LABEL: call_ret_bf16:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: pushq %rax
|
|
; SSE2-NEXT: pinsrw $0, (%rdi), %xmm0
|
|
; SSE2-NEXT: callq returns_bf16@PLT
|
|
;
|
|
; FAST_ISEL_SSE2-LABEL: call_ret_bf16:
|
|
; FAST_ISEL_SSE2: # %bb.0:
|
|
; FAST_ISEL_SSE2-NEXT: pushq %rax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl (%rdi), %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movd %eax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: callq returns_bf16@PLT
|
|
;
|
|
; AVX512BF16-LABEL: call_ret_bf16:
|
|
; AVX512BF16: # %bb.0:
|
|
; AVX512BF16-NEXT: pushq %rax
|
|
; AVX512BF16-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
|
|
; AVX512BF16-NEXT: callq returns_bf16@PLT
|
|
;
|
|
; FAST_ISEL_AVX512BF16-LABEL: call_ret_bf16:
|
|
; FAST_ISEL_AVX512BF16: # %bb.0:
|
|
; FAST_ISEL_AVX512BF16-NEXT: pushq %rax
|
|
; FAST_ISEL_AVX512BF16-NEXT: movzwl (%rdi), %eax
|
|
; FAST_ISEL_AVX512BF16-NEXT: shll $16, %eax
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovd %eax, %xmm0
|
|
; FAST_ISEL_AVX512BF16-NEXT: vcvtneps2bf16 %xmm0, %xmm0
|
|
; FAST_ISEL_AVX512BF16-NEXT: callq returns_bf16@PLT
|
|
;
|
|
; AVXNECONVERT-LABEL: call_ret_bf16:
|
|
; AVXNECONVERT: # %bb.0:
|
|
; AVXNECONVERT-NEXT: pushq %rax
|
|
; AVXNECONVERT-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
|
|
; AVXNECONVERT-NEXT: callq returns_bf16@PLT
|
|
;
|
|
; FAST_ISEL_AVXNECONVERT-LABEL: call_ret_bf16:
|
|
; FAST_ISEL_AVXNECONVERT: # %bb.0:
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: pushq %rax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: movzwl (%rdi), %eax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: shll $16, %eax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovd %eax, %xmm0
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: callq returns_bf16@PLT
|
|
%val = load bfloat, ptr %ptr
|
|
call bfloat @returns_bf16(bfloat %val)
|
|
unreachable
|
|
}
|
|
|
|
define <2 x bfloat> @call_ret_v2bf16(ptr %ptr) #0 {
|
|
;
|
|
; SSE2-LABEL: call_ret_v2bf16:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: pushq %rax
|
|
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
; SSE2-NEXT: callq returns_v2bf16@PLT
|
|
;
|
|
; FAST_ISEL_SSE2-LABEL: call_ret_v2bf16:
|
|
; FAST_ISEL_SSE2: # %bb.0:
|
|
; FAST_ISEL_SSE2-NEXT: subq $40, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: movl (%rdi), %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, (%rsp)
|
|
; FAST_ISEL_SSE2-NEXT: movdqa (%rsp), %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $1, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movd %eax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm1, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: callq returns_v2bf16@PLT
|
|
;
|
|
; AVX512BF16-LABEL: call_ret_v2bf16:
|
|
; AVX512BF16: # %bb.0:
|
|
; AVX512BF16-NEXT: pushq %rax
|
|
; AVX512BF16-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
; AVX512BF16-NEXT: callq returns_v2bf16@PLT
|
|
;
|
|
; FAST_ISEL_AVX512BF16-LABEL: call_ret_v2bf16:
|
|
; FAST_ISEL_AVX512BF16: # %bb.0:
|
|
; FAST_ISEL_AVX512BF16-NEXT: pushq %rax
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_AVX512BF16-NEXT: callq returns_v2bf16@PLT
|
|
;
|
|
; AVXNECONVERT-LABEL: call_ret_v2bf16:
|
|
; AVXNECONVERT: # %bb.0:
|
|
; AVXNECONVERT-NEXT: pushq %rax
|
|
; AVXNECONVERT-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
; AVXNECONVERT-NEXT: callq returns_v2bf16@PLT
|
|
;
|
|
; FAST_ISEL_AVXNECONVERT-LABEL: call_ret_v2bf16:
|
|
; FAST_ISEL_AVXNECONVERT: # %bb.0:
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: pushq %rax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: callq returns_v2bf16@PLT
|
|
%val = load <2 x bfloat>, ptr %ptr
|
|
call <2 x bfloat> @returns_v2bf16(<2 x bfloat> %val)
|
|
unreachable
|
|
}
|
|
|
|
define <3 x bfloat> @call_ret_v3bf16(ptr %ptr) #0 {
|
|
;
|
|
; SSE2-LABEL: call_ret_v3bf16:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: pushq %rax
|
|
; SSE2-NEXT: movl 4(%rdi), %eax
|
|
; SSE2-NEXT: pinsrw $0, %eax, %xmm1
|
|
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
; SSE2-NEXT: callq returns_v3bf16@PLT
|
|
;
|
|
; FAST_ISEL_SSE2-LABEL: call_ret_v3bf16:
|
|
; FAST_ISEL_SSE2: # %bb.0:
|
|
; FAST_ISEL_SSE2-NEXT: subq $40, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: movq (%rdi), %rax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, %ecx
|
|
; FAST_ISEL_SSE2-NEXT: andl $-65536, %ecx # imm = 0xFFFF0000
|
|
; FAST_ISEL_SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, %ecx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ecx
|
|
; FAST_ISEL_SSE2-NEXT: movd %ecx, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: shrq $32, %rax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
|
; FAST_ISEL_SSE2-NEXT: movaps %xmm1, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: callq returns_v3bf16@PLT
|
|
;
|
|
; AVX512BF16-LABEL: call_ret_v3bf16:
|
|
; AVX512BF16: # %bb.0:
|
|
; AVX512BF16-NEXT: pushq %rax
|
|
; AVX512BF16-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; AVX512BF16-NEXT: callq returns_v3bf16@PLT
|
|
;
|
|
; FAST_ISEL_AVX512BF16-LABEL: call_ret_v3bf16:
|
|
; FAST_ISEL_AVX512BF16: # %bb.0:
|
|
; FAST_ISEL_AVX512BF16-NEXT: pushq %rax
|
|
; FAST_ISEL_AVX512BF16-NEXT: movq (%rdi), %rax
|
|
; FAST_ISEL_AVX512BF16-NEXT: movl %eax, %ecx
|
|
; FAST_ISEL_AVX512BF16-NEXT: andl $-65536, %ecx # imm = 0xFFFF0000
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovd %ecx, %xmm0
|
|
; FAST_ISEL_AVX512BF16-NEXT: movl %eax, %ecx
|
|
; FAST_ISEL_AVX512BF16-NEXT: shll $16, %ecx
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovd %ecx, %xmm1
|
|
; FAST_ISEL_AVX512BF16-NEXT: shrq $32, %rax
|
|
; FAST_ISEL_AVX512BF16-NEXT: shll $16, %eax
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovd %eax, %xmm2
|
|
; FAST_ISEL_AVX512BF16-NEXT: vcvtneps2bf16 %xmm2, %xmm2
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovd %xmm2, %eax
|
|
; FAST_ISEL_AVX512BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1
|
|
; FAST_ISEL_AVX512BF16-NEXT: vcvtneps2bf16 %xmm0, %xmm0
|
|
; FAST_ISEL_AVX512BF16-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
|
; FAST_ISEL_AVX512BF16-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
|
; FAST_ISEL_AVX512BF16-NEXT: callq returns_v3bf16@PLT
|
|
;
|
|
; AVXNECONVERT-LABEL: call_ret_v3bf16:
|
|
; AVXNECONVERT: # %bb.0:
|
|
; AVXNECONVERT-NEXT: pushq %rax
|
|
; AVXNECONVERT-NEXT: movl 4(%rdi), %eax
|
|
; AVXNECONVERT-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
|
|
; AVXNECONVERT-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
; AVXNECONVERT-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
|
|
; AVXNECONVERT-NEXT: callq returns_v3bf16@PLT
|
|
;
|
|
; FAST_ISEL_AVXNECONVERT-LABEL: call_ret_v3bf16:
|
|
; FAST_ISEL_AVXNECONVERT: # %bb.0:
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: pushq %rax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: movq (%rdi), %rax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: movl %eax, %ecx
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: andl $-65536, %ecx # imm = 0xFFFF0000
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovd %ecx, %xmm0
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: movl %eax, %ecx
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: shll $16, %ecx
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovd %ecx, %xmm1
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: shrq $32, %rax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: shll $16, %eax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovd %eax, %xmm2
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: {vex} vcvtneps2bf16 %xmm2, %xmm2
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovd %xmm2, %eax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: {vex} vcvtneps2bf16 %xmm1, %xmm1
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovq %xmm0, %rax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: movl %eax, %ecx
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: shrl $16, %ecx
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: shrq $32, %rax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovd %eax, %xmm1
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpbroadcastw %xmm1, %xmm1
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: callq returns_v3bf16@PLT
|
|
%val = load <3 x bfloat>, ptr %ptr
|
|
call <3 x bfloat> @returns_v3bf16(<3 x bfloat> %val)
|
|
unreachable
|
|
}
|
|
|
|
define <4 x bfloat> @call_ret_v4bf16(ptr %ptr) #0 {
|
|
;
|
|
; SSE2-LABEL: call_ret_v4bf16:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: pushq %rax
|
|
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
; SSE2-NEXT: callq returns_v4bf16@PLT
|
|
;
|
|
; FAST_ISEL_SSE2-LABEL: call_ret_v4bf16:
|
|
; FAST_ISEL_SSE2: # %bb.0:
|
|
; FAST_ISEL_SSE2-NEXT: subq $56, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: movq (%rdi), %rax
|
|
; FAST_ISEL_SSE2-NEXT: movq %rax, {{[0-9]+}}(%rsp)
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $3, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $2, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $1, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movd %eax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
|
; FAST_ISEL_SSE2-NEXT: callq returns_v4bf16@PLT
|
|
;
|
|
; AVX512BF16-LABEL: call_ret_v4bf16:
|
|
; AVX512BF16: # %bb.0:
|
|
; AVX512BF16-NEXT: pushq %rax
|
|
; AVX512BF16-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; AVX512BF16-NEXT: callq returns_v4bf16@PLT
|
|
;
|
|
; FAST_ISEL_AVX512BF16-LABEL: call_ret_v4bf16:
|
|
; FAST_ISEL_AVX512BF16: # %bb.0:
|
|
; FAST_ISEL_AVX512BF16-NEXT: pushq %rax
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; FAST_ISEL_AVX512BF16-NEXT: callq returns_v4bf16@PLT
|
|
;
|
|
; AVXNECONVERT-LABEL: call_ret_v4bf16:
|
|
; AVXNECONVERT: # %bb.0:
|
|
; AVXNECONVERT-NEXT: pushq %rax
|
|
; AVXNECONVERT-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; AVXNECONVERT-NEXT: callq returns_v4bf16@PLT
|
|
;
|
|
; FAST_ISEL_AVXNECONVERT-LABEL: call_ret_v4bf16:
|
|
; FAST_ISEL_AVXNECONVERT: # %bb.0:
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: pushq %rax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: callq returns_v4bf16@PLT
|
|
%val = load <4 x bfloat>, ptr %ptr
|
|
call <4 x bfloat> @returns_v4bf16(<4 x bfloat> %val)
|
|
unreachable
|
|
}
|
|
|
|
define <8 x bfloat> @call_ret_v8bf16(ptr %ptr) #0 {
|
|
;
|
|
; SSE2-LABEL: call_ret_v8bf16:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: pushq %rax
|
|
; SSE2-NEXT: movaps (%rdi), %xmm0
|
|
; SSE2-NEXT: callq returns_v8bf16@PLT
|
|
;
|
|
; FAST_ISEL_SSE2-LABEL: call_ret_v8bf16:
|
|
; FAST_ISEL_SSE2: # %bb.0:
|
|
; FAST_ISEL_SSE2-NEXT: pushq %r14
|
|
; FAST_ISEL_SSE2-NEXT: pushq %rbx
|
|
; FAST_ISEL_SSE2-NEXT: subq $56, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: movdqa (%rdi), %xmm1
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $7, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $6, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $5, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $4, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $3, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $2, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $1, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movd %eax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: movd %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %eax
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shlq $32, %rax
|
|
; FAST_ISEL_SSE2-NEXT: orq %r14, %rax
|
|
; FAST_ISEL_SSE2-NEXT: movq %rax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %eax
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shlq $32, %rax
|
|
; FAST_ISEL_SSE2-NEXT: orq %r14, %rax
|
|
; FAST_ISEL_SSE2-NEXT: movq %rax, %xmm1
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
; FAST_ISEL_SSE2-NEXT: callq returns_v8bf16@PLT
|
|
;
|
|
; AVX512BF16-LABEL: call_ret_v8bf16:
|
|
; AVX512BF16: # %bb.0:
|
|
; AVX512BF16-NEXT: pushq %rax
|
|
; AVX512BF16-NEXT: vmovaps (%rdi), %xmm0
|
|
; AVX512BF16-NEXT: callq returns_v8bf16@PLT
|
|
;
|
|
; FAST_ISEL_AVX512BF16-LABEL: call_ret_v8bf16:
|
|
; FAST_ISEL_AVX512BF16: # %bb.0:
|
|
; FAST_ISEL_AVX512BF16-NEXT: pushq %rax
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovaps (%rdi), %xmm0
|
|
; FAST_ISEL_AVX512BF16-NEXT: callq returns_v8bf16@PLT
|
|
;
|
|
; AVXNECONVERT-LABEL: call_ret_v8bf16:
|
|
; AVXNECONVERT: # %bb.0:
|
|
; AVXNECONVERT-NEXT: pushq %rax
|
|
; AVXNECONVERT-NEXT: vmovaps (%rdi), %xmm0
|
|
; AVXNECONVERT-NEXT: callq returns_v8bf16@PLT
|
|
;
|
|
; FAST_ISEL_AVXNECONVERT-LABEL: call_ret_v8bf16:
|
|
; FAST_ISEL_AVXNECONVERT: # %bb.0:
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: pushq %rax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovaps (%rdi), %xmm0
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: callq returns_v8bf16@PLT
|
|
%val = load <8 x bfloat>, ptr %ptr
|
|
call <8 x bfloat> @returns_v8bf16(<8 x bfloat> %val)
|
|
unreachable
|
|
}
|
|
|
|
define <16 x bfloat> @call_ret_v16bf16(ptr %ptr) #0 {
|
|
;
|
|
; SSE2-LABEL: call_ret_v16bf16:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: pushq %rax
|
|
; SSE2-NEXT: movaps (%rdi), %xmm0
|
|
; SSE2-NEXT: movaps 16(%rdi), %xmm1
|
|
; SSE2-NEXT: callq returns_v16bf16@PLT
|
|
;
|
|
; FAST_ISEL_SSE2-LABEL: call_ret_v16bf16:
|
|
; FAST_ISEL_SSE2: # %bb.0:
|
|
; FAST_ISEL_SSE2-NEXT: pushq %r14
|
|
; FAST_ISEL_SSE2-NEXT: pushq %rbx
|
|
; FAST_ISEL_SSE2-NEXT: subq $104, %rsp
|
|
; FAST_ISEL_SSE2-NEXT: movdqa (%rdi), %xmm1
|
|
; FAST_ISEL_SSE2-NEXT: movdqa 16(%rdi), %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $7, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $6, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $5, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $4, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $3, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $2, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $1, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $7, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $6, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $5, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $4, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $3, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $2, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $1, %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movd %eax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: movd %xmm1, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %eax
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shlq $32, %rax
|
|
; FAST_ISEL_SSE2-NEXT: orq %r14, %rax
|
|
; FAST_ISEL_SSE2-NEXT: movq %rax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %eax
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shlq $32, %rax
|
|
; FAST_ISEL_SSE2-NEXT: orq %r14, %rax
|
|
; FAST_ISEL_SSE2-NEXT: movq %rax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %eax
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shlq $32, %rax
|
|
; FAST_ISEL_SSE2-NEXT: orq %r14, %rax
|
|
; FAST_ISEL_SSE2-NEXT: movq %rax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %r14d
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: shll $16, %ebx
|
|
; FAST_ISEL_SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
|
; FAST_ISEL_SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
; FAST_ISEL_SSE2-NEXT: callq __truncsfbf2@PLT
|
|
; FAST_ISEL_SSE2-NEXT: pextrw $0, %xmm0, %eax
|
|
; FAST_ISEL_SSE2-NEXT: movzwl %ax, %eax
|
|
; FAST_ISEL_SSE2-NEXT: orl %ebx, %eax
|
|
; FAST_ISEL_SSE2-NEXT: shlq $32, %rax
|
|
; FAST_ISEL_SSE2-NEXT: orq %r14, %rax
|
|
; FAST_ISEL_SSE2-NEXT: movq %rax, %xmm0
|
|
; FAST_ISEL_SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
; FAST_ISEL_SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
|
; FAST_ISEL_SSE2-NEXT: callq returns_v16bf16@PLT
|
|
;
|
|
; AVX512BF16-LABEL: call_ret_v16bf16:
|
|
; AVX512BF16: # %bb.0:
|
|
; AVX512BF16-NEXT: pushq %rax
|
|
; AVX512BF16-NEXT: vmovaps (%rdi), %ymm0
|
|
; AVX512BF16-NEXT: callq returns_v16bf16@PLT
|
|
;
|
|
; FAST_ISEL_AVX512BF16-LABEL: call_ret_v16bf16:
|
|
; FAST_ISEL_AVX512BF16: # %bb.0:
|
|
; FAST_ISEL_AVX512BF16-NEXT: pushq %rax
|
|
; FAST_ISEL_AVX512BF16-NEXT: vmovaps (%rdi), %ymm0
|
|
; FAST_ISEL_AVX512BF16-NEXT: callq returns_v16bf16@PLT
|
|
;
|
|
; AVXNECONVERT-LABEL: call_ret_v16bf16:
|
|
; AVXNECONVERT: # %bb.0:
|
|
; AVXNECONVERT-NEXT: pushq %rax
|
|
; AVXNECONVERT-NEXT: vmovaps (%rdi), %ymm0
|
|
; AVXNECONVERT-NEXT: callq returns_v16bf16@PLT
|
|
;
|
|
; FAST_ISEL_AVXNECONVERT-LABEL: call_ret_v16bf16:
|
|
; FAST_ISEL_AVXNECONVERT: # %bb.0:
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: pushq %rax
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: vmovaps (%rdi), %ymm0
|
|
; FAST_ISEL_AVXNECONVERT-NEXT: callq returns_v16bf16@PLT
|
|
%val = load <16 x bfloat>, ptr %ptr
|
|
call <16 x bfloat> @returns_v16bf16(<16 x bfloat> %val)
|
|
unreachable
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|