Simon Pilgrim a2a0089ac3
[X86] movsd/movss/movd/movq - add support for constant comments (#78601)
If we're loading a constant value, print the constant (and the zero upper elements) instead of just the shuffle mask.

This did require me to move the shuffle mask handling into addConstantComments as we can't handle this in the MC layer.
2024-01-19 14:21:26 +00:00

170 lines
8.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
define <8 x half> @cvt_and_clamp2(<8 x float>) nounwind {
; CHECK-LABEL: cvt_and_clamp2:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $104, %rsp
; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3],xmm1[3,3]
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: callq fmaxf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: callq fmaxf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: callq fmaxf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: callq fmaxf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movss (%rsp), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: callq fmaxf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: callq fmaxf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: callq fmaxf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: callq fmaxf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: callq fminf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: callq fminf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: callq fminf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movd (%rsp), %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: callq fminf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
; CHECK-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: callq fminf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: callq fminf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: callq fminf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: callq fminf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: addq $104, %rsp
; CHECK-NEXT: retq
%2 = fptrunc <8 x float> %0 to <8 x half>
%3 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> zeroinitializer, <8 x half> %2)
%4 = call <8 x half> @llvm.minnum.v8f16(<8 x half> %3, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>)
ret <8 x half> %4
}
declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>)
declare <8 x half> @llvm.minnum.v8f16(<8 x half>, <8 x half>)