Matthias Braun 189900eb14 X86: Stop assigning register costs for longer encodings.
This stops reporting CostPerUse 1 for `R8`-`R15` and `XMM8`-`XMM31`.
This was previously done because instruction encoding require a REX
prefix when using them resulting in longer instruction encodings. I
found that this regresses the quality of the register allocation as the
costs impose an ordering on eviction candidates. I also feel that there
is a bit of an impedance mismatch as the actual costs occure when
encoding instructions using those registers, but the order of VReg
assignments is not primarily ordered by number of Defs+Uses.

I did extensive measurements with the llvm-test-suite wiht SPEC2006 +
SPEC2017 included, internal services showed similar patterns. Generally
there are a log of improvements but also a lot of regression. But on
average the allocation quality seems to improve at a small code size
regression.

Results for measuring static and dynamic instruction counts:

Dynamic Counts (scaled by execution frequency) / Optimization Remarks:
    Spills+FoldedSpills   -5.6%
    Reloads+FoldedReloads -4.2%
    Copies                -0.1%

Static / LLVM Statistics:
    regalloc.NumSpills    mean -1.6%, geomean -2.8%
    regalloc.NumReloads   mean -1.7%, geomean -3.1%
    size..text            mean +0.4%, geomean +0.4%

Static / LLVM Statistics:
    mean -2.2%, geomean -3.1%) regalloc.NumSpills
    mean -2.6%, geomean -3.9%) regalloc.NumReloads
    mean +0.6%, geomean +0.6%) size..text

Static / LLVM Statistics:
    regalloc.NumSpills   mean -3.0%
    regalloc.NumReloads  mean -3.3%
    size..text           mean +0.3%, geomean +0.3%

Differential Revision: https://reviews.llvm.org/D133902
2022-09-30 16:01:33 -07:00

321 lines
10 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
define void @add(ptr %pa, ptr %pb, ptr %pc) nounwind {
; CHECK-LABEL: add:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movq %rdx, %rbx
; CHECK-NEXT: movzwl (%rsi), %eax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: movzwl (%rdi), %eax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: movw %ax, (%rbx)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%a = load bfloat, ptr %pa
%b = load bfloat, ptr %pb
%add = fadd bfloat %a, %b
store bfloat %add, ptr %pc
ret void
}
define bfloat @add2(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: add2:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: movd %xmm1, %ecx
; CHECK-NEXT: shll $16, %ecx
; CHECK-NEXT: movd %ecx, %xmm1
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
%add = fadd bfloat %a, %b
ret bfloat %add
}
define void @add_double(ptr %pa, ptr %pb, ptr %pc) nounwind {
; CHECK-LABEL: add_double:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movq %rdx, %rbx
; CHECK-NEXT: movq %rsi, %r14
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq __truncdfbf2@PLT
; CHECK-NEXT: movd %xmm0, %ebp
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq __truncdfbf2@PLT
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: shll $16, %ebp
; CHECK-NEXT: movd %ebp, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
; CHECK-NEXT: movsd %xmm0, (%rbx)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
%la = load double, ptr %pa
%a = fptrunc double %la to bfloat
%lb = load double, ptr %pb
%b = fptrunc double %lb to bfloat
%add = fadd bfloat %a, %b
%dadd = fpext bfloat %add to double
store double %dadd, ptr %pc
ret void
}
define double @add_double2(double %da, double %db) nounwind {
; CHECK-LABEL: add_double2:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $16, %rsp
; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: callq __truncdfbf2@PLT
; CHECK-NEXT: movd %xmm0, %ebx
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: callq __truncdfbf2@PLT
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: shll $16, %ebx
; CHECK-NEXT: movd %ebx, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
; CHECK-NEXT: addq $16, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%a = fptrunc double %da to bfloat
%b = fptrunc double %db to bfloat
%add = fadd bfloat %a, %b
%dadd = fpext bfloat %add to double
ret double %dadd
}
define void @add_constant(ptr %pa, ptr %pc) nounwind {
; CHECK-LABEL: add_constant:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movq %rsi, %rbx
; CHECK-NEXT: movzwl (%rdi), %eax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: movw %ax, (%rbx)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%a = load bfloat, ptr %pa
%add = fadd bfloat %a, 1.0
store bfloat %add, ptr %pc
ret void
}
define bfloat @add_constant2(bfloat %a) nounwind {
; CHECK-LABEL: add_constant2:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
%add = fadd bfloat %a, 1.0
ret bfloat %add
}
define void @store_constant(ptr %pc) nounwind {
; CHECK-LABEL: store_constant:
; CHECK: # %bb.0:
; CHECK-NEXT: movw $16256, (%rdi) # imm = 0x3F80
; CHECK-NEXT: retq
store bfloat 1.0, ptr %pc
ret void
}
define void @fold_ext_trunc(ptr %pa, ptr %pc) nounwind {
; CHECK-LABEL: fold_ext_trunc:
; CHECK: # %bb.0:
; CHECK-NEXT: movzwl (%rdi), %eax
; CHECK-NEXT: movw %ax, (%rsi)
; CHECK-NEXT: retq
%a = load bfloat, ptr %pa
%ext = fpext bfloat %a to float
%trunc = fptrunc float %ext to bfloat
store bfloat %trunc, ptr %pc
ret void
}
define bfloat @fold_ext_trunc2(bfloat %a) nounwind {
; CHECK-LABEL: fold_ext_trunc2:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
%ext = fpext bfloat %a to float
%trunc = fptrunc float %ext to bfloat
ret bfloat %trunc
}
define <8 x bfloat> @addv(<8 x bfloat> %a, <8 x bfloat> %b) nounwind {
; CHECK-LABEL: addv:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $56, %rsp
; CHECK-NEXT: movq %xmm0, %rcx
; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movq %xmm1, %rdx
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: shrq $48, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: shrq $48, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: movq %xmm0, %r12
; CHECK-NEXT: movq %r12, %rax
; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: movq %rax, (%rsp) # 8-byte Spill
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
; CHECK-NEXT: movq %xmm0, %r14
; CHECK-NEXT: movq %r14, %rbp
; CHECK-NEXT: shrq $32, %rbp
; CHECK-NEXT: movq %r12, %r15
; CHECK-NEXT: shrq $48, %r15
; CHECK-NEXT: movq %r14, %r13
; CHECK-NEXT: shrq $48, %r13
; CHECK-NEXT: movl %r14d, %eax
; CHECK-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: movl %r12d, %eax
; CHECK-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: movd %xmm0, %ebx
; CHECK-NEXT: shll $16, %ebx
; CHECK-NEXT: shll $16, %r14d
; CHECK-NEXT: movd %r14d, %xmm1
; CHECK-NEXT: shll $16, %r12d
; CHECK-NEXT: movd %r12d, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: movzwl %ax, %r12d
; CHECK-NEXT: orl %ebx, %r12d
; CHECK-NEXT: shll $16, %r13d
; CHECK-NEXT: movd %r13d, %xmm1
; CHECK-NEXT: shll $16, %r15d
; CHECK-NEXT: movd %r15d, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: movd %xmm0, %r14d
; CHECK-NEXT: shll $16, %r14d
; CHECK-NEXT: shll $16, %ebp
; CHECK-NEXT: movd %ebp, %xmm1
; CHECK-NEXT: movq (%rsp), %rax # 8-byte Reload
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: movzwl %ax, %ebx
; CHECK-NEXT: orl %r14d, %ebx
; CHECK-NEXT: shlq $32, %rbx
; CHECK-NEXT: orq %r12, %rbx
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
; CHECK-NEXT: movl %r15d, %eax
; CHECK-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
; CHECK-NEXT: movl %r14d, %eax
; CHECK-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: movd %xmm0, %ebp
; CHECK-NEXT: shll $16, %ebp
; CHECK-NEXT: movq %r15, %rax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: movq %r14, %rax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: movzwl %ax, %r14d
; CHECK-NEXT: orl %ebp, %r14d
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: movd %xmm0, %ebp
; CHECK-NEXT: shll $16, %ebp
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: callq __truncsfbf2@PLT
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: orl %ebp, %eax
; CHECK-NEXT: shlq $32, %rax
; CHECK-NEXT: orq %r14, %rax
; CHECK-NEXT: movq %rax, %xmm0
; CHECK-NEXT: movq %rbx, %xmm1
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
; CHECK-NEXT: popq %r13
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
%add = fadd <8 x bfloat> %a, %b
ret <8 x bfloat> %add
}