llvm-project/llvm/test/CodeGen/AArch64/jump-table-exynos.ll
David Sherwood bdc0afc871
[CodeGen][AArch64] Set min jump table entries to 13 for AArch64 targets (#71166)
There are some workloads that are negatively impacted by using jump
tables when the number of entries is small. The SPEC2017 perlbench
benchmark is one example of this, where increasing the threshold to
around 13 gives a ~1.5% improvement on neoverse-v1. I chose the minimum
threshold based on empirical evidence rather than science, and just
manually increased the threshold until I got the best performance
without impacting other workloads. For neoverse-v1 I saw around ~0.2%
improvement in the SPEC2017 integer geomean, and no overall change for
neoverse-n1. If we find issues with this threshold later on we can
always revisit this.

The most significant SPEC2017 score changes on neoverse-v1 were:

500.perlbench_r: +1.6%
520.omnetpp_r: +0.6%

and the rest saw changes < 0.5%.

I updated CodeGen/AArch64/min-jump-table.ll to reflect the new
threshold. For most of the affected tests I manually set the min number
of entries back to 4 on the RUN line because the tests seem to rely upon
this behaviour.
2023-11-14 13:00:28 +00:00

76 lines
1.7 KiB
LLVM

; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mattr=+force-32bit-jump-tables -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=exynos-m3 -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
; Exynos doesn't want jump tables to be compressed for now.
define i32 @test_jumptable(i32 %in) {
switch i32 %in, label %def [
i32 0, label %lbl1
i32 1, label %lbl2
i32 2, label %lbl3
i32 4, label %lbl4
]
; CHECK-LABEL: test_jumptable:
; CHECK: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0
; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], {{#?}}:lo12:.LJTI0_0
; CHECK: [[PCREL_LBL:.Ltmp.*]]:
; CHECK-NEXT: adr [[PCBASE:x[0-9]+]], [[PCREL_LBL]]
; CHECK: ldrsw x[[OFFSET:[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #2]
; CHECK: add [[DEST:x[0-9]+]], [[PCBASE]], x[[OFFSET]]
; CHECK: br [[DEST]]
; CHECK: .LJTI0_0:
; CHECK-NEXT: .word .LBB{{.*}}-[[PCREL_LBL]]
def:
ret i32 0
lbl1:
ret i32 1
lbl2:
ret i32 2
lbl3:
ret i32 4
lbl4:
ret i32 8
}
define i32 @test_jumptable_minsize(i32 %in) minsize {
switch i32 %in, label %def [
i32 0, label %lbl1
i32 1, label %lbl2
i32 2, label %lbl3
i32 4, label %lbl4
]
; CHECK-LABEL: test_jumptable_minsize:
; CHECK: adrp [[JTPAGE:x[0-9]+]], .LJTI1_0
; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], {{#?}}:lo12:.LJTI1_0
; CHECK: adr [[PCBASE:x[0-9]+]], [[JTBASE:.LBB[0-9]+_[0-9]+]]
; CHECK: ldrb w[[OFFSET:[0-9]+]], [x[[JT]], {{x[0-9]+}}]
; CHECK: add [[DEST:x[0-9]+]], [[PCBASE]], x[[OFFSET]], lsl #2
; CHECK: br [[DEST]]
def:
ret i32 0
lbl1:
ret i32 1
lbl2:
ret i32 2
lbl3:
ret i32 4
lbl4:
ret i32 8
}