David Sherwood bdc0afc871
[CodeGen][AArch64] Set min jump table entries to 13 for AArch64 targets (#71166)
There are some workloads that are negatively impacted by using jump
tables when the number of entries is small. The SPEC2017 perlbench
benchmark is one example of this, where increasing the threshold to
around 13 gives a ~1.5% improvement on neoverse-v1. I chose the minimum
threshold based on empirical evidence rather than science, and just
manually increased the threshold until I got the best performance
without impacting other workloads. For neoverse-v1 I saw around ~0.2%
improvement in the SPEC2017 integer geomean, and no overall change for
neoverse-n1. If we find issues with this threshold later on we can
always revisit this.

The most significant SPEC2017 score changes on neoverse-v1 were:

500.perlbench_r: +1.6%
520.omnetpp_r: +0.6%

and the rest saw changes < 0.5%.

I updated CodeGen/AArch64/min-jump-table.ll to reflect the new
threshold. For most of the affected tests I manually set the min number
of entries back to 4 on the RUN line because the tests seem to rely upon
this behaviour.
2023-11-14 13:00:28 +00:00

263 lines
12 KiB
LLVM

; REQUIRES: arm-registered-target
; REQUIRES: aarch64-registered-target
; REQUIRES: x86-registered-target
; RUN: llc -mtriple=i686-windows < %s | FileCheck %s --check-prefixes=CHECK,I686,NOTA32
; RUN: llc -mtriple=x86_64-windows < %s | FileCheck %s --check-prefixes=CHECK,X64,NOTA32
; RUN: llc -mtriple=aarch64-windows -aarch64-min-jump-table-entries=4 < %s | FileCheck %s --check-prefixes=CHECK,A64,NOTA32
; RUN: llc -mtriple=thumbv7a-windows < %s | FileCheck %s --check-prefixes=CHECK,A32
; RUN: llc -mtriple=x86_64-windows -filetype=obj < %s | llvm-readobj - --codeview | FileCheck %s --check-prefixes=CV
; Generated by clang++ -S -c -std=c++11 -emit-llvm -g from the following C++11 source:
; extern "C" void f1();
; extern "C" void f2();
; extern "C" void f3();
; extern "C" void f4();
; extern "C" void f5();
; extern "C" void func(int i){
; switch (i) {
; case 0: f1(); break;
; case 1: f2(); break;
; case 2: f3(); break;
; case 3: f4(); break;
; }
; switch (i) {
; case 1: f2(); break;
; case 2: f3(); break;
; case 3: f4(); break;
; case 4: f5(); break;
; case 5: f1(); break;
; }
; }
; i686 entries are absolute addresses (Base = 0, SwitchType = Pointer).
; x86_64 entries are fixed-size and relative to the jump table (Base = Table,
; SwitchType = Int32).
; aarch64 entries are variable-sized and relative to the first entry's BB if
; compressed (Base = Branch+0x4, SwitchType = UInt8ShiftLeft/UInt16ShiftLeft)
; otherwise relative to the ADR instruction (Base = Branch-0xc, SwitchType =
; Int32).
; thumbv7a entries are either absolute addresses (Base = 0, SwitchType =
; Pointer) OR variable-sized and relative to *after* the branch instruction
; (Base = Branch+0x4, SwitchType = UInt8ShiftLeft/UInt16ShiftLeft/UInt32) but
; there appears to be a bug where the offsets are always 0.
; Verify branch labels match what's in the CodeView
; X64: .Ltmp1:
; X64-NEXT: jmpq *%{{.*}}
; X64: .Ltmp4:
; X64-NEXT: jmpq *%{{.*}}
; A32: .LCPI0_0:
; A32-NEXT add pc, r{{.*}}
; NOTE: thumbv7a places the jump tables just after the branch, so verify the other branch below
; A64: .Ltmp1:
; A64-NEXT: br x{{.*}}
; A64: .Ltmp4:
; A64-NEXT: br x{{.*}}
; Verify jump table have the same entry size, base offset and shift as what's in the CodeView
; CHECK: {{\.?}}LJTI0_0:
; I686-NEXT: .long LBB0_[[#]]
; X64-NEXT: .long .LBB0_[[#]]-.LJTI0_0
; A32-NEXT: .byte (($MBB0_[[#]])-(.LCPI0_0+4))/2
; A64-NEXT: .byte (.LBB0_[[FIRSTBLOCK:[0-9]+]]-.LBB0_[[FIRSTBLOCK]])>>2
; NOTE: thumbv7a places the jump tables just after the branch, so check for the other branch now
; A32: .LCPI0_1:
; A32-NEXT add pc, r{{.*}}
; CHECK: {{\.?}}LJTI0_1:
; I686-NEXT: .long LBB0_[[#]]
; X64-NEXT: .long .LBB0_[[#]]-.LJTI0_1
; A32-NEXT: .byte (($MBB0_[[#]])-(.LCPI0_1+4))/2
; A64-NEXT: .byte (.LBB0_[[SECONDBLOCK:[0-9]+]]-.LBB0_[[SECONDBLOCK]])>>2
; Verify CodeView
; CHECK: [[INT16:\.short|\.hword]] 4441 [[COMMENT:#|//|@]] Record kind: S_ARMSWITCHTABLE
; I686-NEXT: .long 0 [[COMMENT]] Base offset
; I686-NEXT: .short 0 [[COMMENT]] Base section index
; X64-NEXT: .secrel32 .LJTI0_0 [[COMMENT]] Base offset
; X64-NEXT: .secidx .LJTI0_0 [[COMMENT]] Base section index
; A32-NEXT: .secrel32 .LCPI0_0+4 [[COMMENT]] Base offset
; A32-NEXT: .secidx .LCPI0_0 [[COMMENT]] Base section index
; A64-NEXT: .secrel32 .LBB0_[[FIRSTBLOCK]] [[COMMENT]] Base offset
; A64-NEXT: .secidx .LBB0_[[FIRSTBLOCK]] [[COMMENT]] Base section index
; I686-NEXT: .short 6 [[COMMENT]] Switch type
; X64-NEXT: .short 4 [[COMMENT]] Switch type
; A32-NEXT: .short 7 [[COMMENT]] Switch type
; A64-NEXT: .hword 7 [[COMMENT]] Switch type
; NOTA32-NEXT: .secrel32 {{\.?}}Ltmp1 [[COMMENT]] Branch offset
; A32-NEXT: .secrel32 .LCPI0_0 [[COMMENT]] Branch offset
; CHECK-NEXT: .secrel32 {{\.?}}LJTI0_0 [[COMMENT]] Table offset
; NOTA32-NEXT: .secidx {{\.?}}Ltmp1 [[COMMENT]] Branch section index
; A32-NEXT: .secidx .LCPI0_0 [[COMMENT]] Branch section index
; CHECK-NEXT: .secidx {{\.?}}LJTI0_0 [[COMMENT]] Table section index
; CHECK-NEXT: [[INT32:\.long|\.word]] 4 [[COMMENT]] Entries count
; CHECK: [[INT16]] 4441 [[COMMENT]] Record kind: S_ARMSWITCHTABLE
; I686-NEXT: .long 0 [[COMMENT]] Base offset
; I686-NEXT: .short 0 [[COMMENT]] Base section index
; X64-NEXT: .secrel32 .LJTI0_1 [[COMMENT]] Base offset
; X64-NEXT: .secidx .LJTI0_1 [[COMMENT]] Base section index
; A32-NEXT: .secrel32 .LCPI0_1+4 [[COMMENT]] Base offset
; A32-NEXT: .secidx .LCPI0_1 [[COMMENT]] Base section index
; A64-NEXT: .secrel32 .LBB0_[[SECONDBLOCK]] [[COMMENT]] Base offset
; A64-NEXT: .secidx .LBB0_[[SECONDBLOCK]] [[COMMENT]] Base section index
; I686-NEXT: .short 6 [[COMMENT]] Switch type
; X64-NEXT: .short 4 [[COMMENT]] Switch type
; A32-NEXT: .short 7 [[COMMENT]] Switch type
; A64-NEXT: .hword 7 [[COMMENT]] Switch type
; NOTA32-NEXT: .secrel32 {{\.?}}Ltmp4 [[COMMENT]] Branch offset
; A32-NEXT: .secrel32 .LCPI0_1 [[COMMENT]] Branch offset
; CHECK-NEXT: .secrel32 {{\.?}}LJTI0_1 [[COMMENT]] Table offset
; NOTA32-NEXT: .secidx {{\.?}}Ltmp4 [[COMMENT]] Branch section index
; A32-NEXT: .secidx .LCPI0_1 [[COMMENT]] Branch section index
; CHECK-NEXT: .secidx {{\.?}}LJTI0_1 [[COMMENT]] Table section index
; CHECK-NEXT: [[INT32]] 5 [[COMMENT]] Entries count
; CHECK-NOT: [[INT16]] 4441 [[COMMENT]] Record kind: S_ARMSWITCHTABLE
; Verify CodeView as dumped by llvm-readobj
; CV: Subsection [
; CV: SubSectionType: Symbols (0xF1)
; CV: GlobalProcIdSym {
; CV: DisplayName: func
; CV-NOT: GlobalProcIdSym
; CV: JumpTableSym {
; CV-NEXT: Kind: S_ARMSWITCHTABLE (0x1159)
; CV-NEXT: BaseOffset: 0x0
; CV-NEXT: BaseSegment: 0
; CV-NEXT: SwitchType: Int32 (0x4)
; CV-NEXT: BranchOffset: 0x23
; CV-NEXT: TableOffset: 0x0
; CV-NEXT: BranchSegment: 0
; CV-NEXT: TableSegment: 0
; CV-NEXT: EntriesCount: 4
; CV-NEXT: }
; CV-NEXT: JumpTableSym {
; CV-NEXT: Kind: S_ARMSWITCHTABLE (0x1159)
; CV-NEXT: BaseOffset: 0x10
; CV-NEXT: BaseSegment: 0
; CV-NEXT: SwitchType: Int32 (0x4)
; CV-NEXT: BranchOffset: 0x5A
; CV-NEXT: TableOffset: 0x10
; CV-NEXT: BranchSegment: 0
; CV-NEXT: TableSegment: 0
; CV-NEXT: EntriesCount: 5
; CV-NEXT: }
; CV-NOT: JumpTableSym {
source_filename = ".\\jump-table.cpp"
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc19.35.32216"
; Function Attrs: mustprogress noinline optnone uwtable
define dso_local void @func(i32 noundef %0) #0 !dbg !8 {
%2 = alloca i32, align 4
store i32 %0, ptr %2, align 4
call void @llvm.dbg.declare(metadata ptr %2, metadata !14, metadata !DIExpression()), !dbg !15
%3 = load i32, ptr %2, align 4, !dbg !16
switch i32 %3, label %8 [
i32 0, label %4
i32 1, label %5
i32 2, label %6
i32 3, label %7
], !dbg !16
4: ; preds = %1
call void @f1(), !dbg !17
br label %8, !dbg !17
5: ; preds = %1
call void @f2(), !dbg !19
br label %8, !dbg !19
6: ; preds = %1
call void @f3(), !dbg !20
br label %8, !dbg !20
7: ; preds = %1
call void @f4(), !dbg !21
br label %8, !dbg !21
8: ; preds = %1, %7, %6, %5, %4
%9 = load i32, ptr %2, align 4, !dbg !22
switch i32 %9, label %15 [
i32 1, label %10
i32 2, label %11
i32 3, label %12
i32 4, label %13
i32 5, label %14
], !dbg !22
10: ; preds = %8
call void @f2(), !dbg !23
br label %15, !dbg !23
11: ; preds = %8
call void @f3(), !dbg !25
br label %15, !dbg !25
12: ; preds = %8
call void @f4(), !dbg !26
br label %15, !dbg !26
13: ; preds = %8
call void @f5(), !dbg !27
br label %15, !dbg !27
14: ; preds = %8
call void @f1(), !dbg !28
br label %15, !dbg !28
15: ; preds = %8, %14, %13, %12, %11, %10
ret void, !dbg !29
}
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare dso_local void @f1() #2
declare dso_local void @f2() #2
declare dso_local void @f3() #2
declare dso_local void @f4() #2
declare dso_local void @f5() #2
attributes #0 = { mustprogress noinline optnone uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
attributes #2 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3, !4, !5, !6}
!llvm.ident = !{!7}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_11, file: !1, producer: "clang version 15.0.1", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "jump-table.cpp", directory: "C:\\llvm", checksumkind: CSK_MD5, checksum: "35610c7104c8080f83e2bf6a02dabfc9")
!2 = !{i32 2, !"CodeView", i32 1}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 2}
!5 = !{i32 7, !"PIC Level", i32 2}
!6 = !{i32 7, !"uwtable", i32 2}
!7 = !{!"clang version 15.0.1"}
!8 = distinct !DISubprogram(name: "func", scope: !9, file: !9, line: 6, type: !10, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !13)
!9 = !DIFile(filename: ".\\jump-table.cpp", directory: "C:\\llvm", checksumkind: CSK_MD5, checksum: "35610c7104c8080f83e2bf6a02dabfc9")
!10 = !DISubroutineType(types: !11)
!11 = !{null, !12}
!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!13 = !{}
!14 = !DILocalVariable(name: "i", arg: 1, scope: !8, file: !9, line: 6, type: !12)
!15 = !DILocation(line: 6, scope: !8)
!16 = !DILocation(line: 7, scope: !8)
!17 = !DILocation(line: 8, scope: !18)
!18 = distinct !DILexicalBlock(scope: !8, file: !9, line: 7)
!19 = !DILocation(line: 9, scope: !18)
!20 = !DILocation(line: 10, scope: !18)
!21 = !DILocation(line: 11, scope: !18)
!22 = !DILocation(line: 13, scope: !8)
!23 = !DILocation(line: 14, scope: !24)
!24 = distinct !DILexicalBlock(scope: !8, file: !9, line: 13)
!25 = !DILocation(line: 15, scope: !24)
!26 = !DILocation(line: 16, scope: !24)
!27 = !DILocation(line: 17, scope: !24)
!28 = !DILocation(line: 18, scope: !24)
!29 = !DILocation(line: 20, scope: !8)