llvm-project/llvm/test/CodeGen/AArch64/cgdata-read-priority.ll
Kyungwoo Lee 0f52545289
[CGData][MachineOutliner] Global Outlining (#90074)
This commit introduces support for outlining functions across modules
using codegen data generated from previous codegen. The codegen data
currently manages the outlined hash tree, which records outlining
instances that occurred locally in the past.
    
The machine outliner now operates in one of three modes:

1. CGDataMode::None: This is the default outliner mode that uses the
suffix tree to identify (local) outlining candidates within a module.
This mode is also used by (full)LTO to maintain optimal behavior with
the combined module.
2. CGDataMode::Write (`-codegen-data-generate`): This mode is identical
to the default mode, but it also publishes the stable hash sequences of
instructions in the outlined functions into a local outlined hash tree.
It then encodes this into the `__llvm_outline` section, which will be
dead-stripped at link time.
3. CGDataMode::Read (`-codegen-data-use-path={.cgdata}`): This mode
reads a codegen data file (.cgdata) and initializes a global outlined
hash tree. This tree is used to generate global outlining candidates.
Note that the codegen data file has been post-processed with the raw
`__llvm_outline` sections from all native objects using the
`llvm-cgdata` tool (or a linker, `LLD`, or a new ThinLTO pipeline
later).

This depends on https://github.com/llvm/llvm-project/pull/105398. After
this PR, LLD (https://github.com/llvm/llvm-project/pull/90166) and Clang
(https://github.com/llvm/llvm-project/pull/90304) will follow for each
client side support.
This is a patch for
https://discourse.llvm.org/t/rfc-enhanced-machine-outliner-part-2-thinlto-nolto/78753.
2024-09-10 06:56:31 -07:00

69 lines
2.4 KiB
LLVM

; This test verifies whether we can outline a singleton instance (i.e., an instance that does not repeat)
; using codegen data that has been read from a previous codegen run.
; When multiple matches occur, we prioritize the candidates using the global frequency.
; RUN: split-file %s %t
; First, we generate the cgdata file from local outline instances present in write1.ll and write2.ll
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/write1.ll -o %t_write1
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/write2.ll -o %t_write2
; RUN: llvm-cgdata --merge %t_write1 %t_write2 -o %t_cgdata
; RUN: llvm-cgdata --show %t_cgdata | FileCheck %s --check-prefix=SHOW
; SHOW: Outlined hash tree:
; SHOW-NEXT: Total Node Count: 8
; SHOW-NEXT: Terminal Node Count: 2
; SHOW-NEXT: Depth: 4
; Now, we read the cgdata in the machine outliner, enabling us to optimistically
; outline a singleton instance in read.ll that matches against the cgdata.
; There are two matches -- (1) (mov #1, mov #2, mov #3, b) and (2) (mov #2, mov #3, b).
; Even though sequence (1) is longer than sequence (2), the latter is outlined because it occurs more frequently in the outlined hash tree.
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata -filetype=obj %t/read.ll -o %t_read
; RUN: llvm-objdump -d %t_read | FileCheck %s
; CHECK: _OUTLINED_FUNCTION
; CHECK-NEXT: mov
; CHECK-NEXT: mov
; CHECK-NEXT: b
;--- write1.ll
; The sequence (mov #2, mov #3, b) are repeated 4 times.
declare i32 @g(i32, i32, i32)
define i32 @f1() minsize {
%1 = call i32 @g(i32 10, i32 50, i32 2, i32 3);
ret i32 %1
}
define i32 @f2() minsize {
%1 = call i32 @g(i32 20, i32 60, i32 2, i32 3);
ret i32 %1
}
define i32 @f3() minsize {
%1 = call i32 @g(i32 30, i32 70, i32 2, i32 3);
ret i32 %1
}
define i32 @f4() minsize {
%1 = call i32 @g(i32 40, i32 80, i32 2, i32 3);
ret i32 %1
}
;--- write2.ll
; The sequence (mov #1, mov #2, mov #3, b) are repeated 2 times.
declare i32 @g(i32, i32, i32)
define i32 @f6() minsize {
%1 = call i32 @g(i32 10, i32 1, i32 2, i32 3);
ret i32 %1
}
define i32 @f7() minsize {
%1 = call i32 @g(i32 20, i32 1, i32 2, i32 3);
ret i32 %1
}
;--- read.ll
declare i32 @g(i32, i32, i32)
define i32 @f3() minsize {
%1 = call i32 @g(i32 30, i32 1, i32 2, i32 3);
ret i32 %1
}