llvm-project/llvm/test/tools/llvm-split/AMDGPU/kernels-load-balancing.ll
Pierre van Houtryve 43fd244b3d Reland "[AMDGPU] Add AMDGPU-specific module splitting (#89245)"
(with fix for ubsan)

This enables the --lto-partitions option to work more consistently.

This module splitting logic is fully aware of AMDGPU modules and their
specificities and takes advantage of
them to split modules in a way that avoids compilation issue (such as
resource usage being incorrectly represented).

This also includes a logging system that's more elaborate than just
LLVM_DEBUG which allows
printing logs to uniquely named files, and optionally with all value
names hidden so they can be safely shared without leaking informatiton
about the source. Logs can also be enabled through an environment
variable, which avoids the sometimes complicated process of passing a
-mllvm option all the way from clang driver to the offload linker that
handles full LTO codegen.
2024-05-27 10:43:00 +02:00

76 lines
1.7 KiB
LLVM

; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s
; Test load balancing logic with 6 kernels.
;
; Kernels go from most expensive (A == 6) to least expensive (F == 1)
;
; Load balancing should work like this (current partition cost is in parens)
;
; Initial -> [P0(0), P1(0), P2(0)]
;
; A(6) goes in 2 -> [P2(6), P0(0), P1(0)]
; B(5) goes in 1 -> [P2(6), P1(5), P0(4)]
; C(4) goes in 0 -> [P2(6), P1(5), P0(4)]
; D(3) goes in 0 -> [P0(7), P2(6), P1(5)]
; E(2) goes in 1 -> [P0(7), P1(7), P2(6)]
; F(1) goes in 2 -> [P0(7), P1(7), P2(7)]
; CHECK0-NOT: define
; CHECK0: define amdgpu_kernel void @C
; CHECK0: define amdgpu_kernel void @D
; CHECK0-NOT: define
; CHECK1-NOT: define
; CHECK1: define amdgpu_kernel void @B
; CHECK1: define amdgpu_kernel void @E
; CHECK1-NOT: define
; CHECK2-NOT: define
; CHECK2: define amdgpu_kernel void @A
; CHECK2: define amdgpu_kernel void @F
; CHECK2-NOT: define
define amdgpu_kernel void @A(ptr %x) {
store i64 42, ptr %x
store i64 43, ptr %x
store i64 44, ptr %x
store i64 45, ptr %x
store i64 46, ptr %x
ret void
}
define amdgpu_kernel void @B(ptr %x) {
store i64 42, ptr %x
store i64 43, ptr %x
store i64 44, ptr %x
store i64 45, ptr %x
ret void
}
define amdgpu_kernel void @C(ptr %x) {
store i64 42, ptr %x
store i64 43, ptr %x
store i64 44, ptr %x
ret void
}
define amdgpu_kernel void @D(ptr %x) {
store i64 42, ptr %x
store i64 43, ptr %x
ret void
}
define amdgpu_kernel void @E(ptr %x) {
store i64 42, ptr %x
ret void
}
define amdgpu_kernel void @F() {
ret void
}