The Attributor has logic to run only on assumed live functions and this is exposed to users now. OpenMP-opt will (mostly) ignore dead internal functions now but run the same deduction as before if an internal function is marked live. This should lower compile time as we run on less code and delete more code early on. For the full OpenMC module compiled with noinline and JITed at runtime, we save ~25%, or ~10s on my machine during JITing.
53 lines
1.8 KiB
LLVM
53 lines
1.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
|
|
; RUN: opt < %s -S -passes=openmp-opt | FileCheck %s --check-prefix=MODULE
|
|
; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s --check-prefix=CGSCC
|
|
|
|
target triple = "nvptx64-nvidia-cuda"
|
|
|
|
declare noalias ptr @alloc()
|
|
|
|
define internal i32 @nblist() {
|
|
%1 = call ptr @alloc()
|
|
call fastcc void @rec(ptr %1, i64 0)
|
|
ret i32 0
|
|
}
|
|
|
|
|
|
define fastcc void @rec(ptr %0, i64 %1) {
|
|
; CHECK-LABEL: define {{[^@]+}}@rec(
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP0:%.*]], i64 [[TMP1:%.*]]
|
|
; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4
|
|
; CHECK-NEXT: call fastcc void @rec(ptr [[TMP0]], i64 0)
|
|
; CHECK-NEXT: ret void
|
|
%3 = getelementptr i32, ptr %0, i64 %1
|
|
store i32 0, ptr %3, align 4
|
|
call fastcc void @rec(ptr %0, i64 0)
|
|
ret void
|
|
}
|
|
|
|
!llvm.module.flags = !{!0, !1}
|
|
|
|
!0 = !{i32 7, !"openmp", i32 50}
|
|
!1 = !{i32 7, !"openmp-device", i32 50}
|
|
; MODULE-LABEL: define {{[^@]+}}@rec
|
|
; MODULE-SAME: (ptr [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
|
|
; MODULE-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[TMP1]]
|
|
; MODULE-NEXT: store i32 0, ptr [[TMP3]], align 4
|
|
; MODULE-NEXT: call fastcc void @rec(ptr [[TMP0]], i64 0)
|
|
; MODULE-NEXT: ret void
|
|
;
|
|
;
|
|
; CGSCC-LABEL: define {{[^@]+}}@nblist() {
|
|
; CGSCC-NEXT: [[TMP1:%.*]] = call ptr @alloc()
|
|
; CGSCC-NEXT: call fastcc void @rec(ptr [[TMP1]], i64 0)
|
|
; CGSCC-NEXT: ret i32 0
|
|
;
|
|
;
|
|
; CGSCC-LABEL: define {{[^@]+}}@rec
|
|
; CGSCC-SAME: (ptr [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
|
|
; CGSCC-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[TMP1]]
|
|
; CGSCC-NEXT: store i32 0, ptr [[TMP3]], align 4
|
|
; CGSCC-NEXT: call fastcc void @rec(ptr [[TMP0]], i64 0)
|
|
; CGSCC-NEXT: ret void
|
|
;
|