
We call tail-call-elim near the beginning of the pipeline, but that is too early to annotate calls that get added later. In the motivating case from issue #47852, the missing 'tail' on memset leads to sub-optimal codegen. I experimented with removing the early instance of tail-call-elim instead of just adding another pass, but that appears to be slightly worse for compile-time: +0.15% vs. +0.08% time. "tailcall" shows adding the pass; "tailcall2" shows moving the pass to later, then adding the original early pass back (so 1596886802 is functionally equivalent to 180b0439dc ): https://llvm-compile-time-tracker.com/index.php?config=NewPM-O3&stat=instructions&remote=rotateright Note that there was an effort to split the tail call functionality into 2 passes - that could help reduce compile-time if we find that this change costs more in compile-time than expected based on the preliminary testing: D60031 Differential Revision: https://reviews.llvm.org/D130374
34 lines
1.1 KiB
LLVM
34 lines
1.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes='default<O2>' -S < %s | FileCheck %s
|
|
|
|
define void @PR47852(ptr noundef %d, i32 noundef %c) {
|
|
; CHECK-LABEL: @PR47852(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP_NOT1:%.*]] = icmp eq i32 [[C:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_NOT1]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
|
|
; CHECK: while.body.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[C]] to i64
|
|
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 [[D:%.*]], i8 0, i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[WHILE_END]]
|
|
; CHECK: while.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %while.cond
|
|
|
|
while.cond:
|
|
%c.addr.0 = phi i32 [ %c, %entry ], [ %dec, %while.body ]
|
|
%d.addr.0 = phi ptr [ %d, %entry ], [ %incdec.ptr, %while.body ]
|
|
%dec = add i32 %c.addr.0, -1
|
|
%cmp = icmp ugt i32 %c.addr.0, 0
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.body:
|
|
%incdec.ptr = getelementptr inbounds i8, ptr %d.addr.0, i32 1
|
|
store i8 0, ptr %d.addr.0, align 1
|
|
br label %while.cond
|
|
|
|
while.end:
|
|
ret void
|
|
}
|