
We call tail-call-elim near the beginning of the pipeline, but that is too early to annotate calls that get added later. In the motivating case from issue #47852, the missing 'tail' on memset leads to sub-optimal codegen. I experimented with removing the early instance of tail-call-elim instead of just adding another pass, but that appears to be slightly worse for compile-time: +0.15% vs. +0.08% time. "tailcall" shows adding the pass; "tailcall2" shows moving the pass to later, then adding the original early pass back (so 1596886802 is functionally equivalent to 180b0439dc ): https://llvm-compile-time-tracker.com/index.php?config=NewPM-O3&stat=instructions&remote=rotateright Note that there was an effort to split the tail call functionality into 2 passes - that could help reduce compile-time if we find that this change costs more in compile-time than expected based on the preliminary testing: D60031 Differential Revision: https://reviews.llvm.org/D130374
64 lines
2.1 KiB
LLVM
64 lines
2.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -O1 -S < %s | FileCheck %s
|
|
; RUN: opt -passes='default<O1>' -S < %s | FileCheck %s
|
|
|
|
; This is an important benchmark for color-space-conversion.
|
|
; It should reduce to contain only 1 'not' op.
|
|
|
|
declare void @use(i8, i8, i8, i8)
|
|
|
|
define void @cmyk(i8 %r, i8 %g, i8 %b) {
|
|
; CHECK-LABEL: @cmyk(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G:%.*]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP0]])
|
|
; CHECK-NEXT: [[K_0:%.*]] = xor i8 [[TMP1]], -1
|
|
; CHECK-NEXT: [[SUB31:%.*]] = sub i8 [[TMP1]], [[R]]
|
|
; CHECK-NEXT: [[SUB35:%.*]] = sub i8 [[TMP1]], [[G]]
|
|
; CHECK-NEXT: [[SUB39:%.*]] = sub i8 [[TMP1]], [[B]]
|
|
; CHECK-NEXT: tail call void @use(i8 [[SUB31]], i8 [[SUB35]], i8 [[SUB39]], i8 [[K_0]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%conv = sext i8 %r to i32
|
|
%sub = sub nsw i32 255, %conv
|
|
%conv1 = trunc i32 %sub to i8
|
|
%conv2 = sext i8 %g to i32
|
|
%sub3 = sub nsw i32 255, %conv2
|
|
%conv4 = trunc i32 %sub3 to i8
|
|
%conv5 = sext i8 %b to i32
|
|
%sub6 = sub nsw i32 255, %conv5
|
|
%conv7 = trunc i32 %sub6 to i8
|
|
%conv8 = sext i8 %conv1 to i32
|
|
%conv9 = sext i8 %conv4 to i32
|
|
%cmp = icmp slt i32 %conv8, %conv9
|
|
br i1 %cmp, label %if.then, label %if.else
|
|
|
|
if.then:
|
|
%conv12 = sext i8 %conv7 to i32
|
|
%cmp13 = icmp slt i32 %conv8, %conv12
|
|
%cond = select i1 %cmp13, i32 %conv8, i32 %conv12
|
|
%conv17 = trunc i32 %cond to i8
|
|
br label %if.end
|
|
|
|
if.else:
|
|
%conv19 = sext i8 %conv7 to i32
|
|
%cmp20 = icmp slt i32 %conv9, %conv19
|
|
%cond27 = select i1 %cmp20, i32 %conv9, i32 %conv19
|
|
%conv28 = trunc i32 %cond27 to i8
|
|
br label %if.end
|
|
|
|
if.end:
|
|
%k.0 = phi i8 [ %conv17, %if.then ], [ %conv28, %if.else ]
|
|
%conv30 = sext i8 %k.0 to i32
|
|
%sub31 = sub nsw i32 %conv8, %conv30
|
|
%conv32 = trunc i32 %sub31 to i8
|
|
%sub35 = sub nsw i32 %conv9, %conv30
|
|
%conv36 = trunc i32 %sub35 to i8
|
|
%conv37 = sext i8 %conv7 to i32
|
|
%sub39 = sub nsw i32 %conv37, %conv30
|
|
%conv40 = trunc i32 %sub39 to i8
|
|
call void @use(i8 %conv32, i8 %conv36, i8 %conv40, i8 %k.0)
|
|
ret void
|
|
}
|