Sanjay Patel bfb9b8e075 [Passes] add a tail-call-elim pass near the end of the opt pipeline
We call tail-call-elim near the beginning of the pipeline,
but that is too early to annotate calls that get added later.

In the motivating case from issue #47852, the missing 'tail'
on memset leads to sub-optimal codegen.

I experimented with removing the early instance of
tail-call-elim instead of just adding another pass, but that
appears to be slightly worse for compile-time:
+0.15% vs. +0.08% time.
"tailcall" shows adding the pass; "tailcall2" shows moving
the pass to later, then adding the original early pass back
(so 1596886802 is functionally equivalent to 180b0439dc ):
https://llvm-compile-time-tracker.com/index.php?config=NewPM-O3&stat=instructions&remote=rotateright

Note that there was an effort to split the tail call functionality
into 2 passes - that could help reduce compile-time if we find
that this change costs more in compile-time than expected based
on the preliminary testing:
D60031

Differential Revision: https://reviews.llvm.org/D130374
2022-07-25 15:25:47 -04:00

92 lines
2.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -O1 | FileCheck %s
; RUN: opt -passes='default<O1>' -S < %s | FileCheck %s
; In all tests, expect instcombine to canonicalize the select patterns
; for min/max/abs to allow CSE and subsequent simplification.
; TODO:
; This should be reduced to 0, but we are missing some
; fold(s) in instcombine.
define i8 @smax_nsw(i8 %a, i8 %b) {
; CHECK-LABEL: @smax_nsw(
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i8 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i8 [[A]], [[B]]
; CHECK-NEXT: [[M1:%.*]] = select i1 [[CMP1]], i8 0, i8 [[SUB]]
; CHECK-NEXT: [[TMP1:%.*]] = tail call i8 @llvm.smax.i8(i8 [[SUB]], i8 0)
; CHECK-NEXT: [[R:%.*]] = sub i8 [[TMP1]], [[M1]]
; CHECK-NEXT: ret i8 [[R]]
;
%sub = sub nsw i8 %a, %b
%cmp1 = icmp slt i8 %a, %b
%cmp2 = icmp sgt i8 %sub, 0
%m1 = select i1 %cmp1, i8 0, i8 %sub
%m2 = select i1 %cmp2, i8 %sub, i8 0
%r = sub i8 %m2, %m1
ret i8 %r
}
; or (abs a), (abs a) --> abs a
define i8 @abs_swapped(i8 %a) {
; CHECK-LABEL: @abs_swapped(
; CHECK-NEXT: [[TMP1:%.*]] = tail call i8 @llvm.abs.i8(i8 [[A:%.*]], i1 false)
; CHECK-NEXT: ret i8 [[TMP1]]
;
%neg = sub i8 0, %a
%cmp1 = icmp sgt i8 %a, 0
%cmp2 = icmp slt i8 %a, 0
%m1 = select i1 %cmp1, i8 %a, i8 %neg
%m2 = select i1 %cmp2, i8 %neg, i8 %a
%r = or i8 %m2, %m1
ret i8 %r
}
; xor (nabs a), (nabs a) --> 0
define i8 @nabs_swapped(i8 %a) {
; CHECK-LABEL: @nabs_swapped(
; CHECK-NEXT: ret i8 0
;
%neg = sub i8 0, %a
%cmp1 = icmp slt i8 %a, 0
%cmp2 = icmp sgt i8 %a, 0
%m1 = select i1 %cmp1, i8 %a, i8 %neg
%m2 = select i1 %cmp2, i8 %neg, i8 %a
%r = xor i8 %m2, %m1
ret i8 %r
}
; xor (abs a), (abs a) --> 0
define i8 @abs_different_constants(i8 %a) {
; CHECK-LABEL: @abs_different_constants(
; CHECK-NEXT: ret i8 0
;
%neg = sub i8 0, %a
%cmp1 = icmp sgt i8 %a, -1
%cmp2 = icmp slt i8 %a, 0
%m1 = select i1 %cmp1, i8 %a, i8 %neg
%m2 = select i1 %cmp2, i8 %neg, i8 %a
%r = xor i8 %m2, %m1
ret i8 %r
}
; or (nabs a), (nabs a) --> nabs a
define i8 @nabs_different_constants(i8 %a) {
; CHECK-LABEL: @nabs_different_constants(
; CHECK-NEXT: [[TMP1:%.*]] = tail call i8 @llvm.abs.i8(i8 [[A:%.*]], i1 false)
; CHECK-NEXT: [[M1:%.*]] = sub i8 0, [[TMP1]]
; CHECK-NEXT: ret i8 [[M1]]
;
%neg = sub i8 0, %a
%cmp1 = icmp slt i8 %a, 0
%cmp2 = icmp sgt i8 %a, -1
%m1 = select i1 %cmp1, i8 %a, i8 %neg
%m2 = select i1 %cmp2, i8 %neg, i8 %a
%r = or i8 %m2, %m1
ret i8 %r
}