XChy fc86d031fe
[SimplifyCFG] Transform for redirecting phis between unmergeable BB and SuccBB (#67275)
This patch extends function TryToSimplifyUncondBranchFromEmptyBlock to
handle the similar cases below.

```llvm
define i8 @src(i8 noundef %arg) {
start:
  switch i8 %arg, label %unreachable [
    i8 0, label %case012
    i8 1, label %case1
    i8 2, label %case2
    i8 3, label %end
  ]

unreachable:
  unreachable

case1:
  br label %case012

case2:
  br label %case012

case012:
  %phi1 = phi i8 [ 3, %case2 ], [ 2, %case1 ], [ 1, %start ]
  br label %end

end:
  %phi2 = phi i8 [ %phi1, %case012 ], [ 4, %start ]
  ret i8 %phi2
}
```
The phis here should be merged into one phi, so that we can better
optimize it:

```llvm
define i8 @tgt(i8 noundef %arg) {
start:
  switch i8 %arg, label %unreachable [
    i8 0, label %end
    i8 1, label %case1
    i8 2, label %case2
    i8 3, label %case3
  ]

unreachable:
  unreachable

case1:
  br label %end

case2:
  br label %end

case3:
  br label %end

end:
  %phi = phi i8 [ 4, %case3 ], [ 3, %case2 ], [ 2, %case1 ], [ 1, %start ]
  ret i8 %phi
}
```
Proof:
[normal](https://alive2.llvm.org/ce/z/vAWi88)
[multiple stages](https://alive2.llvm.org/ce/z/DDBQqp)
[multiple stages 2](https://alive2.llvm.org/ce/z/nGkeqN)
[multiple phi combinations](https://alive2.llvm.org/ce/z/VQeEdp)

And lookup table optimization should convert it into add %arg 1.
This patch just match similar CFG structure and merge the phis in
different cases.

Maybe such transform can be applied to other situations besides switch,
but I'm not sure whether it's better than not merging. Therefore, I only
try it in switch,

Related issue:
#63876

[Migrated](https://reviews.llvm.org/D155940)
2023-09-25 10:13:45 +08:00

151 lines
7.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=jump-threading -S %s -o - | FileCheck %s --check-prefix=DEFAULT
; RUN: opt -passes=jump-threading -S -jump-threading-threshold=6 %s -o - | FileCheck %s --check-prefix=OVERIDE
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
define i32 @test_minsize(i32 %argc, ptr nocapture readonly %argv) local_unnamed_addr #0 {
; DEFAULT-LABEL: @test_minsize(
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[CMP:%.*]] = icmp eq i32 [[ARGC:%.*]], 2
; DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
; DEFAULT: cond.true:
; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARGV:%.*]], i32 1
; DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 4
; DEFAULT-NEXT: [[CALL:%.*]] = tail call i32 @atoi(ptr [[TMP0]])
; DEFAULT-NEXT: br label [[COND_END]]
; DEFAULT: cond.end:
; DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ [[CALL]], [[COND_TRUE]] ], [ 46, [[ENTRY:%.*]] ]
; DEFAULT-NEXT: [[TMP1:%.*]] = mul i32 [[COND]], [[COND]]
; DEFAULT-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
; DEFAULT-NEXT: [[TMP3:%.*]] = mul i32 [[COND]], [[TMP2]]
; DEFAULT-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[COND]], 0
; DEFAULT-NEXT: [[COND_FR:%.*]] = freeze i1 [[TMP4]]
; DEFAULT-NEXT: br i1 [[COND_FR]], label [[TMP5:%.*]], label [[TMP6:%.*]]
; DEFAULT: 5:
; DEFAULT-NEXT: br label [[TMP6]]
; DEFAULT: 6:
; DEFAULT-NEXT: [[TMP7:%.*]] = phi i32 [ [[COND]], [[TMP5]] ], [ 0, [[COND_END]] ]
; DEFAULT-NEXT: [[TMP8:%.*]] = mul i32 [[TMP3]], [[TMP7]]
; DEFAULT-NEXT: [[CALL33:%.*]] = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str, i32 [[TMP8]])
; DEFAULT-NEXT: ret i32 0
;
; OVERIDE-LABEL: @test_minsize(
; OVERIDE-NEXT: entry:
; OVERIDE-NEXT: [[CMP:%.*]] = icmp eq i32 [[ARGC:%.*]], 2
; OVERIDE-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_END_THREAD:%.*]]
; OVERIDE: cond.end:
; OVERIDE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARGV:%.*]], i32 1
; OVERIDE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 4
; OVERIDE-NEXT: [[CALL:%.*]] = tail call i32 @atoi(ptr [[TMP0]])
; OVERIDE-NEXT: [[TMP1:%.*]] = mul i32 [[CALL]], [[CALL]]
; OVERIDE-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
; OVERIDE-NEXT: [[TMP3:%.*]] = mul i32 [[CALL]], [[TMP2]]
; OVERIDE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[CALL]], 0
; OVERIDE-NEXT: [[COND_FR:%.*]] = freeze i1 [[TMP4]]
; OVERIDE-NEXT: br i1 [[COND_FR]], label [[TMP5:%.*]], label [[COND_END_THREAD]]
; OVERIDE: 5:
; OVERIDE-NEXT: br label [[COND_END_THREAD]]
; OVERIDE: cond.end.thread:
; OVERIDE-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP3]], [[COND_END]] ], [ [[TMP3]], [[TMP5]] ], [ 205962976, [[ENTRY:%.*]] ]
; OVERIDE-NEXT: [[TMP7:%.*]] = phi i32 [ 0, [[COND_END]] ], [ [[CALL]], [[TMP5]] ], [ 46, [[ENTRY]] ]
; OVERIDE-NEXT: [[TMP8:%.*]] = mul i32 [[TMP6]], [[TMP7]]
; OVERIDE-NEXT: [[CALL33:%.*]] = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str, i32 [[TMP8]])
; OVERIDE-NEXT: ret i32 0
;
entry:
%cmp = icmp eq i32 %argc, 2
br i1 %cmp, label %cond.true, label %cond.end
cond.true: ; preds = %entry
%arrayidx = getelementptr inbounds ptr, ptr %argv, i32 1
%0 = load ptr, ptr %arrayidx, align 4
%call = tail call i32 @atoi(ptr %0)
br label %cond.end
cond.end: ; preds = %entry, %cond.true
%cond = phi i32 [ %call, %cond.true ], [ 46, %entry ]
%1 = mul i32 %cond, %cond
%2 = mul i32 %1, %1
%3 = mul i32 %cond, %2
%4 = icmp sgt i32 %cond, 0
%spec.select = select i1 %4, i32 %cond, i32 0
%5 = mul i32 %3, %spec.select
%call33 = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str, i32 %5) #4
ret i32 0
}
define i32 @test_optsize(i32 %argc, ptr nocapture readonly %argv) local_unnamed_addr #1 {
; DEFAULT-LABEL: @test_optsize(
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[CMP:%.*]] = icmp eq i32 [[ARGC:%.*]], 2
; DEFAULT-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_END_THREAD:%.*]]
; DEFAULT: cond.end:
; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARGV:%.*]], i32 1
; DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 4
; DEFAULT-NEXT: [[CALL:%.*]] = tail call i32 @atoi(ptr [[TMP0]])
; DEFAULT-NEXT: [[TMP1:%.*]] = mul i32 [[CALL]], [[CALL]]
; DEFAULT-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
; DEFAULT-NEXT: [[TMP3:%.*]] = mul i32 [[CALL]], [[TMP2]]
; DEFAULT-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[CALL]], 0
; DEFAULT-NEXT: [[COND_FR:%.*]] = freeze i1 [[TMP4]]
; DEFAULT-NEXT: br i1 [[COND_FR]], label [[TMP5:%.*]], label [[COND_END_THREAD]]
; DEFAULT: 5:
; DEFAULT-NEXT: br label [[COND_END_THREAD]]
; DEFAULT: cond.end.thread:
; DEFAULT-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP3]], [[COND_END]] ], [ [[TMP3]], [[TMP5]] ], [ 205962976, [[ENTRY:%.*]] ]
; DEFAULT-NEXT: [[TMP7:%.*]] = phi i32 [ 0, [[COND_END]] ], [ [[CALL]], [[TMP5]] ], [ 46, [[ENTRY]] ]
; DEFAULT-NEXT: [[TMP8:%.*]] = mul i32 [[TMP6]], [[TMP7]]
; DEFAULT-NEXT: [[CALL33:%.*]] = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str, i32 [[TMP8]])
; DEFAULT-NEXT: ret i32 0
;
; OVERIDE-LABEL: @test_optsize(
; OVERIDE-NEXT: entry:
; OVERIDE-NEXT: [[CMP:%.*]] = icmp eq i32 [[ARGC:%.*]], 2
; OVERIDE-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_END_THREAD:%.*]]
; OVERIDE: cond.end:
; OVERIDE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARGV:%.*]], i32 1
; OVERIDE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 4
; OVERIDE-NEXT: [[CALL:%.*]] = tail call i32 @atoi(ptr [[TMP0]])
; OVERIDE-NEXT: [[TMP1:%.*]] = mul i32 [[CALL]], [[CALL]]
; OVERIDE-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
; OVERIDE-NEXT: [[TMP3:%.*]] = mul i32 [[CALL]], [[TMP2]]
; OVERIDE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[CALL]], 0
; OVERIDE-NEXT: [[COND_FR:%.*]] = freeze i1 [[TMP4]]
; OVERIDE-NEXT: br i1 [[COND_FR]], label [[TMP5:%.*]], label [[COND_END_THREAD]]
; OVERIDE: 5:
; OVERIDE-NEXT: br label [[COND_END_THREAD]]
; OVERIDE: cond.end.thread:
; OVERIDE-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP3]], [[COND_END]] ], [ [[TMP3]], [[TMP5]] ], [ 205962976, [[ENTRY:%.*]] ]
; OVERIDE-NEXT: [[TMP7:%.*]] = phi i32 [ 0, [[COND_END]] ], [ [[CALL]], [[TMP5]] ], [ 46, [[ENTRY]] ]
; OVERIDE-NEXT: [[TMP8:%.*]] = mul i32 [[TMP6]], [[TMP7]]
; OVERIDE-NEXT: [[CALL33:%.*]] = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str, i32 [[TMP8]])
; OVERIDE-NEXT: ret i32 0
;
entry:
%cmp = icmp eq i32 %argc, 2
br i1 %cmp, label %cond.true, label %cond.end
cond.true: ; preds = %entry
%arrayidx = getelementptr inbounds ptr, ptr %argv, i32 1
%0 = load ptr, ptr %arrayidx, align 4
%call = tail call i32 @atoi(ptr %0)
br label %cond.end
cond.end: ; preds = %entry, %cond.true
%cond = phi i32 [ %call, %cond.true ], [ 46, %entry ]
%1 = mul i32 %cond, %cond
%2 = mul i32 %1, %1
%3 = mul i32 %cond, %2
%4 = icmp sgt i32 %cond, 0
%spec.select = select i1 %4, i32 %cond, i32 0
%5 = mul i32 %3, %spec.select
%call33 = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str, i32 %5) #4
ret i32 0
}
declare i32 @atoi(ptr nocapture) local_unnamed_addr
declare i32 @printf(ptr nocapture readonly, ...) local_unnamed_addr
attributes #0 = { minsize optsize }
attributes #1 = { optsize }