Michael Maitland 6b9952759f
[SimplifyCFG] Simplify switch instruction that has duplicate arms (#114262)
I noticed that the two C functions emitted different IR:

```
int switch_duplicate_arms(int switch_val, int v, int w) {
  switch (switch_val) {
  default:
    break;
  case 0:
    w = v;
    break;
  case 1:
    w = v;
    break;
  }
  return w;
}

int if_duplicate_arms(int switch_val, int v, int w) {
  if (switch_val == 0)
    w = v;
  else if (switch_val == 1)
    w = v;
  return v0;
}
```

We generate IR that looks like this:

```
define i32 @switch_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
  switch i32 %1, label %7 [
    i32 0, label %5
    i32 1, label %6
  ]

5:
  br label %7

6:
  br label %7

7:
  %8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ]
  ret i32 %8
}

define i32 @if_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
  %5 = icmp ult i32 %1, 2
  %6 = select i1 %5, i32 %2, i32 %3
  ret i32 %6
}
```

For `switch_duplicate_arms`, taking case 0 and 1 are the same since %5
and %6
branch to the same location and the incoming values for %8 are the same
from
those blocks. We could remove one on the duplicate switch targets and
update
the switch with the single target.

On RISC-V, prior to this patch, we generate the following code:
```
switch_duplicate_arms:
        li      a4, 1
        beq     a1, a4, .LBB0_2
        mv      a0, a3
        bnez    a1, .LBB0_3
.LBB0_2:
        mv      a0, a2
.LBB0_3:
        ret

if_duplicate_arms:
        li      a4, 2
        mv      a0, a2
        bltu    a1, a4, .LBB1_2
        mv      a0, a3
.LBB1_2:
        ret
```

After this patch, the O3 code is optimized to the icmp + select pair,
which
gives us the same code gen as `if_duplicate_arms`, as desired. This
results
is one less branch instruction in the final assembly.

This may help with both code size and further switch simplification. I
found
that this patch causes no significant impact to spec2006/int/ref and
spec2017/intrate/ref.

---------

Co-authored-by: Min Hsu <min@myhsu.dev>
2024-11-15 15:38:34 +01:00

306 lines
7.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=simplifycfg -hoist-common-insts=true -S | FileCheck %s
; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -hoist-common-insts=true -S | FileCheck %s
define void @foo(i1 %C, ptr %P) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: store i32 7, ptr [[P:%.*]], align 4
; CHECK-NEXT: ret void
;
br i1 %C, label %T, label %F
T: ; preds = %0
store i32 7, ptr %P
ret void
F: ; preds = %0
store i32 7, ptr %P
ret void
}
define void @foo_switch(i64 %C, ptr %P) {
; CHECK-LABEL: @foo_switch(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: store i32 7, ptr [[P:%.*]], align 4
; CHECK-NEXT: ret void
;
switch i64 %C, label %bb0 [
i64 1, label %bb1
i64 2, label %bb2
]
bb0: ; preds = %0
store i32 7, ptr %P
ret void
bb1: ; preds = %0
store i32 7, ptr %P
ret void
bb2: ; preds = %0
store i32 7, ptr %P
ret void
}
define float @PR39535min(float %x) {
; CHECK-LABEL: @PR39535min(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[X:%.*]], 0.000000e+00
; CHECK-NEXT: [[DOTX:%.*]] = select fast i1 [[TOBOOL]], float 0.000000e+00, float [[X]]
; CHECK-NEXT: ret float [[DOTX]]
;
entry:
%tobool = fcmp une float %x, 0.0
br i1 %tobool, label %cond.true, label %cond.false
cond.true:
br label %cond.end
cond.false:
br label %cond.end
cond.end:
%cond = phi fast float [ 0.0, %cond.true ], [ %x, %cond.false ]
ret float %cond
}
define float @PR39535min_switch(i64 %i, float %x) {
; CHECK-LABEL: @PR39535min_switch(
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i64 [[I:%.*]], label [[END:%.*]] [
; CHECK-NEXT: i64 1, label [[BB1:%.*]]
; CHECK-NEXT: i64 2, label [[BB1]]
; CHECK-NEXT: ]
; CHECK: bb1:
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret float [[COND]]
;
entry:
switch i64 %i, label %bb0 [
i64 1, label %bb1
i64 2, label %bb2
]
bb0:
br label %end
bb1:
br label %end
bb2:
br label %end
end:
%cond = phi fast float [ 0.0, %bb0 ], [ %x, %bb1 ], [ %x, %bb2 ]
ret float %cond
}
define i32 @hoist_zext_flags_preserve(i1 %C, i8 %x) {
; CHECK-LABEL: @hoist_zext_flags_preserve(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[Z1:%.*]] = zext nneg i8 [[X:%.*]] to i32
; CHECK-NEXT: ret i32 [[Z1]]
;
br i1 %C, label %T, label %F
T:
%z1 = zext nneg i8 %x to i32
ret i32 %z1
F:
%z2 = zext nneg i8 %x to i32
ret i32 %z2
}
define i32 @hoist_zext_flags_drop(i1 %C, i8 %x) {
; CHECK-LABEL: @hoist_zext_flags_drop(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[X:%.*]] to i32
; CHECK-NEXT: ret i32 [[Z1]]
;
br i1 %C, label %T, label %F
T:
%z1 = zext nneg i8 %x to i32
ret i32 %z1
F:
%z2 = zext i8 %x to i32
ret i32 %z2
}
define float @hoist_uitofp_flags_preserve(i1 %C, i8 %x) {
; CHECK-LABEL: @hoist_uitofp_flags_preserve(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[Z1:%.*]] = uitofp nneg i8 [[X:%.*]] to float
; CHECK-NEXT: ret float [[Z1]]
;
br i1 %C, label %T, label %F
T:
%z1 = uitofp nneg i8 %x to float
ret float %z1
F:
%z2 = uitofp nneg i8 %x to float
ret float %z2
}
define float @hoist_uitofp_flags_drop(i1 %C, i8 %x) {
; CHECK-LABEL: @hoist_uitofp_flags_drop(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[Z1:%.*]] = uitofp i8 [[X:%.*]] to float
; CHECK-NEXT: ret float [[Z1]]
;
br i1 %C, label %T, label %F
T:
%z1 = uitofp nneg i8 %x to float
ret float %z1
F:
%z2 = uitofp i8 %x to float
ret float %z2
}
define i32 @hoist_or_flags_preserve(i1 %C, i32 %x, i32 %y) {
; CHECK-LABEL: @hoist_or_flags_preserve(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[Z1:%.*]] = or disjoint i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i32 [[Z1]]
;
br i1 %C, label %T, label %F
T:
%z1 = or disjoint i32 %x, %y
ret i32 %z1
F:
%z2 = or disjoint i32 %x, %y
ret i32 %z2
}
define i32 @hoist_or_flags_drop(i1 %C, i32 %x, i32 %y) {
; CHECK-LABEL: @hoist_or_flags_drop(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[Z1:%.*]] = or i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i32 [[Z1]]
;
br i1 %C, label %T, label %F
T:
%z1 = or i32 %x, %y
ret i32 %z1
F:
%z2 = or disjoint i32 %x, %y
ret i32 %z2
}
define i16 @hoist_trunc_flags_preserve(i1 %C, i32 %x) {
; CHECK-LABEL: @hoist_trunc_flags_preserve(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[Z1:%.*]] = trunc nuw nsw i32 [[X:%.*]] to i16
; CHECK-NEXT: ret i16 [[Z1]]
;
br i1 %C, label %T, label %F
T:
%z1 = trunc nsw nuw i32 %x to i16
ret i16 %z1
F:
%z2 = trunc nsw nuw i32 %x to i16
ret i16 %z2
}
define i16 @hoist_trunc_flags_drop(i1 %C, i32 %x) {
; CHECK-LABEL: @hoist_trunc_flags_drop(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[Z1:%.*]] = trunc i32 [[X:%.*]] to i16
; CHECK-NEXT: ret i16 [[Z1]]
;
br i1 %C, label %T, label %F
T:
%z1 = trunc i32 %x to i16
ret i16 %z1
F:
%z2 = trunc nsw nuw i32 %x to i16
ret i16 %z2
}
define ptr @hoist_gep_flags_both_nuw(i1 %C, ptr %p) {
; CHECK-LABEL: @hoist_gep_flags_both_nuw(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr nuw i8, ptr [[P:%.*]], i64 1
; CHECK-NEXT: ret ptr [[GEP1]]
;
br i1 %C, label %T, label %F
T:
%gep1 = getelementptr nuw i8, ptr %p, i64 1
ret ptr %gep1
F:
%gep2 = getelementptr nuw i8, ptr %p, i64 1
ret ptr %gep2
}
define ptr @hoist_gep_flags_both_nusw(i1 %C, ptr %p) {
; CHECK-LABEL: @hoist_gep_flags_both_nusw(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr nusw i8, ptr [[P:%.*]], i64 1
; CHECK-NEXT: ret ptr [[GEP1]]
;
br i1 %C, label %T, label %F
T:
%gep1 = getelementptr nusw i8, ptr %p, i64 1
ret ptr %gep1
F:
%gep2 = getelementptr nusw i8, ptr %p, i64 1
ret ptr %gep2
}
define ptr @hoist_gep_flags_intersect1(i1 %C, ptr %p) {
; CHECK-LABEL: @hoist_gep_flags_intersect1(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr nusw i8, ptr [[P:%.*]], i64 1
; CHECK-NEXT: ret ptr [[GEP1]]
;
br i1 %C, label %T, label %F
T:
%gep1 = getelementptr inbounds nuw i8, ptr %p, i64 1
ret ptr %gep1
F:
%gep2 = getelementptr nusw i8, ptr %p, i64 1
ret ptr %gep2
}
define ptr @hoist_gep_flags_intersect2(i1 %C, ptr %p) {
; CHECK-LABEL: @hoist_gep_flags_intersect2(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
; CHECK-NEXT: ret ptr [[GEP1]]
;
br i1 %C, label %T, label %F
T:
%gep1 = getelementptr inbounds i8, ptr %p, i64 1
ret ptr %gep1
F:
%gep2 = getelementptr nuw i8, ptr %p, i64 1
ret ptr %gep2
}
define i1 @hoist_icmp_flags_preserve(i1 %C, i32 %x, i32 %y) {
; CHECK-LABEL: @hoist_icmp_flags_preserve(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[Z1:%.*]] = icmp samesign ult i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[Z1]]
;
br i1 %C, label %T, label %F
T:
%z1 = icmp samesign ult i32 %x, %y
ret i1 %z1
F:
%z2 = icmp samesign ult i32 %x, %y
ret i1 %z2
}
define i1 @hoist_icmp_flags_drop(i1 %C, i32 %x, i32 %y) {
; CHECK-LABEL: @hoist_icmp_flags_drop(
; CHECK-NEXT: common.ret:
; CHECK-NEXT: [[Z1:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[Z1]]
;
br i1 %C, label %T, label %F
T:
%z1 = icmp ult i32 %x, %y
ret i1 %z1
F:
%z2 = icmp samesign ult i32 %x, %y
ret i1 %z2
}