
I noticed that the two C functions emitted different IR: ``` int switch_duplicate_arms(int switch_val, int v, int w) { switch (switch_val) { default: break; case 0: w = v; break; case 1: w = v; break; } return w; } int if_duplicate_arms(int switch_val, int v, int w) { if (switch_val == 0) w = v; else if (switch_val == 1) w = v; return v0; } ``` We generate IR that looks like this: ``` define i32 @switch_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) { switch i32 %1, label %7 [ i32 0, label %5 i32 1, label %6 ] 5: br label %7 6: br label %7 7: %8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ] ret i32 %8 } define i32 @if_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) { %5 = icmp ult i32 %1, 2 %6 = select i1 %5, i32 %2, i32 %3 ret i32 %6 } ``` For `switch_duplicate_arms`, taking case 0 and 1 are the same since %5 and %6 branch to the same location and the incoming values for %8 are the same from those blocks. We could remove one on the duplicate switch targets and update the switch with the single target. On RISC-V, prior to this patch, we generate the following code: ``` switch_duplicate_arms: li a4, 1 beq a1, a4, .LBB0_2 mv a0, a3 bnez a1, .LBB0_3 .LBB0_2: mv a0, a2 .LBB0_3: ret if_duplicate_arms: li a4, 2 mv a0, a2 bltu a1, a4, .LBB1_2 mv a0, a3 .LBB1_2: ret ``` After this patch, the O3 code is optimized to the icmp + select pair, which gives us the same code gen as `if_duplicate_arms`, as desired. This results is one less branch instruction in the final assembly. This may help with both code size and further switch simplification. I found that this patch causes no significant impact to spec2006/int/ref and spec2017/intrate/ref. --------- Co-authored-by: Min Hsu <min@myhsu.dev>
306 lines
7.3 KiB
LLVM
306 lines
7.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=simplifycfg -hoist-common-insts=true -S | FileCheck %s
|
|
; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -hoist-common-insts=true -S | FileCheck %s
|
|
|
|
define void @foo(i1 %C, ptr %P) {
|
|
; CHECK-LABEL: @foo(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: store i32 7, ptr [[P:%.*]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T: ; preds = %0
|
|
store i32 7, ptr %P
|
|
ret void
|
|
F: ; preds = %0
|
|
store i32 7, ptr %P
|
|
ret void
|
|
}
|
|
|
|
define void @foo_switch(i64 %C, ptr %P) {
|
|
; CHECK-LABEL: @foo_switch(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: store i32 7, ptr [[P:%.*]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
switch i64 %C, label %bb0 [
|
|
i64 1, label %bb1
|
|
i64 2, label %bb2
|
|
]
|
|
bb0: ; preds = %0
|
|
store i32 7, ptr %P
|
|
ret void
|
|
bb1: ; preds = %0
|
|
store i32 7, ptr %P
|
|
ret void
|
|
bb2: ; preds = %0
|
|
store i32 7, ptr %P
|
|
ret void
|
|
}
|
|
|
|
define float @PR39535min(float %x) {
|
|
; CHECK-LABEL: @PR39535min(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[X:%.*]], 0.000000e+00
|
|
; CHECK-NEXT: [[DOTX:%.*]] = select fast i1 [[TOBOOL]], float 0.000000e+00, float [[X]]
|
|
; CHECK-NEXT: ret float [[DOTX]]
|
|
;
|
|
entry:
|
|
%tobool = fcmp une float %x, 0.0
|
|
br i1 %tobool, label %cond.true, label %cond.false
|
|
|
|
cond.true:
|
|
br label %cond.end
|
|
|
|
cond.false:
|
|
br label %cond.end
|
|
|
|
cond.end:
|
|
%cond = phi fast float [ 0.0, %cond.true ], [ %x, %cond.false ]
|
|
ret float %cond
|
|
}
|
|
|
|
define float @PR39535min_switch(i64 %i, float %x) {
|
|
; CHECK-LABEL: @PR39535min_switch(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: switch i64 [[I:%.*]], label [[END:%.*]] [
|
|
; CHECK-NEXT: i64 1, label [[BB1:%.*]]
|
|
; CHECK-NEXT: i64 2, label [[BB1]]
|
|
; CHECK-NEXT: ]
|
|
; CHECK: bb1:
|
|
; CHECK-NEXT: br label [[END]]
|
|
; CHECK: end:
|
|
; CHECK-NEXT: [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: ret float [[COND]]
|
|
;
|
|
entry:
|
|
switch i64 %i, label %bb0 [
|
|
i64 1, label %bb1
|
|
i64 2, label %bb2
|
|
]
|
|
|
|
bb0:
|
|
br label %end
|
|
|
|
bb1:
|
|
br label %end
|
|
|
|
bb2:
|
|
br label %end
|
|
|
|
end:
|
|
%cond = phi fast float [ 0.0, %bb0 ], [ %x, %bb1 ], [ %x, %bb2 ]
|
|
ret float %cond
|
|
}
|
|
|
|
define i32 @hoist_zext_flags_preserve(i1 %C, i8 %x) {
|
|
; CHECK-LABEL: @hoist_zext_flags_preserve(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[Z1:%.*]] = zext nneg i8 [[X:%.*]] to i32
|
|
; CHECK-NEXT: ret i32 [[Z1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%z1 = zext nneg i8 %x to i32
|
|
ret i32 %z1
|
|
F:
|
|
%z2 = zext nneg i8 %x to i32
|
|
ret i32 %z2
|
|
}
|
|
|
|
define i32 @hoist_zext_flags_drop(i1 %C, i8 %x) {
|
|
; CHECK-LABEL: @hoist_zext_flags_drop(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[X:%.*]] to i32
|
|
; CHECK-NEXT: ret i32 [[Z1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%z1 = zext nneg i8 %x to i32
|
|
ret i32 %z1
|
|
F:
|
|
%z2 = zext i8 %x to i32
|
|
ret i32 %z2
|
|
}
|
|
|
|
|
|
define float @hoist_uitofp_flags_preserve(i1 %C, i8 %x) {
|
|
; CHECK-LABEL: @hoist_uitofp_flags_preserve(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[Z1:%.*]] = uitofp nneg i8 [[X:%.*]] to float
|
|
; CHECK-NEXT: ret float [[Z1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%z1 = uitofp nneg i8 %x to float
|
|
ret float %z1
|
|
F:
|
|
%z2 = uitofp nneg i8 %x to float
|
|
ret float %z2
|
|
}
|
|
|
|
define float @hoist_uitofp_flags_drop(i1 %C, i8 %x) {
|
|
; CHECK-LABEL: @hoist_uitofp_flags_drop(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[Z1:%.*]] = uitofp i8 [[X:%.*]] to float
|
|
; CHECK-NEXT: ret float [[Z1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%z1 = uitofp nneg i8 %x to float
|
|
ret float %z1
|
|
F:
|
|
%z2 = uitofp i8 %x to float
|
|
ret float %z2
|
|
}
|
|
|
|
define i32 @hoist_or_flags_preserve(i1 %C, i32 %x, i32 %y) {
|
|
; CHECK-LABEL: @hoist_or_flags_preserve(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[Z1:%.*]] = or disjoint i32 [[X:%.*]], [[Y:%.*]]
|
|
; CHECK-NEXT: ret i32 [[Z1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%z1 = or disjoint i32 %x, %y
|
|
ret i32 %z1
|
|
F:
|
|
%z2 = or disjoint i32 %x, %y
|
|
ret i32 %z2
|
|
}
|
|
|
|
define i32 @hoist_or_flags_drop(i1 %C, i32 %x, i32 %y) {
|
|
; CHECK-LABEL: @hoist_or_flags_drop(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[Z1:%.*]] = or i32 [[X:%.*]], [[Y:%.*]]
|
|
; CHECK-NEXT: ret i32 [[Z1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%z1 = or i32 %x, %y
|
|
ret i32 %z1
|
|
F:
|
|
%z2 = or disjoint i32 %x, %y
|
|
ret i32 %z2
|
|
}
|
|
|
|
define i16 @hoist_trunc_flags_preserve(i1 %C, i32 %x) {
|
|
; CHECK-LABEL: @hoist_trunc_flags_preserve(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[Z1:%.*]] = trunc nuw nsw i32 [[X:%.*]] to i16
|
|
; CHECK-NEXT: ret i16 [[Z1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%z1 = trunc nsw nuw i32 %x to i16
|
|
ret i16 %z1
|
|
F:
|
|
%z2 = trunc nsw nuw i32 %x to i16
|
|
ret i16 %z2
|
|
}
|
|
|
|
define i16 @hoist_trunc_flags_drop(i1 %C, i32 %x) {
|
|
; CHECK-LABEL: @hoist_trunc_flags_drop(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[Z1:%.*]] = trunc i32 [[X:%.*]] to i16
|
|
; CHECK-NEXT: ret i16 [[Z1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%z1 = trunc i32 %x to i16
|
|
ret i16 %z1
|
|
F:
|
|
%z2 = trunc nsw nuw i32 %x to i16
|
|
ret i16 %z2
|
|
}
|
|
|
|
define ptr @hoist_gep_flags_both_nuw(i1 %C, ptr %p) {
|
|
; CHECK-LABEL: @hoist_gep_flags_both_nuw(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr nuw i8, ptr [[P:%.*]], i64 1
|
|
; CHECK-NEXT: ret ptr [[GEP1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%gep1 = getelementptr nuw i8, ptr %p, i64 1
|
|
ret ptr %gep1
|
|
F:
|
|
%gep2 = getelementptr nuw i8, ptr %p, i64 1
|
|
ret ptr %gep2
|
|
}
|
|
|
|
define ptr @hoist_gep_flags_both_nusw(i1 %C, ptr %p) {
|
|
; CHECK-LABEL: @hoist_gep_flags_both_nusw(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr nusw i8, ptr [[P:%.*]], i64 1
|
|
; CHECK-NEXT: ret ptr [[GEP1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%gep1 = getelementptr nusw i8, ptr %p, i64 1
|
|
ret ptr %gep1
|
|
F:
|
|
%gep2 = getelementptr nusw i8, ptr %p, i64 1
|
|
ret ptr %gep2
|
|
}
|
|
|
|
define ptr @hoist_gep_flags_intersect1(i1 %C, ptr %p) {
|
|
; CHECK-LABEL: @hoist_gep_flags_intersect1(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr nusw i8, ptr [[P:%.*]], i64 1
|
|
; CHECK-NEXT: ret ptr [[GEP1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%gep1 = getelementptr inbounds nuw i8, ptr %p, i64 1
|
|
ret ptr %gep1
|
|
F:
|
|
%gep2 = getelementptr nusw i8, ptr %p, i64 1
|
|
ret ptr %gep2
|
|
}
|
|
|
|
define ptr @hoist_gep_flags_intersect2(i1 %C, ptr %p) {
|
|
; CHECK-LABEL: @hoist_gep_flags_intersect2(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
|
|
; CHECK-NEXT: ret ptr [[GEP1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%gep1 = getelementptr inbounds i8, ptr %p, i64 1
|
|
ret ptr %gep1
|
|
F:
|
|
%gep2 = getelementptr nuw i8, ptr %p, i64 1
|
|
ret ptr %gep2
|
|
}
|
|
|
|
define i1 @hoist_icmp_flags_preserve(i1 %C, i32 %x, i32 %y) {
|
|
; CHECK-LABEL: @hoist_icmp_flags_preserve(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[Z1:%.*]] = icmp samesign ult i32 [[X:%.*]], [[Y:%.*]]
|
|
; CHECK-NEXT: ret i1 [[Z1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%z1 = icmp samesign ult i32 %x, %y
|
|
ret i1 %z1
|
|
F:
|
|
%z2 = icmp samesign ult i32 %x, %y
|
|
ret i1 %z2
|
|
}
|
|
|
|
define i1 @hoist_icmp_flags_drop(i1 %C, i32 %x, i32 %y) {
|
|
; CHECK-LABEL: @hoist_icmp_flags_drop(
|
|
; CHECK-NEXT: common.ret:
|
|
; CHECK-NEXT: [[Z1:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
|
|
; CHECK-NEXT: ret i1 [[Z1]]
|
|
;
|
|
br i1 %C, label %T, label %F
|
|
T:
|
|
%z1 = icmp ult i32 %x, %y
|
|
ret i1 %z1
|
|
F:
|
|
%z2 = icmp samesign ult i32 %x, %y
|
|
ret i1 %z2
|
|
}
|