
I tried to add a limit to number of blocks visited in the paths() function but even with a very high limit the transformation coverage was being reduced. After looking at the code it seemed that the function was trying to create paths of the form `SwitchBB...DeterminatorBB...SwitchPredecessor`. This is inefficient because a lot of nodes in those paths (nodes before DeterminatorBB) would be irrelevant to the optimization. We only care about paths of the form `DeterminatorBB_Pred DeterminatorBB...SwitchBB`. This weeds out a lot of visited nodes. In this patch I have added a hard limit to the number of nodes visited and changed the algorithm for path calculation. Primarily I am traversing the use-def chain for the PHI nodes that define the state. If we have a hole in the use-def chain (no immediate predecessors) then I call the paths() function. I also had to the change the select instruction unfolding code to insert redundant one input PHIs to allow the use of the use-def chain in calculating the paths. The test suite coverage with this patch (including a limit on nodes visited) is as follows: Geomean diff: dfa-jump-threading.NumTransforms: +13.4% dfa-jump-threading.NumCloned: +34.1% dfa-jump-threading.NumPaths: -80.7% Compile time effect vs baseline (pass enabled by default) is mostly positive: https://llvm-compile-time-tracker.com/compare.php?from=ad8705fda25f64dcfeb6264ac4d6bac36bee91ab&to=5a3af6ce7e852f0736f706b4a8663efad5bce6ea&stat=instructions:u Change-Id: I0fba9e0f8aa079706f633089a8ccd4ecf57547ed
57 lines
1.5 KiB
LLVM
57 lines
1.5 KiB
LLVM
; REQUIRES: asserts
|
|
; RUN: opt -S -passes=dfa-jump-threading -dfa-max-path-length=3 %s \
|
|
; RUN: 2>&1 -disable-output -debug-only=dfa-jump-threading | FileCheck %s
|
|
; RUN: opt -S -passes=dfa-jump-threading -dfa-max-num-visited-paths=3 %s \
|
|
; RUN: 2>&1 -disable-output -debug-only=dfa-jump-threading | FileCheck %s
|
|
|
|
; Make the path
|
|
; < case1 case1.1 case1.2 case1.3 case1.4 for.inc for.body > [ 3, case1 ]
|
|
; too long so that it is not jump-threaded.
|
|
define i32 @max_path_length(i32 %num) {
|
|
; CHECK-NOT: 3, case1
|
|
; CHECK: < case2 for.inc for.body > [ 1, for.inc ]
|
|
; CHECK-NEXT: < for.inc for.body > [ 1, for.inc ]
|
|
; CHECK-NEXT: < case2 sel.si.unfold.false for.inc for.body > [ 2, sel.si.unfold.false ]
|
|
; CHECK-NEXT: DFA-JT: Renaming non-local uses of:
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
|
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
|
|
switch i32 %state, label %for.inc [
|
|
i32 1, label %case1
|
|
i32 2, label %case2
|
|
]
|
|
|
|
case1:
|
|
%case1.state.next = phi i32 [ 3, %for.body ]
|
|
br label %case1.1
|
|
|
|
case1.1:
|
|
br label %case1.2
|
|
|
|
case1.2:
|
|
br label %case1.3
|
|
|
|
case1.3:
|
|
br label %case1.4
|
|
|
|
case1.4:
|
|
br label %for.inc
|
|
|
|
case2:
|
|
%cmp = icmp eq i32 %count, 50
|
|
%sel = select i1 %cmp, i32 1, i32 2
|
|
br label %for.inc
|
|
|
|
for.inc:
|
|
%state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ %case1.state.next, %case1.4 ]
|
|
%inc = add nsw i32 %count, 1
|
|
%cmp.exit = icmp slt i32 %inc, %num
|
|
br i1 %cmp.exit, label %for.body, label %for.end
|
|
|
|
for.end:
|
|
ret i32 0
|
|
}
|