
LoopInterchange currently stop and exit its process when the number of loads/stores in the loop is greater than `MaxMemInstrCount`. This prevents excessive querying to DependenceAnalysis. However, computing `CacheCost` also involves DependenceAnalysis queries, and their number can grow to `O(N^2)` in the worst case, where `N` is the number of loads/stores in the loop. Therefore, we should also avoid calculating it if the loads/stores count exceeds `MaxMemInstrCount`. This patch defers the calculation of `CacheCost` until it is actually needed to reduce compile time. This avoids computing `CacheCost` when the number of loads/stores is large. Additionally, since this patch delays its calculation as much as possible, it is also effective in other scenarios, e.g., when there are no legal loop pairs to exchange.
78 lines
2.0 KiB
LLVM
78 lines
2.0 KiB
LLVM
; REQUIRES: asserts
|
|
|
|
; RUN: opt -passes=loop-interchange -debug -disable-output %s 2>&1 | FileCheck %s
|
|
|
|
@A = global [16 x [16 x i32]] zeroinitializer
|
|
|
|
; Check that the CacheCost is calculated only when required. In this case, it
|
|
; is computed after passing the legality check.
|
|
;
|
|
; for (i = 0; i < 16; i++)
|
|
; for (j = 0; j < 16; j++)
|
|
; A[j][i] += 1;
|
|
|
|
; CHECK: Loops are legal to interchange
|
|
; CHECK: Compute CacheCost
|
|
define void @legal_to_interchange() {
|
|
entry:
|
|
br label %for.i.header
|
|
|
|
for.i.header:
|
|
%i = phi i32 [ 0, %entry ], [ %i.next, %for.i.latch ]
|
|
br label %for.j
|
|
|
|
for.j:
|
|
%j = phi i32 [ 0, %for.i.header ], [ %j.next, %for.j ]
|
|
%idx = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 0, i32 %j, i32 %i
|
|
%val = load i32, ptr %idx
|
|
%inc = add i32 %val, 1
|
|
store i32 %inc, ptr %idx
|
|
%j.next = add i32 %j, 1
|
|
%j.exit = icmp eq i32 %j.next, 16
|
|
br i1 %j.exit, label %for.i.latch, label %for.j
|
|
|
|
for.i.latch:
|
|
%i.next = add i32 %i, 1
|
|
%i.exit = icmp eq i32 %i.next, 16
|
|
br i1 %i.exit, label %exit, label %for.i.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Check that the CacheCost is not calculated when not required. In this case,
|
|
; the legality check always fails so that we do not need to compute the
|
|
; CacheCost.
|
|
;
|
|
; for (i = 0; i < 16; i++)
|
|
; for (j = 0; j < 16; j++)
|
|
; A[j][i] = A[i][j];
|
|
|
|
; CHECK-NOT: Compute CacheCost
|
|
define void @illegal_to_interchange() {
|
|
entry:
|
|
br label %for.i.header
|
|
|
|
for.i.header:
|
|
%i = phi i32 [ 0, %entry ], [ %i.next, %for.i.latch ]
|
|
br label %for.j
|
|
|
|
for.j:
|
|
%j = phi i32 [ 0, %for.i.header ], [ %j.next, %for.j ]
|
|
%idx.load = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 0, i32 %i, i32 %j
|
|
%idx.store = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 0, i32 %j, i32 %i
|
|
%val = load i32, ptr %idx.load
|
|
store i32 %val, ptr %idx.store
|
|
%j.next = add i32 %j, 1
|
|
%j.exit = icmp eq i32 %j.next, 16
|
|
br i1 %j.exit, label %for.i.latch, label %for.j
|
|
|
|
for.i.latch:
|
|
%i.next = add i32 %i, 1
|
|
%i.exit = icmp eq i32 %i.next, 16
|
|
br i1 %i.exit, label %exit, label %for.i.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|