
This patch is an extension to #115128. After profiling LLVM test-suite, I see a lot of loop nest of depth more than `MaxLoopNestDepth` which is 10. Early exit for them would save compile-time as it would avoid computing DependenceInfo and CacheCost. Please see 'bound-max-depth' branch on compile-time-tracker.
66 lines
2.1 KiB
LLVM
66 lines
2.1 KiB
LLVM
; REQUIRES: asserts
|
|
|
|
; RUN: opt < %s -passes=loop-interchange -debug -disable-output 2>&1 | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
|
|
|
|
@N = dso_local global i32 0, align 4
|
|
@a = dso_local global ptr null, align 8
|
|
@b = dso_local global ptr null, align 8
|
|
@c = dso_local global ptr null, align 8
|
|
|
|
; Loop interchange should not run delinearization
|
|
; for one loop case and should bail out early.
|
|
|
|
; CHECK-NOT: Delinearizing
|
|
; CHECK-NOT: Strides:
|
|
; CHECK-NOT: Terms:
|
|
; CHECK: Unsupported depth of loop nest 1, the supported range is [2, 10].
|
|
|
|
define void @foo() {
|
|
entry:
|
|
%retval = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 0, ptr %retval, align 4
|
|
store i32 0, ptr %i, align 4
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.inc, %entry
|
|
%0 = load i32, ptr %i, align 4
|
|
%1 = load i32, ptr @N, align 4
|
|
%cmp = icmp ult i32 %0, %1
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.cond
|
|
br label %for.end
|
|
|
|
for.body: ; preds = %for.cond
|
|
%2 = load ptr, ptr @b, align 8
|
|
%3 = load i32, ptr %i, align 4
|
|
%idxprom = zext i32 %3 to i64
|
|
%arrayidx = getelementptr inbounds nuw i32, ptr %2, i64 %idxprom
|
|
%4 = load i32, ptr %arrayidx, align 4
|
|
%5 = load ptr, ptr @c, align 8
|
|
%6 = load i32, ptr %i, align 4
|
|
%idxprom1 = zext i32 %6 to i64
|
|
%arrayidx2 = getelementptr inbounds nuw i32, ptr %5, i64 %idxprom1
|
|
%7 = load i32, ptr %arrayidx2, align 4
|
|
%add = add nsw i32 %4, %7
|
|
%8 = load ptr, ptr @a, align 8
|
|
%9 = load i32, ptr %i, align 4
|
|
%idxprom3 = zext i32 %9 to i64
|
|
%arrayidx4 = getelementptr inbounds nuw i32, ptr %8, i64 %idxprom3
|
|
store i32 %add, ptr %arrayidx4, align 4
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body
|
|
%10 = load i32, ptr %i, align 4
|
|
%inc = add i32 %10, 1
|
|
store i32 %inc, ptr %i, align 4
|
|
br label %for.cond
|
|
|
|
for.end: ; preds = %for.cond.cleanup
|
|
ret void
|
|
}
|
|
|