Reapply of a22d1c2225543aa9ae7882f6b1a97ee7b2c95574. Using this PR for pre-merge CI. Instead of relying on any pass manager to schedule Polly's passes, add Polly's own pipeline manager which is seen as a monolithic pass in LLVM's pass manager. Polly's former passes are now phases of the new PhaseManager component. Relying on LLVM's pass manager (the legacy as well as the New Pass Manager) to manage Polly's phases never was a good fit that the PhaseManager resolves: * Polly passes were modifying analysis results, in particular RegionInfo and ScopInfo. This means that there was not just one unique and "definite" analysis result, the actual result depended on which analyses ran prior, and the pass manager was not allowed to throw away cached analyses or prior SCoP optimizations would have been forgotten. The LLVM pass manger's persistance of analysis results is not contractual but designed for caching. * Polly depends on a particular execution order of passes and regions (e.g. regression tests, invalidation of consecutive SCoPs). LLVM's pass manager does not guarantee any excecution order. * Polly does not completely preserve DominatorTree, RegionInfo, LoopInfo, or ScalarEvolution, but only as-needed for Polly's own uses. Because the ScopDetection object stores references to those analyses, it still had to lie to the pass manager that they would be preserved, or the pass manager would have released and recomputed the invalidated analysis objects that ScopDetection/ScopInfo was still referencing. To ensure that no non-Polly pass would see these not-completely-preserved analyses, all analyses still had to be thrown away after the ScopPassManager, respectively with a BarrierNoopPass in case of the LPM. * The NPM's PassInstrumentation wraps the IR unit into an `llvm::Any` object, but implementations such as PrintIRInstrumentation call llvm_unreachable on encountering an unknown IR unit, such as SCoPs, with no extension points to add support. Hence LLVM crashes when dumping IR between SCoP passes (such as `-print-before-changed` with Polly being active). The new PhaseManager uses some command line options that previously belonged to Polly's legacy passes, such as `-polly-print-detect` (so the option will continue to work). Hence the LPM support is incompatible with the new approach and support for it is removed.
87 lines
3.6 KiB
LLVM
87 lines
3.6 KiB
LLVM
; RUN: opt %loadNPMPolly '-passes=polly<no-default-opts>' -polly-codegen-perf-monitoring -S < %s | FileCheck %s
|
|
|
|
; void f(long A[], long N) {
|
|
; long i;
|
|
; if (true)
|
|
; for (i = 0; i < N; ++i)
|
|
; A[i] = i;
|
|
; }
|
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
define void @f(ptr %A, i64 %N) nounwind {
|
|
entry:
|
|
fence seq_cst
|
|
br label %next
|
|
|
|
next:
|
|
br i1 true, label %for.i, label %return
|
|
|
|
for.i:
|
|
%indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
|
|
%scevgep = getelementptr i64, ptr %A, i64 %indvar
|
|
store i64 %indvar, ptr %scevgep
|
|
%indvar.next = add nsw i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %N
|
|
br i1 %exitcond, label %return, label %for.i
|
|
|
|
return:
|
|
fence seq_cst
|
|
ret void
|
|
}
|
|
|
|
; CHECK: @__polly_perf_cycles_total_start = weak thread_local(initialexec) constant i64 0
|
|
; CHECK-NEXT: @__polly_perf_initialized = weak thread_local(initialexec) constant i1 false
|
|
; CHECK-NEXT: @__polly_perf_cycles_in_scops = weak thread_local(initialexec) constant i64 0
|
|
; CHECK-NEXT: @__polly_perf_cycles_in_scop_start = weak thread_local(initialexec) constant i64 0
|
|
|
|
; CHECK: polly.split_new_and_old: ; preds = %entry
|
|
; CHECK-NEXT: %0 = call { i64, i32 } @llvm.x86.rdtscp()
|
|
; CHECK-NEXT: %1 = extractvalue { i64, i32 } %0, 0
|
|
; CHECK-NEXT: store volatile i64 %1, ptr @__polly_perf_cycles_in_scop_start
|
|
|
|
; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
|
|
; CHECK-NEXT: %6 = load volatile i64, ptr @__polly_perf_cycles_in_scop_start
|
|
; CHECK-NEXT: %7 = call { i64, i32 } @llvm.x86.rdtscp()
|
|
; CHECK-NEXT: %8 = extractvalue { i64, i32 } %7, 0
|
|
; CHECK-NEXT: %9 = sub i64 %8, %6
|
|
; CHECK-NEXT: %10 = load volatile i64, ptr @__polly_perf_cycles_in_scops
|
|
; CHECK-NEXT: %11 = add i64 %10, %9
|
|
; CHECK-NEXT: store volatile i64 %11, ptr @__polly_perf_cycles_in_scops
|
|
|
|
|
|
; CHECK: define weak_odr void @__polly_perf_final() {
|
|
; CHECK-NEXT: start:
|
|
; CHECK-NEXT: %0 = call { i64, i32 } @llvm.x86.rdtscp()
|
|
; CHECK-NEXT: %1 = extractvalue { i64, i32 } %0, 0
|
|
; CHECK-NEXT: %2 = load volatile i64, ptr @__polly_perf_cycles_total_start
|
|
; CHECK-NEXT: %3 = sub i64 %1, %2
|
|
; CHECK-NEXT: %4 = load volatile i64, ptr @__polly_perf_cycles_in_scops
|
|
; CHECK-NEXT: %5 = call i32 (...) @printf(ptr @1, ptr addrspace(4) @0)
|
|
; CHECK-NEXT: %6 = call i32 @fflush(ptr null)
|
|
; CHECK-NEXT: %7 = call i32 (...) @printf(ptr @3, ptr addrspace(4) @2)
|
|
; CHECK-NEXT: %8 = call i32 @fflush(ptr null)
|
|
; CHECK-NEXT: %9 = call i32 (...) @printf(ptr @6, ptr addrspace(4) @4, i64 %3, ptr addrspace(4) @5)
|
|
; CHECK-NEXT: %10 = call i32 @fflush(ptr null)
|
|
; CHECK-NEXT: %11 = call i32 (...) @printf(ptr @9, ptr addrspace(4) @7, i64 %4, ptr addrspace(4) @8)
|
|
; CHECK-NEXT: %12 = call i32 @fflush(ptr null)
|
|
|
|
|
|
; CHECK: define weak_odr void @__polly_perf_init() {
|
|
; CHECK-NEXT: start:
|
|
; CHECK-NEXT: %0 = load i1, ptr @__polly_perf_initialized
|
|
; CHECK-NEXT: br i1 %0, label %earlyreturn, label %initbb
|
|
|
|
; CHECK: earlyreturn: ; preds = %start
|
|
; CHECK-NEXT: ret void
|
|
|
|
; CHECK: initbb: ; preds = %start
|
|
; CHECK-NEXT: store i1 true, ptr @__polly_perf_initialized
|
|
; CHECK-NEXT: %1 = call i32 @atexit(ptr @__polly_perf_final)
|
|
; CHECK-NEXT: %2 = call { i64, i32 } @llvm.x86.rdtscp()
|
|
; CHECK-NEXT: %3 = extractvalue { i64, i32 } %2, 0
|
|
; CHECK-NEXT: store volatile i64 %3, ptr @__polly_perf_cycles_total_start
|
|
; CHECK-NEXT: ret void
|
|
; CHECK-NEXT: }
|