llvm-project/polly/test/ScopInfo/reduction_if.ll
Karthika Devi C 1e5334bcda
[Polly] Data flow reduction detection to cover more cases (#84901)
The base concept is same as existing reduction algorithm where we get
the list of candidate pairs <store,load>. But the existing algorithm
works only if there is single binary operation between the load and
store.
Example sum += a[i];

This algorithm extends to work with more than single binary operation as
well. It is implemented using data flow reduction detection on basic
block level. We propagate the loads, the number of times the load is
used(flows into instruction) and binary operation performed until we
reach a store.

Example sum += a[i] + b[i];
```
sum(Ld)     a[i](Ld)
      \  +  /
        tmp    b[i](Ld)
           \ + /
            sum(St)
```

In the above case the candidate pairs are formed by associating sum with
all of its load inputs which are sum, a[i] and b[i]. Then check
functions are used to filter a valid reduction pair ie {sum,sum}.

---------

Co-authored-by: Michael Kruse <github@meinersbur.de>
2024-07-30 09:43:24 -07:00

53 lines
1.9 KiB
LLVM

; RUN: opt %loadPolly -basic-aa -polly-print-scops -disable-output -polly-allow-nonaffine < %s | FileCheck %s
;
; Verify if reduction spread across multiple blocks in a single scop statement are detected
;
; CHECK: Stmt_for_body
; CHECK: Reduction Type: +
; CHECK-NEXT: MemRef_sum[0]
; CHECK: Reduction Type: +
; CHECK-NEXT: MemRef_sum[0]
;
; void f(int*__restrict A, int*__restrict B, int *sum) {
; for (int i = 0; i < 4444; ++i) {
; if (B[i])
; *sum += A[i];
; }
; }
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
define dso_local void @f(ptr noalias nocapture noundef readonly %A, ptr noalias nocapture noundef readonly %B, ptr nocapture noundef %sum) local_unnamed_addr #0 {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.body
for.cond.cleanup: ; preds = %for.inc
ret void
for.body: ; preds = %entry.split, %for.inc
%indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.inc ]
%arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%0 = load i32, ptr %arrayidx
%tobool.not = icmp eq i32 %0, 0
br i1 %tobool.not, label %for.inc, label %if.then
if.then: ; preds = %for.body
%arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%1 = load i32, ptr %arrayidx2
%2 = load i32, ptr %sum
%add = add nsw i32 %2, %1
store i32 %add, ptr %sum
br label %for.inc
for.inc: ; preds = %for.body, %if.then
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, 4444
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}