
When flattening the loop, if the GEP was inbound, it should stay inbound, because the only thing that changed is how the pointers are calculated, not the elements being accessed. Proof: https://alive2.llvm.org/ce/z/dApMpQ
138 lines
4.0 KiB
LLVM
138 lines
4.0 KiB
LLVM
; RUN: opt < %s -S -passes='loop(loop-flatten),verify' -verify-loop-info -verify-dom-info -verify-scev | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
|
|
|
; We should be able to flatten the loops and turn the two geps into one.
|
|
; CHECK-LABEL: test1
|
|
define void @test1(i32 %N, ptr %A) {
|
|
entry:
|
|
%cmp3 = icmp ult i32 0, %N
|
|
br i1 %cmp3, label %for.outer.preheader, label %for.end
|
|
|
|
; CHECK-LABEL: for.outer.preheader:
|
|
; CHECK: %flatten.tripcount = mul i32 %N, %N
|
|
for.outer.preheader:
|
|
br label %for.inner.preheader
|
|
|
|
; CHECK-LABEL: for.inner.preheader:
|
|
; CHECK: %flatten.arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
|
|
for.inner.preheader:
|
|
%i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ]
|
|
br label %for.inner
|
|
|
|
; CHECK-LABEL: for.inner:
|
|
; CHECK: store i32 0, ptr %flatten.arrayidx, align 4
|
|
; CHECK: br label %for.outer
|
|
for.inner:
|
|
%j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ]
|
|
%mul = mul i32 %i, %N
|
|
%gep = getelementptr inbounds i32, ptr %A, i32 %mul
|
|
%arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j
|
|
store i32 0, ptr %arrayidx, align 4
|
|
%inc1 = add nuw i32 %j, 1
|
|
%cmp2 = icmp ult i32 %inc1, %N
|
|
br i1 %cmp2, label %for.inner, label %for.outer
|
|
|
|
; CHECK-LABEL: for.outer:
|
|
; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount
|
|
for.outer:
|
|
%inc2 = add i32 %i, 1
|
|
%cmp1 = icmp ult i32 %inc2, %N
|
|
br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit
|
|
|
|
for.end.loopexit:
|
|
br label %for.end
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; We can flatten, but the flattened gep has to be inserted after the load it
|
|
; depends on.
|
|
; CHECK-LABEL: test2
|
|
define void @test2(i32 %N, ptr %A) {
|
|
entry:
|
|
%cmp3 = icmp ult i32 0, %N
|
|
br i1 %cmp3, label %for.outer.preheader, label %for.end
|
|
|
|
; CHECK-LABEL: for.outer.preheader:
|
|
; CHECK: %flatten.tripcount = mul i32 %N, %N
|
|
for.outer.preheader:
|
|
br label %for.inner.preheader
|
|
|
|
; CHECK-LABEL: for.inner.preheader:
|
|
; CHECK-NOT: getelementptr inbounds i32, ptr %ptr, i32 %i
|
|
for.inner.preheader:
|
|
%i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ]
|
|
br label %for.inner
|
|
|
|
; CHECK-LABEL: for.inner:
|
|
; CHECK: %flatten.arrayidx = getelementptr inbounds i32, ptr %ptr, i32 %i
|
|
; CHECK: store i32 0, ptr %flatten.arrayidx, align 4
|
|
; CHECK: br label %for.outer
|
|
for.inner:
|
|
%j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ]
|
|
%ptr = load volatile ptr, ptr %A, align 4
|
|
%mul = mul i32 %i, %N
|
|
%gep = getelementptr inbounds i32, ptr %ptr, i32 %mul
|
|
%arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j
|
|
store i32 0, ptr %arrayidx, align 4
|
|
%inc1 = add nuw i32 %j, 1
|
|
%cmp2 = icmp ult i32 %inc1, %N
|
|
br i1 %cmp2, label %for.inner, label %for.outer
|
|
|
|
; CHECK-LABEL: for.outer:
|
|
; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount
|
|
for.outer:
|
|
%inc2 = add i32 %i, 1
|
|
%cmp1 = icmp ult i32 %inc2, %N
|
|
br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit
|
|
|
|
for.end.loopexit:
|
|
br label %for.end
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; We can't flatten if the gep offset is smaller than the pointer size.
|
|
; CHECK-LABEL: test3
|
|
define void @test3(i16 %N, ptr %A) {
|
|
entry:
|
|
%cmp3 = icmp ult i16 0, %N
|
|
br i1 %cmp3, label %for.outer.preheader, label %for.end
|
|
|
|
for.outer.preheader:
|
|
br label %for.inner.preheader
|
|
|
|
; CHECK-LABEL: for.inner.preheader:
|
|
; CHECK-NOT: getelementptr i32, ptr %A, i16 %i
|
|
for.inner.preheader:
|
|
%i = phi i16 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ]
|
|
br label %for.inner
|
|
|
|
; CHECK-LABEL: for.inner:
|
|
; CHECK-NOT: getelementptr i32, ptr %A, i16 %i
|
|
; CHECK: br i1 %cmp2, label %for.inner, label %for.outer
|
|
for.inner:
|
|
%j = phi i16 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ]
|
|
%mul = mul i16 %i, %N
|
|
%gep = getelementptr inbounds i32, ptr %A, i16 %mul
|
|
%arrayidx = getelementptr inbounds i32, ptr %gep, i16 %j
|
|
store i32 0, ptr %arrayidx, align 4
|
|
%inc1 = add nuw i16 %j, 1
|
|
%cmp2 = icmp ult i16 %inc1, %N
|
|
br i1 %cmp2, label %for.inner, label %for.outer
|
|
|
|
for.outer:
|
|
%inc2 = add i16 %i, 1
|
|
%cmp1 = icmp ult i16 %inc2, %N
|
|
br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit
|
|
|
|
for.end.loopexit:
|
|
br label %for.end
|
|
|
|
for.end:
|
|
ret void
|
|
}
|