AtariDreams 4d1ecf1923
[Transforms] Preserve inbounds attribute of transformed GEPs when flattening loops (#86961)
When flattening the loop, if the GEP was inbound, it should stay
inbound, because the only thing that changed is how the pointers are
calculated, not the elements being accessed.

Proof: https://alive2.llvm.org/ce/z/dApMpQ
2024-05-15 10:26:23 +09:00

138 lines
4.0 KiB
LLVM

; RUN: opt < %s -S -passes='loop(loop-flatten),verify' -verify-loop-info -verify-dom-info -verify-scev | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
; We should be able to flatten the loops and turn the two geps into one.
; CHECK-LABEL: test1
define void @test1(i32 %N, ptr %A) {
entry:
%cmp3 = icmp ult i32 0, %N
br i1 %cmp3, label %for.outer.preheader, label %for.end
; CHECK-LABEL: for.outer.preheader:
; CHECK: %flatten.tripcount = mul i32 %N, %N
for.outer.preheader:
br label %for.inner.preheader
; CHECK-LABEL: for.inner.preheader:
; CHECK: %flatten.arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
for.inner.preheader:
%i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ]
br label %for.inner
; CHECK-LABEL: for.inner:
; CHECK: store i32 0, ptr %flatten.arrayidx, align 4
; CHECK: br label %for.outer
for.inner:
%j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ]
%mul = mul i32 %i, %N
%gep = getelementptr inbounds i32, ptr %A, i32 %mul
%arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j
store i32 0, ptr %arrayidx, align 4
%inc1 = add nuw i32 %j, 1
%cmp2 = icmp ult i32 %inc1, %N
br i1 %cmp2, label %for.inner, label %for.outer
; CHECK-LABEL: for.outer:
; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount
for.outer:
%inc2 = add i32 %i, 1
%cmp1 = icmp ult i32 %inc2, %N
br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit
for.end.loopexit:
br label %for.end
for.end:
ret void
}
; We can flatten, but the flattened gep has to be inserted after the load it
; depends on.
; CHECK-LABEL: test2
define void @test2(i32 %N, ptr %A) {
entry:
%cmp3 = icmp ult i32 0, %N
br i1 %cmp3, label %for.outer.preheader, label %for.end
; CHECK-LABEL: for.outer.preheader:
; CHECK: %flatten.tripcount = mul i32 %N, %N
for.outer.preheader:
br label %for.inner.preheader
; CHECK-LABEL: for.inner.preheader:
; CHECK-NOT: getelementptr inbounds i32, ptr %ptr, i32 %i
for.inner.preheader:
%i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ]
br label %for.inner
; CHECK-LABEL: for.inner:
; CHECK: %flatten.arrayidx = getelementptr inbounds i32, ptr %ptr, i32 %i
; CHECK: store i32 0, ptr %flatten.arrayidx, align 4
; CHECK: br label %for.outer
for.inner:
%j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ]
%ptr = load volatile ptr, ptr %A, align 4
%mul = mul i32 %i, %N
%gep = getelementptr inbounds i32, ptr %ptr, i32 %mul
%arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j
store i32 0, ptr %arrayidx, align 4
%inc1 = add nuw i32 %j, 1
%cmp2 = icmp ult i32 %inc1, %N
br i1 %cmp2, label %for.inner, label %for.outer
; CHECK-LABEL: for.outer:
; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount
for.outer:
%inc2 = add i32 %i, 1
%cmp1 = icmp ult i32 %inc2, %N
br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit
for.end.loopexit:
br label %for.end
for.end:
ret void
}
; We can't flatten if the gep offset is smaller than the pointer size.
; CHECK-LABEL: test3
define void @test3(i16 %N, ptr %A) {
entry:
%cmp3 = icmp ult i16 0, %N
br i1 %cmp3, label %for.outer.preheader, label %for.end
for.outer.preheader:
br label %for.inner.preheader
; CHECK-LABEL: for.inner.preheader:
; CHECK-NOT: getelementptr i32, ptr %A, i16 %i
for.inner.preheader:
%i = phi i16 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ]
br label %for.inner
; CHECK-LABEL: for.inner:
; CHECK-NOT: getelementptr i32, ptr %A, i16 %i
; CHECK: br i1 %cmp2, label %for.inner, label %for.outer
for.inner:
%j = phi i16 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ]
%mul = mul i16 %i, %N
%gep = getelementptr inbounds i32, ptr %A, i16 %mul
%arrayidx = getelementptr inbounds i32, ptr %gep, i16 %j
store i32 0, ptr %arrayidx, align 4
%inc1 = add nuw i16 %j, 1
%cmp2 = icmp ult i16 %inc1, %N
br i1 %cmp2, label %for.inner, label %for.outer
for.outer:
%inc2 = add i16 %i, 1
%cmp1 = icmp ult i16 %inc2, %N
br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit
for.end.loopexit:
br label %for.end
for.end:
ret void
}