
LICM tries to reassociate GEPs in order to hoist an invariant GEP. Currently, it also does this in the case where the GEP has a constant offset. This is usually undesirable. From a back-end perspective, constant GEPs are usually free because they can be folded into addressing modes, so this just increases register pressume. From a middle-end perspective, keeping constant offsets last in the chain makes it easier to analyze the relationship between multiple GEPs on the same base, especially after CSE. The worst that can happen here is if we start with something like ``` loop { p + 4*x p + 4*x + 1 p + 4*x + 2 p + 4*x + 3 } ``` And LICM converts it into: ``` p.1 = p + 1 p.2 = p + 2 p.3 = p + 3 loop { p + 4*x p.1 + 4*x p.2 + 4*x p.3 + 4*x } ``` Which is much worse than leaving it for CSE to convert to: ``` loop { p2 = p + 4*x p2 + 1 p2 + 2 p2 + 3 } ```
109 lines
3.8 KiB
LLVM
109 lines
3.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
|
|
; RUN: -verify-machineinstrs < %s | FileCheck %s
|
|
define signext i32 @test(ptr noalias %PtrA, ptr noalias %PtrB, i32 signext %LenA, i32 signext %LenB) #0 {
|
|
; CHECK-LABEL: test:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi 6, 3, 4
|
|
; CHECK-NEXT: addi 4, 4, -4
|
|
; CHECK-NEXT: li 8, 0
|
|
; CHECK-NEXT: li 7, 0
|
|
; CHECK-NEXT: .LBB0_1: # %block3
|
|
; CHECK-NEXT: # =>This Loop Header: Depth=1
|
|
; CHECK-NEXT: # Child Loop BB0_2 Depth 2
|
|
; CHECK-NEXT: extsw 9, 8
|
|
; CHECK-NEXT: addi 8, 8, 1
|
|
; CHECK-NEXT: extsw 7, 7
|
|
; CHECK-NEXT: cmpw 8, 5
|
|
; CHECK-NEXT: sldi 10, 7, 2
|
|
; CHECK-NEXT: sldi 9, 9, 2
|
|
; CHECK-NEXT: addi 7, 7, 1
|
|
; CHECK-NEXT: add 10, 4, 10
|
|
; CHECK-NEXT: crnot 20, 0
|
|
; CHECK-NEXT: bc 12, 20, .LBB0_5
|
|
; CHECK-NEXT: .p2align 5
|
|
; CHECK-NEXT: .LBB0_2: # %if.end
|
|
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
|
|
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
|
|
; CHECK-NEXT: lwz 12, 4(10)
|
|
; CHECK-NEXT: addi 11, 10, 4
|
|
; CHECK-NEXT: cmplwi 12, 0
|
|
; CHECK-NEXT: beq 0, .LBB0_4
|
|
; CHECK-NEXT: # %bb.3: # %if.then4
|
|
; CHECK-NEXT: #
|
|
; CHECK-NEXT: lwzx 12, 6, 9
|
|
; CHECK-NEXT: addi 7, 7, 1
|
|
; CHECK-NEXT: stw 12, 8(10)
|
|
; CHECK-NEXT: mr 10, 11
|
|
; CHECK-NEXT: bc 4, 20, .LBB0_2
|
|
; CHECK-NEXT: b .LBB0_5
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: .LBB0_4: # %if.end9
|
|
; CHECK-NEXT: #
|
|
; CHECK-NEXT: add 9, 3, 9
|
|
; CHECK-NEXT: lwz 10, 4(9)
|
|
; CHECK-NEXT: addi 10, 10, 1
|
|
; CHECK-NEXT: stw 10, 4(9)
|
|
; CHECK-NEXT: b .LBB0_1
|
|
; CHECK-NEXT: .LBB0_5: # %if.then
|
|
; CHECK-NEXT: lwax 3, 9, 3
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
br label %block2
|
|
|
|
block2: ; preds = %entry
|
|
br label %block3
|
|
|
|
block3: ; preds = %block8, %block2
|
|
%OuterInd.0 = phi i32 [ 0, %block2 ], [ %inc, %block8 ]
|
|
%InnerInd.0 = phi i32 [ 0, %block2 ], [ %inc1, %block8 ]
|
|
%inc = add nsw i32 %OuterInd.0, 1
|
|
br label %block4
|
|
|
|
block4: ; preds = %if.then4, %block3
|
|
%InnerInd.1 = phi i32 [ %InnerInd.0, %block3 ], [ %inc1, %if.then4 ]
|
|
%cmp = icmp sge i32 %inc, %LenA
|
|
br i1 %cmp, label %if.then, label %if.end
|
|
|
|
if.then: ; preds = %block4
|
|
%sub = sub nsw i32 %inc, 1
|
|
%idxprom = sext i32 %sub to i64
|
|
%arrayidx = getelementptr inbounds i32, ptr %PtrA, i64 %idxprom
|
|
%0 = load i32, ptr %arrayidx, align 4
|
|
ret i32 %0
|
|
|
|
if.end: ; preds = %block4
|
|
br label %block5
|
|
|
|
block5: ; preds = %if.end
|
|
%inc1 = add nsw i32 %InnerInd.1, 1
|
|
%idxprom2 = sext i32 %InnerInd.1 to i64
|
|
%arrayidx3 = getelementptr inbounds i32, ptr %PtrB, i64 %idxprom2
|
|
%1 = load i32, ptr %arrayidx3, align 4
|
|
%tobool = icmp ne i32 %1, 0
|
|
br i1 %tobool, label %if.then4, label %if.end9
|
|
|
|
if.then4: ; preds = %block5
|
|
%idxprom5 = sext i32 %inc to i64
|
|
%arrayidx6 = getelementptr inbounds i32, ptr %PtrA, i64 %idxprom5
|
|
%2 = load i32, ptr %arrayidx6, align 4
|
|
%idxprom7 = sext i32 %inc1 to i64
|
|
%arrayidx8 = getelementptr inbounds i32, ptr %PtrB, i64 %idxprom7
|
|
store i32 %2, ptr %arrayidx8, align 4
|
|
br label %block4
|
|
|
|
if.end9: ; preds = %block5
|
|
br label %block6
|
|
|
|
block6: ; preds = %if.end9
|
|
%idxprom10 = sext i32 %inc to i64
|
|
%arrayidx11 = getelementptr inbounds i32, ptr %PtrA, i64 %idxprom10
|
|
%3 = load i32, ptr %arrayidx11, align 4
|
|
%inc12 = add nsw i32 %3, 1
|
|
store i32 %inc12, ptr %arrayidx11, align 4
|
|
br label %block8
|
|
|
|
block8: ; preds = %block6
|
|
br label %block3
|
|
}
|