llvm-project/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
sgokhale c4a60c9d34 [CodeGen][ShrinkWrap] Enable PostShrinkWrap by default
This is an attempt to reland D42600 and enabling this optimisation by default.

This also resolves the issue pointed out in the context of PGO build.

Differential Revision: https://reviews.llvm.org/D42600
2023-05-25 13:56:29 +05:30

219 lines
7.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 < %s | FileCheck %s
define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpd 5, 7
; CHECK-NEXT: bgelr 0
; CHECK-NEXT: # %bb.1: # %.preheader
; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill
; CHECK-NEXT: addi 27, 5, 2
; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill
; CHECK-NEXT: addi 28, 5, 3
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-NEXT: addi 30, 5, 1
; CHECK-NEXT: mulld 12, 8, 5
; CHECK-NEXT: mulld 0, 9, 8
; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
; CHECK-NEXT: addi 29, 3, 16
; CHECK-NEXT: sldi 11, 10, 3
; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill
; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill
; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill
; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill
; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill
; CHECK-NEXT: mulld 30, 8, 30
; CHECK-NEXT: mulld 28, 8, 28
; CHECK-NEXT: mulld 8, 8, 27
; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: add 5, 5, 9
; CHECK-NEXT: add 12, 12, 0
; CHECK-NEXT: add 30, 30, 0
; CHECK-NEXT: add 28, 28, 0
; CHECK-NEXT: add 8, 8, 0
; CHECK-NEXT: cmpd 5, 7
; CHECK-NEXT: bge 0, .LBB0_6
; CHECK-NEXT: .LBB0_3: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_5 Depth 2
; CHECK-NEXT: sub 27, 5, 10
; CHECK-NEXT: cmpd 6, 27
; CHECK-NEXT: bge 0, .LBB0_2
; CHECK-NEXT: # %bb.4:
; CHECK-NEXT: add 25, 6, 12
; CHECK-NEXT: add 24, 6, 8
; CHECK-NEXT: sldi 26, 6, 3
; CHECK-NEXT: sldi 23, 25, 3
; CHECK-NEXT: add 25, 6, 30
; CHECK-NEXT: sldi 24, 24, 3
; CHECK-NEXT: add 26, 4, 26
; CHECK-NEXT: sldi 22, 25, 3
; CHECK-NEXT: add 25, 6, 28
; CHECK-NEXT: add 24, 29, 24
; CHECK-NEXT: add 23, 3, 23
; CHECK-NEXT: sldi 25, 25, 3
; CHECK-NEXT: add 22, 3, 22
; CHECK-NEXT: add 25, 29, 25
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lfd 0, 0(26)
; CHECK-NEXT: lfd 1, 0(23)
; CHECK-NEXT: add 6, 6, 10
; CHECK-NEXT: cmpd 6, 27
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 8(23)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 16(23)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 24(23)
; CHECK-NEXT: add 23, 23, 11
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 0(22)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 8(22)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 16(22)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 24(22)
; CHECK-NEXT: add 22, 22, 11
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, -16(24)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, -8(24)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 0(24)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 8(24)
; CHECK-NEXT: add 24, 24, 11
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, -16(25)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, -8(25)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 0(25)
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: lfd 1, 8(25)
; CHECK-NEXT: add 25, 25, 11
; CHECK-NEXT: xsadddp 0, 0, 1
; CHECK-NEXT: stfd 0, 0(26)
; CHECK-NEXT: add 26, 26, 11
; CHECK-NEXT: blt 0, .LBB0_5
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_6:
; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 26, -48(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 25, -56(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 24, -64(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 23, -72(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 22, -80(1) # 8-byte Folded Reload
; CHECK-NEXT: blr
%9 = icmp slt i64 %2, %4
br i1 %9, label %10, label %97
10: ; preds = %8, %93
%11 = phi i64 [ %95, %93 ], [ %2, %8 ]
%12 = phi i64 [ %94, %93 ], [ %3, %8 ]
%13 = sub nsw i64 %11, %7
%14 = icmp slt i64 %12, %13
br i1 %14, label %15, label %93
15: ; preds = %10
%16 = mul nsw i64 %11, %5
%17 = add nsw i64 %11, 1
%18 = mul nsw i64 %17, %5
%19 = add nsw i64 %11, 2
%20 = mul nsw i64 %19, %5
%21 = add nsw i64 %11, 3
%22 = mul nsw i64 %21, %5
br label %23
23: ; preds = %15, %23
%24 = phi i64 [ %12, %15 ], [ %91, %23 ]
%25 = getelementptr inbounds double, ptr %1, i64 %24
%26 = load double, ptr %25, align 8
%27 = add nsw i64 %24, %16
%28 = getelementptr inbounds double, ptr %0, i64 %27
%29 = load double, ptr %28, align 8
%30 = fadd double %26, %29
%31 = add nsw i64 %27, 1
%32 = getelementptr inbounds double, ptr %0, i64 %31
%33 = load double, ptr %32, align 8
%34 = fadd double %30, %33
%35 = add nsw i64 %27, 2
%36 = getelementptr inbounds double, ptr %0, i64 %35
%37 = load double, ptr %36, align 8
%38 = fadd double %34, %37
%39 = add nsw i64 %27, 3
%40 = getelementptr inbounds double, ptr %0, i64 %39
%41 = load double, ptr %40, align 8
%42 = fadd double %38, %41
%43 = add nsw i64 %24, %18
%44 = getelementptr inbounds double, ptr %0, i64 %43
%45 = load double, ptr %44, align 8
%46 = fadd double %42, %45
%47 = add nsw i64 %43, 1
%48 = getelementptr inbounds double, ptr %0, i64 %47
%49 = load double, ptr %48, align 8
%50 = fadd double %46, %49
%51 = add nsw i64 %43, 2
%52 = getelementptr inbounds double, ptr %0, i64 %51
%53 = load double, ptr %52, align 8
%54 = fadd double %50, %53
%55 = add nsw i64 %43, 3
%56 = getelementptr inbounds double, ptr %0, i64 %55
%57 = load double, ptr %56, align 8
%58 = fadd double %54, %57
%59 = add nsw i64 %24, %20
%60 = getelementptr inbounds double, ptr %0, i64 %59
%61 = load double, ptr %60, align 8
%62 = fadd double %58, %61
%63 = add nsw i64 %59, 1
%64 = getelementptr inbounds double, ptr %0, i64 %63
%65 = load double, ptr %64, align 8
%66 = fadd double %62, %65
%67 = add nsw i64 %59, 2
%68 = getelementptr inbounds double, ptr %0, i64 %67
%69 = load double, ptr %68, align 8
%70 = fadd double %66, %69
%71 = add nsw i64 %59, 3
%72 = getelementptr inbounds double, ptr %0, i64 %71
%73 = load double, ptr %72, align 8
%74 = fadd double %70, %73
%75 = add nsw i64 %24, %22
%76 = getelementptr inbounds double, ptr %0, i64 %75
%77 = load double, ptr %76, align 8
%78 = fadd double %74, %77
%79 = add nsw i64 %75, 1
%80 = getelementptr inbounds double, ptr %0, i64 %79
%81 = load double, ptr %80, align 8
%82 = fadd double %78, %81
%83 = add nsw i64 %75, 2
%84 = getelementptr inbounds double, ptr %0, i64 %83
%85 = load double, ptr %84, align 8
%86 = fadd double %82, %85
%87 = add nsw i64 %75, 3
%88 = getelementptr inbounds double, ptr %0, i64 %87
%89 = load double, ptr %88, align 8
%90 = fadd double %86, %89
store double %90, ptr %25, align 8
%91 = add nsw i64 %24, %7
%92 = icmp slt i64 %91, %13
br i1 %92, label %23, label %93
93: ; preds = %23, %10
%94 = phi i64 [ %12, %10 ], [ %91, %23 ]
%95 = add nsw i64 %11, %6
%96 = icmp slt i64 %95, %4
br i1 %96, label %10, label %97
97: ; preds = %93, %8
ret void
}