Florian Hahn 175d297102
[LoopUnroll] Add CSE to remove redundant loads after unrolling. (#83860)
This patch adds loadCSE support to simplifyLoopAfterUnroll. It is based
on EarlyCSE's implementation using ScopeHashTable and is using SCEV for
accessed pointers to check to find redundant loads after unrolling.

This applies to the late unroll pass only, for full unrolling those
redundant loads will be cleaned up by the regular pipeline.

The current approach constructs MSSA on-demand per-loop, but there is
still small but notable compile-time impact:

stage1-O3  +0.04%
stage1-ReleaseThinLTO +0.06%
stage1-ReleaseLTO-g +0.05%
stage1-O0-g +0.02%
stage2-O3 +0.09%
stage2-O0-g +0.04%
stage2-clang +0.02%


https://llvm-compile-time-tracker.com/compare.php?from=c089fa5a729e217d0c0d4647656386dac1a1b135&to=ec7c0f27cb5c12b600d9adfc8543d131765ec7be&stat=instructions:u

This benefits some workloads with runtime-unrolling disabled,
where users use pragmas to force unrolling, as well as with
runtime unrolling enabled.

On SPEC/MultiSource, this removes a number of loads after unrolling
on AArch64 with runtime unrolling enabled.

```
External/S...te/526.blender_r/526.blender_r    96
MultiSourc...rks/mediabench/gsm/toast/toast    39
SingleSource/Benchmarks/Misc/ffbench            4
External/SPEC/CINT2006/403.gcc/403.gcc         18
MultiSourc.../Applications/JM/ldecod/ldecod     4
MultiSourc.../mediabench/jpeg/jpeg-6a/cjpeg     6
MultiSourc...OE-ProxyApps-C/miniGMG/miniGMG     9
MultiSourc...e/Applications/ClamAV/clamscan     4
MultiSourc.../MallocBench/espresso/espresso     3
MultiSourc...dence-flt/LinearDependence-flt     2
MultiSourc...ch/office-ispell/office-ispell     4
MultiSourc...ch/consumer-jpeg/consumer-jpeg     6
MultiSourc...ench/security-sha/security-sha    11
MultiSourc...chmarks/McCat/04-bisect/bisect     3
SingleSour...tTests/2020-01-06-coverage-009    12
MultiSourc...ench/telecomm-gsm/telecomm-gsm    39
MultiSourc...lds-flt/CrossingThresholds-flt    24
MultiSourc...dence-dbl/LinearDependence-dbl     2
External/S...C/CINT2006/445.gobmk/445.gobmk     6
MultiSourc...enchmarks/mafft/pairlocalalign    53
External/S...31.deepsjeng_r/531.deepsjeng_r     3
External/S...rate/510.parest_r/510.parest_r    58
External/S...NT2006/464.h264ref/464.h264ref    29
External/S...NT2017rate/502.gcc_r/502.gcc_r    45
External/S...C/CINT2006/456.hmmer/456.hmmer     6
External/S...te/538.imagick_r/538.imagick_r    18
External/S.../CFP2006/447.dealII/447.dealII     4
MultiSourc...OE-ProxyApps-C++/miniFE/miniFE    12
External/S...2017rate/525.x264_r/525.x264_r    36
MultiSourc...Benchmarks/7zip/7zip-benchmark    33
MultiSourc...hmarks/ASC_Sequoia/AMGmk/AMGmk     2
MultiSourc...chmarks/VersaBench/8b10b/8b10b     1
MultiSourc.../Applications/JM/lencod/lencod   116
MultiSourc...lds-dbl/CrossingThresholds-dbl    24
MultiSource/Benchmarks/McCat/05-eks/eks        15
```

PR: https://github.com/llvm/llvm-project/pull/83860
2024-05-02 11:01:24 +01:00

478 lines
24 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -p loop-unroll -S %s | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
define void @cse_matching_load_from_previous_unrolled_iteration(ptr %src, ptr noalias %dst, i64 %N) {
; CHECK-LABEL: define void @cse_matching_load_from_previous_unrolled_iteration(
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
; CHECK-NEXT: [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
; CHECK: entry.new:
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
; CHECK-NEXT: [[L_12:%.*]] = load i64, ptr [[GEP_SRC_12]], align 8
; CHECK-NEXT: [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
; CHECK-NEXT: [[L_4:%.*]] = load i64, ptr [[GEP_SRC_4]], align 8
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[L_12]], [[L_4]]
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i64 [[MUL]], ptr [[GEP_DST]], align 8
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[L_12_1:%.*]] = load i64, ptr [[GEP_SRC_12_1]], align 8
; CHECK-NEXT: [[MUL_1:%.*]] = mul i64 [[L_12_1]], [[L_12]]
; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
; CHECK-NEXT: store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: exit.unr-lcssa.loopexit:
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
; CHECK: exit.unr-lcssa:
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: loop.epil.preheader:
; CHECK-NEXT: br label [[LOOP_EPIL:%.*]]
; CHECK: loop.epil:
; CHECK-NEXT: [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
; CHECK-NEXT: [[L_12_EPIL:%.*]] = load i64, ptr [[GEP_SRC_12_EPIL]], align 8
; CHECK-NEXT: [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
; CHECK-NEXT: [[L_4_EPIL:%.*]] = load i64, ptr [[GEP_SRC_4_EPIL]], align 8
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul i64 [[L_12_EPIL]], [[L_4_EPIL]]
; CHECK-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
; CHECK-NEXT: store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%src.4 = getelementptr i8, ptr %src, i64 4
%src.12 = getelementptr i8, ptr %src, i64 12
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
%l.12 = load i64, ptr %gep.src.12, align 8
%gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
%l.4 = load i64, ptr %gep.src.4, align 8
%mul = mul i64 %l.12, %l.4
%gep.dst = getelementptr i64, ptr %dst, i64 %iv
store i64 %mul, ptr %gep.dst
%iv.next = add nuw nsw i64 %iv, 1
%c = icmp eq i64 %iv.next, %N
br i1 %c, label %exit, label %loop, !llvm.loop !1
exit:
ret void
}
define void @cse_different_load_types(ptr %src, ptr noalias %dst, i64 %N) {
; CHECK-LABEL: define void @cse_different_load_types(
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
; CHECK-NEXT: [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
; CHECK: entry.new:
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
; CHECK-NEXT: [[L_12:%.*]] = load i32, ptr [[GEP_SRC_12]], align 8
; CHECK-NEXT: [[L_12_EXT:%.*]] = zext i32 [[L_12]] to i64
; CHECK-NEXT: [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
; CHECK-NEXT: [[L_4:%.*]] = load i64, ptr [[GEP_SRC_4]], align 8
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[L_12_EXT]], [[L_4]]
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i64 [[MUL]], ptr [[GEP_DST]], align 8
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[L_12_1:%.*]] = load i32, ptr [[GEP_SRC_12_1]], align 8
; CHECK-NEXT: [[L_12_EXT_1:%.*]] = zext i32 [[L_12_1]] to i64
; CHECK-NEXT: [[GEP_SRC_4_1:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[L_4_1:%.*]] = load i64, ptr [[GEP_SRC_4_1]], align 8
; CHECK-NEXT: [[MUL_1:%.*]] = mul i64 [[L_12_EXT_1]], [[L_4_1]]
; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
; CHECK-NEXT: store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit.unr-lcssa.loopexit:
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
; CHECK: exit.unr-lcssa:
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: loop.epil.preheader:
; CHECK-NEXT: br label [[LOOP_EPIL:%.*]]
; CHECK: loop.epil:
; CHECK-NEXT: [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
; CHECK-NEXT: [[L_12_EPIL:%.*]] = load i32, ptr [[GEP_SRC_12_EPIL]], align 8
; CHECK-NEXT: [[L_12_EXT_EPIL:%.*]] = zext i32 [[L_12_EPIL]] to i64
; CHECK-NEXT: [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
; CHECK-NEXT: [[L_4_EPIL:%.*]] = load i64, ptr [[GEP_SRC_4_EPIL]], align 8
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul i64 [[L_12_EXT_EPIL]], [[L_4_EPIL]]
; CHECK-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
; CHECK-NEXT: store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%src.4 = getelementptr i8, ptr %src, i64 4
%src.12 = getelementptr i8, ptr %src, i64 12
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
%l.12 = load i32, ptr %gep.src.12, align 8
%l.12.ext = zext i32 %l.12 to i64
%gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
%l.4 = load i64, ptr %gep.src.4, align 8
%mul = mul i64 %l.12.ext, %l.4
%gep.dst = getelementptr i64, ptr %dst, i64 %iv
store i64 %mul, ptr %gep.dst
%iv.next = add nuw nsw i64 %iv, 1
%c = icmp eq i64 %iv.next, %N
br i1 %c, label %exit, label %loop, !llvm.loop !1
exit:
ret void
}
define void @cse_volatile_loads(ptr %src, ptr noalias %dst, i64 %N) {
; CHECK-LABEL: define void @cse_volatile_loads(
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
; CHECK-NEXT: [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
; CHECK: entry.new:
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
; CHECK-NEXT: [[L_12:%.*]] = load i64, ptr [[GEP_SRC_12]], align 8
; CHECK-NEXT: [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
; CHECK-NEXT: [[L_4:%.*]] = load volatile i64, ptr [[GEP_SRC_4]], align 8
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[L_12]], [[L_4]]
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i64 [[MUL]], ptr [[GEP_DST]], align 8
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[L_12_1:%.*]] = load i64, ptr [[GEP_SRC_12_1]], align 8
; CHECK-NEXT: [[GEP_SRC_4_1:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[L_4_1:%.*]] = load volatile i64, ptr [[GEP_SRC_4_1]], align 8
; CHECK-NEXT: [[MUL_1:%.*]] = mul i64 [[L_12_1]], [[L_4_1]]
; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
; CHECK-NEXT: store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: exit.unr-lcssa.loopexit:
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
; CHECK: exit.unr-lcssa:
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: loop.epil.preheader:
; CHECK-NEXT: br label [[LOOP_EPIL:%.*]]
; CHECK: loop.epil:
; CHECK-NEXT: [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
; CHECK-NEXT: [[L_12_EPIL:%.*]] = load i64, ptr [[GEP_SRC_12_EPIL]], align 8
; CHECK-NEXT: [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
; CHECK-NEXT: [[L_4_EPIL:%.*]] = load volatile i64, ptr [[GEP_SRC_4_EPIL]], align 8
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul i64 [[L_12_EPIL]], [[L_4_EPIL]]
; CHECK-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
; CHECK-NEXT: store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%src.4 = getelementptr i8, ptr %src, i64 4
%src.12 = getelementptr i8, ptr %src, i64 12
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
%l.12 = load i64, ptr %gep.src.12, align 8
%gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
%l.4 = load volatile i64, ptr %gep.src.4, align 8
%mul = mul i64 %l.12, %l.4
%gep.dst = getelementptr i64, ptr %dst, i64 %iv
store i64 %mul, ptr %gep.dst
%iv.next = add nuw nsw i64 %iv, 1
%c = icmp eq i64 %iv.next, %N
br i1 %c, label %exit, label %loop, !llvm.loop !1
exit:
ret void
}
define void @cse_atomic_loads(ptr %src, ptr noalias %dst, i64 %N) {
; CHECK-LABEL: define void @cse_atomic_loads(
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
; CHECK-NEXT: [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
; CHECK: entry.new:
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
; CHECK-NEXT: [[L_12:%.*]] = load i64, ptr [[GEP_SRC_12]], align 8
; CHECK-NEXT: [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
; CHECK-NEXT: [[L_4:%.*]] = load atomic i64, ptr [[GEP_SRC_4]] unordered, align 8
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[L_12]], [[L_4]]
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i64 [[MUL]], ptr [[GEP_DST]], align 8
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[L_12_1:%.*]] = load i64, ptr [[GEP_SRC_12_1]], align 8
; CHECK-NEXT: [[GEP_SRC_4_1:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[L_4_1:%.*]] = load atomic i64, ptr [[GEP_SRC_4_1]] unordered, align 8
; CHECK-NEXT: [[MUL_1:%.*]] = mul i64 [[L_12_1]], [[L_4_1]]
; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
; CHECK-NEXT: store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: exit.unr-lcssa.loopexit:
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
; CHECK: exit.unr-lcssa:
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: loop.epil.preheader:
; CHECK-NEXT: br label [[LOOP_EPIL:%.*]]
; CHECK: loop.epil:
; CHECK-NEXT: [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
; CHECK-NEXT: [[L_12_EPIL:%.*]] = load i64, ptr [[GEP_SRC_12_EPIL]], align 8
; CHECK-NEXT: [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
; CHECK-NEXT: [[L_4_EPIL:%.*]] = load atomic i64, ptr [[GEP_SRC_4_EPIL]] unordered, align 8
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul i64 [[L_12_EPIL]], [[L_4_EPIL]]
; CHECK-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
; CHECK-NEXT: store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%src.4 = getelementptr i8, ptr %src, i64 4
%src.12 = getelementptr i8, ptr %src, i64 12
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
%l.12 = load i64, ptr %gep.src.12, align 8
%gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
%l.4 = load atomic i64, ptr %gep.src.4 unordered, align 8
%mul = mul i64 %l.12, %l.4
%gep.dst = getelementptr i64, ptr %dst, i64 %iv
store i64 %mul, ptr %gep.dst
%iv.next = add nuw nsw i64 %iv, 1
%c = icmp eq i64 %iv.next, %N
br i1 %c, label %exit, label %loop, !llvm.loop !1
exit:
ret void
}
define void @cse_load_may_be_clobbered(ptr %src, ptr %dst, i64 %N) {
; CHECK-LABEL: define void @cse_load_may_be_clobbered(
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
; CHECK-NEXT: [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
; CHECK: entry.new:
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
; CHECK-NEXT: [[L_12:%.*]] = load i64, ptr [[GEP_SRC_12]], align 8
; CHECK-NEXT: [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
; CHECK-NEXT: [[L_4:%.*]] = load i64, ptr [[GEP_SRC_4]], align 8
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[L_12]], [[L_4]]
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i64 [[MUL]], ptr [[GEP_DST]], align 8
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[L_12_1:%.*]] = load i64, ptr [[GEP_SRC_12_1]], align 8
; CHECK-NEXT: [[GEP_SRC_4_1:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[L_4_1:%.*]] = load i64, ptr [[GEP_SRC_4_1]], align 8
; CHECK-NEXT: [[MUL_1:%.*]] = mul i64 [[L_12_1]], [[L_4_1]]
; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
; CHECK-NEXT: store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: exit.unr-lcssa.loopexit:
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
; CHECK: exit.unr-lcssa:
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: loop.epil.preheader:
; CHECK-NEXT: br label [[LOOP_EPIL:%.*]]
; CHECK: loop.epil:
; CHECK-NEXT: [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
; CHECK-NEXT: [[L_12_EPIL:%.*]] = load i64, ptr [[GEP_SRC_12_EPIL]], align 8
; CHECK-NEXT: [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
; CHECK-NEXT: [[L_4_EPIL:%.*]] = load i64, ptr [[GEP_SRC_4_EPIL]], align 8
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul i64 [[L_12_EPIL]], [[L_4_EPIL]]
; CHECK-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
; CHECK-NEXT: store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%src.4 = getelementptr i8, ptr %src, i64 4
%src.12 = getelementptr i8, ptr %src, i64 12
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
%l.12 = load i64, ptr %gep.src.12, align 8
%gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
%l.4 = load i64, ptr %gep.src.4, align 8
%mul = mul i64 %l.12, %l.4
%gep.dst = getelementptr i64, ptr %dst, i64 %iv
store i64 %mul, ptr %gep.dst
%iv.next = add nuw nsw i64 %iv, 1
%c = icmp eq i64 %iv.next, %N
br i1 %c, label %exit, label %loop, !llvm.loop !1
exit:
ret void
}
declare void @foo()
define void @loop_body_with_dead_blocks(ptr %src) {
; CHECK-LABEL: define void @loop_body_with_dead_blocks(
; CHECK-SAME: ptr [[SRC:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header.loopexit:
; CHECK-NEXT: br label [[OUTER_HEADER]]
; CHECK: outer.header:
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: br label [[LOOP_BB:%.*]]
; CHECK: loop.bb.dead:
; CHECK-NEXT: unreachable
; CHECK: loop.bb:
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[SRC]], align 8
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i32 [[L_1]], 0
; CHECK-NEXT: br i1 [[C_1]], label [[OUTER_HEADER_LOOPEXIT:%.*]], label [[LOOP_LATCH:%.*]]
; CHECK: loop.latch:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[SRC]], align 8
; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[L_2]], 1
; CHECK-NEXT: br i1 [[C_2]], label [[EXIT:%.*]], label [[LOOP_HEADER_1:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: loop.header.1:
; CHECK-NEXT: br label [[LOOP_BB_1:%.*]]
; CHECK: loop.bb.1:
; CHECK-NEXT: [[C_1_1:%.*]] = icmp eq i32 [[L_2]], 0
; CHECK-NEXT: br i1 [[C_1_1]], label [[OUTER_HEADER_LOOPEXIT]], label [[LOOP_LATCH_1:%.*]]
; CHECK: loop.latch.1:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: [[L_2_1:%.*]] = load i32, ptr [[SRC]], align 8
; CHECK-NEXT: [[C_2_1:%.*]] = icmp eq i32 [[L_2_1]], 1
; CHECK-NEXT: br i1 [[C_2_1]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %outer.header
outer.header:
br label %loop.header
loop.header:
br label %loop.bb
loop.bb.dead:
br label %loop.bb
loop.bb:
%l.1 = load i32, ptr %src, align 8
%c.1 = icmp eq i32 %l.1, 0
br i1 %c.1, label %outer.header, label %loop.latch
loop.latch:
call void @foo()
%l.2 = load i32, ptr %src, align 8
%c.2 = icmp eq i32 %l.2, 1
br i1 %c.2, label %exit, label %loop.header, !llvm.loop !1
exit:
ret void
}
!0 = !{!"llvm.loop.mustprogress"}
!1 = distinct !{!1, !0, !2}
!2 = !{!"llvm.loop.unroll.count", i32 2}
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.mustprogress"}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META8:![0-9]+]]}
; CHECK: [[META8]] = !{!"llvm.loop.unroll.count", i32 2}
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
;.