
This patch adds loadCSE support to simplifyLoopAfterUnroll. It is based on EarlyCSE's implementation using ScopeHashTable and is using SCEV for accessed pointers to check to find redundant loads after unrolling. This applies to the late unroll pass only, for full unrolling those redundant loads will be cleaned up by the regular pipeline. The current approach constructs MSSA on-demand per-loop, but there is still small but notable compile-time impact: stage1-O3 +0.04% stage1-ReleaseThinLTO +0.06% stage1-ReleaseLTO-g +0.05% stage1-O0-g +0.02% stage2-O3 +0.09% stage2-O0-g +0.04% stage2-clang +0.02% https://llvm-compile-time-tracker.com/compare.php?from=c089fa5a729e217d0c0d4647656386dac1a1b135&to=ec7c0f27cb5c12b600d9adfc8543d131765ec7be&stat=instructions:u This benefits some workloads with runtime-unrolling disabled, where users use pragmas to force unrolling, as well as with runtime unrolling enabled. On SPEC/MultiSource, this removes a number of loads after unrolling on AArch64 with runtime unrolling enabled. ``` External/S...te/526.blender_r/526.blender_r 96 MultiSourc...rks/mediabench/gsm/toast/toast 39 SingleSource/Benchmarks/Misc/ffbench 4 External/SPEC/CINT2006/403.gcc/403.gcc 18 MultiSourc.../Applications/JM/ldecod/ldecod 4 MultiSourc.../mediabench/jpeg/jpeg-6a/cjpeg 6 MultiSourc...OE-ProxyApps-C/miniGMG/miniGMG 9 MultiSourc...e/Applications/ClamAV/clamscan 4 MultiSourc.../MallocBench/espresso/espresso 3 MultiSourc...dence-flt/LinearDependence-flt 2 MultiSourc...ch/office-ispell/office-ispell 4 MultiSourc...ch/consumer-jpeg/consumer-jpeg 6 MultiSourc...ench/security-sha/security-sha 11 MultiSourc...chmarks/McCat/04-bisect/bisect 3 SingleSour...tTests/2020-01-06-coverage-009 12 MultiSourc...ench/telecomm-gsm/telecomm-gsm 39 MultiSourc...lds-flt/CrossingThresholds-flt 24 MultiSourc...dence-dbl/LinearDependence-dbl 2 External/S...C/CINT2006/445.gobmk/445.gobmk 6 MultiSourc...enchmarks/mafft/pairlocalalign 53 External/S...31.deepsjeng_r/531.deepsjeng_r 3 External/S...rate/510.parest_r/510.parest_r 58 External/S...NT2006/464.h264ref/464.h264ref 29 External/S...NT2017rate/502.gcc_r/502.gcc_r 45 External/S...C/CINT2006/456.hmmer/456.hmmer 6 External/S...te/538.imagick_r/538.imagick_r 18 External/S.../CFP2006/447.dealII/447.dealII 4 MultiSourc...OE-ProxyApps-C++/miniFE/miniFE 12 External/S...2017rate/525.x264_r/525.x264_r 36 MultiSourc...Benchmarks/7zip/7zip-benchmark 33 MultiSourc...hmarks/ASC_Sequoia/AMGmk/AMGmk 2 MultiSourc...chmarks/VersaBench/8b10b/8b10b 1 MultiSourc.../Applications/JM/lencod/lencod 116 MultiSourc...lds-dbl/CrossingThresholds-dbl 24 MultiSource/Benchmarks/McCat/05-eks/eks 15 ``` PR: https://github.com/llvm/llvm-project/pull/83860
478 lines
24 KiB
LLVM
478 lines
24 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -p loop-unroll -S %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
|
|
|
define void @cse_matching_load_from_previous_unrolled_iteration(ptr %src, ptr noalias %dst, i64 %N) {
|
|
; CHECK-LABEL: define void @cse_matching_load_from_previous_unrolled_iteration(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
|
|
; CHECK-NEXT: [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
|
|
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
|
|
; CHECK: entry.new:
|
|
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_12:%.*]] = load i64, ptr [[GEP_SRC_12]], align 8
|
|
; CHECK-NEXT: [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_4:%.*]] = load i64, ptr [[GEP_SRC_4]], align 8
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[L_12]], [[L_4]]
|
|
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
|
|
; CHECK-NEXT: store i64 [[MUL]], ptr [[GEP_DST]], align 8
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_12_1:%.*]] = load i64, ptr [[GEP_SRC_12_1]], align 8
|
|
; CHECK-NEXT: [[MUL_1:%.*]] = mul i64 [[L_12_1]], [[L_12]]
|
|
; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
|
|
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
|
|
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
|
|
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: exit.unr-lcssa.loopexit:
|
|
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
|
|
; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
|
|
; CHECK: exit.unr-lcssa:
|
|
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
|
|
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
|
|
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: loop.epil.preheader:
|
|
; CHECK-NEXT: br label [[LOOP_EPIL:%.*]]
|
|
; CHECK: loop.epil:
|
|
; CHECK-NEXT: [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: [[L_12_EPIL:%.*]] = load i64, ptr [[GEP_SRC_12_EPIL]], align 8
|
|
; CHECK-NEXT: [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: [[L_4_EPIL:%.*]] = load i64, ptr [[GEP_SRC_4_EPIL]], align 8
|
|
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul i64 [[L_12_EPIL]], [[L_4_EPIL]]
|
|
; CHECK-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%src.4 = getelementptr i8, ptr %src, i64 4
|
|
%src.12 = getelementptr i8, ptr %src, i64 12
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
|
|
%l.12 = load i64, ptr %gep.src.12, align 8
|
|
%gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
|
|
%l.4 = load i64, ptr %gep.src.4, align 8
|
|
%mul = mul i64 %l.12, %l.4
|
|
%gep.dst = getelementptr i64, ptr %dst, i64 %iv
|
|
store i64 %mul, ptr %gep.dst
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%c = icmp eq i64 %iv.next, %N
|
|
br i1 %c, label %exit, label %loop, !llvm.loop !1
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @cse_different_load_types(ptr %src, ptr noalias %dst, i64 %N) {
|
|
; CHECK-LABEL: define void @cse_different_load_types(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
|
|
; CHECK-NEXT: [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
|
|
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
|
|
; CHECK: entry.new:
|
|
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_12:%.*]] = load i32, ptr [[GEP_SRC_12]], align 8
|
|
; CHECK-NEXT: [[L_12_EXT:%.*]] = zext i32 [[L_12]] to i64
|
|
; CHECK-NEXT: [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_4:%.*]] = load i64, ptr [[GEP_SRC_4]], align 8
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[L_12_EXT]], [[L_4]]
|
|
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
|
|
; CHECK-NEXT: store i64 [[MUL]], ptr [[GEP_DST]], align 8
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_12_1:%.*]] = load i32, ptr [[GEP_SRC_12_1]], align 8
|
|
; CHECK-NEXT: [[L_12_EXT_1:%.*]] = zext i32 [[L_12_1]] to i64
|
|
; CHECK-NEXT: [[GEP_SRC_4_1:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_4_1:%.*]] = load i64, ptr [[GEP_SRC_4_1]], align 8
|
|
; CHECK-NEXT: [[MUL_1:%.*]] = mul i64 [[L_12_EXT_1]], [[L_4_1]]
|
|
; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
|
|
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
|
|
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
|
|
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: exit.unr-lcssa.loopexit:
|
|
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
|
|
; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
|
|
; CHECK: exit.unr-lcssa:
|
|
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
|
|
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
|
|
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: loop.epil.preheader:
|
|
; CHECK-NEXT: br label [[LOOP_EPIL:%.*]]
|
|
; CHECK: loop.epil:
|
|
; CHECK-NEXT: [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: [[L_12_EPIL:%.*]] = load i32, ptr [[GEP_SRC_12_EPIL]], align 8
|
|
; CHECK-NEXT: [[L_12_EXT_EPIL:%.*]] = zext i32 [[L_12_EPIL]] to i64
|
|
; CHECK-NEXT: [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: [[L_4_EPIL:%.*]] = load i64, ptr [[GEP_SRC_4_EPIL]], align 8
|
|
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul i64 [[L_12_EXT_EPIL]], [[L_4_EPIL]]
|
|
; CHECK-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%src.4 = getelementptr i8, ptr %src, i64 4
|
|
%src.12 = getelementptr i8, ptr %src, i64 12
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
|
|
%l.12 = load i32, ptr %gep.src.12, align 8
|
|
%l.12.ext = zext i32 %l.12 to i64
|
|
%gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
|
|
%l.4 = load i64, ptr %gep.src.4, align 8
|
|
%mul = mul i64 %l.12.ext, %l.4
|
|
%gep.dst = getelementptr i64, ptr %dst, i64 %iv
|
|
store i64 %mul, ptr %gep.dst
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%c = icmp eq i64 %iv.next, %N
|
|
br i1 %c, label %exit, label %loop, !llvm.loop !1
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @cse_volatile_loads(ptr %src, ptr noalias %dst, i64 %N) {
|
|
; CHECK-LABEL: define void @cse_volatile_loads(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
|
|
; CHECK-NEXT: [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
|
|
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
|
|
; CHECK: entry.new:
|
|
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_12:%.*]] = load i64, ptr [[GEP_SRC_12]], align 8
|
|
; CHECK-NEXT: [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_4:%.*]] = load volatile i64, ptr [[GEP_SRC_4]], align 8
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[L_12]], [[L_4]]
|
|
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
|
|
; CHECK-NEXT: store i64 [[MUL]], ptr [[GEP_DST]], align 8
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_12_1:%.*]] = load i64, ptr [[GEP_SRC_12_1]], align 8
|
|
; CHECK-NEXT: [[GEP_SRC_4_1:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_4_1:%.*]] = load volatile i64, ptr [[GEP_SRC_4_1]], align 8
|
|
; CHECK-NEXT: [[MUL_1:%.*]] = mul i64 [[L_12_1]], [[L_4_1]]
|
|
; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
|
|
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
|
|
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
|
|
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; CHECK: exit.unr-lcssa.loopexit:
|
|
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
|
|
; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
|
|
; CHECK: exit.unr-lcssa:
|
|
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
|
|
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
|
|
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: loop.epil.preheader:
|
|
; CHECK-NEXT: br label [[LOOP_EPIL:%.*]]
|
|
; CHECK: loop.epil:
|
|
; CHECK-NEXT: [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: [[L_12_EPIL:%.*]] = load i64, ptr [[GEP_SRC_12_EPIL]], align 8
|
|
; CHECK-NEXT: [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: [[L_4_EPIL:%.*]] = load volatile i64, ptr [[GEP_SRC_4_EPIL]], align 8
|
|
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul i64 [[L_12_EPIL]], [[L_4_EPIL]]
|
|
; CHECK-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%src.4 = getelementptr i8, ptr %src, i64 4
|
|
%src.12 = getelementptr i8, ptr %src, i64 12
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
|
|
%l.12 = load i64, ptr %gep.src.12, align 8
|
|
%gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
|
|
%l.4 = load volatile i64, ptr %gep.src.4, align 8
|
|
%mul = mul i64 %l.12, %l.4
|
|
%gep.dst = getelementptr i64, ptr %dst, i64 %iv
|
|
store i64 %mul, ptr %gep.dst
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%c = icmp eq i64 %iv.next, %N
|
|
br i1 %c, label %exit, label %loop, !llvm.loop !1
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @cse_atomic_loads(ptr %src, ptr noalias %dst, i64 %N) {
|
|
; CHECK-LABEL: define void @cse_atomic_loads(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
|
|
; CHECK-NEXT: [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
|
|
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
|
|
; CHECK: entry.new:
|
|
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_12:%.*]] = load i64, ptr [[GEP_SRC_12]], align 8
|
|
; CHECK-NEXT: [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_4:%.*]] = load atomic i64, ptr [[GEP_SRC_4]] unordered, align 8
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[L_12]], [[L_4]]
|
|
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
|
|
; CHECK-NEXT: store i64 [[MUL]], ptr [[GEP_DST]], align 8
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_12_1:%.*]] = load i64, ptr [[GEP_SRC_12_1]], align 8
|
|
; CHECK-NEXT: [[GEP_SRC_4_1:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_4_1:%.*]] = load atomic i64, ptr [[GEP_SRC_4_1]] unordered, align 8
|
|
; CHECK-NEXT: [[MUL_1:%.*]] = mul i64 [[L_12_1]], [[L_4_1]]
|
|
; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
|
|
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
|
|
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
|
|
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; CHECK: exit.unr-lcssa.loopexit:
|
|
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
|
|
; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
|
|
; CHECK: exit.unr-lcssa:
|
|
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
|
|
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
|
|
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: loop.epil.preheader:
|
|
; CHECK-NEXT: br label [[LOOP_EPIL:%.*]]
|
|
; CHECK: loop.epil:
|
|
; CHECK-NEXT: [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: [[L_12_EPIL:%.*]] = load i64, ptr [[GEP_SRC_12_EPIL]], align 8
|
|
; CHECK-NEXT: [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: [[L_4_EPIL:%.*]] = load atomic i64, ptr [[GEP_SRC_4_EPIL]] unordered, align 8
|
|
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul i64 [[L_12_EPIL]], [[L_4_EPIL]]
|
|
; CHECK-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%src.4 = getelementptr i8, ptr %src, i64 4
|
|
%src.12 = getelementptr i8, ptr %src, i64 12
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
|
|
%l.12 = load i64, ptr %gep.src.12, align 8
|
|
%gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
|
|
%l.4 = load atomic i64, ptr %gep.src.4 unordered, align 8
|
|
%mul = mul i64 %l.12, %l.4
|
|
%gep.dst = getelementptr i64, ptr %dst, i64 %iv
|
|
store i64 %mul, ptr %gep.dst
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%c = icmp eq i64 %iv.next, %N
|
|
br i1 %c, label %exit, label %loop, !llvm.loop !1
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @cse_load_may_be_clobbered(ptr %src, ptr %dst, i64 %N) {
|
|
; CHECK-LABEL: define void @cse_load_may_be_clobbered(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[SRC_4:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
|
|
; CHECK-NEXT: [[SRC_12:%.*]] = getelementptr i8, ptr [[SRC]], i64 12
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
|
|
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
|
|
; CHECK: entry.new:
|
|
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_12:%.*]] = load i64, ptr [[GEP_SRC_12]], align 8
|
|
; CHECK-NEXT: [[GEP_SRC_4:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_4:%.*]] = load i64, ptr [[GEP_SRC_4]], align 8
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[L_12]], [[L_4]]
|
|
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
|
|
; CHECK-NEXT: store i64 [[MUL]], ptr [[GEP_DST]], align 8
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC_12_1:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_12_1:%.*]] = load i64, ptr [[GEP_SRC_12_1]], align 8
|
|
; CHECK-NEXT: [[GEP_SRC_4_1:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[L_4_1:%.*]] = load i64, ptr [[GEP_SRC_4_1]], align 8
|
|
; CHECK-NEXT: [[MUL_1:%.*]] = mul i64 [[L_12_1]], [[L_4_1]]
|
|
; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: store i64 [[MUL_1]], ptr [[GEP_DST_1]], align 8
|
|
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
|
|
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
|
|
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; CHECK: exit.unr-lcssa.loopexit:
|
|
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
|
|
; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
|
|
; CHECK: exit.unr-lcssa:
|
|
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
|
|
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
|
|
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: loop.epil.preheader:
|
|
; CHECK-NEXT: br label [[LOOP_EPIL:%.*]]
|
|
; CHECK: loop.epil:
|
|
; CHECK-NEXT: [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: [[L_12_EPIL:%.*]] = load i64, ptr [[GEP_SRC_12_EPIL]], align 8
|
|
; CHECK-NEXT: [[GEP_SRC_4_EPIL:%.*]] = getelementptr i64, ptr [[SRC_4]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: [[L_4_EPIL:%.*]] = load i64, ptr [[GEP_SRC_4_EPIL]], align 8
|
|
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul i64 [[L_12_EPIL]], [[L_4_EPIL]]
|
|
; CHECK-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_UNR]]
|
|
; CHECK-NEXT: store i64 [[MUL_EPIL]], ptr [[GEP_DST_EPIL]], align 8
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%src.4 = getelementptr i8, ptr %src, i64 4
|
|
%src.12 = getelementptr i8, ptr %src, i64 12
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%gep.src.12 = getelementptr i64, ptr %src.12, i64 %iv
|
|
%l.12 = load i64, ptr %gep.src.12, align 8
|
|
%gep.src.4 = getelementptr i64, ptr %src.4, i64 %iv
|
|
%l.4 = load i64, ptr %gep.src.4, align 8
|
|
%mul = mul i64 %l.12, %l.4
|
|
%gep.dst = getelementptr i64, ptr %dst, i64 %iv
|
|
store i64 %mul, ptr %gep.dst
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%c = icmp eq i64 %iv.next, %N
|
|
br i1 %c, label %exit, label %loop, !llvm.loop !1
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
|
|
declare void @foo()
|
|
|
|
define void @loop_body_with_dead_blocks(ptr %src) {
|
|
; CHECK-LABEL: define void @loop_body_with_dead_blocks(
|
|
; CHECK-SAME: ptr [[SRC:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
|
|
; CHECK: outer.header.loopexit:
|
|
; CHECK-NEXT: br label [[OUTER_HEADER]]
|
|
; CHECK: outer.header:
|
|
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
|
|
; CHECK: loop.header:
|
|
; CHECK-NEXT: br label [[LOOP_BB:%.*]]
|
|
; CHECK: loop.bb.dead:
|
|
; CHECK-NEXT: unreachable
|
|
; CHECK: loop.bb:
|
|
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[SRC]], align 8
|
|
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i32 [[L_1]], 0
|
|
; CHECK-NEXT: br i1 [[C_1]], label [[OUTER_HEADER_LOOPEXIT:%.*]], label [[LOOP_LATCH:%.*]]
|
|
; CHECK: loop.latch:
|
|
; CHECK-NEXT: call void @foo()
|
|
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[SRC]], align 8
|
|
; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[L_2]], 1
|
|
; CHECK-NEXT: br i1 [[C_2]], label [[EXIT:%.*]], label [[LOOP_HEADER_1:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: loop.header.1:
|
|
; CHECK-NEXT: br label [[LOOP_BB_1:%.*]]
|
|
; CHECK: loop.bb.1:
|
|
; CHECK-NEXT: [[C_1_1:%.*]] = icmp eq i32 [[L_2]], 0
|
|
; CHECK-NEXT: br i1 [[C_1_1]], label [[OUTER_HEADER_LOOPEXIT]], label [[LOOP_LATCH_1:%.*]]
|
|
; CHECK: loop.latch.1:
|
|
; CHECK-NEXT: call void @foo()
|
|
; CHECK-NEXT: [[L_2_1:%.*]] = load i32, ptr [[SRC]], align 8
|
|
; CHECK-NEXT: [[C_2_1:%.*]] = icmp eq i32 [[L_2_1]], 1
|
|
; CHECK-NEXT: br i1 [[C_2_1]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %outer.header
|
|
|
|
outer.header:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
br label %loop.bb
|
|
|
|
loop.bb.dead:
|
|
br label %loop.bb
|
|
|
|
loop.bb:
|
|
%l.1 = load i32, ptr %src, align 8
|
|
%c.1 = icmp eq i32 %l.1, 0
|
|
br i1 %c.1, label %outer.header, label %loop.latch
|
|
|
|
loop.latch:
|
|
call void @foo()
|
|
%l.2 = load i32, ptr %src, align 8
|
|
%c.2 = icmp eq i32 %l.2, 1
|
|
br i1 %c.2, label %exit, label %loop.header, !llvm.loop !1
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
!0 = !{!"llvm.loop.mustprogress"}
|
|
!1 = distinct !{!1, !0, !2}
|
|
!2 = !{!"llvm.loop.unroll.count", i32 2}
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.mustprogress"}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"}
|
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META8:![0-9]+]]}
|
|
; CHECK: [[META8]] = !{!"llvm.loop.unroll.count", i32 2}
|
|
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
|
|
;.
|