LICM will speculatively hoist code outside of loops. This requires removing information, like alias analysis (https://github.com/llvm/llvm-project/issues/53794), range information (https://bugs.llvm.org/show_bug.cgi?id=50550), among others. Prior to https://reviews.llvm.org/D99249 , LICM would only be run after LoopRotate. Running Loop Rotate prior to LICM prevents a instruction hoist from being speculative, if it was conditionally executed by the iteration (as is commonly emitted by clang and other frontends). Adding the additional LICM pass first, however, forces all of these instructions to be considered speculative, even if they are not speculative after LoopRotate. This destroys information, resulting in performance losses for discarding this additional information. This PR modifies LICM to accept a ``speculative'' parameter which allows LICM to be set to perform information-loss speculative hoists or not. Phase ordering is then modified to not perform the information-losing speculative hoists until after loop rotate is performed, preserving this additional information. Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D119965
157 lines
9.4 KiB
LLVM
157 lines
9.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -O1 -S -enable-new-pm=0 < %s | FileCheck --check-prefixes=OLDPM_O1 %s
|
|
; RUN: opt -O2 -S -enable-new-pm=0 < %s | FileCheck --check-prefixes=OLDPM_O23 %s
|
|
; RUN: opt -O3 -S -enable-new-pm=0 < %s | FileCheck --check-prefixes=OLDPM_O23 %s
|
|
; RUN: opt -passes='default<O1>' -S < %s | FileCheck --check-prefixes=NEWPM_O1 %s
|
|
; RUN: opt -passes='default<O2>' -S < %s | FileCheck --check-prefixes=NEWPM_O23 %s
|
|
; RUN: opt -passes='default<O3>' -S < %s | FileCheck --check-prefixes=NEWPM_O23 %s
|
|
|
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
; We should retain the TBAA on the load here, not lose it.
|
|
|
|
define void @licm(double** align 8 dereferenceable(8) %_M_start.i, i64 %numElem) {
|
|
; OLDPM_O1-LABEL: @licm(
|
|
; OLDPM_O1-NEXT: entry:
|
|
; OLDPM_O1-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0
|
|
; OLDPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]]
|
|
; OLDPM_O1: for.body.lr.ph:
|
|
; OLDPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]]
|
|
; OLDPM_O1-NEXT: br label [[FOR_BODY:%.*]]
|
|
; OLDPM_O1: for.body:
|
|
; OLDPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
|
; OLDPM_O1-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]]
|
|
; OLDPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8:![0-9]+]]
|
|
; OLDPM_O1-NEXT: [[INC]] = add nuw i64 [[K_02]], 1
|
|
; OLDPM_O1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]]
|
|
; OLDPM_O1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
|
|
; OLDPM_O1: for.cond.cleanup:
|
|
; OLDPM_O1-NEXT: ret void
|
|
;
|
|
; OLDPM_O23-LABEL: @licm(
|
|
; OLDPM_O23-NEXT: entry:
|
|
; OLDPM_O23-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0
|
|
; OLDPM_O23-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]]
|
|
; OLDPM_O23: for.body.lr.ph:
|
|
; OLDPM_O23-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]]
|
|
; OLDPM_O23-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEM]], 4
|
|
; OLDPM_O23-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]]
|
|
; OLDPM_O23: vector.ph:
|
|
; OLDPM_O23-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEM]], -4
|
|
; OLDPM_O23-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; OLDPM_O23: vector.body:
|
|
; OLDPM_O23-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; OLDPM_O23-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[INDEX]]
|
|
; OLDPM_O23-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>*
|
|
; OLDPM_O23-NEXT: store <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA8:![0-9]+]]
|
|
; OLDPM_O23-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2
|
|
; OLDPM_O23-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>*
|
|
; OLDPM_O23-NEXT: store <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA8]]
|
|
; OLDPM_O23-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; OLDPM_O23-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; OLDPM_O23-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; OLDPM_O23: middle.block:
|
|
; OLDPM_O23-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEM]]
|
|
; OLDPM_O23-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER]]
|
|
; OLDPM_O23: for.body.preheader:
|
|
; OLDPM_O23-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
|
|
; OLDPM_O23-NEXT: br label [[FOR_BODY:%.*]]
|
|
; OLDPM_O23: for.body:
|
|
; OLDPM_O23-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER]] ]
|
|
; OLDPM_O23-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]]
|
|
; OLDPM_O23-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8]]
|
|
; OLDPM_O23-NEXT: [[INC]] = add nuw i64 [[K_02]], 1
|
|
; OLDPM_O23-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]]
|
|
; OLDPM_O23-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
; OLDPM_O23: for.cond.cleanup:
|
|
; OLDPM_O23-NEXT: ret void
|
|
;
|
|
; NEWPM_O1-LABEL: @licm(
|
|
; NEWPM_O1-NEXT: entry:
|
|
; NEWPM_O1-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0
|
|
; NEWPM_O1-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]]
|
|
; NEWPM_O1: for.body.lr.ph:
|
|
; NEWPM_O1-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]]
|
|
; NEWPM_O1-NEXT: br label [[FOR_BODY:%.*]]
|
|
; NEWPM_O1: for.body:
|
|
; NEWPM_O1-NEXT: [[K_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
|
; NEWPM_O1-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]]
|
|
; NEWPM_O1-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8:![0-9]+]]
|
|
; NEWPM_O1-NEXT: [[INC]] = add nuw i64 [[K_02]], 1
|
|
; NEWPM_O1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]]
|
|
; NEWPM_O1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
|
|
; NEWPM_O1: for.cond.cleanup:
|
|
; NEWPM_O1-NEXT: ret void
|
|
;
|
|
; NEWPM_O23-LABEL: @licm(
|
|
; NEWPM_O23-NEXT: entry:
|
|
; NEWPM_O23-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0
|
|
; NEWPM_O23-NEXT: br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]]
|
|
; NEWPM_O23: for.body.lr.ph:
|
|
; NEWPM_O23-NEXT: [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]]
|
|
; NEWPM_O23-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEM]], 4
|
|
; NEWPM_O23-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]]
|
|
; NEWPM_O23: vector.ph:
|
|
; NEWPM_O23-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEM]], -4
|
|
; NEWPM_O23-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; NEWPM_O23: vector.body:
|
|
; NEWPM_O23-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; NEWPM_O23-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[INDEX]]
|
|
; NEWPM_O23-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>*
|
|
; NEWPM_O23-NEXT: store <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA8:![0-9]+]]
|
|
; NEWPM_O23-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2
|
|
; NEWPM_O23-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>*
|
|
; NEWPM_O23-NEXT: store <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA8]]
|
|
; NEWPM_O23-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; NEWPM_O23-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; NEWPM_O23-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; NEWPM_O23: middle.block:
|
|
; NEWPM_O23-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEM]]
|
|
; NEWPM_O23-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER]]
|
|
; NEWPM_O23: for.body.preheader:
|
|
; NEWPM_O23-NEXT: [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
|
|
; NEWPM_O23-NEXT: br label [[FOR_BODY:%.*]]
|
|
; NEWPM_O23: for.body:
|
|
; NEWPM_O23-NEXT: [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER]] ]
|
|
; NEWPM_O23-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]]
|
|
; NEWPM_O23-NEXT: store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8]]
|
|
; NEWPM_O23-NEXT: [[INC]] = add nuw i64 [[K_02]], 1
|
|
; NEWPM_O23-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]]
|
|
; NEWPM_O23-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
; NEWPM_O23: for.cond.cleanup:
|
|
; NEWPM_O23-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.body, %entry
|
|
%k.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
|
|
%cmp = icmp ult i64 %k.0, %numElem
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.body: ; preds = %for.cond
|
|
%0 = load double*, double** %_M_start.i, align 8, !tbaa !3
|
|
%add.ptr.i = getelementptr inbounds double, double* %0, i64 %k.0
|
|
store double 2.000000e+00, double* %add.ptr.i, align 8, !tbaa !8
|
|
%inc = add nuw i64 %k.0, 1
|
|
br label %for.cond
|
|
|
|
for.cond.cleanup: ; preds = %for.cond
|
|
ret void
|
|
}
|
|
|
|
!llvm.module.flags = !{!0, !1}
|
|
!llvm.ident = !{!2}
|
|
|
|
!0 = !{i32 1, !"wchar_size", i32 4}
|
|
!1 = !{i32 7, !"uwtable", i32 1}
|
|
!2 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git fc510998f7c287df2bc1304673e0cd8452d50b31)"}
|
|
!3 = !{!4, !5, i64 0}
|
|
!4 = !{!"_ZTSNSt12_Vector_baseIdSaIdEE17_Vector_impl_dataE", !5, i64 0, !5, i64 8, !5, i64 16}
|
|
!5 = !{!"any pointer", !6, i64 0}
|
|
!6 = !{!"omnipotent char", !7, i64 0}
|
|
!7 = !{!"Simple C++ TBAA"}
|
|
!8 = !{!9, !9, i64 0}
|
|
!9 = !{!"double", !6, i64 0}
|