The original loop (OL) that serves as input to LoopUnroll has basic
blocks that are arranged as follows:
```
OLPreHeader
OLHeader <-.
... |
OLLatch ---'
OLExit
```
In this depiction, every block has an implicit edge to the next block
below, so any explicit edge indicates a conditional branch.
Given OL and unroll count N, LoopUnroll sometimes creates an unrolled
loop (UL) with a remainder loop (RL) epilogue arranged like this:
```
,-- ULGuard
| ULPreHeader
| ULHeader <-.
| ... |
| ULLatch ---'
| ULExit
`-> RLGuard -----.
RLPreHeader |
,-> RLHeader |
| ... |
`-- RLLatch |
RLExit |
OLExit <-----'
```
Each UL iteration executes N OL iterations, but each RL iteration
executes 1 OL iteration. ULGuard or RLGuard checks whether the first
iteration of UL or RL should execute, respectively. If so, ULLatch or
RLLatch checks whether to execute each subsequent iteration.
Once reached, OL always executes its first iteration but not necessarily
the next N-1 iterations. Thus, ULGuard is always required before the
first UL iteration. However, when control flows from ULGuard directly to
RLGuard, the first OL iteration has yet to execute, so RLGuard is then
redundant before the first RL iteration.
Thus, this patch makes the following changes:
- Adjust ULGuard to branch to RLPreHeader instead of RLGuard, thus
eliminating RLGuard's unnecessary branch instruction for that path.
- Eliminate the creation of RLGuard phi node poison values. Without this
patch, RLGuard has such a phi node for each value that is defined by any
OL iteration and used in OLExit. The poison value is required where
ULGuard is the predecessor. The poison value indicates that control flow
from ULGuard to RLGuard to Exit has no counterpart in OL because the
first OL iteration must execute either in UL or RL.
- Simplify the CFG by not splitting ULExit and RLGuard because, without
the ULGuard predecessor, the single block can now be a dedicated UL
exit.
- To RLPreHeader, add an `llvm.assume` call that asserts the RL trip
count is non-zero. Without this patch, RLPreHeader is reachable only
when RLGuard guarantees that assertion is true. With this patch, RLGuard
guarantees it only when RLGuard is the predecessor, and the OL structure
guarantees it when ULGuard is the predecessor. If RL itself is unrolled
later, this guarantee somehow prevents ScalarEvolution from giving up
when trying to compute a maximum trip count for RL. That maximum trip
count enables the branch instruction in the final unrolled instance of
RLLatch to be eliminated. Without the `llvm.assume` call, some existing
unroll tests start to fail because that instruction is not eliminated.
The original motivation for this patch is to facilitate later patches
that fix LoopUnroll's computation of branch weights so that they
maintain the block frequency of OL's body (see #135812). Specifically,
this patch ensures RLGuard's branch weights do not affect RL's
contribution to the block frequency of OL's body in the case that
ULGuard skips UL.
420 lines
26 KiB
LLVM
420 lines
26 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
|
|
; RUN: opt -passes=loop-vectorize -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S %s | FileCheck --check-prefix AUTO_VEC %s
|
|
|
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
; This test checks auto-vectorization with FP induction variable.
|
|
; FMF is required on the IR instructions.
|
|
|
|
;void fp_iv_loop1(ptr __restrict__ A, int N) {
|
|
; float x = 1.0;
|
|
; for (int i=0; i < N; ++i) {
|
|
; A[i] = x;
|
|
; x += 0.5;
|
|
; }
|
|
;}
|
|
|
|
define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
|
|
; AUTO_VEC-LABEL: define void @fp_iv_loop1(
|
|
; AUTO_VEC-SAME: ptr noalias captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; AUTO_VEC-NEXT: [[ENTRY:.*:]]
|
|
; AUTO_VEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N]], 0
|
|
; AUTO_VEC-NEXT: br i1 [[CMP4]], label %[[ITER_CHECK:.*]], label %[[FOR_END:.*]]
|
|
; AUTO_VEC: [[ITER_CHECK]]:
|
|
; AUTO_VEC-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
|
|
; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
|
|
; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
|
|
; AUTO_VEC: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
|
|
; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 32
|
|
; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; AUTO_VEC: [[VECTOR_PH]]:
|
|
; AUTO_VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32
|
|
; AUTO_VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
|
|
; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
|
|
; AUTO_VEC-NEXT: [[TMP6:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST]]
|
|
; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd fast float 1.000000e+00, [[TMP6]]
|
|
; AUTO_VEC-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; AUTO_VEC: [[VECTOR_BODY]]:
|
|
; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <8 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00, float 3.000000e+00, float 3.500000e+00, float 4.000000e+00, float 4.500000e+00>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <8 x float> [[VEC_IND]], splat (float 4.000000e+00)
|
|
; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[STEP_ADD]], splat (float 4.000000e+00)
|
|
; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[STEP_ADD2]], splat (float 4.000000e+00)
|
|
; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
|
|
; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8
|
|
; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 16
|
|
; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 24
|
|
; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND]], ptr [[TMP1]], align 4
|
|
; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD]], ptr [[TMP2]], align 4
|
|
; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD2]], ptr [[TMP3]], align 4
|
|
; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD3]], ptr [[TMP4]], align 4
|
|
; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
|
|
; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <8 x float> [[STEP_ADD3]], splat (float 4.000000e+00)
|
|
; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; AUTO_VEC-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; AUTO_VEC: [[MIDDLE_BLOCK]]:
|
|
; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
|
|
; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
|
|
; AUTO_VEC: [[VEC_EPILOG_ITER_CHECK]]:
|
|
; AUTO_VEC-NEXT: [[DOTCAST12:%.*]] = sitofp i64 [[N_VEC]] to float
|
|
; AUTO_VEC-NEXT: [[TMP11:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST12]]
|
|
; AUTO_VEC-NEXT: [[IND_END1:%.*]] = fadd fast float 1.000000e+00, [[TMP11]]
|
|
; AUTO_VEC-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
|
|
; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
|
|
; AUTO_VEC: [[VEC_EPILOG_PH]]:
|
|
; AUTO_VEC-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
|
|
; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
|
|
; AUTO_VEC-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4
|
|
; AUTO_VEC-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]]
|
|
; AUTO_VEC-NEXT: [[DOTCAST4:%.*]] = sitofp i64 [[N_VEC3]] to float
|
|
; AUTO_VEC-NEXT: [[TMP12:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST4]]
|
|
; AUTO_VEC-NEXT: [[TMP10:%.*]] = fadd fast float 1.000000e+00, [[TMP12]]
|
|
; AUTO_VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_RESUME_VAL]], i64 0
|
|
; AUTO_VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
|
; AUTO_VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
|
|
; AUTO_VEC-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
|
|
; AUTO_VEC: [[VEC_EPILOG_VECTOR_BODY]]:
|
|
; AUTO_VEC-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
|
; AUTO_VEC-NEXT: [[VEC_IND11:%.*]] = phi <4 x float> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
|
; AUTO_VEC-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX10]]
|
|
; AUTO_VEC-NEXT: store <4 x float> [[VEC_IND11]], ptr [[TMP8]], align 4
|
|
; AUTO_VEC-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX10]], 4
|
|
; AUTO_VEC-NEXT: [[VEC_IND_NEXT12]] = fadd fast <4 x float> [[VEC_IND11]], splat (float 2.000000e+00)
|
|
; AUTO_VEC-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC3]]
|
|
; AUTO_VEC-NEXT: br i1 [[TMP9]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; AUTO_VEC: [[VEC_EPILOG_MIDDLE_BLOCK]]:
|
|
; AUTO_VEC-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
|
|
; AUTO_VEC-NEXT: br i1 [[CMP_N9]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
|
|
; AUTO_VEC: [[VEC_EPILOG_SCALAR_PH]]:
|
|
; AUTO_VEC-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
|
|
; AUTO_VEC-NEXT: [[BC_RESUME_VAL11:%.*]] = phi float [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ]
|
|
; AUTO_VEC-NEXT: br label %[[LOOP:.*]]
|
|
; AUTO_VEC: [[LOOP]]:
|
|
; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL10]], %[[VEC_EPILOG_SCALAR_PH]] ]
|
|
; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ]
|
|
; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
|
|
; AUTO_VEC-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4
|
|
; AUTO_VEC-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
|
|
; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; AUTO_VEC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
|
; AUTO_VEC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
|
|
; AUTO_VEC-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; AUTO_VEC: [[FOR_END_LOOPEXIT]]:
|
|
; AUTO_VEC-NEXT: br label %[[FOR_END]]
|
|
; AUTO_VEC: [[FOR_END]]:
|
|
; AUTO_VEC-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp4 = icmp sgt i32 %N, 0
|
|
br i1 %cmp4, label %loop.preheader, label %for.end
|
|
|
|
loop.preheader: ; preds = %entry
|
|
br label %loop
|
|
|
|
loop: ; preds = %loop.preheader, %loop
|
|
%iv = phi i64 [ %iv.next, %loop ], [ 0, %loop.preheader ]
|
|
%x.06 = phi float [ %conv1, %loop ], [ 1.000000e+00, %loop.preheader ]
|
|
%arrayidx = getelementptr inbounds float, ptr %A, i64 %iv
|
|
store float %x.06, ptr %arrayidx, align 4
|
|
%conv1 = fadd fast float %x.06, 5.000000e-01
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%lftr.wideiv = trunc i64 %iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, %N
|
|
br i1 %exitcond, label %for.end.loopexit, label %loop
|
|
|
|
for.end.loopexit: ; preds = %loop
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.end.loopexit, %entry
|
|
ret void
|
|
}
|
|
|
|
; The same as the previous, but FP operation has no FMF.
|
|
; Vectorization should be rejected.
|
|
;void fp_iv_loop2(ptr __restrict__ A, int N) {
|
|
; float x = 1.0;
|
|
; for (int i=0; i < N; ++i) {
|
|
; A[i] = x;
|
|
; x += 0.5;
|
|
; }
|
|
;}
|
|
|
|
define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) {
|
|
; AUTO_VEC-LABEL: define void @fp_iv_loop2(
|
|
; AUTO_VEC-SAME: ptr noalias captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; AUTO_VEC-NEXT: [[ENTRY:.*:]]
|
|
; AUTO_VEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N]], 0
|
|
; AUTO_VEC-NEXT: br i1 [[CMP4]], label %[[LOOP_PREHEADER:.*]], label %[[FOR_END:.*]]
|
|
; AUTO_VEC: [[LOOP_PREHEADER]]:
|
|
; AUTO_VEC-NEXT: br label %[[LOOP:.*]]
|
|
; AUTO_VEC: [[LOOP]]:
|
|
; AUTO_VEC-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
|
|
; AUTO_VEC-NEXT: [[X_06_EPIL:%.*]] = phi float [ [[CONV1_EPIL:%.*]], %[[LOOP]] ], [ 1.000000e+00, %[[LOOP_PREHEADER]] ]
|
|
; AUTO_VEC-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
|
|
; AUTO_VEC-NEXT: store float [[X_06_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4
|
|
; AUTO_VEC-NEXT: [[CONV1_EPIL]] = fadd float [[X_06_EPIL]], 5.000000e-01
|
|
; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
|
|
; AUTO_VEC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT_EPIL]] to i32
|
|
; AUTO_VEC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
|
|
; AUTO_VEC-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[LOOP]]
|
|
; AUTO_VEC: [[FOR_END_LOOPEXIT]]:
|
|
; AUTO_VEC-NEXT: br label %[[FOR_END]]
|
|
; AUTO_VEC: [[FOR_END]]:
|
|
; AUTO_VEC-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp4 = icmp sgt i32 %N, 0
|
|
br i1 %cmp4, label %loop.preheader, label %for.end
|
|
|
|
loop.preheader: ; preds = %entry
|
|
br label %loop
|
|
|
|
loop: ; preds = %loop.preheader, %loop
|
|
%iv = phi i64 [ %iv.next, %loop ], [ 0, %loop.preheader ]
|
|
%x.06 = phi float [ %conv1, %loop ], [ 1.000000e+00, %loop.preheader ]
|
|
%arrayidx = getelementptr inbounds float, ptr %A, i64 %iv
|
|
store float %x.06, ptr %arrayidx, align 4
|
|
%conv1 = fadd float %x.06, 5.000000e-01
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%lftr.wideiv = trunc i64 %iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, %N
|
|
br i1 %exitcond, label %for.end.loopexit, label %loop
|
|
|
|
for.end.loopexit: ; preds = %loop
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.end.loopexit, %entry
|
|
ret void
|
|
}
|
|
|
|
define double @external_use_with_fast_math(ptr %a, i64 %n) {
|
|
; AUTO_VEC-LABEL: define double @external_use_with_fast_math(
|
|
; AUTO_VEC-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; AUTO_VEC-NEXT: [[ENTRY:.*]]:
|
|
; AUTO_VEC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
|
|
; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 16
|
|
; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; AUTO_VEC: [[VECTOR_PH]]:
|
|
; AUTO_VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 16
|
|
; AUTO_VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
|
|
; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to double
|
|
; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast double 3.000000e+00, [[DOTCAST]]
|
|
; AUTO_VEC-NEXT: [[TMP6:%.*]] = fadd fast double 0.000000e+00, [[TMP0]]
|
|
; AUTO_VEC-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; AUTO_VEC: [[VECTOR_BODY]]:
|
|
; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x double> [ <double 0.000000e+00, double 3.000000e+00, double 6.000000e+00, double 9.000000e+00>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], splat (double 1.200000e+01)
|
|
; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[STEP_ADD]], splat (double 1.200000e+01)
|
|
; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[STEP_ADD_2]], splat (double 1.200000e+01)
|
|
; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]]
|
|
; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP1]], i32 4
|
|
; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[TMP1]], i32 8
|
|
; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP1]], i32 12
|
|
; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], ptr [[TMP1]], align 8
|
|
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], ptr [[TMP2]], align 8
|
|
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], ptr [[TMP3]], align 8
|
|
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_3]], ptr [[TMP4]], align 8
|
|
; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
|
; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x double> [[STEP_ADD_3]], splat (double 1.200000e+01)
|
|
; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; AUTO_VEC-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; AUTO_VEC: [[MIDDLE_BLOCK]]:
|
|
; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
|
|
; AUTO_VEC-NEXT: [[TMP7:%.*]] = fsub fast double [[TMP6]], 3.000000e+00
|
|
; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
|
|
; AUTO_VEC: [[SCALAR_PH]]:
|
|
; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
|
; AUTO_VEC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi double [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
|
|
; AUTO_VEC-NEXT: br label %[[LOOP:.*]]
|
|
; AUTO_VEC: [[LOOP]]:
|
|
; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
|
|
; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[J_NEXT:%.*]], %[[LOOP]] ]
|
|
; AUTO_VEC-NEXT: [[T0:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
|
|
; AUTO_VEC-NEXT: store double [[J]], ptr [[T0]], align 8
|
|
; AUTO_VEC-NEXT: [[I_NEXT]] = add i64 [[I]], 1
|
|
; AUTO_VEC-NEXT: [[J_NEXT]] = fadd fast double [[J]], 3.000000e+00
|
|
; AUTO_VEC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
|
|
; AUTO_VEC-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[FOR_END]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; AUTO_VEC: [[FOR_END]]:
|
|
; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[J]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
|
|
; AUTO_VEC-NEXT: ret double [[J_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%i = phi i64 [ 0, %entry ], [%i.next, %loop]
|
|
%j = phi double [ 0.0, %entry ], [ %j.next, %loop ]
|
|
%t0 = getelementptr double, ptr %a, i64 %i
|
|
store double %j, ptr %t0
|
|
%i.next = add i64 %i, 1
|
|
%j.next = fadd fast double %j, 3.0
|
|
%cond = icmp slt i64 %i.next, %n
|
|
br i1 %cond, label %loop, label %for.end
|
|
|
|
for.end:
|
|
%t1 = phi double [ %j, %loop ]
|
|
ret double %t1
|
|
}
|
|
|
|
define double @external_use_without_fast_math(ptr %a, i64 %n) {
|
|
; AUTO_VEC-LABEL: define double @external_use_without_fast_math(
|
|
; AUTO_VEC-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; AUTO_VEC-NEXT: [[ENTRY:.*]]:
|
|
; AUTO_VEC-NEXT: br label %[[LOOP:.*]]
|
|
; AUTO_VEC: [[LOOP]]:
|
|
; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT_7:%.*]], %[[LOOP]] ]
|
|
; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[J_NEXT_7:%.*]], %[[LOOP]] ]
|
|
; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
|
|
; AUTO_VEC-NEXT: store double [[J]], ptr [[TMP7]], align 8
|
|
; AUTO_VEC-NEXT: [[I_NEXT_7]] = add i64 [[I]], 1
|
|
; AUTO_VEC-NEXT: [[J_NEXT_7]] = fadd double [[J]], 3.000000e+00
|
|
; AUTO_VEC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT_7]], [[N]]
|
|
; AUTO_VEC-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[FOR_END:.*]]
|
|
; AUTO_VEC: [[FOR_END]]:
|
|
; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[J]], %[[LOOP]] ]
|
|
; AUTO_VEC-NEXT: ret double [[J_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%i = phi i64 [ 0, %entry ], [%i.next, %loop]
|
|
%j = phi double [ 0.0, %entry ], [ %j.next, %loop ]
|
|
%t0 = getelementptr double, ptr %a, i64 %i
|
|
store double %j, ptr %t0
|
|
%i.next = add i64 %i, 1
|
|
%j.next = fadd double %j, 3.0
|
|
%cond = icmp slt i64 %i.next, %n
|
|
br i1 %cond, label %loop, label %for.end
|
|
|
|
for.end:
|
|
%t1 = phi double [ %j, %loop ]
|
|
ret double %t1
|
|
}
|
|
|
|
;; void fadd_induction(ptr p, unsigned N) {
|
|
;; float x = 1.0f;
|
|
;; for (unsigned i=0; i!=N; ++i) {
|
|
;; p[i] = p[i] + x;
|
|
;; x += 42.0f;
|
|
;; }
|
|
;; }
|
|
|
|
define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
|
|
; AUTO_VEC-LABEL: define void @fadd_reassoc_FMF(
|
|
; AUTO_VEC-SAME: ptr captures(none) [[P:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; AUTO_VEC-NEXT: [[ITER_CHECK:.*]]:
|
|
; AUTO_VEC-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
|
|
; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
|
|
; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
|
|
; AUTO_VEC: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
|
|
; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 32
|
|
; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; AUTO_VEC: [[VECTOR_PH]]:
|
|
; AUTO_VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32
|
|
; AUTO_VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
|
|
; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
|
|
; AUTO_VEC-NEXT: [[TMP1:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST]]
|
|
; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd reassoc float 1.000000e+00, [[TMP1]]
|
|
; AUTO_VEC-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; AUTO_VEC: [[VECTOR_BODY]]:
|
|
; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <8 x float> [ <float 1.000000e+00, float 4.300000e+01, float 8.500000e+01, float 1.270000e+02, float 1.690000e+02, float 2.110000e+02, float 2.530000e+02, float 2.950000e+02>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], splat (float 3.360000e+02)
|
|
; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], splat (float 3.360000e+02)
|
|
; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], splat (float 3.360000e+02)
|
|
; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDEX]]
|
|
; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8
|
|
; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 16
|
|
; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 24
|
|
; AUTO_VEC-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4
|
|
; AUTO_VEC-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4
|
|
; AUTO_VEC-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP4]], align 4
|
|
; AUTO_VEC-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x float>, ptr [[TMP5]], align 4
|
|
; AUTO_VEC-NEXT: [[TMP6:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], [[WIDE_LOAD]]
|
|
; AUTO_VEC-NEXT: [[TMP7:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], [[WIDE_LOAD2]]
|
|
; AUTO_VEC-NEXT: [[TMP8:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], [[WIDE_LOAD3]]
|
|
; AUTO_VEC-NEXT: [[TMP9:%.*]] = fadd reassoc <8 x float> [[STEP_ADD3]], [[WIDE_LOAD4]]
|
|
; AUTO_VEC-NEXT: store <8 x float> [[TMP6]], ptr [[TMP2]], align 4
|
|
; AUTO_VEC-NEXT: store <8 x float> [[TMP7]], ptr [[TMP3]], align 4
|
|
; AUTO_VEC-NEXT: store <8 x float> [[TMP8]], ptr [[TMP4]], align 4
|
|
; AUTO_VEC-NEXT: store <8 x float> [[TMP9]], ptr [[TMP5]], align 4
|
|
; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
|
|
; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd reassoc <8 x float> [[STEP_ADD3]], splat (float 3.360000e+02)
|
|
; AUTO_VEC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; AUTO_VEC-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; AUTO_VEC: [[MIDDLE_BLOCK]]:
|
|
; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
|
|
; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
|
|
; AUTO_VEC: [[VEC_EPILOG_ITER_CHECK]]:
|
|
; AUTO_VEC-NEXT: [[DOTCAST16:%.*]] = sitofp i64 [[N_VEC]] to float
|
|
; AUTO_VEC-NEXT: [[TMP12:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST16]]
|
|
; AUTO_VEC-NEXT: [[IND_END1:%.*]] = fadd reassoc float 1.000000e+00, [[TMP12]]
|
|
; AUTO_VEC-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
|
|
; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
|
|
; AUTO_VEC: [[VEC_EPILOG_PH]]:
|
|
; AUTO_VEC-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
|
|
; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
|
|
; AUTO_VEC-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[TMP0]], 4
|
|
; AUTO_VEC-NEXT: [[N_VEC6:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF5]]
|
|
; AUTO_VEC-NEXT: [[DOTCAST7:%.*]] = sitofp i64 [[N_VEC6]] to float
|
|
; AUTO_VEC-NEXT: [[TMP17:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST7]]
|
|
; AUTO_VEC-NEXT: [[TMP18:%.*]] = fadd reassoc float 1.000000e+00, [[TMP17]]
|
|
; AUTO_VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_RESUME_VAL]], i64 0
|
|
; AUTO_VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
|
; AUTO_VEC-NEXT: [[INDUCTION:%.*]] = fadd reassoc <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 4.200000e+01, float 8.400000e+01, float 1.260000e+02>
|
|
; AUTO_VEC-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
|
|
; AUTO_VEC: [[VEC_EPILOG_VECTOR_BODY]]:
|
|
; AUTO_VEC-NEXT: [[INDEX13:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
|
; AUTO_VEC-NEXT: [[VEC_IND14:%.*]] = phi <4 x float> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT15:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
|
; AUTO_VEC-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDEX13]]
|
|
; AUTO_VEC-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, ptr [[TMP13]], align 4
|
|
; AUTO_VEC-NEXT: [[TMP14:%.*]] = fadd reassoc <4 x float> [[VEC_IND14]], [[WIDE_LOAD16]]
|
|
; AUTO_VEC-NEXT: store <4 x float> [[TMP14]], ptr [[TMP13]], align 4
|
|
; AUTO_VEC-NEXT: [[INDEX_NEXT17]] = add nuw i64 [[INDEX13]], 4
|
|
; AUTO_VEC-NEXT: [[VEC_IND_NEXT15]] = fadd reassoc <4 x float> [[VEC_IND14]], splat (float 1.680000e+02)
|
|
; AUTO_VEC-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT17]], [[N_VEC6]]
|
|
; AUTO_VEC-NEXT: br i1 [[TMP15]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; AUTO_VEC: [[VEC_EPILOG_MIDDLE_BLOCK]]:
|
|
; AUTO_VEC-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC6]]
|
|
; AUTO_VEC-NEXT: br i1 [[CMP_N18]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
|
|
; AUTO_VEC: [[VEC_EPILOG_SCALAR_PH]]:
|
|
; AUTO_VEC-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i64 [ [[N_VEC6]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
|
|
; AUTO_VEC-NEXT: [[BC_RESUME_VAL15:%.*]] = phi float [ [[TMP18]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ]
|
|
; AUTO_VEC-NEXT: br label %[[LOOP:.*]]
|
|
; AUTO_VEC: [[LOOP]]:
|
|
; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; AUTO_VEC-NEXT: [[X_012:%.*]] = phi float [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD3:%.*]], %[[LOOP]] ]
|
|
; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDVARS_IV]]
|
|
; AUTO_VEC-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; AUTO_VEC-NEXT: [[ADD:%.*]] = fadd reassoc float [[X_012]], [[TMP16]]
|
|
; AUTO_VEC-NEXT: store float [[ADD]], ptr [[ARRAYIDX]], align 4
|
|
; AUTO_VEC-NEXT: [[ADD3]] = fadd reassoc float [[X_012]], 4.200000e+01
|
|
; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; AUTO_VEC-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]]
|
|
; AUTO_VEC-NEXT: br i1 [[CMP_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; AUTO_VEC: [[EXIT]]:
|
|
; AUTO_VEC-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = zext i32 %N to i64
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%x.012 = phi float [ 1.000000e+00, %entry ], [ %add3, %loop ]
|
|
%arrayidx = getelementptr inbounds float, ptr %p, i64 %iv
|
|
%1 = load float, ptr %arrayidx, align 4
|
|
%add = fadd reassoc float %x.012, %1
|
|
store float %add, ptr %arrayidx, align 4
|
|
%add3 = fadd reassoc float %x.012, 4.200000e+01
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%cmp.not = icmp eq i64 %iv.next, %0
|
|
br i1 %cmp.not, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|