llvm-project/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll
Nikita Popov a105877646
[InstCombine] Remove some of the complexity-based canonicalization (#91185)
The idea behind this canonicalization is that it allows us to handle less
patterns, because we know that some will be canonicalized away. This is
indeed very useful to e.g. know that constants are always on the right.

However, this is only useful if the canonicalization is actually
reliable. This is the case for constants, but not for arguments: Moving
these to the right makes it look like the "more complex" expression is
guaranteed to be on the left, but this is not actually the case in
practice. It fails as soon as you replace the argument with another
instruction.

The end result is that it looks like things correctly work in tests,
while they actually don't. We use the "thwart complexity-based
canonicalization" trick to handle this in tests, but it's often a
challenge for new contributors to get this right, and based on the
regressions this PR originally exposed, we clearly don't get this right
in many cases.

For this reason, I think that it's better to remove this complexity
canonicalization. It will make it much easier to write tests for
commuted cases and make sure that they are handled.
2024-08-21 12:02:54 +02:00

284 lines
16 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop-vectorize,dce,instcombine,loop-mssa(licm)' -force-vector-width=4 -S | FileCheck %s
; First licm pass is to hoist/sink invariant stores if possible. Today LICM does
; not hoist/sink the invariant stores. Even if that changes, we should still
; vectorize this loop in case licm is not run.
; The next licm pass after vectorization is to hoist/sink loop invariant
; instructions.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; This file separates tests with auto-generated check lines from
; invariant-store-vectorization.ll for maintenance.
; all tests check that it is legal to vectorize the stores to invariant
; address.
; Instcombine'd version of @inv_val_store_to_inv_address_conditional_diff_values.
; Now the store is no longer of invariant value.
; scalar store the value extracted from the last element of the vector value.
define void @inv_val_store_to_inv_address_conditional_diff_values_ic(ptr %a, i64 %n, ptr %b, i32 %k) {
; CHECK-LABEL: @inv_val_store_to_inv_address_conditional_diff_values_ic(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[N]], 2
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[K:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT4]], ptr [[TMP1]], align 4, !alias.scope [[META0]], !noalias [[META3]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i32> [[BROADCAST_SPLAT4]], <4 x i32> [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[PREDPHI]], i64 3
; CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4, !alias.scope [[META3]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[I1]], align 8
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I2]], [[K]]
; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[I1]], align 4
; CHECK-NEXT: br i1 [[CMP]], label [[COND_STORE:%.*]], label [[COND_STORE_K:%.*]]
; CHECK: cond_store:
; CHECK-NEXT: br label [[LATCH]]
; CHECK: cond_store_k:
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[STOREVAL:%.*]] = phi i32 [ [[NTRUNC]], [[COND_STORE]] ], [ [[K]], [[COND_STORE_K]] ]
; CHECK-NEXT: store i32 [[STOREVAL]], ptr [[A]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
%ntrunc = trunc i64 %n to i32
br label %for.body
for.body: ; preds = %for.body, %entry
%i = phi i64 [ %i.next, %latch ], [ 0, %entry ]
%i1 = getelementptr inbounds i32, ptr %b, i64 %i
%i2 = load i32, ptr %i1, align 8
%cmp = icmp eq i32 %i2, %k
store i32 %ntrunc, ptr %i1
br i1 %cmp, label %cond_store, label %cond_store_k
cond_store:
br label %latch
cond_store_k:
br label %latch
latch:
%storeval = phi i32 [ %ntrunc, %cond_store ], [ %k, %cond_store_k ]
store i32 %storeval, ptr %a
%i.next = add nuw nsw i64 %i, 1
%cond = icmp slt i64 %i.next, %n
br i1 %cond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
; invariant val stored to invariant address predicated on invariant condition
; This is not treated as a predicated store since the block the store belongs to
; is the latch block (which doesn't need to be predicated).
; variant/invariant values being stored to invariant address.
; test checks that the last element of the phi is extracted and scalar stored
; into the uniform address within the loop.
; Since the condition and the phi is loop invariant, they are LICM'ed after
; vectorization.
define void @inv_val_store_to_inv_address_conditional_inv(ptr %a, i64 %n, ptr %b, i32 %k) {
; CHECK-LABEL: @inv_val_store_to_inv_address_conditional_inv(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[K:%.*]], [[NTRUNC]]
; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[N]], 2
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[CMP]], i64 3
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = insertelement <4 x i32> poison, i32 [[K]], i64 3
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT6]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[PREDPHI]], i64 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]]
; CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4, !alias.scope [[META12]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[I1]], align 4
; CHECK-NEXT: br i1 [[CMP]], label [[COND_STORE:%.*]], label [[COND_STORE_K:%.*]]
; CHECK: cond_store:
; CHECK-NEXT: br label [[LATCH]]
; CHECK: cond_store_k:
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[STOREVAL:%.*]] = phi i32 [ [[NTRUNC]], [[COND_STORE]] ], [ [[K]], [[COND_STORE_K]] ]
; CHECK-NEXT: store i32 [[STOREVAL]], ptr [[A]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
%ntrunc = trunc i64 %n to i32
%cmp = icmp eq i32 %ntrunc, %k
br label %for.body
for.body: ; preds = %for.body, %entry
%i = phi i64 [ %i.next, %latch ], [ 0, %entry ]
%i1 = getelementptr inbounds i32, ptr %b, i64 %i
%i2 = load i32, ptr %i1, align 8
store i32 %ntrunc, ptr %i1
br i1 %cmp, label %cond_store, label %cond_store_k
cond_store:
br label %latch
cond_store_k:
br label %latch
latch:
%storeval = phi i32 [ %ntrunc, %cond_store ], [ %k, %cond_store_k ]
store i32 %storeval, ptr %a
%i.next = add nuw nsw i64 %i, 1
%cond = icmp slt i64 %i.next, %n
br i1 %cond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
; variant value stored to uniform address tests that the code gen extracts the
; last element from the variant vector and scalar stores it into the uniform
; address.
define i32 @variant_val_store_to_inv_address(ptr %a, i64 %n, ptr %b, i32 %k) {
; CHECK-LABEL: @variant_val_store_to_inv_address(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[N]], 2
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META16:![0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3
; CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4, !alias.scope [[META19:![0-9]+]], !noalias [[META16]]
; CHECK-NEXT: [[TMP3]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP3]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[I3:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[I1]], align 8
; CHECK-NEXT: store i32 [[I2]], ptr [[A]], align 4
; CHECK-NEXT: [[I3]] = add i32 [[I0]], [[I2]]
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: [[I3_LCSSA:%.*]] = phi i32 [ [[I3]], [[FOR_BODY]] ]
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ [[I3_LCSSA]], [[FOR_END_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[RDX_LCSSA]]
;
entry:
%ntrunc = trunc i64 %n to i32
%cmp = icmp eq i32 %ntrunc, %k
br label %for.body
for.body: ; preds = %for.body, %entry
%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
%i0 = phi i32 [ %i3, %for.body ], [ 0, %entry ]
%i1 = getelementptr inbounds i32, ptr %b, i64 %i
%i2 = load i32, ptr %i1, align 8
store i32 %i2, ptr %a
%i3 = add i32 %i0, %i2
%i.next = add nuw nsw i64 %i, 1
%cond = icmp slt i64 %i.next, %n
br i1 %cond, label %for.body, label %for.end
for.end: ; preds = %for.body
%rdx.lcssa = phi i32 [ %i3, %for.body ]
ret i32 %rdx.lcssa
}