; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -p loop-vectorize -S %s | FileCheck %s target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.15.0" define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noalias %p.invar, ptr noalias %dst.1, ptr noalias %dst.2) { ; CHECK-LABEL: define void @test_free_instructions_feeding_geps_for_interleave_groups( ; CHECK-SAME: ptr noalias [[P_INVAR:%.*]], ptr noalias [[DST_1:%.*]], ptr noalias [[DST_2:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK: [[VECTOR_SCEVCHECK]]: ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST_1]], i64 8 ; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult ptr [[TMP1]], [[SCEVGEP]] ; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[TMP2]], [[MUL_OVERFLOW]] ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST_1]], i64 12 ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1) ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 0, [[MUL_RESULT3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp ult ptr [[TMP5]], [[SCEVGEP1]] ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[MUL_OVERFLOW4]] ; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[DST_1]], i64 4 ; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1) ; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = sub i64 0, [[MUL_RESULT7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp ult ptr [[TMP9]], [[SCEVGEP5]] ; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP10]], [[MUL_OVERFLOW8]] ; CHECK-NEXT: [[MUL9:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1) ; CHECK-NEXT: [[MUL_RESULT10:%.*]] = extractvalue { i64, i1 } [[MUL9]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW11:%.*]] = extractvalue { i64, i1 } [[MUL9]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 0, [[MUL_RESULT10]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST_1]], i64 [[MUL_RESULT10]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp ult ptr [[TMP13]], [[DST_1]] ; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW11]] ; CHECK-NEXT: [[SCEVGEP12:%.*]] = getelementptr i8, ptr [[DST_2]], i64 8 ; CHECK-NEXT: [[MUL13:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1) ; CHECK-NEXT: [[MUL_RESULT14:%.*]] = extractvalue { i64, i1 } [[MUL13]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW15:%.*]] = extractvalue { i64, i1 } [[MUL13]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = sub i64 0, [[MUL_RESULT14]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[SCEVGEP12]], i64 [[MUL_RESULT14]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp ult ptr [[TMP17]], [[SCEVGEP12]] ; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW15]] ; CHECK-NEXT: [[SCEVGEP16:%.*]] = getelementptr i8, ptr [[DST_2]], i64 12 ; CHECK-NEXT: [[MUL17:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1) ; CHECK-NEXT: [[MUL_RESULT18:%.*]] = extractvalue { i64, i1 } [[MUL17]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW19:%.*]] = extractvalue { i64, i1 } [[MUL17]], 1 ; CHECK-NEXT: [[TMP20:%.*]] = sub i64 0, [[MUL_RESULT18]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[SCEVGEP16]], i64 [[MUL_RESULT18]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp ult ptr [[TMP21]], [[SCEVGEP16]] ; CHECK-NEXT: [[TMP23:%.*]] = or i1 [[TMP22]], [[MUL_OVERFLOW19]] ; CHECK-NEXT: [[SCEVGEP20:%.*]] = getelementptr i8, ptr [[DST_2]], i64 4 ; CHECK-NEXT: [[MUL21:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1) ; CHECK-NEXT: [[MUL_RESULT22:%.*]] = extractvalue { i64, i1 } [[MUL21]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW23:%.*]] = extractvalue { i64, i1 } [[MUL21]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = sub i64 0, [[MUL_RESULT22]] ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[SCEVGEP20]], i64 [[MUL_RESULT22]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp ult ptr [[TMP25]], [[SCEVGEP20]] ; CHECK-NEXT: [[TMP27:%.*]] = or i1 [[TMP26]], [[MUL_OVERFLOW23]] ; CHECK-NEXT: [[MUL24:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1) ; CHECK-NEXT: [[MUL_RESULT25:%.*]] = extractvalue { i64, i1 } [[MUL24]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW26:%.*]] = extractvalue { i64, i1 } [[MUL24]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = sub i64 0, [[MUL_RESULT25]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[DST_2]], i64 [[MUL_RESULT25]] ; CHECK-NEXT: [[TMP30:%.*]] = icmp ult ptr [[TMP29]], [[DST_2]] ; CHECK-NEXT: [[TMP31:%.*]] = or i1 [[TMP30]], [[MUL_OVERFLOW26]] ; CHECK-NEXT: [[TMP32:%.*]] = or i1 [[TMP3]], [[TMP7]] ; CHECK-NEXT: [[TMP33:%.*]] = or i1 [[TMP32]], [[TMP11]] ; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP15]] ; CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP19]] ; CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP23]] ; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP36]], [[TMP27]] ; CHECK-NEXT: [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP31]] ; CHECK-NEXT: br i1 [[TMP38]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP39:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP40:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP40]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP41:%.*]] = shl i64 [[TMP39]], 2 ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[TMP41]] ; CHECK-NEXT: [[TMP42:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <2 x float> poison, float [[TMP42]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT28:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT27]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT28]], <4 x i32> ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <4 x float> [[TMP46]], <4 x float> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP47]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP44]], align 4 ; CHECK-NEXT: [[TMP48:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT29:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT30:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT29]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP49:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[TMP41]] ; CHECK-NEXT: [[BROADCAST_SPLAT36:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLAT30]], <2 x float> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLAT36]], <4 x float> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC31:%.*]] = shufflevector <8 x float> [[TMP51]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC31]], ptr [[TMP49]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP53:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP53]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[IV_MUL:%.*]] = shl i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_DST_19:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[IV_MUL]] ; CHECK-NEXT: store float [[L_0]], ptr [[GEP_DST_19]], align 4 ; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[ADD_1:%.*]] = or disjoint i64 [[IV_MUL]], 1 ; CHECK-NEXT: [[GEP_DST_119:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_1]] ; CHECK-NEXT: store float [[L_1]], ptr [[GEP_DST_119]], align 4 ; CHECK-NEXT: [[ADD_2:%.*]] = or disjoint i64 [[IV_MUL]], 2 ; CHECK-NEXT: [[GEP_DST_129:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_2]] ; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_129]], align 4 ; CHECK-NEXT: [[ADD_3:%.*]] = or disjoint i64 [[IV_MUL]], 3 ; CHECK-NEXT: [[GEP_DST_140:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_3]] ; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_140]], align 4 ; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[GEP_DST_247:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[IV_MUL]] ; CHECK-NEXT: store float [[L_2]], ptr [[GEP_DST_247]], align 4 ; CHECK-NEXT: [[GEP_DST_255:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_1]] ; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_255]], align 4 ; CHECK-NEXT: [[GEP_DST_265:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_2]] ; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_265]], align 4 ; CHECK-NEXT: [[GEP_DST_276:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_3]] ; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_276]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %l.0 = load float, ptr %p.invar, align 4 %iv.mul = shl i64 %iv, 2 %gep.dst.19 = getelementptr float, ptr %dst.1, i64 %iv.mul store float %l.0, ptr %gep.dst.19, align 4 %l.1 = load float, ptr %p.invar, align 4 %add.1 = or disjoint i64 %iv.mul, 1 %gep.dst.119 = getelementptr float, ptr %dst.1, i64 %add.1 store float %l.1, ptr %gep.dst.119, align 4 %add.2 = or disjoint i64 %iv.mul, 2 %gep.dst.129 = getelementptr float, ptr %dst.1, i64 %add.2 store float 0.000000e+00, ptr %gep.dst.129, align 4 %add.3 = or disjoint i64 %iv.mul, 3 %gep.dst.140 = getelementptr float, ptr %dst.1, i64 %add.3 store float 0.000000e+00, ptr %gep.dst.140, align 4 %l.2 = load float, ptr %p.invar, align 4 %gep.dst.247 = getelementptr float, ptr %dst.2, i64 %iv.mul store float %l.2, ptr %gep.dst.247, align 4 %gep.dst.255 = getelementptr float, ptr %dst.2, i64 %add.1 store float 0.000000e+00, ptr %gep.dst.255, align 4 %gep.dst.265 = getelementptr float, ptr %dst.2, i64 %add.2 store float 0.000000e+00, ptr %gep.dst.265, align 4 %gep.dst.276 = getelementptr float, ptr %dst.2, i64 %add.3 store float 0.000000e+00, ptr %gep.dst.276, align 4 %iv.next = add i64 %iv, 1 %ec = icmp eq i64 %iv.next, 0 br i1 %ec, label %exit, label %loop exit: ret void } define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr %arg2) #0 { ; CHECK-LABEL: define void @geps_feeding_interleave_groups_with_reuse( ; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 54 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK: [[VECTOR_SCEVCHECK]]: ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG2]], i64 8 ; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]] ; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[ARG2]], i64 12 ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]]) ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[MUL_RESULT3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]] ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]] ; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[ARG2]], i64 4 ; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]]) ; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT7]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP5]] ; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW8]] ; CHECK-NEXT: [[MUL9:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]]) ; CHECK-NEXT: [[MUL_RESULT10:%.*]] = extractvalue { i64, i1 } [[MUL9]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW11:%.*]] = extractvalue { i64, i1 } [[MUL9]], 1 ; CHECK-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT10]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[MUL_RESULT10]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[ARG2]] ; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW11]] ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP4]], [[TMP8]] ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[TMP12]] ; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[TMP16]] ; CHECK-NEXT: br i1 [[TMP19]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[ARG1]], 4 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 16 ; CHECK-NEXT: [[SCEVGEP12:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP22:%.*]] = shl i64 [[ARG1]], 5 ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], 32 ; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP23]] ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP13]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[ARG]], [[SCEVGEP12]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP25:%.*]] = shl i64 [[TMP24]], 5 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP24]], 4 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[TMP27]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP26]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC15:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC16:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC17:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC18:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC19:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC20:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = fadd <2 x float> [[STRIDED_VEC]], [[STRIDED_VEC17]] ; CHECK-NEXT: [[TMP31:%.*]] = fmul <2 x float> [[TMP30]], zeroinitializer ; CHECK-NEXT: [[TMP32:%.*]] = fadd <2 x float> [[STRIDED_VEC14]], [[STRIDED_VEC18]] ; CHECK-NEXT: [[TMP33:%.*]] = fmul <2 x float> [[TMP32]], zeroinitializer ; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x float> [[STRIDED_VEC15]], [[STRIDED_VEC19]] ; CHECK-NEXT: [[TMP35:%.*]] = fmul <2 x float> [[TMP34]], zeroinitializer ; CHECK-NEXT: [[TMP36:%.*]] = fadd <2 x float> [[STRIDED_VEC16]], [[STRIDED_VEC20]] ; CHECK-NEXT: [[TMP37:%.*]] = fmul <2 x float> [[TMP36]], zeroinitializer ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <2 x float> [[TMP31]], <2 x float> [[TMP33]], <4 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <2 x float> [[TMP35]], <2 x float> [[TMP37]], <4 x i32> ; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x float> [[TMP40]], <4 x float> [[TMP41]], <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP42]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP28]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[SHL_IV_5:%.*]] = shl i64 [[IV]], 5 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[SHL_IV_5]] ; CHECK-NEXT: [[ADD_5:%.*]] = or disjoint i64 [[SHL_IV_5]], 16 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[ADD_5]] ; CHECK-NEXT: [[SHL_IV_4:%.*]] = shl i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[SHL_IV_4]] ; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_1]], align 4 ; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_2]], align 4 ; CHECK-NEXT: [[ADD_1:%.*]] = fadd float [[L_1]], [[L_2]] ; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[ADD_1]], 0.000000e+00 ; CHECK-NEXT: store float [[MUL_1]], ptr [[GEP_3]], align 4 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 4 ; CHECK-NEXT: [[L_3:%.*]] = load float, ptr [[GEP_4]], align 4 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 4 ; CHECK-NEXT: [[L_4:%.*]] = load float, ptr [[GEP_5]], align 4 ; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[L_3]], [[L_4]] ; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[ADD_2]], 0.000000e+00 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 4 ; CHECK-NEXT: store float [[MUL_2]], ptr [[GEP_6]], align 4 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 8 ; CHECK-NEXT: [[L_5:%.*]] = load float, ptr [[GEP_7]], align 4 ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 8 ; CHECK-NEXT: [[L_6:%.*]] = load float, ptr [[GEP_8]], align 4 ; CHECK-NEXT: [[ADD_3:%.*]] = fadd float [[L_5]], [[L_6]] ; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[ADD_3]], 0.000000e+00 ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 8 ; CHECK-NEXT: store float [[MUL_3]], ptr [[GEP_9]], align 4 ; CHECK-NEXT: [[I27:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 12 ; CHECK-NEXT: [[L_7:%.*]] = load float, ptr [[I27]], align 4 ; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 12 ; CHECK-NEXT: [[L_8:%.*]] = load float, ptr [[GEP_10]], align 4 ; CHECK-NEXT: [[ADD_4:%.*]] = fadd float [[L_7]], [[L_8]] ; CHECK-NEXT: [[MUL_4:%.*]] = fmul float [[ADD_4]], 0.000000e+00 ; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 12 ; CHECK-NEXT: store float [[MUL_4]], ptr [[GEP_11]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[ARG1]] ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %shl.iv.5 = shl i64 %iv, 5 %gep.1 = getelementptr i8, ptr %arg, i64 %shl.iv.5 %add.5 = or disjoint i64 %shl.iv.5, 16 %gep.2 = getelementptr i8, ptr %arg, i64 %add.5 %shl.iv.4 = shl i64 %iv, 4 %gep.3 = getelementptr i8, ptr %arg2, i64 %shl.iv.4 %l.1 = load float, ptr %gep.1, align 4 %l.2 = load float, ptr %gep.2, align 4 %add.1 = fadd float %l.1, %l.2 %mul.1 = fmul float %add.1, 0.000000e+00 store float %mul.1, ptr %gep.3, align 4 %gep.4 = getelementptr i8, ptr %gep.1, i64 4 %l.3 = load float, ptr %gep.4, align 4 %gep.5 = getelementptr i8, ptr %gep.2, i64 4 %l.4 = load float, ptr %gep.5, align 4 %add.2 = fadd float %l.3, %l.4 %mul.2 = fmul float %add.2, 0.000000e+00 %gep.6 = getelementptr i8, ptr %gep.3, i64 4 store float %mul.2, ptr %gep.6, align 4 %gep.7 = getelementptr i8, ptr %gep.1, i64 8 %l.5 = load float, ptr %gep.7, align 4 %gep.8 = getelementptr i8, ptr %gep.2, i64 8 %l.6 = load float, ptr %gep.8, align 4 %add.3 = fadd float %l.5, %l.6 %mul.3 = fmul float %add.3, 0.000000e+00 %gep.9 = getelementptr i8, ptr %gep.3, i64 8 store float %mul.3, ptr %gep.9, align 4 %i27 = getelementptr i8, ptr %gep.1, i64 12 %l.7 = load float, ptr %i27, align 4 %gep.10 = getelementptr i8, ptr %gep.2, i64 12 %l.8 = load float, ptr %gep.10, align 4 %add.4 = fadd float %l.7, %l.8 %mul.4 = fmul float %add.4, 0.000000e+00 %gep.11 = getelementptr i8, ptr %gep.3, i64 12 store float %mul.4, ptr %gep.11, align 4 %iv.next = add i64 %iv, 1 %ec = icmp eq i64 %iv, %arg1 br i1 %ec, label %exit, label %loop exit: ret void } define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) #1 { ; CHECK-LABEL: define void @geps_feeding_interleave_groups_with_reuse2( ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[N]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 52 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK: [[VECTOR_SCEVCHECK]]: ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[N]], 3 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 24 ; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = sub i64 0, [[MUL_RESULT]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[TMP4]], [[SCEVGEP]] ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]] ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 28 ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 0, [[MUL_RESULT3]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ult ptr [[TMP8]], [[SCEVGEP1]] ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW4]] ; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 20 ; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1 ; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT7]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp ult ptr [[TMP12]], [[SCEVGEP5]] ; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW8]] ; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i8, ptr [[A]], i64 16 ; CHECK-NEXT: [[MUL10:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT11:%.*]] = extractvalue { i64, i1 } [[MUL10]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW12:%.*]] = extractvalue { i64, i1 } [[MUL10]], 1 ; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT11]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SCEVGEP9]], i64 [[MUL_RESULT11]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[SCEVGEP9]] ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW12]] ; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 12 ; CHECK-NEXT: [[MUL14:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT15:%.*]] = extractvalue { i64, i1 } [[MUL14]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW16:%.*]] = extractvalue { i64, i1 } [[MUL14]], 1 ; CHECK-NEXT: [[TMP19:%.*]] = sub i64 0, [[MUL_RESULT15]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SCEVGEP13]], i64 [[MUL_RESULT15]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp ult ptr [[TMP20]], [[SCEVGEP13]] ; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW16]] ; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 8 ; CHECK-NEXT: [[MUL18:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT19:%.*]] = extractvalue { i64, i1 } [[MUL18]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW20:%.*]] = extractvalue { i64, i1 } [[MUL18]], 1 ; CHECK-NEXT: [[TMP23:%.*]] = sub i64 0, [[MUL_RESULT19]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[SCEVGEP17]], i64 [[MUL_RESULT19]] ; CHECK-NEXT: [[TMP25:%.*]] = icmp ult ptr [[TMP24]], [[SCEVGEP17]] ; CHECK-NEXT: [[TMP26:%.*]] = or i1 [[TMP25]], [[MUL_OVERFLOW20]] ; CHECK-NEXT: [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 4 ; CHECK-NEXT: [[MUL22:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT23:%.*]] = extractvalue { i64, i1 } [[MUL22]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW24:%.*]] = extractvalue { i64, i1 } [[MUL22]], 1 ; CHECK-NEXT: [[TMP27:%.*]] = sub i64 0, [[MUL_RESULT23]] ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SCEVGEP21]], i64 [[MUL_RESULT23]] ; CHECK-NEXT: [[TMP29:%.*]] = icmp ult ptr [[TMP28]], [[SCEVGEP21]] ; CHECK-NEXT: [[TMP30:%.*]] = or i1 [[TMP29]], [[MUL_OVERFLOW24]] ; CHECK-NEXT: [[MUL25:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT26:%.*]] = extractvalue { i64, i1 } [[MUL25]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW27:%.*]] = extractvalue { i64, i1 } [[MUL25]], 1 ; CHECK-NEXT: [[TMP31:%.*]] = sub i64 0, [[MUL_RESULT26]] ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT26]] ; CHECK-NEXT: [[TMP33:%.*]] = icmp ult ptr [[TMP32]], [[A]] ; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW27]] ; CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP6]], [[TMP10]] ; CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP14]] ; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP36]], [[TMP18]] ; CHECK-NEXT: [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP22]] ; CHECK-NEXT: [[TMP39:%.*]] = or i1 [[TMP38]], [[TMP26]] ; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[TMP39]], [[TMP30]] ; CHECK-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP34]] ; CHECK-NEXT: br i1 [[TMP41]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP42:%.*]] = lshr i64 [[N]], 3 ; CHECK-NEXT: [[TMP43:%.*]] = shl i64 [[TMP42]], 5 ; CHECK-NEXT: [[TMP44:%.*]] = add i64 [[TMP43]], 32 ; CHECK-NEXT: [[SCEVGEP28:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP44]] ; CHECK-NEXT: [[TMP45:%.*]] = add nuw nsw i64 [[TMP43]], 4 ; CHECK-NEXT: [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP45]] ; CHECK-NEXT: [[TMP46:%.*]] = shl i64 [[TMP42]], 4 ; CHECK-NEXT: [[TMP47:%.*]] = add nuw nsw i64 [[TMP46]], 8 ; CHECK-NEXT: [[SCEVGEP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP47]] ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP29]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: [[BOUND031:%.*]] = icmp ult ptr [[A]], [[SCEVGEP30]] ; CHECK-NEXT: [[BOUND132:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]] ; CHECK-NEXT: [[FOUND_CONFLICT33:%.*]] = and i1 [[BOUND031]], [[BOUND132]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT33]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP48]], i64 4, i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP49]] ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP50:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP51:%.*]] = lshr exact i64 [[TMP50]], 1 ; CHECK-NEXT: [[TMP52:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP51]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP52]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC34:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP50]] ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[B]], <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META6:![0-9]+]] ; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC34]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <8 x i32> [[TMP58]], <8 x i32> [[TMP59]], <16 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <8 x i32> [[TMP60]], <8 x i32> zeroinitializer, <16 x i32> ; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <32 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[TMP63]], <32 x i32> poison, <32 x i32> ; CHECK-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP56]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[SHR_1:%.*]] = lshr exact i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr nusw i32, ptr [[B]], i64 [[SHR_1]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_B]], align 4 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: store i32 [[L]], ptr [[GEP_A]], align 4 ; CHECK-NEXT: [[IV_NEXT:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT]] ; CHECK-NEXT: store i32 0, ptr [[GEP_A_1]], align 4 ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = or disjoint i64 [[IV]], 2 ; CHECK-NEXT: [[SHR_2:%.*]] = lshr exact i64 [[IV_NEXT_1]], 1 ; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr i32, ptr [[B]], i64 [[SHR_2]] ; CHECK-NEXT: [[TMP65:%.*]] = load i32, ptr [[GEP_B_2]], align 4 ; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: store i32 [[TMP65]], ptr [[GEP_A_2]], align 4 ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = or disjoint i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_A_3:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: store i32 0, ptr [[GEP_A_3]], align 4 ; CHECK-NEXT: [[IV_NEXT_3:%.*]] = or disjoint i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_B_4:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[TMP66:%.*]] = load i32, ptr [[GEP_B_4]], align 4 ; CHECK-NEXT: [[GEP_A_4:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_3]] ; CHECK-NEXT: store i32 [[TMP66]], ptr [[GEP_A_4]], align 4 ; CHECK-NEXT: [[IV_NEXT_4:%.*]] = or disjoint i64 [[IV]], 5 ; CHECK-NEXT: [[GEP_A_5:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_4]] ; CHECK-NEXT: store i32 0, ptr [[GEP_A_5]], align 4 ; CHECK-NEXT: [[IV_NEXT_5:%.*]] = or disjoint i64 [[IV]], 6 ; CHECK-NEXT: [[GEP_A_6:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_5]] ; CHECK-NEXT: store i32 0, ptr [[GEP_A_6]], align 4 ; CHECK-NEXT: [[IV_NEXT_6:%.*]] = or disjoint i64 [[IV]], 7 ; CHECK-NEXT: [[GEP_A_7:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT_6]] ; CHECK-NEXT: store i32 0, ptr [[GEP_A_7]], align 4 ; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next.7, %loop ] %shr.1 = lshr exact i64 %iv, 1 %gep.B = getelementptr nusw i32, ptr %B, i64 %shr.1 %l = load i32, ptr %gep.B, align 4 %gep.A = getelementptr i32, ptr %A, i64 %iv store i32 %l, ptr %gep.A, align 4 %iv.next = or disjoint i64 %iv, 1 %gep.A.1 = getelementptr i32, ptr %A, i64 %iv.next store i32 0, ptr %gep.A.1, align 4 %iv.next.1 = or disjoint i64 %iv, 2 %shr.2 = lshr exact i64 %iv.next.1, 1 %gep.B.2 = getelementptr i32, ptr %B, i64 %shr.2 %1 = load i32, ptr %gep.B.2, align 4 %gep.A.2 = getelementptr i32, ptr %A, i64 %iv.next.1 store i32 %1, ptr %gep.A.2, align 4 %iv.next.2 = or disjoint i64 %iv, 3 %gep.A.3 = getelementptr i32, ptr %A, i64 %iv.next.2 store i32 0, ptr %gep.A.3, align 4 %iv.next.3 = or disjoint i64 %iv, 4 %gep.B.4 = getelementptr i32, ptr %B, i64 %iv %2 = load i32, ptr %gep.B.4, align 4 %gep.A.4 = getelementptr i32, ptr %A, i64 %iv.next.3 store i32 %2, ptr %gep.A.4, align 4 %iv.next.4 = or disjoint i64 %iv, 5 %gep.A.5 = getelementptr i32, ptr %A, i64 %iv.next.4 store i32 0, ptr %gep.A.5, align 4 %iv.next.5 = or disjoint i64 %iv, 6 %gep.A.6 = getelementptr i32, ptr %A, i64 %iv.next.5 store i32 0, ptr %gep.A.6, align 4 %iv.next.6 = or disjoint i64 %iv, 7 %gep.A.7 = getelementptr i32, ptr %A, i64 %iv.next.6 store i32 0, ptr %gep.A.7, align 4 %iv.next.7 = add nuw nsw i64 %iv, 8 %ec = icmp eq i64 %iv, %N br i1 %ec, label %exit, label %loop exit: ret void } ; Test case for https://github.com/llvm/llvm-project/issues/112922. define void @interleave_store_double_i64(ptr %dst) { ; CHECK-LABEL: define void @interleave_store_double_i64( ; CHECK-SAME: ptr [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VEC_IND]] to <2 x double> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1 ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.1 = getelementptr { double, i64 }, ptr %dst, i64 %iv, i32 1 store i64 %iv, ptr %gep.1, align 8 %gep.0 = getelementptr { double, i64 }, ptr %dst, i64 %iv store double 0.000000e+00, ptr %gep.0, align 8 %iv.next = add i64 %iv, 1 %ec = icmp eq i64 %iv, 1 br i1 %ec, label %exit, label %loop exit: ret void } define void @interleave_store_i64_double(ptr %dst) { ; CHECK-LABEL: define void @interleave_store_i64_double( ; CHECK-SAME: ptr [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1 ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.0 = getelementptr { double, i64 }, ptr %dst, i64 %iv store double 0.000000e+00, ptr %gep.0, align 8 %gep.1 = getelementptr { double, i64 }, ptr %dst, i64 %iv, i32 1 store i64 %iv, ptr %gep.1, align 8 %iv.next = add i64 %iv, 1 %ec = icmp eq i64 %iv, 1 br i1 %ec, label %exit, label %loop exit: ret void } ; TODO: The interleave group should likely have the same cost as @interleave_store_double_i64. define void @interleave_store_double_i64_2(ptr %dst) { ; CHECK-LABEL: define void @interleave_store_double_i64_2( ; CHECK-SAME: ptr [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.1 = getelementptr { i64, double }, ptr %dst, i64 %iv, i32 1 store double 0.000000e+00, ptr %gep.1, align 8 %gep.0 = getelementptr { i64, double }, ptr %dst, i64 %iv store i64 %iv, ptr %gep.0, align 8 %iv.next = add i64 %iv, 1 %ec = icmp eq i64 %iv, 1 br i1 %ec, label %exit, label %loop exit: ret void } define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-LABEL: define void @interleave_store_i64_double_2( ; CHECK-SAME: ptr [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VEC_IND]] to <2 x double> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1 ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %gep.0 = getelementptr { i64, double }, ptr %dst, i64 %iv store i64 %iv, ptr %gep.0, align 8 %gep.1 = getelementptr { i64, double }, ptr %dst, i64 %iv, i32 1 store double 0.000000e+00, ptr %gep.1, align 8 %iv.next = add i64 %iv, 1 %ec = icmp eq i64 %iv, 1 br i1 %ec, label %exit, label %loop exit: ret void } attributes #0 = { "target-features"="+sse4.2" } attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" } ;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} ; CHECK: [[META6]] = !{[[META7:![0-9]+]]} ; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} ; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"} ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]} ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]} ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]} ; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]} ;.