Florian Hahn 68469a80cb
[LV] Disable runtime unrolling for vectorized loops.
This patch adds metadata to disable runtime unrolling to the vectorized
loop. If runtime unrolling/interleaving is considered profitable, LV
will interleave the loop directly. There should be no need to perform
runtime unrolling at a later stage.

Note that we already add metadata to disable runtime unrolling to the
scalar loop after vectorization.

The additional unrolling unnecessarily increases code size and compile
time. In addition to that we have several bug reports of unncessary
runtime unrolling for vectorized loops, e.g. PR40961

Compile-time improvements:

  NewPM-O3: -1.04%
  NewPM-ReleaseThinLTO: -0.59%
  NewPM-ReleaseLTO-g: -0.97%

https://llvm-compile-time-tracker.com/compare.php?from=ce1be13a868d0f8afa367975558c1a6175cce33a&to=78bc2e67f22e9e10e61cdb6cdac4bb857d95eb1b&stat=instructions:u

Fixes #40306.

Reviewed By: lebedev.ri, nikic

Differential Revision: https://reviews.llvm.org/D115261
2023-01-06 10:56:17 +00:00

245 lines
20 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes='default<O3>' -unroll-runtime -S %s | FileCheck %s
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx"
@b = global [58 x double] zeroinitializer, align 16
@c = global [58 x double] zeroinitializer, align 16
@a = global [58 x double] zeroinitializer, align 16
; Test case for #42332, showing excessive unrolling of vector loop.
define void @test_known_trip_count() {
; CHECK-LABEL: @test_known_trip_count(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr @b, align 16
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x double>, ptr @c, align 16
; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: store <2 x double> [[TMP0]], ptr @a, align 16
; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 2), align 16
; CHECK-NEXT: [[WIDE_LOAD3_1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 2), align 16
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[WIDE_LOAD_1]], [[WIDE_LOAD3_1]]
; CHECK-NEXT: store <2 x double> [[TMP1]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 2), align 16
; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 4), align 16
; CHECK-NEXT: [[WIDE_LOAD3_2:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 4), align 16
; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[WIDE_LOAD_2]], [[WIDE_LOAD3_2]]
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 4), align 16
; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 6), align 16
; CHECK-NEXT: [[WIDE_LOAD3_3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 6), align 16
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[WIDE_LOAD_3]], [[WIDE_LOAD3_3]]
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 6), align 16
; CHECK-NEXT: [[WIDE_LOAD_4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 8), align 16
; CHECK-NEXT: [[WIDE_LOAD3_4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 8), align 16
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD_4]], [[WIDE_LOAD3_4]]
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 8), align 16
; CHECK-NEXT: [[WIDE_LOAD_5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 10), align 16
; CHECK-NEXT: [[WIDE_LOAD3_5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 10), align 16
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD_5]], [[WIDE_LOAD3_5]]
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 10), align 16
; CHECK-NEXT: [[WIDE_LOAD_6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 12), align 16
; CHECK-NEXT: [[WIDE_LOAD3_6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 12), align 16
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[WIDE_LOAD_6]], [[WIDE_LOAD3_6]]
; CHECK-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 12), align 16
; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 14), align 16
; CHECK-NEXT: [[WIDE_LOAD3_7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 14), align 16
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[WIDE_LOAD_7]], [[WIDE_LOAD3_7]]
; CHECK-NEXT: store <2 x double> [[TMP7]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 14), align 16
; CHECK-NEXT: [[WIDE_LOAD_8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 16), align 16
; CHECK-NEXT: [[WIDE_LOAD3_8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 16), align 16
; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[WIDE_LOAD_8]], [[WIDE_LOAD3_8]]
; CHECK-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 16), align 16
; CHECK-NEXT: [[WIDE_LOAD_9:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 18), align 16
; CHECK-NEXT: [[WIDE_LOAD3_9:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 18), align 16
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[WIDE_LOAD_9]], [[WIDE_LOAD3_9]]
; CHECK-NEXT: store <2 x double> [[TMP9]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 18), align 16
; CHECK-NEXT: [[WIDE_LOAD_10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 20), align 16
; CHECK-NEXT: [[WIDE_LOAD3_10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 20), align 16
; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[WIDE_LOAD_10]], [[WIDE_LOAD3_10]]
; CHECK-NEXT: store <2 x double> [[TMP10]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 20), align 16
; CHECK-NEXT: [[WIDE_LOAD_11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 22), align 16
; CHECK-NEXT: [[WIDE_LOAD3_11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 22), align 16
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[WIDE_LOAD_11]], [[WIDE_LOAD3_11]]
; CHECK-NEXT: store <2 x double> [[TMP11]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 22), align 16
; CHECK-NEXT: [[WIDE_LOAD_12:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 24), align 16
; CHECK-NEXT: [[WIDE_LOAD3_12:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 24), align 16
; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[WIDE_LOAD_12]], [[WIDE_LOAD3_12]]
; CHECK-NEXT: store <2 x double> [[TMP12]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 24), align 16
; CHECK-NEXT: [[WIDE_LOAD_13:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 26), align 16
; CHECK-NEXT: [[WIDE_LOAD3_13:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 26), align 16
; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[WIDE_LOAD_13]], [[WIDE_LOAD3_13]]
; CHECK-NEXT: store <2 x double> [[TMP13]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 26), align 16
; CHECK-NEXT: [[WIDE_LOAD_14:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 28), align 16
; CHECK-NEXT: [[WIDE_LOAD3_14:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 28), align 16
; CHECK-NEXT: [[TMP14:%.*]] = fadd <2 x double> [[WIDE_LOAD_14]], [[WIDE_LOAD3_14]]
; CHECK-NEXT: store <2 x double> [[TMP14]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 28), align 16
; CHECK-NEXT: [[WIDE_LOAD_15:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 30), align 16
; CHECK-NEXT: [[WIDE_LOAD3_15:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 30), align 16
; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[WIDE_LOAD_15]], [[WIDE_LOAD3_15]]
; CHECK-NEXT: store <2 x double> [[TMP15]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 30), align 16
; CHECK-NEXT: [[WIDE_LOAD_16:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 32), align 16
; CHECK-NEXT: [[WIDE_LOAD3_16:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 32), align 16
; CHECK-NEXT: [[TMP16:%.*]] = fadd <2 x double> [[WIDE_LOAD_16]], [[WIDE_LOAD3_16]]
; CHECK-NEXT: store <2 x double> [[TMP16]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 32), align 16
; CHECK-NEXT: [[WIDE_LOAD_17:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 34), align 16
; CHECK-NEXT: [[WIDE_LOAD3_17:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 34), align 16
; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[WIDE_LOAD_17]], [[WIDE_LOAD3_17]]
; CHECK-NEXT: store <2 x double> [[TMP17]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 34), align 16
; CHECK-NEXT: [[WIDE_LOAD_18:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 36), align 16
; CHECK-NEXT: [[WIDE_LOAD3_18:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 36), align 16
; CHECK-NEXT: [[TMP18:%.*]] = fadd <2 x double> [[WIDE_LOAD_18]], [[WIDE_LOAD3_18]]
; CHECK-NEXT: store <2 x double> [[TMP18]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 36), align 16
; CHECK-NEXT: [[WIDE_LOAD_19:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 38), align 16
; CHECK-NEXT: [[WIDE_LOAD3_19:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 38), align 16
; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x double> [[WIDE_LOAD_19]], [[WIDE_LOAD3_19]]
; CHECK-NEXT: store <2 x double> [[TMP19]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 38), align 16
; CHECK-NEXT: [[WIDE_LOAD_20:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 40), align 16
; CHECK-NEXT: [[WIDE_LOAD3_20:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 40), align 16
; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[WIDE_LOAD_20]], [[WIDE_LOAD3_20]]
; CHECK-NEXT: store <2 x double> [[TMP20]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 40), align 16
; CHECK-NEXT: [[WIDE_LOAD_21:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 42), align 16
; CHECK-NEXT: [[WIDE_LOAD3_21:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 42), align 16
; CHECK-NEXT: [[TMP21:%.*]] = fadd <2 x double> [[WIDE_LOAD_21]], [[WIDE_LOAD3_21]]
; CHECK-NEXT: store <2 x double> [[TMP21]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 42), align 16
; CHECK-NEXT: [[WIDE_LOAD_22:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 44), align 16
; CHECK-NEXT: [[WIDE_LOAD3_22:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 44), align 16
; CHECK-NEXT: [[TMP22:%.*]] = fadd <2 x double> [[WIDE_LOAD_22]], [[WIDE_LOAD3_22]]
; CHECK-NEXT: store <2 x double> [[TMP22]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 44), align 16
; CHECK-NEXT: [[WIDE_LOAD_23:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 46), align 16
; CHECK-NEXT: [[WIDE_LOAD3_23:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 46), align 16
; CHECK-NEXT: [[TMP23:%.*]] = fadd <2 x double> [[WIDE_LOAD_23]], [[WIDE_LOAD3_23]]
; CHECK-NEXT: store <2 x double> [[TMP23]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 46), align 16
; CHECK-NEXT: [[WIDE_LOAD_24:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 48), align 16
; CHECK-NEXT: [[WIDE_LOAD3_24:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 48), align 16
; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[WIDE_LOAD_24]], [[WIDE_LOAD3_24]]
; CHECK-NEXT: store <2 x double> [[TMP24]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 48), align 16
; CHECK-NEXT: [[WIDE_LOAD_25:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 50), align 16
; CHECK-NEXT: [[WIDE_LOAD3_25:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 50), align 16
; CHECK-NEXT: [[TMP25:%.*]] = fadd <2 x double> [[WIDE_LOAD_25]], [[WIDE_LOAD3_25]]
; CHECK-NEXT: store <2 x double> [[TMP25]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 50), align 16
; CHECK-NEXT: [[WIDE_LOAD_26:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 52), align 16
; CHECK-NEXT: [[WIDE_LOAD3_26:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 52), align 16
; CHECK-NEXT: [[TMP26:%.*]] = fadd <2 x double> [[WIDE_LOAD_26]], [[WIDE_LOAD3_26]]
; CHECK-NEXT: store <2 x double> [[TMP26]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 52), align 16
; CHECK-NEXT: [[WIDE_LOAD_27:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 54), align 16
; CHECK-NEXT: [[WIDE_LOAD3_27:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 54), align 16
; CHECK-NEXT: [[TMP27:%.*]] = fadd <2 x double> [[WIDE_LOAD_27]], [[WIDE_LOAD3_27]]
; CHECK-NEXT: store <2 x double> [[TMP27]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 54), align 16
; CHECK-NEXT: [[WIDE_LOAD_28:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 56), align 16
; CHECK-NEXT: [[WIDE_LOAD3_28:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 56), align 16
; CHECK-NEXT: [[TMP28:%.*]] = fadd <2 x double> [[WIDE_LOAD_28]], [[WIDE_LOAD3_28]]
; CHECK-NEXT: store <2 x double> [[TMP28]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 56), align 16
; CHECK-NEXT: [[WIDE_LOAD_29:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 1, i64 0), align 16
; CHECK-NEXT: [[WIDE_LOAD3_29:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 1, i64 0), align 16
; CHECK-NEXT: [[TMP29:%.*]] = fadd <2 x double> [[WIDE_LOAD_29]], [[WIDE_LOAD3_29]]
; CHECK-NEXT: store <2 x double> [[TMP29]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 1, i64 0), align 16
; CHECK-NEXT: [[TMP30:%.*]] = load double, ptr getelementptr inbounds ([58 x double], ptr @b, i64 1, i64 2), align 16
; CHECK-NEXT: [[TMP31:%.*]] = load double, ptr getelementptr inbounds ([58 x double], ptr @c, i64 1, i64 2), align 16
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP30]], [[TMP31]]
; CHECK-NEXT: store double [[ADD]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 1, i64 2), align 16
; CHECK-NEXT: ret void
;
entry:
br label %for.cond
for.cond:
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%cmp = icmp slt i32 %i.0, 61
br i1 %cmp, label %for.body, label %exit
for.body:
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 %idxprom
%0 = load double, ptr %arrayidx, align 8
%idxprom1 = sext i32 %i.0 to i64
%arrayidx2 = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 %idxprom1
%1 = load double, ptr %arrayidx2, align 8
%add = fadd double %0, %1
%idxprom3 = sext i32 %i.0 to i64
%arrayidx4 = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 %idxprom3
store double %add, ptr %arrayidx4, align 8
%inc = add nsw i32 %i.0, 1
br label %for.cond
exit:
ret void
}
define void @test_runtime_trip_count(i32 %N) {
; CHECK-LABEL: @test_runtime_trip_count(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER7:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 2
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x double>, ptr [[TMP1]], align 16
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP2]], align 16
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 2
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP3]], align 16
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD]], [[WIDE_LOAD5]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD4]], [[WIDE_LOAD6]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX]]
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[TMP6]], align 16
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 2
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[TMP7]], align 16
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT]], label [[FOR_BODY_PREHEADER7]]
; CHECK: for.body.preheader7:
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER7]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX4]], align 8
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %for.cond
for.cond:
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%cmp = icmp slt i32 %i.0, %N
br i1 %cmp, label %for.body, label %exit
for.body:
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 %idxprom
%0 = load double, ptr %arrayidx, align 8
%idxprom1 = sext i32 %i.0 to i64
%arrayidx2 = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 %idxprom1
%1 = load double, ptr %arrayidx2, align 8
%add = fadd double %0, %1
%idxprom3 = sext i32 %i.0 to i64
%arrayidx4 = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 %idxprom3
store double %add, ptr %arrayidx4, align 8
%inc = add nsw i32 %i.0, 1
br label %for.cond
exit:
ret void
}