This patch adds metadata to disable runtime unrolling to the vectorized loop. If runtime unrolling/interleaving is considered profitable, LV will interleave the loop directly. There should be no need to perform runtime unrolling at a later stage. Note that we already add metadata to disable runtime unrolling to the scalar loop after vectorization. The additional unrolling unnecessarily increases code size and compile time. In addition to that we have several bug reports of unncessary runtime unrolling for vectorized loops, e.g. PR40961 Compile-time improvements: NewPM-O3: -1.04% NewPM-ReleaseThinLTO: -0.59% NewPM-ReleaseLTO-g: -0.97% https://llvm-compile-time-tracker.com/compare.php?from=ce1be13a868d0f8afa367975558c1a6175cce33a&to=78bc2e67f22e9e10e61cdb6cdac4bb857d95eb1b&stat=instructions:u Fixes #40306. Reviewed By: lebedev.ri, nikic Differential Revision: https://reviews.llvm.org/D115261
245 lines
20 KiB
LLVM
245 lines
20 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes='default<O3>' -unroll-runtime -S %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-apple-macosx"
|
|
|
|
@b = global [58 x double] zeroinitializer, align 16
|
|
@c = global [58 x double] zeroinitializer, align 16
|
|
@a = global [58 x double] zeroinitializer, align 16
|
|
|
|
; Test case for #42332, showing excessive unrolling of vector loop.
|
|
define void @test_known_trip_count() {
|
|
; CHECK-LABEL: @test_known_trip_count(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr @b, align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x double>, ptr @c, align 16
|
|
; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[WIDE_LOAD]], [[WIDE_LOAD3]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP0]], ptr @a, align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 2), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 2), align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[WIDE_LOAD_1]], [[WIDE_LOAD3_1]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP1]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 2), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 4), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_2:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 4), align 16
|
|
; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[WIDE_LOAD_2]], [[WIDE_LOAD3_2]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 4), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 6), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 6), align 16
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[WIDE_LOAD_3]], [[WIDE_LOAD3_3]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 6), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 8), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 8), align 16
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD_4]], [[WIDE_LOAD3_4]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 8), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 10), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 10), align 16
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD_5]], [[WIDE_LOAD3_5]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 10), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 12), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 12), align 16
|
|
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[WIDE_LOAD_6]], [[WIDE_LOAD3_6]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 12), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 14), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 14), align 16
|
|
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[WIDE_LOAD_7]], [[WIDE_LOAD3_7]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP7]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 14), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 16), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[WIDE_LOAD_8]], [[WIDE_LOAD3_8]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 16), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_9:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 18), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_9:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 18), align 16
|
|
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[WIDE_LOAD_9]], [[WIDE_LOAD3_9]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP9]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 18), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 20), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 20), align 16
|
|
; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[WIDE_LOAD_10]], [[WIDE_LOAD3_10]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP10]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 20), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 22), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 22), align 16
|
|
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[WIDE_LOAD_11]], [[WIDE_LOAD3_11]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP11]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 22), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_12:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 24), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_12:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 24), align 16
|
|
; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[WIDE_LOAD_12]], [[WIDE_LOAD3_12]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP12]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 24), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_13:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 26), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_13:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 26), align 16
|
|
; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[WIDE_LOAD_13]], [[WIDE_LOAD3_13]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP13]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 26), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_14:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 28), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_14:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 28), align 16
|
|
; CHECK-NEXT: [[TMP14:%.*]] = fadd <2 x double> [[WIDE_LOAD_14]], [[WIDE_LOAD3_14]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP14]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 28), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_15:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 30), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_15:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 30), align 16
|
|
; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[WIDE_LOAD_15]], [[WIDE_LOAD3_15]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP15]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 30), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_16:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 32), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_16:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 32), align 16
|
|
; CHECK-NEXT: [[TMP16:%.*]] = fadd <2 x double> [[WIDE_LOAD_16]], [[WIDE_LOAD3_16]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP16]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 32), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_17:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 34), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_17:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 34), align 16
|
|
; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[WIDE_LOAD_17]], [[WIDE_LOAD3_17]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP17]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 34), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_18:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 36), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_18:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 36), align 16
|
|
; CHECK-NEXT: [[TMP18:%.*]] = fadd <2 x double> [[WIDE_LOAD_18]], [[WIDE_LOAD3_18]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP18]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 36), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_19:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 38), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_19:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 38), align 16
|
|
; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x double> [[WIDE_LOAD_19]], [[WIDE_LOAD3_19]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP19]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 38), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_20:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 40), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_20:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 40), align 16
|
|
; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[WIDE_LOAD_20]], [[WIDE_LOAD3_20]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP20]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 40), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_21:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 42), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_21:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 42), align 16
|
|
; CHECK-NEXT: [[TMP21:%.*]] = fadd <2 x double> [[WIDE_LOAD_21]], [[WIDE_LOAD3_21]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP21]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 42), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_22:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 44), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_22:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 44), align 16
|
|
; CHECK-NEXT: [[TMP22:%.*]] = fadd <2 x double> [[WIDE_LOAD_22]], [[WIDE_LOAD3_22]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP22]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 44), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_23:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 46), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_23:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 46), align 16
|
|
; CHECK-NEXT: [[TMP23:%.*]] = fadd <2 x double> [[WIDE_LOAD_23]], [[WIDE_LOAD3_23]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP23]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 46), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_24:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 48), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_24:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 48), align 16
|
|
; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[WIDE_LOAD_24]], [[WIDE_LOAD3_24]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP24]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 48), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_25:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 50), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_25:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 50), align 16
|
|
; CHECK-NEXT: [[TMP25:%.*]] = fadd <2 x double> [[WIDE_LOAD_25]], [[WIDE_LOAD3_25]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP25]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 50), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_26:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 52), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_26:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 52), align 16
|
|
; CHECK-NEXT: [[TMP26:%.*]] = fadd <2 x double> [[WIDE_LOAD_26]], [[WIDE_LOAD3_26]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP26]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 52), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_27:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 54), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_27:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 54), align 16
|
|
; CHECK-NEXT: [[TMP27:%.*]] = fadd <2 x double> [[WIDE_LOAD_27]], [[WIDE_LOAD3_27]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP27]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 54), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_28:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 56), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_28:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 56), align 16
|
|
; CHECK-NEXT: [[TMP28:%.*]] = fadd <2 x double> [[WIDE_LOAD_28]], [[WIDE_LOAD3_28]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP28]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 56), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_29:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 1, i64 0), align 16
|
|
; CHECK-NEXT: [[WIDE_LOAD3_29:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 1, i64 0), align 16
|
|
; CHECK-NEXT: [[TMP29:%.*]] = fadd <2 x double> [[WIDE_LOAD_29]], [[WIDE_LOAD3_29]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP29]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 1, i64 0), align 16
|
|
; CHECK-NEXT: [[TMP30:%.*]] = load double, ptr getelementptr inbounds ([58 x double], ptr @b, i64 1, i64 2), align 16
|
|
; CHECK-NEXT: [[TMP31:%.*]] = load double, ptr getelementptr inbounds ([58 x double], ptr @c, i64 1, i64 2), align 16
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP30]], [[TMP31]]
|
|
; CHECK-NEXT: store double [[ADD]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 1, i64 2), align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.cond
|
|
|
|
for.cond:
|
|
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%cmp = icmp slt i32 %i.0, 61
|
|
br i1 %cmp, label %for.body, label %exit
|
|
|
|
for.body:
|
|
%idxprom = sext i32 %i.0 to i64
|
|
%arrayidx = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 %idxprom
|
|
%0 = load double, ptr %arrayidx, align 8
|
|
%idxprom1 = sext i32 %i.0 to i64
|
|
%arrayidx2 = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 %idxprom1
|
|
%1 = load double, ptr %arrayidx2, align 8
|
|
%add = fadd double %0, %1
|
|
%idxprom3 = sext i32 %i.0 to i64
|
|
%arrayidx4 = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 %idxprom3
|
|
store double %add, ptr %arrayidx4, align 8
|
|
%inc = add nsw i32 %i.0, 1
|
|
br label %for.cond
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @test_runtime_trip_count(i32 %N) {
|
|
; CHECK-LABEL: @test_runtime_trip_count(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER7:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 2
|
|
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x double>, ptr [[TMP1]], align 16
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP2]], align 16
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 2
|
|
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP3]], align 16
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD]], [[WIDE_LOAD5]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD4]], [[WIDE_LOAD6]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[TMP6]], align 16
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 2
|
|
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[TMP7]], align 16
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT]], label [[FOR_BODY_PREHEADER7]]
|
|
; CHECK: for.body.preheader7:
|
|
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER7]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[ARRAYIDX]], align 8
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP9]], [[TMP10]]
|
|
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX4]], align 8
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.cond
|
|
|
|
for.cond:
|
|
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%cmp = icmp slt i32 %i.0, %N
|
|
br i1 %cmp, label %for.body, label %exit
|
|
|
|
for.body:
|
|
%idxprom = sext i32 %i.0 to i64
|
|
%arrayidx = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 %idxprom
|
|
%0 = load double, ptr %arrayidx, align 8
|
|
%idxprom1 = sext i32 %i.0 to i64
|
|
%arrayidx2 = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 %idxprom1
|
|
%1 = load double, ptr %arrayidx2, align 8
|
|
%add = fadd double %0, %1
|
|
%idxprom3 = sext i32 %i.0 to i64
|
|
%arrayidx4 = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 %idxprom3
|
|
store double %add, ptr %arrayidx4, align 8
|
|
%inc = add nsw i32 %i.0, 1
|
|
br label %for.cond
|
|
|
|
exit:
|
|
ret void
|
|
}
|