From e93829e8079f04b0d1cfe7c5a3272004dadb6bfb Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 12 Feb 2026 10:43:53 -0800 Subject: [PATCH] [SLP]Fix crash with deleted non-copyable node in scheduling copyables If the copyables are parts of the deleted nodes, need to check the actual tree to correctly handling the scheduling of copyables --- .../Transforms/Vectorize/SLPVectorizer.cpp | 20 +++-- .../AArch64/externally-used-copyables.ll | 38 +++++---- .../copyables-with-parent-scalars-in-phis.ll | 7 +- .../deleted-node-with-copyable-operands.ll | 83 +++++++++++++++++++ .../SLPVectorizer/X86/external-bin-op-user.ll | 82 ++++++++++++++++++ 5 files changed, 200 insertions(+), 30 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/deleted-node-with-copyable-operands.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/external-bin-op-user.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 324c5729c3f5..4197c8f64d8e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5851,8 +5851,8 @@ private: SmallVector> PseudoBundles; SmallVector Bundles; Instruction *In = SD->getInst(); - if (R.isVectorized(In)) { - ArrayRef Entries = R.getTreeEntries(In); + ArrayRef Entries = R.getTreeEntries(In); + if (!Entries.empty()) { for (TreeEntry *TE : Entries) { if (!isa(In) && In->getNumOperands() != TE->getNumOperands()) @@ -22351,12 +22351,17 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, // analysis, leading to a crash. // Non-scheduled nodes may not have related ScheduleData model, which may lead // to a skipped dep analysis. - if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() && - EI.UserTE->doesNotNeedToSchedule() && + bool HasCopyables = S.areInstructionsWithCopyableElements(); + bool DoesNotRequireScheduling = + (!HasCopyables && doesNotNeedToSchedule(VL)) || + all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }); + if (!DoesNotRequireScheduling && S.areInstructionsWithCopyableElements() && + EI && EI.UserTE->hasState() && EI.UserTE->doesNotNeedToSchedule() && EI.UserTE->getOpcode() != Instruction::PHI && + EI.UserTE->getOpcode() != Instruction::InsertElement && any_of(EI.UserTE->Scalars, [](Value *V) { auto *I = dyn_cast(V); - if (!I || I->hasOneUser()) + if (!I) return false; for (User *U : I->users()) { auto *UI = cast(U); @@ -22428,13 +22433,10 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, return std::nullopt; } } - bool HasCopyables = S.areInstructionsWithCopyableElements(); - if (((!HasCopyables && doesNotNeedToSchedule(VL)) || - all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) { + if (DoesNotRequireScheduling) { // If all operands were replaced by copyables, the operands of this node // might be not, so need to recalculate dependencies for schedule data, // replaced by copyable schedule data. - SmallVector ControlDependentMembers; for (Value *V : VL) { auto *I = dyn_cast(V); if (!I || (HasCopyables && S.isCopyableElement(V))) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll index fd8659a2d34f..38705032ce1c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll @@ -10,32 +10,36 @@ define void @test(i64 %0, i64 %1, i64 %2, i64 %3, i64 %.sroa.3341.0.copyload, i6 ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> , <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = mul <4 x i64> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP0]], 11 +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> , <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i64> [[TMP15]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i64> [[TMP21]], <4 x i64> [[TMP16]], <4 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> [[TMP22]], <4 x i32> +; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i64> [[TMP32]], <4 x i64> , <4 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = sub <4 x i64> [[TMP22]], [[TMP33]] ; CHECK-NEXT: [[TMP18:%.*]] = shl i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[TMP0]] -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> , i64 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP14]], i32 2 -; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> , <4 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i64> [[TMP22]], <4 x i64> , <4 x i32> +; CHECK-NEXT: [[TMP81:%.*]] = insertelement <4 x i64> poison, i64 [[TMP19]], i32 0 +; CHECK-NEXT: [[TMP82:%.*]] = shufflevector <4 x i64> [[TMP81]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> [[TMP82]], <4 x i32> +; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> , <4 x i32> ; CHECK-NEXT: [[TMP38:%.*]] = sub <4 x i64> [[TMP17]], [[TMP37]] -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i64> [[TMP20]], i64 [[TMP19]], i32 1 -; CHECK-NEXT: [[TMP22:%.*]] = or <2 x i64> [[TMP21]], splat (i64 1) -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = or <4 x i64> [[TMP17]], [[TMP37]] +; CHECK-NEXT: [[TMP83:%.*]] = shufflevector <4 x i64> [[TMP38]], <4 x i64> [[TMP29]], <4 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x i64> [[TMP24]], <8 x i64> , <8 x i32> -; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <2 x i32> ; CHECK-NEXT: [[TMP80:%.*]] = insertelement <64 x i64> , i64 [[TMP1]], i32 11 ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> poison, <28 x i32> -; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <14 x i32> -; CHECK-NEXT: [[TMP81:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <14 x i32> -; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i64> [[TMP38]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <2 x i64> [[TMP22]], <2 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i64> [[TMP31]], <4 x i64> [[TMP32]], <4 x i32> +; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <14 x i32> ; CHECK-NEXT: br label %[[DOTLR_PH1977_US:.*]] ; CHECK: [[_LR_PH1977_US:.*:]] ; CHECK-NEXT: [[INDVAR37888:%.*]] = phi i64 [ 0, [[DOTLR_PH_PREHEADER:%.*]] ], [ 1, %[[DOTLR_PH1977_US]] ] -; CHECK-NEXT: [[TMP34:%.*]] = mul <4 x i64> [[TMP33]], [[TMP10]] +; CHECK-NEXT: [[TMP34:%.*]] = mul <4 x i64> [[TMP83]], [[TMP31]] ; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i64> [[TMP34]], <4 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP36:%.*]] = mul <4 x i64> [[TMP38]], [[TMP10]] +; CHECK-NEXT: [[TMP36:%.*]] = mul <4 x i64> [[TMP20]], [[TMP31]] ; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP0]], [[TMP0]] ; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP0]], 1 @@ -63,7 +67,7 @@ define void @test(i64 %0, i64 %1, i64 %2, i64 %3, i64 %.sroa.3341.0.copyload, i6 ; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <64 x i64> [[TMP60]], <64 x i64> [[TMP50]], <28 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <28 x i64> [[TMP61]], <28 x i64> poison, <64 x i32> ; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <28 x i64> [[TMP61]], <28 x i64> poison, <14 x i32> -; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <14 x i64> [[TMP81]], <14 x i64> [[TMP63]], <14 x i32> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <14 x i64> [[TMP84]], <14 x i64> [[TMP63]], <14 x i32> ; CHECK-NEXT: [[TMP65:%.*]] = insertelement <14 x i64> [[TMP64]], i64 [[DOTNEG1]], i32 3 ; CHECK-NEXT: [[TMP66:%.*]] = insertelement <14 x i64> [[TMP65]], i64 [[TMP2]], i32 4 ; CHECK-NEXT: [[TMP67:%.*]] = insertelement <14 x i64> [[TMP66]], i64 [[TMP3]], i32 5 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/copyables-with-parent-scalars-in-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/copyables-with-parent-scalars-in-phis.ll index 88f4520aa736..347dff468bdb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/copyables-with-parent-scalars-in-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/copyables-with-parent-scalars-in-phis.ll @@ -12,11 +12,10 @@ define i32 @test() { ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP9:%.*]], %[[BB3]] ] ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr null, align 8 ; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[LOAD]] to i32 -; CHECK-NEXT: [[TRUNC6:%.*]] = trunc i64 0 to i32 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[TRUNC6]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> , i32 [[TRUNC]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP10]], +; CHECK-NEXT: [[AND:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0 ; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 0, [[AND]] -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[AND]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TRUNC]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = ashr <2 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP6]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/deleted-node-with-copyable-operands.ll b/llvm/test/Transforms/SLPVectorizer/X86/deleted-node-with-copyable-operands.ll new file mode 100644 index 000000000000..958d618a662e --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/deleted-node-with-copyable-operands.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define void @test(ptr %0, ptr %1, i1 %cond, double %2) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], i1 [[COND:%.*]], double [[TMP2:%.*]]) { +; CHECK-NEXT: [[ITER_CHECK:.*]]: +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br [[DOTLR_PH383_US_US_US_US:label %.*]] +; CHECK: [[_LR_PH383_US_US_US_US:.*:]] +; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x double> [ [[TMP22:%.*]], %[[DOT_CRIT_EDGE384_US_US_US_US:.*]] ], [ zeroinitializer, %[[ITER_CHECK]] ] +; CHECK-NEXT: br i1 false, label %[[DOTLR_PH383_US_US_US_US___CRIT_EDGE384_US_US_US_US_CRIT_EDGE:.*]], label %[[BB6:.*]] +; CHECK: [[_LR_PH383_US_US_US_US___CRIT_EDGE384_US_US_US_US_CRIT_EDGE:.*:]] +; CHECK-NEXT: br label %[[DOT_CRIT_EDGE384_US_US_US_US]] +; CHECK: [[BB6]]: +; CHECK-NEXT: br i1 [[COND]], label %[[BB16:.*]], label %[[BB7:.*]] +; CHECK: [[BB7]]: +; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP10]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], +; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = fadd double [[TMP2]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[TMP13]], i32 0 +; CHECK-NEXT: br label %[[BB16]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[DOT0304_US_US_US_US:%.*]] = phi double [ [[TMP14]], %[[BB7]] ], [ 0.000000e+00, %[[BB6]] ] +; CHECK-NEXT: [[DOT0301_US_US_US_US:%.*]] = phi double [ [[TMP15]], %[[BB7]] ], [ 0.000000e+00, %[[BB6]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x double> [ [[TMP13]], %[[BB7]] ], [ [[TMP4]], %[[BB6]] ] +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x double> [[TMP18]], double [[DOT0301_US_US_US_US]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x double> [[TMP19]], double [[DOT0304_US_US_US_US]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = fadd <4 x double> [[TMP20]], [[TMP5]] +; CHECK-NEXT: br label %[[DOT_CRIT_EDGE384_US_US_US_US]] +; CHECK: [[__CRIT_EDGE384_US_US_US_US:.*:]] +; CHECK-NEXT: [[TMP22]] = phi <4 x double> [ [[TMP21]], %[[BB16]] ], [ zeroinitializer, %[[DOTLR_PH383_US_US_US_US___CRIT_EDGE384_US_US_US_US_CRIT_EDGE]] ] +; CHECK-NEXT: br [[DOTLR_PH383_US_US_US_US]] +; +iter.check: + br label %.lr.ph383.us.us.us.us + +.lr.ph383.us.us.us.us: ; preds = %._crit_edge384.us.us.us.us, %iter.check + %.2277404.us.us.us.us = phi double [ %.4279.us.us.us.us, %._crit_edge384.us.us.us.us ], [ 0.000000e+00, %iter.check ] + %.2287402.us.us.us.us = phi double [ %.4289.us.us.us.us, %._crit_edge384.us.us.us.us ], [ 0.000000e+00, %iter.check ] + %.2292401.us.us.us.us = phi double [ %.4294.us.us.us.us, %._crit_edge384.us.us.us.us ], [ 0.000000e+00, %iter.check ] + %.2297400.us.us.us.us = phi double [ %.4299.us.us.us.us, %._crit_edge384.us.us.us.us ], [ 0.000000e+00, %iter.check ] + br i1 false, label %.lr.ph383.us.us.us.us.._crit_edge384.us.us.us.us_crit_edge, label %3 + +.lr.ph383.us.us.us.us.._crit_edge384.us.us.us.us_crit_edge: ; preds = %.lr.ph383.us.us.us.us + br label %._crit_edge384.us.us.us.us + +3: ; preds = %.lr.ph383.us.us.us.us + br i1 %cond, label %11, label %4 + +4: ; preds = %3 + %5 = load double, ptr %0, align 8 + %6 = fmul double %5, 0.000000e+00 + %7 = fadd double %6, 0.000000e+00 + %8 = fadd double %5, 0.000000e+00 + %9 = load double, ptr %1, align 8 + %10 = fadd double %2, %9 + br label %11 + +11: ; preds = %4, %3 + %.0311.us.us.us.us = phi double [ %7, %4 ], [ %2, %3 ] + %.0304.us.us.us.us = phi double [ %10, %4 ], [ 0.000000e+00, %3 ] + %.0301.us.us.us.us = phi double [ %8, %4 ], [ 0.000000e+00, %3 ] + %.0257.us.us.us.us = phi double [ %8, %4 ], [ %2, %3 ] + %12 = fadd double %.0311.us.us.us.us, %.2277404.us.us.us.us + %13 = fadd double %.0257.us.us.us.us, %.2297400.us.us.us.us + %14 = fadd double %.0304.us.us.us.us, %.2287402.us.us.us.us + %15 = fadd double %.0301.us.us.us.us, %.2292401.us.us.us.us + br label %._crit_edge384.us.us.us.us + +._crit_edge384.us.us.us.us: ; preds = %11, %.lr.ph383.us.us.us.us.._crit_edge384.us.us.us.us_crit_edge + %.4299.us.us.us.us = phi double [ %13, %11 ], [ 0.000000e+00, %.lr.ph383.us.us.us.us.._crit_edge384.us.us.us.us_crit_edge ] + %.4294.us.us.us.us = phi double [ %15, %11 ], [ 0.000000e+00, %.lr.ph383.us.us.us.us.._crit_edge384.us.us.us.us_crit_edge ] + %.4289.us.us.us.us = phi double [ %14, %11 ], [ 0.000000e+00, %.lr.ph383.us.us.us.us.._crit_edge384.us.us.us.us_crit_edge ] + %.4279.us.us.us.us = phi double [ %12, %11 ], [ 0.000000e+00, %.lr.ph383.us.us.us.us.._crit_edge384.us.us.us.us_crit_edge ] + br label %.lr.ph383.us.us.us.us +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-bin-op-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-bin-op-user.ll new file mode 100644 index 000000000000..b79c07daa72f --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/external-bin-op-user.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver4 < %s | FileCheck %s + +define void @test(ptr %0, ptr %1, double %2) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], double [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ITER_CHECK:.*]]: +; CHECK-NEXT: br label %[[DOTLR_PH383_US_US_US_US:.*]] +; CHECK: [[_LR_PH383_US_US_US_US:.*:]] +; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x double> [ [[TMP26:%.*]], %[[DOT_CRIT_EDGE384_US_US_US_US:.*]] ], [ zeroinitializer, %[[ITER_CHECK]] ] +; CHECK-NEXT: br i1 false, label %[[DOT_CRIT_EDGE384_US_US_US_US]], label %[[BB4:.*]] +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fmul double [[TMP5]], 0.000000e+00 +; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> , double [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd double [[TMP2]], 0.000000e+00 +; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> [[TMP14]], double [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP18:%.*]] = fadd <2 x double> [[TMP15]], [[TMP17]] +; CHECK-NEXT: br label %[[BB19:.*]] +; CHECK: [[BB19]]: +; CHECK-NEXT: br label %[[BB20:.*]] +; CHECK: [[BB20]]: +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x double> [[TMP18]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x double> [[TMP21]], <4 x double> [[TMP22]], <4 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = fmul <4 x double> [[TMP23]], +; CHECK-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[TMP24]], [[TMP3]] +; CHECK-NEXT: br label %[[DOT_CRIT_EDGE384_US_US_US_US]] +; CHECK: [[__CRIT_EDGE384_US_US_US_US:.*:]] +; CHECK-NEXT: [[TMP26]] = phi <4 x double> [ [[TMP25]], %[[BB20]] ], [ zeroinitializer, %[[DOTLR_PH383_US_US_US_US]] ] +; CHECK-NEXT: br label %[[DOTLR_PH383_US_US_US_US]] +; +iter.check: + br label %.lr.ph383.us.us.us.us + +.lr.ph383.us.us.us.us: ; preds = %._crit_edge384.us.us.us.us, %iter.check + %.2277404.us.us.us.us = phi double [ %.4279.us.us.us.us, %._crit_edge384.us.us.us.us ], [ 0.000000e+00, %iter.check ] + %.2287402.us.us.us.us = phi double [ %.4289.us.us.us.us, %._crit_edge384.us.us.us.us ], [ 0.000000e+00, %iter.check ] + %.2292401.us.us.us.us = phi double [ %.4294.us.us.us.us, %._crit_edge384.us.us.us.us ], [ 0.000000e+00, %iter.check ] + %.2297400.us.us.us.us = phi double [ %.4299.us.us.us.us, %._crit_edge384.us.us.us.us ], [ 0.000000e+00, %iter.check ] + br i1 false, label %._crit_edge384.us.us.us.us, label %3 + +3: ; preds = %.lr.ph383.us.us.us.us + %4 = load double, ptr %0, align 8 + %5 = fmul double %4, 0.000000e+00 + %6 = load double, ptr %1, align 8 + %7 = fadd double %6, %5 + %8 = fadd double %6, %5 + %9 = fadd double %4, 0.000000e+00 + %10 = fadd double %2, 0.000000e+00 + %11 = load double, ptr %0, align 8 + %12 = fadd double %10, %11 + br label %13 + +13: ; preds = %3 + br label %14 + +14: ; preds = %13 + %15 = fmul double %7, 0.000000e+00 + %16 = fadd double %15, %.2277404.us.us.us.us + %17 = fmul double %8, 0.000000e+00 + %18 = fadd double %17, %.2297400.us.us.us.us + %19 = fadd double %9, %.2287402.us.us.us.us + %20 = fadd double %12, %.2292401.us.us.us.us + br label %._crit_edge384.us.us.us.us + +._crit_edge384.us.us.us.us: ; preds = %14, %.lr.ph383.us.us.us.us + %.4299.us.us.us.us = phi double [ %18, %14 ], [ 0.000000e+00, %.lr.ph383.us.us.us.us ] + %.4294.us.us.us.us = phi double [ %20, %14 ], [ 0.000000e+00, %.lr.ph383.us.us.us.us ] + %.4289.us.us.us.us = phi double [ %19, %14 ], [ 0.000000e+00, %.lr.ph383.us.us.us.us ] + %.4279.us.us.us.us = phi double [ %16, %14 ], [ 0.000000e+00, %.lr.ph383.us.us.us.us ] + br label %.lr.ph383.us.us.us.us +} +