From 6408703de5a523e331ee47bbb6bea5a13b1a2758 Mon Sep 17 00:00:00 2001 From: Karthika Devi C Date: Mon, 10 Nov 2025 13:48:15 +0530 Subject: [PATCH] [Polly] Retain vectorization for fallback loop when RTC is unsatisfiable (#165525) When Polly generates a false runtime condition (RTC), the associated Polly generated loop is never executed and is eventually eliminated. As a result, the fallback loop becomes the default execution path. Disabling vectorization for this fallback loop will be counterproductive. This patch ensures that vectorization is only disabled when the RTC is not false (no Codegen failure). --- polly/lib/CodeGen/CodeGeneration.cpp | 24 ++++++++++------ .../CodeGen/Metadata/fallback_vec_annotate.ll | 28 +++++++++++++++++++ 2 files changed, 43 insertions(+), 9 deletions(-) create mode 100644 polly/test/CodeGen/Metadata/fallback_vec_annotate.ll diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp index 2d8b393cc039..062cdfbcfe3b 100644 --- a/polly/lib/CodeGen/CodeGeneration.cpp +++ b/polly/lib/CodeGen/CodeGeneration.cpp @@ -235,15 +235,6 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI, NodeBuilder.allocateNewArrays(StartExitBlocks); Annotator.buildAliasScopes(S); - // The code below annotates the "llvm.loop.vectorize.enable" to false - // for the code flow taken when RTCs fail. Because we don't want the - // Loop Vectorizer to come in later and vectorize the original fall back - // loop when Polly is enabled. - for (Loop *L : LI.getLoopsInPreorder()) { - if (S.contains(L)) - addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0); - } - if (PerfMonitoring) { PerfMonitor P(S, EnteringBB->getParent()->getParent()); P.initialize(); @@ -285,6 +276,21 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI, Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); + auto *CI = dyn_cast(RTC); + // The code below annotates the "llvm.loop.vectorize.enable" to false + // for the code flow taken when RTCs fail. Because we don't want the + // Loop Vectorizer to come in later and vectorize the original fall back + // loop when Polly is enabled. This avoids loop versioning on fallback + // loop by Loop Vectorizer. Don't do this when Polly's RTC value is + // false (due to code generation failure), as we are left with only one + // version of Loop. + if (!(CI && CI->isZero())) { + for (Loop *L : LI.getLoopsInPreorder()) { + if (S.contains(L)) + addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0); + } + } + // Explicitly set the insert point to the end of the block to avoid that a // split at the builder's current // insert position would move the malloc calls to the wrong BasicBlock. diff --git a/polly/test/CodeGen/Metadata/fallback_vec_annotate.ll b/polly/test/CodeGen/Metadata/fallback_vec_annotate.ll new file mode 100644 index 000000000000..317d30649ab1 --- /dev/null +++ b/polly/test/CodeGen/Metadata/fallback_vec_annotate.ll @@ -0,0 +1,28 @@ +; RUN: opt %loadNPMPolly -S -passes=polly-codegen -polly-annotate-metadata-vectorize < %s | FileCheck %s +; RUN: opt %loadNPMPolly -S -passes=polly-codegen < %s | FileCheck %s + +; Verify vectorization is not disabled when RTC of Polly is false + +; CHECK: attributes {{.*}} = { "polly-optimized" } +; CHECK-NOT: {{.*}} = !{!"llvm.loop.vectorize.enable", i32 0} + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "aarch64-unknown-linux-android10000" + +define void @ham(i64 %arg) { +bb: + br label %bb1 + +bb1: ; preds = %bb3, %bb + %phi = phi ptr [ %getelementptr4, %bb3 ], [ null, %bb ] + br label %bb2 + +bb2: ; preds = %bb2, %bb1 + %getelementptr = getelementptr i8, ptr %phi, i64 1 + store i8 0, ptr %getelementptr, align 1 + br i1 false, label %bb2, label %bb3 + +bb3: ; preds = %bb2 + %getelementptr4 = getelementptr i8, ptr %phi, i64 %arg + br label %bb1 +}