diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 396c40d360ee..bd5844f715de 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -553,9 +553,6 @@ LLVM_ABI FunctionPass *createReplaceWithVeclibLegacyPass(); // Expands large div/rem and floating-point instructions. LLVM_ABI FunctionPass *createExpandIRInstsPass(CodeGenOptLevel); -// This pass expands memcmp() to load/stores. -LLVM_ABI FunctionPass *createExpandMemCmpLegacyPass(); - /// Creates Break False Dependencies pass. \see BreakFalseDeps.cpp LLVM_ABI FunctionPass *createBreakFalseDeps(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 48e4ecd8ee2a..604118cee9e2 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -114,7 +114,6 @@ LLVM_ABI void initializeEarlyTailDuplicateLegacyPass(PassRegistry &); LLVM_ABI void initializeEdgeBundlesWrapperLegacyPass(PassRegistry &); LLVM_ABI void initializeEHContGuardTargetsPass(PassRegistry &); LLVM_ABI void initializeExpandIRInstsLegacyPassPass(PassRegistry &); -LLVM_ABI void initializeExpandMemCmpLegacyPassPass(PassRegistry &); LLVM_ABI void initializeExpandPostRALegacyPass(PassRegistry &); LLVM_ABI void initializeExpandReductionsPass(PassRegistry &); LLVM_ABI void initializeExpandVariadicsPass(PassRegistry &); @@ -234,7 +233,6 @@ LLVM_ABI void initializeMachineUniformityAnalysisPassPass(PassRegistry &); LLVM_ABI void initializeMachineVerifierLegacyPassPass(PassRegistry &); LLVM_ABI void initializeMemoryDependenceWrapperPassPass(PassRegistry &); LLVM_ABI void initializeMemorySSAWrapperPassPass(PassRegistry &); -LLVM_ABI void initializeMergeICmpsLegacyPassPass(PassRegistry &); LLVM_ABI void initializeModuleSummaryIndexWrapperPassPass(PassRegistry &); LLVM_ABI void initializeModuloScheduleTestPass(PassRegistry &); LLVM_ABI void initializeNaryReassociateLegacyPassPass(PassRegistry &); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 612e57b52644..a01a67f13671 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -127,8 +127,6 @@ struct ForcePassLinking { (void)llvm::createEarlyCSEPass(); (void)llvm::createGVNPass(); (void)llvm::createPostDomTree(); - (void)llvm::createMergeICmpsLegacyPass(); - (void)llvm::createExpandMemCmpLegacyPass(); std::string buf; llvm::raw_string_ostream os(buf); (void)llvm::createPrintModulePass(os); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 6108284615be..e25f5a6e24e4 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -29,7 +29,6 @@ #include "llvm/CodeGen/DetectDeadLanes.h" #include "llvm/CodeGen/DwarfEHPrepare.h" #include "llvm/CodeGen/ExpandIRInsts.h" -#include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/ExpandPostRAPseudos.h" #include "llvm/CodeGen/ExpandReductions.h" #include "llvm/CodeGen/FEntryInserter.h" @@ -736,16 +735,6 @@ void CodeGenPassBuilder::addIRPasses( PMW); } - if (getOptLevel() != CodeGenOptLevel::None) { - // The MergeICmpsPass tries to create memcmp calls by grouping sequences of - // loads and compares. ExpandMemCmpPass then tries to expand those calls - // into optimally-sized loads and compares. The transforms are enabled by a - // target lowering hook. - if (!Opt.DisableMergeICmps) - addFunctionPass(MergeICmpsPass(), PMW); - addFunctionPass(ExpandMemCmpPass(), PMW); - } - // Run GC lowering passes for builtin collectors // TODO: add a pass insertion point here addFunctionPass(GCLoweringPass(), PMW); diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h index 8f9d55226e68..22b62e841dbc 100644 --- a/llvm/include/llvm/Target/CGPassBuilderOption.h +++ b/llvm/include/llvm/Target/CGPassBuilderOption.h @@ -66,7 +66,6 @@ struct CGPassBuilderOption { bool DisableLSR = false; bool DisableCGP = false; - bool DisableMergeICmps = false; bool DisablePartialLibcallInlining = false; bool DisableConstantHoisting = false; bool DisableSelectOptimize = true; diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 8e68b6a57e51..e2a236458dd7 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -145,11 +145,6 @@ LLVM_ABI FunctionPass *createSinkingPass(); LLVM_ABI Pass *createLowerAtomicPass(); //===----------------------------------------------------------------------===// -// -// MergeICmps - Merge integer comparison chains into a memcmp -// -LLVM_ABI Pass *createMergeICmpsLegacyPass(); - //===----------------------------------------------------------------------===// // // InferAddressSpaces - Modify users of addrspacecast instructions with values diff --git a/llvm/include/llvm/CodeGen/ExpandMemCmp.h b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h similarity index 80% rename from llvm/include/llvm/CodeGen/ExpandMemCmp.h rename to llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h index e65903d31b1a..856978df03b7 100644 --- a/llvm/include/llvm/CodeGen/ExpandMemCmp.h +++ b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h @@ -6,15 +6,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_EXPANDMEMCMP_H -#define LLVM_CODEGEN_EXPANDMEMCMP_H +#ifndef LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H +#define LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H #include "llvm/IR/PassManager.h" namespace llvm { -class TargetMachine; - class ExpandMemCmpPass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); @@ -22,4 +20,4 @@ public: } // namespace llvm -#endif // LLVM_CODEGEN_EXPANDMEMCMP_H +#endif // LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 7620d546091c..1cdf81791dc7 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -57,7 +57,6 @@ add_llvm_component_library(LLVMCodeGen EHContGuardTargets.cpp ExecutionDomainFix.cpp ExpandIRInsts.cpp - ExpandMemCmp.cpp ExpandPostRAPseudos.cpp ExpandReductions.cpp ExpandVectorPredication.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index a325c31faf41..bb8bbc4f44ee 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -44,7 +44,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeEarlyMachineLICMPass(Registry); initializeEarlyTailDuplicateLegacyPass(Registry); initializeExpandIRInstsLegacyPassPass(Registry); - initializeExpandMemCmpLegacyPassPass(Registry); initializeExpandPostRALegacyPass(Registry); initializeExpandReductionsPass(Registry); initializeFEntryInserterLegacyPass(Registry); diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index f84295225efa..4aa99340ef1d 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -110,9 +110,6 @@ static cl::opt EnableImplicitNullChecks( "enable-implicit-null-checks", cl::desc("Fold null checks into faulting memory operations"), cl::init(false), cl::Hidden); -static cl::opt DisableMergeICmps("disable-mergeicmps", - cl::desc("Disable MergeICmps Pass"), - cl::init(false), cl::Hidden); static cl::opt PrintISelInput("print-isel-input", cl::Hidden, cl::desc("Print LLVM IR input to isel pass")); @@ -520,7 +517,6 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() { SET_BOOLEAN_OPTION(EnableImplicitNullChecks) SET_BOOLEAN_OPTION(EnableMachineOutliner) SET_BOOLEAN_OPTION(MISchedPostRA) - SET_BOOLEAN_OPTION(DisableMergeICmps) SET_BOOLEAN_OPTION(DisableLSR) SET_BOOLEAN_OPTION(DisableConstantHoisting) SET_BOOLEAN_OPTION(DisableCGP) @@ -863,14 +859,6 @@ void TargetPassConfig::addIRPasses() { if (EnableLoopTermFold) addPass(createLoopTermFoldPass()); } - - // The MergeICmpsPass tries to create memcmp calls by grouping sequences of - // loads and compares. ExpandMemCmpPass then tries to expand those calls - // into optimally-sized loads and compares. The transforms are enabled by a - // target lowering hook. - if (!DisableMergeICmps) - addPass(createMergeICmpsLegacyPass()); - addPass(createExpandMemCmpLegacyPass()); } // Run GC lowering passes for builtin collectors diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index a23d64b491a7..55a4e99e7402 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -91,7 +91,6 @@ #include "llvm/CodeGen/EarlyIfConversion.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/ExpandIRInsts.h" -#include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/ExpandPostRAPseudos.h" #include "llvm/CodeGen/ExpandReductions.h" #include "llvm/CodeGen/FEntryInserter.h" @@ -281,6 +280,7 @@ #include "llvm/Transforms/Scalar/DivRemPairs.h" #include "llvm/Transforms/Scalar/DropUnnecessaryAssumes.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" +#include "llvm/Transforms/Scalar/ExpandMemCmp.h" #include "llvm/Transforms/Scalar/FlattenCFG.h" #include "llvm/Transforms/Scalar/Float2Int.h" #include "llvm/Transforms/Scalar/GVN.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index cae85c43bd20..255dfa8e4e9c 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -97,6 +97,7 @@ #include "llvm/Transforms/Scalar/DivRemPairs.h" #include "llvm/Transforms/Scalar/DropUnnecessaryAssumes.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" +#include "llvm/Transforms/Scalar/ExpandMemCmp.h" #include "llvm/Transforms/Scalar/Float2Int.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/IndVarSimplify.h" @@ -124,6 +125,7 @@ #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" +#include "llvm/Transforms/Scalar/MergeICmps.h" #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/Reassociate.h" @@ -268,6 +270,10 @@ static cl::opt EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics")); +static cl::opt EnableMergeICmps( + "enable-mergeicmps", cl::init(true), cl::Hidden, + cl::desc("Enable MergeICmps pass in the optimization pipeline")); + static cl::opt EnableConstraintElimination( "enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc( @@ -1605,6 +1611,12 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // flattening of blocks. OptimizePM.addPass(DivRemPairsPass()); + // Merge adjacent icmps into memcmp, then expand memcmp to loads/compares. + // TODO: move this furter up so that it can be optimized by GVN, etc. + if (EnableMergeICmps) + OptimizePM.addPass(MergeICmpsPass()); + OptimizePM.addPass(ExpandMemCmpPass()); + // Try to annotate calls that were created during optimization. OptimizePM.addPass( TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse())); diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index 37dbb3460564..4f6b208c2e72 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -13,6 +13,7 @@ add_llvm_component_library(LLVMScalarOpts DivRemPairs.cpp DropUnnecessaryAssumes.cpp EarlyCSE.cpp + ExpandMemCmp.cpp FlattenCFGPass.cpp Float2Int.cpp GuardWidening.cpp diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp similarity index 94% rename from llvm/lib/CodeGen/ExpandMemCmp.cpp rename to llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp index b471717f65dc..1e1976c7e5f0 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/ExpandMemCmp.h" +#include "llvm/Transforms/Scalar/ExpandMemCmp.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DomTreeUpdater.h" @@ -20,13 +20,10 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ProfDataUtils.h" -#include "llvm/InitializePasses.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SizeOpts.h" @@ -892,46 +889,6 @@ static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, DominatorTree *DT); -class ExpandMemCmpLegacyPass : public FunctionPass { -public: - static char ID; - - ExpandMemCmpLegacyPass() : FunctionPass(ID) {} - - bool runOnFunction(Function &F) override { - if (skipFunction(F)) return false; - - auto *TPC = getAnalysisIfAvailable(); - if (!TPC) { - return false; - } - - const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(F); - const TargetTransformInfo *TTI = - &getAnalysis().getTTI(F); - auto *PSI = &getAnalysis().getPSI(); - auto *BFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis().getBFI() : - nullptr; - DominatorTree *DT = nullptr; - if (auto *DTWP = getAnalysisIfAvailable()) - DT = &DTWP->getDomTree(); - auto PA = runImpl(F, TLI, TTI, PSI, BFI, DT); - return !PA.areAllPreserved(); - } - -private: - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); - FunctionPass::getAnalysisUsage(AU); - } -}; - bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, const DataLayout &DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, @@ -985,6 +942,14 @@ PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, PreservedAnalyses ExpandMemCmpPass::run(Function &F, FunctionAnalysisManager &FAM) { + // Don't expand memcmp in sanitized functions — sanitizers intercept memcmp + // calls to check for memory errors, and expanding would bypass that. + if (F.hasFnAttribute(Attribute::SanitizeAddress) || + F.hasFnAttribute(Attribute::SanitizeMemory) || + F.hasFnAttribute(Attribute::SanitizeThread) || + F.hasFnAttribute(Attribute::SanitizeHWAddress)) + return PreservedAnalyses::all(); + const auto &TLI = FAM.getResult(F); const auto &TTI = FAM.getResult(F); auto *PSI = FAM.getResult(F) @@ -996,18 +961,3 @@ PreservedAnalyses ExpandMemCmpPass::run(Function &F, return runImpl(F, &TLI, &TTI, PSI, BFI, DT); } - -char ExpandMemCmpLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(ExpandMemCmpLegacyPass, DEBUG_TYPE, - "Expand memcmp() to load/stores", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(ExpandMemCmpLegacyPass, DEBUG_TYPE, - "Expand memcmp() to load/stores", false, false) - -FunctionPass *llvm::createExpandMemCmpLegacyPass() { - return new ExpandMemCmpLegacyPass(); -} diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp index 531d370b6d67..d4286492011c 100644 --- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -43,8 +43,8 @@ #include "llvm/Transforms/Scalar/MergeICmps.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -53,9 +53,6 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/ProfDataUtils.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include @@ -888,7 +885,7 @@ static bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, static bool runImpl(Function &F, const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, AliasAnalysis &AA, DominatorTree *DT) { - LLVM_DEBUG(dbgs() << "MergeICmpsLegacyPass: " << F.getName() << "\n"); + LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n"); // We only try merging comparisons if the target wants to expand memcmp later. // The rationale is to avoid turning small chains into memcmp calls. @@ -913,49 +910,6 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI, return MadeChange; } -namespace { -class MergeICmpsLegacyPass : public FunctionPass { -public: - static char ID; - - MergeICmpsLegacyPass() : FunctionPass(ID) { - initializeMergeICmpsLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - if (skipFunction(F)) return false; - const auto &TLI = getAnalysis().getTLI(F); - const auto &TTI = getAnalysis().getTTI(F); - // MergeICmps does not need the DominatorTree, but we update it if it's - // already available. - auto *DTWP = getAnalysisIfAvailable(); - auto &AA = getAnalysis().getAAResults(); - return runImpl(F, TLI, TTI, AA, DTWP ? &DTWP->getDomTree() : nullptr); - } - - private: - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); - } -}; - -} // namespace - -char MergeICmpsLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(MergeICmpsLegacyPass, "mergeicmps", - "Merge contiguous icmps into a memcmp", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(MergeICmpsLegacyPass, "mergeicmps", - "Merge contiguous icmps into a memcmp", false, false) - -Pass *llvm::createMergeICmpsLegacyPass() { return new MergeICmpsLegacyPass(); } - PreservedAnalyses MergeICmpsPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TLI = AM.getResult(F); diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 032a3a779282..7a954047c05f 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -34,7 +34,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopTermFoldPass(Registry); initializeLoopUnrollPass(Registry); initializeLowerAtomicLegacyPassPass(Registry); - initializeMergeICmpsLegacyPassPass(Registry); initializeNaryReassociateLegacyPassPass(Registry); initializePartiallyInlineLibCallsLegacyPassPass(Registry); initializeDSELegacyPassPass(Registry); diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 16b33376fa84..ff4f28a3a50c 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -49,13 +49,6 @@ ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Remove unreachable blocks from the CFG diff --git a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll index 4846c46e6481..d196304cefc3 100644 --- a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll +++ b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O2 < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefix=CHECKN -; RUN: llc -O2 < %s -mtriple=aarch64-linux-gnu -mattr=strict-align | FileCheck %s --check-prefix=CHECKS +; RUN: opt -passes=expand-memcmp -mtriple=aarch64-linux-gnu -S < %s | llc -O2 -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefix=CHECKN +; RUN: opt -passes=expand-memcmp -mtriple=aarch64-linux-gnu -mattr=strict-align -S < %s | llc -O2 -mtriple=aarch64-linux-gnu -mattr=strict-align | FileCheck %s --check-prefix=CHECKS declare i32 @bcmp(ptr, ptr, i64) nounwind readonly declare i32 @memcmp(ptr, ptr, i64) nounwind readonly diff --git a/llvm/test/CodeGen/AArch64/bcmp.ll b/llvm/test/CodeGen/AArch64/bcmp.ll index e70ddc3415ca..436a3c2375b5 100644 --- a/llvm/test/CodeGen/AArch64/bcmp.ll +++ b/llvm/test/CodeGen/AArch64/bcmp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O2 < %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=aarch64-linux-gnu -S < %s | llc -O2 -mtriple=aarch64-linux-gnu | FileCheck %s declare i32 @bcmp(ptr, ptr, i64) diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll index 16ccd61f925e..7423f6fcf1d1 100644 --- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=aarch64-none-linux-gnu -mattr=+neon -S < %s | llc -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define i1 @combine_setcc_eq_vecreduce_or_v8i1(<8 x i8> %a) { ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v8i1: diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll index a32c53a5a574..06a3c07ed357 100644 --- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll +++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=aarch64-linux-gnu -S < %s | llc -mtriple=aarch64-linux-gnu | FileCheck %s ; One dimensional loop with load that can be hoisted outside of loop ; for (int i = 0; i < N; ++i) @@ -29,8 +29,8 @@ for.body: ; preds = %entry, %for.body %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ] %sum.05 = phi i64 [ %spec.select, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.06 - %0 = load ptr, ptr %arrayidx, align 8 - %bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 4) + %ptr = load ptr, ptr %arrayidx, align 8 + %bcmp = tail call i32 @bcmp(ptr %ptr, ptr %b, i64 4) %tobool = icmp eq i32 %bcmp, 0 %add = zext i1 %tobool to i64 %spec.select = add i64 %sum.05, %add @@ -387,8 +387,8 @@ for.body: ; preds = %for.body.preheader, %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] %sum.05 = phi i32 [ 0, %for.body.preheader ], [ %spec.select, %for.body ] %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %indvars.iv - %0 = load ptr, ptr %arrayidx, align 8 - %bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 4) + %ptr = load ptr, ptr %arrayidx, align 8 + %bcmp = tail call i32 @bcmp(ptr %ptr, ptr %b, i64 4) %tobool.not = icmp eq i32 %bcmp, 0 %add = zext i1 %tobool.not to i32 %spec.select = add nuw nsw i32 %sum.05, %add diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll index 98ea86b06d6c..f7ffdad81180 100644 --- a/llvm/test/CodeGen/AArch64/memcmp.ll +++ b/llvm/test/CodeGen/AArch64/memcmp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=aarch64-unknown-unknown -S < %s | llc -mtriple=aarch64-unknown-unknown | FileCheck %s @.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 83d307e1627b..c49b2b927bd3 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -127,8 +127,6 @@ ; GCN-O2-NEXT: verify ; GCN-O2-NEXT: loop(canon-freeze ; GCN-O2-NEXT: loop-reduce) -; GCN-O2-NEXT: mergeicmps -; GCN-O2-NEXT: expand-memcmp ; GCN-O2-NEXT: unreachableblockelim ; GCN-O2-NEXT: consthoist ; GCN-O2-NEXT: replace-with-veclib @@ -298,8 +296,6 @@ ; GCN-O3-NEXT: verify ; GCN-O3-NEXT: loop(canon-freeze ; GCN-O3-NEXT: loop-reduce) -; GCN-O3-NEXT: mergeicmps -; GCN-O3-NEXT: expand-memcmp ; GCN-O3-NEXT: unreachableblockelim ; GCN-O3-NEXT: consthoist ; GCN-O3-NEXT: replace-with-veclib diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index c41a43bf9cf4..cf127b0bc0d3 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -225,13 +225,6 @@ ; GCN-O1-NEXT: Canonicalize Freeze Instructions in Loops ; GCN-O1-NEXT: Induction Variable Users ; GCN-O1-NEXT: Loop Strength Reduction -; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O1-NEXT: Function Alias Analysis Results -; GCN-O1-NEXT: Merge contiguous icmps into a memcmp -; GCN-O1-NEXT: Natural Loop Information -; GCN-O1-NEXT: Lazy Branch Probability Analysis -; GCN-O1-NEXT: Lazy Block Frequency Analysis -; GCN-O1-NEXT: Expand memcmp() to load/stores ; GCN-O1-NEXT: Remove unreachable blocks from the CFG ; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: Post-Dominator Tree Construction @@ -535,13 +528,6 @@ ; GCN-O1-OPTS-NEXT: Canonicalize Freeze Instructions in Loops ; GCN-O1-OPTS-NEXT: Induction Variable Users ; GCN-O1-OPTS-NEXT: Loop Strength Reduction -; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O1-OPTS-NEXT: Function Alias Analysis Results -; GCN-O1-OPTS-NEXT: Merge contiguous icmps into a memcmp -; GCN-O1-OPTS-NEXT: Natural Loop Information -; GCN-O1-OPTS-NEXT: Lazy Branch Probability Analysis -; GCN-O1-OPTS-NEXT: Lazy Block Frequency Analysis -; GCN-O1-OPTS-NEXT: Expand memcmp() to load/stores ; GCN-O1-OPTS-NEXT: Remove unreachable blocks from the CFG ; GCN-O1-OPTS-NEXT: Natural Loop Information ; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction @@ -862,13 +848,6 @@ ; GCN-O2-NEXT: Canonicalize Freeze Instructions in Loops ; GCN-O2-NEXT: Induction Variable Users ; GCN-O2-NEXT: Loop Strength Reduction -; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O2-NEXT: Function Alias Analysis Results -; GCN-O2-NEXT: Merge contiguous icmps into a memcmp -; GCN-O2-NEXT: Natural Loop Information -; GCN-O2-NEXT: Lazy Branch Probability Analysis -; GCN-O2-NEXT: Lazy Block Frequency Analysis -; GCN-O2-NEXT: Expand memcmp() to load/stores ; GCN-O2-NEXT: Remove unreachable blocks from the CFG ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Post-Dominator Tree Construction @@ -1197,13 +1176,6 @@ ; GCN-O3-NEXT: Canonicalize Freeze Instructions in Loops ; GCN-O3-NEXT: Induction Variable Users ; GCN-O3-NEXT: Loop Strength Reduction -; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) -; GCN-O3-NEXT: Function Alias Analysis Results -; GCN-O3-NEXT: Merge contiguous icmps into a memcmp -; GCN-O3-NEXT: Natural Loop Information -; GCN-O3-NEXT: Lazy Branch Probability Analysis -; GCN-O3-NEXT: Lazy Block Frequency Analysis -; GCN-O3-NEXT: Expand memcmp() to load/stores ; GCN-O3-NEXT: Remove unreachable blocks from the CFG ; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Post-Dominator Tree Construction diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index c88e96d81ae6..9f4d70531a3f 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -25,13 +25,6 @@ ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Remove unreachable blocks from the CFG diff --git a/llvm/test/CodeGen/BPF/memcmp.ll b/llvm/test/CodeGen/BPF/memcmp.ll index 4c326feda559..694d1fb2057b 100644 --- a/llvm/test/CodeGen/BPF/memcmp.ll +++ b/llvm/test/CodeGen/BPF/memcmp.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=bpfel < %s | FileCheck %s -; RUN: llc -mtriple=bpfel -mcpu=v3 < %s | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=bpfel -S < %s | llc -mtriple=bpfel | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=bpfel -mcpu=v3 -S < %s | llc -mtriple=bpfel -mcpu=v3 | FileCheck %s ; ; Source code: ; /* set aligned 4 to minimize the number of loads */ diff --git a/llvm/test/CodeGen/Generic/llc-start-stop.ll b/llvm/test/CodeGen/Generic/llc-start-stop.ll index 3339bac9ce40..bca7b410de06 100644 --- a/llvm/test/CodeGen/Generic/llc-start-stop.ll +++ b/llvm/test/CodeGen/Generic/llc-start-stop.ll @@ -19,15 +19,15 @@ ; STOP-BEFORE-NOT: Loop Strength Reduction ; RUN: llc < %s -debug-pass=Structure -start-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-AFTER -; START-AFTER: -aa -mergeicmps +; START-AFTER: -gc-lowering ; START-AFTER: FunctionPass Manager -; START-AFTER-NEXT: Dominator Tree Construction +; START-AFTER-NEXT: Lower Garbage Collection Instructions ; RUN: llc < %s -debug-pass=Structure -start-before=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-BEFORE ; START-BEFORE: -machine-branch-prob -regalloc-evict -regalloc-priority -domtree ; START-BEFORE: FunctionPass Manager ; START-BEFORE: Loop Strength Reduction -; START-BEFORE-NEXT: {{Loop Terminator Folding|Basic Alias Analysis \(stateless AA impl\)}} +; START-BEFORE-NEXT: {{Loop Terminator Folding|Lower Garbage Collection Instructions}} ; RUN: not llc < %s -start-before=nonexistent -o /dev/null 2>&1 | FileCheck %s -check-prefix=NONEXISTENT-START-BEFORE ; RUN: not llc < %s -stop-before=nonexistent -o /dev/null 2>&1 | FileCheck %s -check-prefix=NONEXISTENT-STOP-BEFORE diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll index 36670fa801b3..fb02b5e24504 100644 --- a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll +++ b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \ +; RUN: sed 's/iGRLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=loongarch32 -mattr=+ual -S | llc --mtriple=loongarch32 --mattr=+ual \ ; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL -; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \ +; RUN: sed 's/iGRLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=loongarch64 -mattr=+ual -S | llc --mtriple=loongarch64 --mattr=+ual \ ; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL -; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \ +; RUN: sed 's/iGRLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=loongarch32 -mattr=-ual -S | llc --mtriple=loongarch32 --mattr=-ual \ ; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL -; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \ +; RUN: sed 's/iGRLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=loongarch64 -mattr=-ual -S | llc --mtriple=loongarch64 --mattr=-ual \ ; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll index c1bf850baa8c..00ff3438b8b5 100644 --- a/llvm/test/CodeGen/LoongArch/expandmemcmp.ll +++ b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \ +; RUN: sed 's/iGRLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=loongarch32 -mattr=+ual -S | llc --mtriple=loongarch32 --mattr=+ual \ ; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL -; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \ +; RUN: sed 's/iGRLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=loongarch64 -mattr=+ual -S | llc --mtriple=loongarch64 --mattr=+ual \ ; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL -; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \ +; RUN: sed 's/iGRLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=loongarch32 -mattr=-ual -S | llc --mtriple=loongarch32 --mattr=-ual \ ; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL -; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \ +; RUN: sed 's/iGRLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=loongarch64 -mattr=-ual -S | llc --mtriple=loongarch64 --mattr=-ual \ ; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly diff --git a/llvm/test/CodeGen/LoongArch/memcmp.ll b/llvm/test/CodeGen/LoongArch/memcmp.ll index c3811c035779..d95454da9d6f 100644 --- a/llvm/test/CodeGen/LoongArch/memcmp.ll +++ b/llvm/test/CodeGen/LoongArch/memcmp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 -mattr=+d < %s | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=loongarch64 -mattr=+d -S < %s | llc --mtriple=loongarch64 -mattr=+d | FileCheck %s ;; Before getSelectionDAGInfo() interface hooks were defined DAGBuilder ;; would crash. diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll index 29a035f2949d..2657a575aa8a 100644 --- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll @@ -25,8 +25,8 @@ ; LAXX-NEXT: Library Function Lowering Analysis ; LAXX-NEXT: Type-Based Alias Analysis ; LAXX-NEXT: Scoped NoAlias Alias Analysis -; LAXX-NEXT: Profile summary info ; LAXX-NEXT: Create Garbage Collector Module Metadata +; LAXX-NEXT: Profile summary info ; LAXX-NEXT: Machine Branch Probability Analysis ; LAXX-NEXT: Default Regalloc Eviction Advisor ; LAXX-NEXT: Default Regalloc Priority Advisor @@ -50,13 +50,6 @@ ; LAXX-NEXT: Canonicalize Freeze Instructions in Loops ; LAXX-NEXT: Induction Variable Users ; LAXX-NEXT: Loop Strength Reduction -; LAXX-NEXT: Basic Alias Analysis (stateless AA impl) -; LAXX-NEXT: Function Alias Analysis Results -; LAXX-NEXT: Merge contiguous icmps into a memcmp -; LAXX-NEXT: Natural Loop Information -; LAXX-NEXT: Lazy Branch Probability Analysis -; LAXX-NEXT: Lazy Block Frequency Analysis -; LAXX-NEXT: Expand memcmp() to load/stores ; LAXX-NEXT: Lower Garbage Collection Instructions ; LAXX-NEXT: Shadow Stack GC Lowering ; LAXX-NEXT: Remove unreachable blocks from the CFG diff --git a/llvm/test/CodeGen/M68k/pipeline.ll b/llvm/test/CodeGen/M68k/pipeline.ll index f068694c08ca..24329b2749fc 100644 --- a/llvm/test/CodeGen/M68k/pipeline.ll +++ b/llvm/test/CodeGen/M68k/pipeline.ll @@ -19,13 +19,6 @@ ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Remove unreachable blocks from the CFG diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll index 7118ae453245..3901d122f449 100644 --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -13,8 +13,8 @@ ; CHECK-NEXT: Library Function Lowering Analysis ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis -; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: Default Regalloc Priority Advisor @@ -51,13 +51,6 @@ ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Remove unreachable blocks from the CFG diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll index 8d4dce122a43..5182fe095b56 100644 --- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -S < %s | llc -ppc-gpr-icmps=all -verify-machineinstrs -mcpu=pwr8 | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll index 29910646c893..d38bab32fb08 100644 --- a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll +++ b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=PPC64LE +; RUN: opt -passes=mergeicmps,expand-memcmp -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr8 -S < %s | llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux | FileCheck %s -check-prefix=PPC64LE ; This tests interaction between MergeICmp and expand-memcmp. diff --git a/llvm/test/CodeGen/PowerPC/memcmp.ll b/llvm/test/CodeGen/PowerPC/memcmp.ll index 4998d87cf397..0cf896d6bbef 100644 --- a/llvm/test/CodeGen/PowerPC/memcmp.ll +++ b/llvm/test/CodeGen/PowerPC/memcmp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=CHECK +; RUN: opt -passes=expand-memcmp -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr8 -S < %s | llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux | FileCheck %s -check-prefix=CHECK define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { ; CHECK-LABEL: memcmp8: diff --git a/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll b/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll index 7dfaac1a8ae3..daa48d5fc0e2 100644 --- a/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll +++ b/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \ +; RUN: opt -passes=expand-memcmp -mtriple=powerpc-ibm-aix -mcpu=pwr8 -S < %s | llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix | \ ; RUN: FileCheck %s --check-prefix=CHECK-AIX32-P8 -; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \ +; RUN: opt -passes=expand-memcmp -mtriple=powerpc-ibm-aix -mcpu=pwr10 -S < %s | llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix | \ ; RUN: FileCheck %s --check-prefix=CHECK-AIX32-P10 -; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu < %s | \ +; RUN: opt -passes=expand-memcmp -mtriple=powerpcle-unknown-linux-gnu -mcpu=pwr8 -S < %s | llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu | \ ; RUN: FileCheck %s --check-prefix=CHECK-LINUX32-P8 -; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu < %s | \ +; RUN: opt -passes=expand-memcmp -mtriple=powerpcle-unknown-linux-gnu -mcpu=pwr10 -S < %s | llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu | \ ; RUN: FileCheck %s --check-prefix=CHECK-LINUX32-P10 define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) { diff --git a/llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll b/llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll index bd703b9d35cf..ea367daec3de 100644 --- a/llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll +++ b/llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: opt -passes=expand-memcmp -mtriple=powerpc64-ibm-aix -mcpu=pwr8 -S < %s | llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix | \ ; RUN: FileCheck %s --check-prefix=CHECK-AIX64-32-P8 -; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: opt -passes=expand-memcmp -mtriple=powerpc64-ibm-aix -mcpu=pwr10 -S < %s | llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix | \ ; RUN: FileCheck %s --check-prefix=CHECK-AIX64-32-P10 -; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: opt -passes=expand-memcmp -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -S < %s | llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu | \ ; RUN: FileCheck %s --check-prefix=CHECK-LINUX64-P8 -; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: opt -passes=expand-memcmp -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 -S < %s | llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu | \ ; RUN: FileCheck %s --check-prefix=CHECK-LINUX64-P10 define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) { diff --git a/llvm/test/CodeGen/PowerPC/memcmpIR.ll b/llvm/test/CodeGen/PowerPC/memcmpIR.ll index 974b8bda3486..064ded2ccbec 100644 --- a/llvm/test/CodeGen/PowerPC/memcmpIR.ll +++ b/llvm/test/CodeGen/PowerPC/memcmpIR.ll @@ -1,5 +1,5 @@ -; RUN: llc -o - -mtriple=powerpc64le-unknown-gnu-linux -stop-after codegenprepare %s | FileCheck %s -; RUN: llc -o - -mtriple=powerpc64-unknown-gnu-linux -stop-after codegenprepare %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: opt -passes=expand-memcmp -mtriple=powerpc64le-unknown-gnu-linux -S < %s | llc -o - -mtriple=powerpc64le-unknown-gnu-linux -stop-after codegenprepare | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=powerpc64-unknown-gnu-linux -S < %s | llc -o - -mtriple=powerpc64-unknown-gnu-linux -stop-after codegenprepare | FileCheck %s --check-prefix=CHECK-BE define signext i32 @test1(ptr nocapture readonly %buffer1, ptr nocapture readonly %buffer2) { entry: diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 682437b1e0db..149764ffedf9 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -53,13 +53,6 @@ ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction ; CHECK-NEXT: Loop Terminator Folding -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Remove unreachable blocks from the CFG diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll index afc8e3553f8b..b7e62d31a399 100644 --- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll +++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll @@ -1,35 +1,35 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -S | llc -mtriple=riscv32 -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-ALIGNED,CHECK-ALIGNED-RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -S | llc -mtriple=riscv64 -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-ALIGNED,CHECK-ALIGNED-RV64 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zbb -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+zbb -S | llc -mtriple=riscv32 -mattr=+zbb -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-ALIGNED,CHECK-ALIGNED-RV32-ZBB -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zbb -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+zbb -S | llc -mtriple=riscv64 -mattr=+zbb -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-ALIGNED,CHECK-ALIGNED-RV64-ZBB -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zbkb -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+zbkb -S | llc -mtriple=riscv32 -mattr=+zbkb -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-ALIGNED,CHECK-ALIGNED-RV32-ZBKB -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zbkb -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+zbkb -S | llc -mtriple=riscv64 -mattr=+zbkb -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-ALIGNED,CHECK-ALIGNED-RV64-ZBKB -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+v -S | llc -mtriple=riscv32 -mattr=+v -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-ALIGNED,CHECK-ALIGNED-RV32-V -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+v -S | llc -mtriple=riscv64 -mattr=+v -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-ALIGNED,CHECK-ALIGNED-RV64-V -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+unaligned-scalar-mem -S | llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-UNALIGNED,CHECK-UNALIGNED-RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+unaligned-scalar-mem -S | llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-UNALIGNED,CHECK-UNALIGNED-RV64 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zbb,+unaligned-scalar-mem -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+zbb,+unaligned-scalar-mem -S | llc -mtriple=riscv32 -mattr=+zbb,+unaligned-scalar-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-UNALIGNED,CHECK-UNALIGNED-RV32-ZBB -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zbb,+unaligned-scalar-mem -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+zbb,+unaligned-scalar-mem -S | llc -mtriple=riscv64 -mattr=+zbb,+unaligned-scalar-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-UNALIGNED,CHECK-UNALIGNED-RV64-ZBB -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zbkb,+unaligned-scalar-mem -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+zbkb,+unaligned-scalar-mem -S | llc -mtriple=riscv32 -mattr=+zbkb,+unaligned-scalar-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-UNALIGNED,CHECK-UNALIGNED-RV32-ZBKB -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zbkb,+unaligned-scalar-mem -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+zbkb,+unaligned-scalar-mem -S | llc -mtriple=riscv64 -mattr=+zbkb,+unaligned-scalar-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-UNALIGNED,CHECK-UNALIGNED-RV64-ZBKB -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem -S | llc -mtriple=riscv32 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-UNALIGNED,CHECK-UNALIGNED-RV32-V -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem -S | llc -mtriple=riscv64 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-UNALIGNED,CHECK-UNALIGNED-RV64-V declare i32 @bcmp(ptr, ptr, iXLen) nounwind readonly diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll index c737edb9acce..6678f30a5f73 100644 --- a/llvm/test/CodeGen/RISCV/memcmp.ll +++ b/llvm/test/CodeGen/RISCV/memcmp.ll @@ -1,35 +1,35 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -S | llc -mtriple=riscv32 -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-ALIGNED,CHECK-ALIGNED-RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -S | llc -mtriple=riscv64 -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-ALIGNED,CHECK-ALIGNED-RV64 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zbb -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+zbb -S | llc -mtriple=riscv32 -mattr=+zbb -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-ALIGNED,CHECK-ALIGNED-RV32-ZBB -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zbb -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+zbb -S | llc -mtriple=riscv64 -mattr=+zbb -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-ALIGNED,CHECK-ALIGNED-RV64-ZBB -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zbkb -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+zbkb -S | llc -mtriple=riscv32 -mattr=+zbkb -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-ALIGNED,CHECK-ALIGNED-RV32-ZBKB -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zbkb -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+zbkb -S | llc -mtriple=riscv64 -mattr=+zbkb -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-ALIGNED,CHECK-ALIGNED-RV64-ZBKB -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+v -S | llc -mtriple=riscv32 -mattr=+v -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-ALIGNED,CHECK-ALIGNED-RV32-V -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+v -S | llc -mtriple=riscv64 -mattr=+v -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-ALIGNED,CHECK-ALIGNED-RV64-V -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+unaligned-scalar-mem -S | llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-UNALIGNED,CHECK-UNALIGNED-RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+unaligned-scalar-mem -S | llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-UNALIGNED,CHECK-UNALIGNED-RV64 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zbb,+unaligned-scalar-mem -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+zbb,+unaligned-scalar-mem -S | llc -mtriple=riscv32 -mattr=+zbb,+unaligned-scalar-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-UNALIGNED,CHECK-UNALIGNED-RV32-ZBB -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zbb,+unaligned-scalar-mem -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+zbb,+unaligned-scalar-mem -S | llc -mtriple=riscv64 -mattr=+zbb,+unaligned-scalar-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-UNALIGNED,CHECK-UNALIGNED-RV64-ZBB -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zbkb,+unaligned-scalar-mem -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+zbkb,+unaligned-scalar-mem -S | llc -mtriple=riscv32 -mattr=+zbkb,+unaligned-scalar-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-UNALIGNED,CHECK-UNALIGNED-RV32-ZBKB -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zbkb,+unaligned-scalar-mem -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+zbkb,+unaligned-scalar-mem -S | llc -mtriple=riscv64 -mattr=+zbkb,+unaligned-scalar-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-UNALIGNED,CHECK-UNALIGNED-RV64-ZBKB -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem -O2 \ +; RUN: sed 's/iXLen/i32/g' %s | opt -passes=expand-memcmp -mtriple=riscv32 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem -S | llc -mtriple=riscv32 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-UNALIGNED,CHECK-UNALIGNED-RV32-V -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem -O2 \ +; RUN: sed 's/iXLen/i64/g' %s | opt -passes=expand-memcmp -mtriple=riscv64 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem -S | llc -mtriple=riscv64 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem -O2 \ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-UNALIGNED,CHECK-UNALIGNED-RV64-V declare i32 @bcmp(ptr, ptr, iXLen) nounwind readonly diff --git a/llvm/test/CodeGen/RISCV/pr186969.ll b/llvm/test/CodeGen/RISCV/pr186969.ll index 03551052dee3..92705bb3555d 100644 --- a/llvm/test/CodeGen/RISCV/pr186969.ll +++ b/llvm/test/CodeGen/RISCV/pr186969.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc < %s -mtriple=riscv64-unknown-elf -mattr=+unaligned-scalar-mem -O1 | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=riscv64-unknown-elf -mattr=+unaligned-scalar-mem -S < %s | llc -mtriple=riscv64-unknown-elf -mattr=+unaligned-scalar-mem -O1 | FileCheck %s @.str.45 = constant [4 x i8] c" 16\00" @.str.47 = constant [4 x i8] c"-0E\00" diff --git a/llvm/test/CodeGen/SPIRV/llc-pipeline.ll b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll index 2877fb871e68..81d7c1b985eb 100644 --- a/llvm/test/CodeGen/SPIRV/llc-pipeline.ll +++ b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll @@ -102,8 +102,8 @@ ; SPIRV-Opt-NEXT:Library Function Lowering Analysis ; SPIRV-Opt-NEXT:Type-Based Alias Analysis ; SPIRV-Opt-NEXT:Scoped NoAlias Alias Analysis -; SPIRV-Opt-NEXT:Profile summary info ; SPIRV-Opt-NEXT:Create Garbage Collector Module Metadata +; SPIRV-Opt-NEXT:Profile summary info ; SPIRV-Opt-NEXT:Machine Branch Probability Analysis ; SPIRV-Opt-NEXT: ModulePass Manager ; SPIRV-Opt-NEXT: FunctionPass Manager @@ -124,13 +124,6 @@ ; SPIRV-Opt-NEXT: Canonicalize Freeze Instructions in Loops ; SPIRV-Opt-NEXT: Induction Variable Users ; SPIRV-Opt-NEXT: Loop Strength Reduction -; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl) -; SPIRV-Opt-NEXT: Function Alias Analysis Results -; SPIRV-Opt-NEXT: Merge contiguous icmps into a memcmp -; SPIRV-Opt-NEXT: Natural Loop Information -; SPIRV-Opt-NEXT: Lazy Branch Probability Analysis -; SPIRV-Opt-NEXT: Lazy Block Frequency Analysis -; SPIRV-Opt-NEXT: Expand memcmp() to load/stores ; SPIRV-Opt-NEXT: Lower Garbage Collection Instructions ; SPIRV-Opt-NEXT: Shadow Stack GC Lowering ; SPIRV-Opt-NEXT: Remove unreachable blocks from the CFG diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll index 4357dc5631eb..eb2bb984e3cb 100644 --- a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=wasm32-unknown-unknown -mattr=+simd128 -S < %s | llc -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/CodeGen/WebAssembly/simd-memcmp.ll b/llvm/test/CodeGen/WebAssembly/simd-memcmp.ll index 7da1bd5ed31b..23df82ea0223 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-memcmp.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-memcmp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=wasm32 -mattr=+simd128 -S < %s | llc -verify-machineinstrs -mattr=+simd128 | FileCheck %s target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-i128:128-n32:64-S128-ni:1:10:20" target triple = "wasm32" diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc.ll index 127fd4e96303..4aa96be582f5 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-setcc.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=wasm32-unknown-unknown -mattr=+simd128 -S < %s | llc -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/CodeGen/X86/llc-pipeline-npm.ll b/llvm/test/CodeGen/X86/llc-pipeline-npm.ll index c4b886d7ff17..e392cf740830 100644 --- a/llvm/test/CodeGen/X86/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/X86/llc-pipeline-npm.ll @@ -88,8 +88,6 @@ ; O2-NEXT: verify ; O2-NEXT: loop(canon-freeze ; O2-NEXT: loop-reduce) -; O2-NEXT: mergeicmps -; O2-NEXT: expand-memcmp ; O2-NEXT: gc-lowering) ; O2-NEXT: shadow-stack-gc-lowering ; O2-NEXT: function(unreachableblockelim @@ -276,8 +274,6 @@ ; O3-WINDOWS-NEXT: verify ; O3-WINDOWS-NEXT: loop(canon-freeze ; O3-WINDOWS-NEXT: loop-reduce) -; O3-WINDOWS-NEXT: mergeicmps -; O3-WINDOWS-NEXT: expand-memcmp ; O3-WINDOWS-NEXT: gc-lowering) ; O3-WINDOWS-NEXT: shadow-stack-gc-lowering ; O3-WINDOWS-NEXT: function(unreachableblockelim diff --git a/llvm/test/CodeGen/X86/memcmp-constant.ll b/llvm/test/CodeGen/X86/memcmp-constant.ll index 2059b8f80408..26dd6a9dbc6a 100644 --- a/llvm/test/CodeGen/X86/memcmp-constant.ll +++ b/llvm/test/CodeGen/X86/memcmp-constant.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -S < %s | llc -mtriple=x86_64-unknown-unknown | FileCheck %s @.str1 = private constant [4 x i8] c"\00\00\00\00", align 1 @.str2 = private constant [4 x i8] c"\ff\ff\ff\ff", align 1 diff --git a/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll b/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll index c16e2adb7a07..d7827c7e0fcc 100644 --- a/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll +++ b/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 +; RUN: opt -passes=mergeicmps,expand-memcmp -mtriple=i686-unknown-unknown -S < %s | llc -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 +; RUN: opt -passes=mergeicmps,expand-memcmp -mtriple=x86_64-unknown-unknown -S < %s | llc -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 ; This tests interaction between MergeICmp and ExpandMemCmp. diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll index 7d1422d3c961..ef19991f13c1 100644 --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: This is a copy of llvm/test/CodeGen/X86/memcmp.ll with more load pairs. Please keep it that way. -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,X86-SSE1 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86,X86-SSE41 +; RUN: opt -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=cmov -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE +; RUN: opt -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,X86-SSE1 +; RUN: opt -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse2 -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 +; RUN: opt -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse4.1 -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86,X86-SSE41 ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll index 3a3824a4ffe8..b487e9421cae 100644 --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll @@ -1,15 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: This is a copy of llvm/test/CodeGen/X86/memcmp.ll with more load pairs. Please keep it that way. -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE41 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512BW -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512F -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX2 -; RUN: llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX512F +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=sse4.1 -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE41 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx2 -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512BW +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512F +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -S < %s | llc -max-loads-per-memcmp=4 -memcmp-num-loads-per-block=4 -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX512F ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 diff --git a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll index 09f02c3f5634..d65043203c2a 100644 --- a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 +; RUN: opt -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=cmov -S < %s | llc -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE +; RUN: opt -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse2 -S < %s | llc -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll index 4fe67fa0883d..6287d840575f 100644 --- a/llvm/test/CodeGen/X86/memcmp-optsize.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -S < %s | llc -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx -S < %s | llc -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx2 -S < %s | llc -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 diff --git a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll index 1b3fd6d4ddd3..20958b84d8b8 100644 --- a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 +; RUN: opt -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=cmov -S < %s | llc -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE +; RUN: opt -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse2 -S < %s | llc -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll index 26ee94afbce8..4c3a76df73b7 100644 --- a/llvm/test/CodeGen/X86/memcmp-pgso.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -S < %s | llc -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx -S < %s | llc -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx2 -S < %s | llc -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 diff --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll index 28e732be9191..9c5d48c6bacb 100644 --- a/llvm/test/CodeGen/X86/memcmp-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-x32.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,X86-SSE1 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86,X86-SSE41 +; RUN: opt -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=cmov -S < %s | llc -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefixes=X86,X86-NOSSE +; RUN: opt -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse -S < %s | llc -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86,X86-SSE1 +; RUN: opt -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse2 -S < %s | llc -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 +; RUN: opt -passes=expand-memcmp -mtriple=i686-unknown-unknown -mattr=+sse4.1 -S < %s | llc -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86,X86-SSE41 ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index 9e713bfa6c39..6b6151ce8781 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE41 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512BW -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX512F +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -S < %s | llc -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=sse4.1 -S < %s | llc -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE41 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx -S < %s | llc -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx2 -S < %s | llc -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit -S < %s | llc -mtriple=x86_64-unknown-unknown -mattr=avx512bw,+prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit -S < %s | llc -mtriple=x86_64-unknown-unknown -mattr=avx512bw,-prefer-256-bit | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512BW +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers -S < %s | llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers -S < %s | llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,-prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512F +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers -S < %s | llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,+prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX2 +; RUN: opt -passes=expand-memcmp -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers -S < %s | llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,-prefer-256-bit,+prefer-mask-registers | FileCheck %s --check-prefixes=X64,X64-MIC-AVX,X64-MIC-AVX512F ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 81c2f62b1d3d..24390f2d852d 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -21,8 +21,8 @@ ; CHECK-NEXT: Library Function Lowering Analysis ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis -; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: Default Regalloc Priority Advisor @@ -48,13 +48,6 @@ ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Merge contiguous icmps into a memcmp -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions ; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Remove unreachable blocks from the CFG diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 68092e68361c..6b09ea04c860 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -282,6 +282,8 @@ ; CHECK-O-NEXT: Running pass: LoopSinkPass ; CHECK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-O-NEXT: Running pass: DivRemPairsPass +; CHECK-O-NEXT: Running pass: MergeICmpsPass +; CHECK-O-NEXT: Running pass: ExpandMemCmpPass ; CHECK-O-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-DEFAULT-NEXT: Running pass: AllocToken diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll index e092569194b1..60c2f8e92ba0 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -200,6 +200,8 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: LoopSinkPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-POSTLINK-O-NEXT: Running pass: DivRemPairsPass +; CHECK-POSTLINK-O-NEXT: Running pass: MergeICmpsPass +; CHECK-POSTLINK-O-NEXT: Running pass: ExpandMemCmpPass ; CHECK-POSTLINK-O-NEXT: Running pass: TailCallElimPass ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-POSTLINK-O-NEXT: Running pass: AllocTokenPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 8a1e1589dcdb..c38964d809c7 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -185,6 +185,8 @@ ; CHECK-O-NEXT: Running pass: LoopSinkPass ; CHECK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-O-NEXT: Running pass: DivRemPairsPass +; CHECK-O-NEXT: Running pass: MergeICmpsPass +; CHECK-O-NEXT: Running pass: ExpandMemCmpPass ; CHECK-O-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: AllocTokenPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 00e93a2872b0..d782e221315a 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -194,6 +194,8 @@ ; CHECK-O-NEXT: Running pass: LoopSinkPass ; CHECK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-O-NEXT: Running pass: DivRemPairsPass +; CHECK-O-NEXT: Running pass: MergeICmpsPass +; CHECK-O-NEXT: Running pass: ExpandMemCmpPass ; CHECK-O-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: AllocTokenPass diff --git a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll index 0ed3af535ac9..b05c878d1f80 100644 --- a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 3 -; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s ; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s declare i32 @memcmp(ptr nocapture, ptr nocapture, i64) diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll index 41d357728b93..d3cdde660e39 100644 --- a/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 -; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 +; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 declare i32 @bcmp(ptr nocapture, ptr nocapture, i64) diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll index 30da8b391f55..b7ab20ab6e77 100644 --- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals -; RUN: opt -S -expand-memcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=X32 -; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=X32 +; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=X32 declare i32 @memcmp(ptr nocapture, ptr nocapture, i32) diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll index b5075a2fcff0..f0b9da962273 100644 --- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals -; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD -; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_2LD -; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD -; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_2LD +; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD +; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_2LD declare i32 @memcmp(ptr nocapture, ptr nocapture, i64) diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/sanitizer-skip.ll b/llvm/test/Transforms/ExpandMemCmp/X86/sanitizer-skip.ll new file mode 100644 index 000000000000..d403b60f0a3d --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/X86/sanitizer-skip.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=expand-memcmp -mtriple=x86_64-unknown-unknown < %s | FileCheck %s +; +; Verify that ExpandMemCmp does not expand memcmp calls in functions with +; sanitizer attributes, so sanitizer interceptors can catch memory errors. + +declare i32 @memcmp(ptr nocapture, ptr nocapture, i64) + +define i32 @cmp_asan(ptr nocapture readonly %x, ptr nocapture readonly %y) sanitize_address { +; CHECK-LABEL: define i32 @cmp_asan( +; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 4) + ret i32 %call +} + +define i32 @cmp_msan(ptr nocapture readonly %x, ptr nocapture readonly %y) sanitize_memory { +; CHECK-LABEL: define i32 @cmp_msan( +; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 4) + ret i32 %call +} + +define i32 @cmp_tsan(ptr nocapture readonly %x, ptr nocapture readonly %y) sanitize_thread { +; CHECK-LABEL: define i32 @cmp_tsan( +; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 4) + ret i32 %call +} + +define i32 @cmp_hwasan(ptr nocapture readonly %x, ptr nocapture readonly %y) sanitize_hwaddress { +; CHECK-LABEL: define i32 @cmp_hwasan( +; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(ptr [[X]], ptr [[Y]], i64 4) +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 4) + ret i32 %call +} + +define i32 @cmp_no_sanitizer(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_no_sanitizer( +; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.ucmp.i32.i32(i32 [[TMP3]], i32 [[TMP4]]) +; CHECK-NEXT: ret i32 [[TMP5]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 4) + ret i32 %call +} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/expand-memcmp-middle-end.ll b/llvm/test/Transforms/PhaseOrdering/X86/expand-memcmp-middle-end.ll new file mode 100644 index 000000000000..4f6e94431e49 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/expand-memcmp-middle-end.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -O2 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s +; +; Verify that expanding memcmp in the middle-end enables further IR +; optimizations. When two memcmp calls share a common pointer operand and +; constant size, the expanded loads can be CSE'd / GVN'd away. + +declare i32 @memcmp(ptr nocapture, ptr nocapture, i64) + +; FIXME: The redundant load of %x is not eliminated yet because ExpandMemCmp +; runs late in the pipeline. Moving it earlier should allow GVN to CSE the loads. +; Two memcmp calls with a shared first argument. After expansion in the +; middle-end, further IR optimizations should be able to optimize the expanded code. +define i1 @redundant_memcmp_loads(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocapture readonly %z) { +; CHECK-LABEL: define i1 @redundant_memcmp_loads( +; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], ptr readonly captures(none) [[Z:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.ucmp.i32.i32(i32 [[TMP3]], i32 [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[Z]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP6]]) +; CHECK-NEXT: [[TMP9:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP7]]) +; CHECK-NEXT: [[TMP10:%.*]] = tail call i32 @llvm.ucmp.i32.i32(i32 [[TMP8]], i32 [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[RESULT:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %cmp1 = call i32 @memcmp(ptr %x, ptr %y, i64 4) + %eq1 = icmp eq i32 %cmp1, 0 + %cmp2 = call i32 @memcmp(ptr %x, ptr %z, i64 4) + %eq2 = icmp eq i32 %cmp2, 0 + %result = and i1 %eq1, %eq2 + ret i1 %result +} diff --git a/llvm/test/tools/llc/new-pm/option-conflict.ll b/llvm/test/tools/llc/new-pm/option-conflict.ll index 5031e950fee2..47bc82a20e5b 100644 --- a/llvm/test/tools/llc/new-pm/option-conflict.ll +++ b/llvm/test/tools/llc/new-pm/option-conflict.ll @@ -1,3 +1,3 @@ -; RUN: not llc -mtriple=x86_64-pc-linux-gnu -passes=foo -start-before=mergeicmps -stop-after=gc-lowering -filetype=null %s 2>&1 | FileCheck %s +; RUN: not llc -mtriple=x86_64-pc-linux-gnu -passes=foo -start-before=gc-lowering -stop-after=gc-lowering -filetype=null %s 2>&1 | FileCheck %s ; CHECK: error: --passes cannot be used with start-before and stop-after. diff --git a/llvm/test/tools/llc/new-pm/start-stop.ll b/llvm/test/tools/llc/new-pm/start-stop.ll index 9f7531264ea8..829432238d62 100644 --- a/llvm/test/tools/llc/new-pm/start-stop.ll +++ b/llvm/test/tools/llc/new-pm/start-stop.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=x86_64-pc-linux-gnu -enable-new-pm -print-pipeline-passes -start-before=mergeicmps -stop-after=gc-lowering -filetype=null %s | FileCheck --match-full-lines %s --check-prefix=NULL -; RUN: llc -mtriple=x86_64-pc-linux-gnu -enable-new-pm -print-pipeline-passes -start-before=mergeicmps -stop-after=gc-lowering -o /dev/null %s | FileCheck --match-full-lines %s --check-prefix=OBJ +; RUN: llc -mtriple=x86_64-pc-linux-gnu -enable-new-pm -print-pipeline-passes -start-before=gc-lowering -stop-after=gc-lowering -filetype=null %s | FileCheck --match-full-lines %s --check-prefix=NULL +; RUN: llc -mtriple=x86_64-pc-linux-gnu -enable-new-pm -print-pipeline-passes -start-before=gc-lowering -stop-after=gc-lowering -o /dev/null %s | FileCheck --match-full-lines %s --check-prefix=OBJ -; NULL: require,require,require,require,require,function(verify,mergeicmps,expand-memcmp,gc-lowering,verify) -; OBJ: require,require,require,require,require,function(verify,mergeicmps,expand-memcmp,gc-lowering,verify),PrintMIRPreparePass,function(machine-function(print),free-machine-function) +; NULL: require,require,require,require,require,function(verify,gc-lowering,verify) +; OBJ: require,require,require,require,require,function(verify,gc-lowering,verify),PrintMIRPreparePass,function(machine-function(print),free-machine-function) diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp index 2a6f0d4cb238..0d1aec2df803 100644 --- a/llvm/tools/opt/optdriver.cpp +++ b/llvm/tools/opt/optdriver.cpp @@ -422,7 +422,6 @@ optMain(int argc, char **argv, // For codegen passes, only passes that do IR to IR transformation are // supported. initializeExpandIRInstsLegacyPassPass(Registry); - initializeExpandMemCmpLegacyPassPass(Registry); initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); initializeSelectOptimizePass(Registry); initializeInlineAsmPreparePass(Registry); diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index 893034311872..45ca2d8ab849 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -57,7 +57,6 @@ static_library("CodeGen") { "EdgeBundles.cpp", "ExecutionDomainFix.cpp", "ExpandIRInsts.cpp", - "ExpandMemCmp.cpp", "ExpandPostRAPseudos.cpp", "ExpandReductions.cpp", "ExpandVectorPredication.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn index 57403e8f5ba4..279fb741ae7b 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn @@ -24,6 +24,7 @@ static_library("Scalar") { "DivRemPairs.cpp", "DropUnnecessaryAssumes.cpp", "EarlyCSE.cpp", + "ExpandMemCmp.cpp", "FlattenCFGPass.cpp", "Float2Int.cpp", "GVN.cpp",