diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8c696cb16e77..cf0a6f96fb01 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -29477,6 +29477,42 @@ execution, but is unknown at compile time. If the result value does not fit in the result type, then the result is a :ref:`poison value `. +.. _llvm_fake_use: + +'``llvm.fake.use``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.fake.use(...) + +Overview: +""""""""" + +The ``llvm.fake.use`` intrinsic is a no-op. It takes a single +value as an operand and is treated as a use of that operand, to force the +optimizer to preserve that value prior to the fake use. This is used for +extending the lifetimes of variables, where this intrinsic placed at the end of +a variable's scope helps prevent that variable from being optimized out. + +Arguments: +"""""""""" + +The ``llvm.fake.use`` intrinsic takes one argument, which may be any +function-local SSA value. Note that the signature is variadic so that the +intrinsic can take any type of argument, but passing more than one argument will +result in an error. + +Semantics: +"""""""""" + +This intrinsic does nothing, but optimizers must consider it a use of its single +operand and should try to preserve the intrinsic and its position in the +function. + Stack Map Intrinsics -------------------- diff --git a/llvm/include/llvm/Analysis/PtrUseVisitor.h b/llvm/include/llvm/Analysis/PtrUseVisitor.h index b6cc14d2077a..f5c23b1b4e01 100644 --- a/llvm/include/llvm/Analysis/PtrUseVisitor.h +++ b/llvm/include/llvm/Analysis/PtrUseVisitor.h @@ -278,6 +278,12 @@ protected: default: return Base::visitIntrinsicInst(II); + // We escape pointers used by a fake_use to prevent SROA from transforming + // them. + case Intrinsic::fake_use: + PI.setEscaped(&II); + return; + case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: return; // No-op intrinsics. diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 86ff26289759..187d624f0a73 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1372,6 +1372,11 @@ enum NodeType { LIFETIME_START, LIFETIME_END, + /// FAKE_USE represents a use of the operand but does not do anything. + /// Its purpose is the extension of the operand's lifetime mainly for + /// debugging purposes. + FAKE_USE, + /// GC_TRANSITION_START/GC_TRANSITION_END - These operators mark the /// beginning and end of GC transition sequence, and carry arbitrary /// information that target might need for lowering. The first operand is diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 04c8144f2fe7..62667cc8ef38 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1435,6 +1435,8 @@ public: return getOpcode() == TargetOpcode::EXTRACT_SUBREG; } + bool isFakeUse() const { return getOpcode() == TargetOpcode::FAKE_USE; } + /// Return true if the instruction behaves like a copy. /// This does not include native copy instructions. bool isCopyLike() const { diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index c7c217857121..dbdd110b0600 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -440,6 +440,9 @@ namespace llvm { // metadata after llvm SanitizerBinaryMetadata pass. extern char &MachineSanitizerBinaryMetadataID; + /// RemoveLoadsIntoFakeUses pass. + extern char &RemoveLoadsIntoFakeUsesID; + /// RemoveRedundantDebugValues pass. extern char &RemoveRedundantDebugValuesID; diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index fc0590b1a1b6..f6191c6fdb7f 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -463,6 +463,7 @@ private: void Select_READ_REGISTER(SDNode *Op); void Select_WRITE_REGISTER(SDNode *Op); void Select_UNDEF(SDNode *N); + void Select_FAKE_USE(SDNode *N); void CannotYetSelect(SDNode *N); void Select_FREEZE(SDNode *N); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index e3bf0446575a..232d6be1073f 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1835,6 +1835,9 @@ def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem, IntrWillReturn, IntrConvergent], "llvm.is.constant">; +// Introduce a use of the argument without generating any code. +def int_fake_use : Intrinsic<[], [llvm_vararg_ty]>; + // Intrinsic to mask out bits of a pointer. // First argument must be pointer or vector of pointer. This is checked by the // verifier. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index cc5e93c58f56..47a1ca15fc0d 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -264,6 +264,7 @@ void initializeRegionOnlyViewerPass(PassRegistry &); void initializeRegionPrinterPass(PassRegistry &); void initializeRegionViewerPass(PassRegistry &); void initializeRegisterCoalescerPass(PassRegistry &); +void initializeRemoveLoadsIntoFakeUsesPass(PassRegistry &); void initializeRemoveRedundantDebugValuesPass(PassRegistry &); void initializeRenameIndependentSubregsPass(PassRegistry &); void initializeReplaceWithVeclibLegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 05baf514fa72..b710b1c46f64 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -250,6 +250,7 @@ DUMMY_MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass) DUMMY_MACHINE_FUNCTION_PASS("regallocscoringpass", RegAllocScoringPass) DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass) +DUMMY_MACHINE_FUNCTION_PASS("remove-loads-into-fake-uses", RemoveLoadsIntoFakeUsesPass) DUMMY_MACHINE_FUNCTION_PASS("removeredundantdebugvalues", RemoveRedundantDebugValuesPass) DUMMY_MACHINE_FUNCTION_PASS("rename-independent-subregs", RenameIndependentSubregsPass) DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass) diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 9fb6de49fb20..635c265a4336 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -217,6 +217,9 @@ HANDLE_TARGET_OPCODE(PATCHABLE_TYPED_EVENT_CALL) HANDLE_TARGET_OPCODE(ICALL_BRANCH_FUNNEL) +/// Represents a use of the operand but generates no code. +HANDLE_TARGET_OPCODE(FAKE_USE) + // This is a fence with the singlethread scope. It represents a compiler memory // barrier, but does not correspond to any generated instruction. HANDLE_TARGET_OPCODE(MEMBARRIER) diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index 343323860858..b2eb250ae60b 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1418,6 +1418,16 @@ def FAULTING_OP : StandardPseudoInstruction { let isTerminator = true; let isBranch = true; } +def FAKE_USE : StandardPseudoInstruction { + // An instruction that uses its operands but does nothing; this instruction + // will be treated specially by CodeGen passes, distinguishing it from any + // otherwise equivalent instructions. + let OutOperandList = (outs); + let InOperandList = (ins variable_ops); + let AsmString = "FAKE_USE"; + let hasSideEffects = 0; + let isMeta = true; +} def PATCHABLE_OP : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index 128060ec912c..f77b733c6c8f 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -567,7 +567,8 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM, if (const IntrinsicInst *II = dyn_cast(BBI)) if (II->getIntrinsicID() == Intrinsic::lifetime_end || II->getIntrinsicID() == Intrinsic::assume || - II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) + II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl || + II->getIntrinsicID() == Intrinsic::fake_use) continue; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || !isSafeToSpeculativelyExecute(&*BBI)) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 60cb26973ead..19d23c8ba967 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1131,6 +1131,21 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { AP.OutStreamer->addBlankLine(); } +static void emitFakeUse(const MachineInstr *MI, AsmPrinter &AP) { + std::string Str; + raw_string_ostream OS(Str); + OS << "fake_use:"; + for (const MachineOperand &Op : MI->operands()) { + // In some circumstances we can end up with fake uses of constants; skip + // these. + if (!Op.isReg()) + continue; + OS << ' ' << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo()); + } + AP.OutStreamer->AddComment(OS.str()); + AP.OutStreamer->addBlankLine(); +} + /// emitDebugValueComment - This method handles the target-independent form /// of DBG_VALUE, returning true if it was able to do so. A false return /// means the target will need to handle MI in EmitInstruction. @@ -1799,6 +1814,10 @@ void AsmPrinter::emitFunctionBody() { case TargetOpcode::KILL: if (isVerbose()) emitKill(&MI, *this); break; + case TargetOpcode::FAKE_USE: + if (isVerbose()) + emitFakeUse(&MI, *this); + break; case TargetOpcode::PSEUDO_PROBE: emitPseudoProbe(MI); break; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index f1607f85c5b3..ae12ce1170f7 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -200,6 +200,7 @@ add_llvm_component_library(LLVMCodeGen RegisterUsageInfo.cpp RegUsageInfoCollector.cpp RegUsageInfoPropagate.cpp + RemoveLoadsIntoFakeUses.cpp ReplaceWithVeclib.cpp ResetMachineFunctionPass.cpp RegisterBank.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 31fa4c105cef..177702054a0e 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -116,6 +116,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRegUsageInfoCollectorPass(Registry); initializeRegUsageInfoPropagationPass(Registry); initializeRegisterCoalescerPass(Registry); + initializeRemoveLoadsIntoFakeUsesPass(Registry); initializeRemoveRedundantDebugValuesPass(Registry); initializeRenameIndependentSubregsPass(Registry); initializeSafeStackLegacyPassPass(Registry); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index da6c758d53d4..271a047fc6a7 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2800,12 +2800,34 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, return false; }; + SmallVector FakeUses; + + auto isFakeUse = [&FakeUses](const Instruction *Inst) { + if (auto *II = dyn_cast(Inst); + II && II->getIntrinsicID() == Intrinsic::fake_use) { + // Record the instruction so it can be preserved when the exit block is + // removed. Do not preserve the fake use that uses the result of the + // PHI instruction. + // Do not copy fake uses that use the result of a PHI node. + // FIXME: If we do want to copy the fake use into the return blocks, we + // have to figure out which of the PHI node operands to use for each + // copy. + if (!isa(II->getOperand(0))) { + FakeUses.push_back(II); + } + return true; + } + + return false; + }; + // Make sure there are no instructions between the first instruction // and return. const Instruction *BI = BB->getFirstNonPHI(); // Skip over debug and the bitcast. while (isa(BI) || BI == BCI || BI == EVI || - isa(BI) || isLifetimeEndOrBitCastFor(BI)) + isa(BI) || isLifetimeEndOrBitCastFor(BI) || + isFakeUse(BI)) BI = BI->getNextNode(); if (BI != RetI) return false; @@ -2814,6 +2836,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, /// call. const Function *F = BB->getParent(); SmallVector TailCallBBs; + // Record the call instructions so we can insert any fake uses + // that need to be preserved before them. + SmallVector CallInsts; if (PN) { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { // Look through bitcasts. @@ -2825,6 +2850,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, TLI->mayBeEmittedAsTailCall(CI) && attributesPermitTailCall(F, CI, RetI, *TLI)) { TailCallBBs.push_back(PredBB); + CallInsts.push_back(CI); } else { // Consider the cases in which the phi value is indirectly produced by // the tail call, for example when encountering memset(), memmove(), @@ -2844,8 +2870,10 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, isIntrinsicOrLFToBeTailCalled(TLInfo, CI) && IncomingVal == CI->getArgOperand(0) && TLI->mayBeEmittedAsTailCall(CI) && - attributesPermitTailCall(F, CI, RetI, *TLI)) + attributesPermitTailCall(F, CI, RetI, *TLI)) { TailCallBBs.push_back(PredBB); + CallInsts.push_back(CI); + } } } } else { @@ -2863,6 +2891,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) && V == CI->getArgOperand(0))) { TailCallBBs.push_back(Pred); + CallInsts.push_back(CI); } } } @@ -2889,8 +2918,17 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, } // If we eliminated all predecessors of the block, delete the block now. - if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) + if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) { + // Copy the fake uses found in the original return block to all blocks + // that contain tail calls. + for (auto *CI : CallInsts) { + for (auto const *FakeUse : FakeUses) { + auto *ClonedInst = FakeUse->clone(); + ClonedInst->insertBefore(CI); + } + } BB->eraseFromParent(); + } return Changed; } diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 7fc25cd889a0..332ed37bd2b7 100644 --- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -87,7 +87,8 @@ bool DeadMachineInstructionElimImpl::isDead(const MachineInstr *MI) const { return false; // Don't delete frame allocation labels. - if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE) + if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE || + MI->getOpcode() == TargetOpcode::FAKE_USE) return false; // Don't delete instructions with side effects. diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index f44af78cded4..968d0a2a5c75 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2193,6 +2193,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, } return true; } + case Intrinsic::fake_use: { + SmallVector VRegs; + for (const auto &Arg : CI.args()) + for (auto VReg : getOrCreateVRegs(*Arg)) + VRegs.push_back(VReg); + MIRBuilder.buildInstr(TargetOpcode::FAKE_USE, std::nullopt, VRegs); + return true; + } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(CI); assert(DI.getVariable() && "Missing variable"); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index cfdd9905c16f..b1270e7aeb87 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -228,6 +228,9 @@ bool llvm::isTriviallyDead(const MachineInstr &MI, // Don't delete frame allocation labels. if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) return false; + // Don't delete fake uses. + if (MI.getOpcode() == TargetOpcode::FAKE_USE) + return false; // LIFETIME markers should be preserved even if they seem dead. if (MI.getOpcode() == TargetOpcode::LIFETIME_START || MI.getOpcode() == TargetOpcode::LIFETIME_END) diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp index 27bbf5599b60..aadc54b495fe 100644 --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -406,7 +406,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, bool MachineCSE::isCSECandidate(MachineInstr *MI) { if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || - MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo()) + MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo() || + MI->isFakeUse()) return false; // Ignore copies. diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 7a3cf96ccffe..4e6d34346b1d 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -530,7 +530,8 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII) { - return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF); + return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF) || + MI->isFakeUse(); } /// A region of an MBB for scheduling. diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index fe515ef5be54..609f9af9767f 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -833,7 +833,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (!ProcessedBegin) --I; - if (MI.isDebugOrPseudoInstr()) { + if (MI.isDebugOrPseudoInstr() || MI.isFakeUse()) { if (MI.isDebugValue()) ProcessDbgInst(MI); continue; diff --git a/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp b/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp new file mode 100644 index 000000000000..232181a199b8 --- /dev/null +++ b/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp @@ -0,0 +1,162 @@ +//===---- RemoveLoadsIntoFakeUses.cpp - Remove loads with no real uses ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// The FAKE_USE instruction is used to preserve certain values through +/// optimizations for the sake of debugging. This may result in spilled values +/// being loaded into registers that are only used by FAKE_USEs; this is not +/// necessary for debugging purposes, because at that point the value must be on +/// the stack and hence available for debugging. Therefore, this pass removes +/// loads that are only used by FAKE_USEs. +/// +/// This pass should run very late, to ensure that we don't inadvertently +/// shorten stack lifetimes by removing these loads, since the FAKE_USEs will +/// also no longer be in effect. Running immediately before LiveDebugValues +/// ensures that LDV will have accurate information of the machine location of +/// debug values. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "remove-loads-into-fake-uses" + +STATISTIC(NumLoadsDeleted, "Number of dead load instructions deleted"); +STATISTIC(NumFakeUsesDeleted, "Number of FAKE_USE instructions deleted"); + +class RemoveLoadsIntoFakeUses : public MachineFunctionPass { +public: + static char ID; + + RemoveLoadsIntoFakeUses() : MachineFunctionPass(ID) { + initializeRemoveLoadsIntoFakeUsesPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "Remove Loads Into Fake Uses"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +char RemoveLoadsIntoFakeUses::ID = 0; +char &llvm::RemoveLoadsIntoFakeUsesID = RemoveLoadsIntoFakeUses::ID; + +INITIALIZE_PASS_BEGIN(RemoveLoadsIntoFakeUses, DEBUG_TYPE, + "Remove Loads Into Fake Uses", false, false) +INITIALIZE_PASS_END(RemoveLoadsIntoFakeUses, DEBUG_TYPE, + "Remove Loads Into Fake Uses", false, false) + +bool RemoveLoadsIntoFakeUses::runOnMachineFunction(MachineFunction &MF) { + // Only `optdebug` functions should contain FAKE_USEs, so don't try to run + // this for other functions. + if (!MF.getFunction().hasFnAttribute(Attribute::OptimizeForDebugging) || + skipFunction(MF.getFunction())) + return false; + + bool AnyChanges = false; + + LiveRegUnits LivePhysRegs; + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + const TargetSubtargetInfo &ST = MF.getSubtarget(); + const TargetInstrInfo *TII = ST.getInstrInfo(); + const TargetRegisterInfo *TRI = ST.getRegisterInfo(); + + SmallDenseMap> RegFakeUses; + LivePhysRegs.init(*TRI); + SmallVector Statepoints; + for (MachineBasicBlock *MBB : post_order(&MF)) { + LivePhysRegs.addLiveOuts(*MBB); + + for (MachineInstr &MI : make_early_inc_range(reverse(*MBB))) { + if (MI.isFakeUse()) { + for (const MachineOperand &MO : MI.operands()) { + // Track the Fake Uses that use this register so that we can delete + // them if we delete the corresponding load. + if (MO.isReg()) + RegFakeUses[MO.getReg()].push_back(&MI); + } + // Do not record FAKE_USE uses in LivePhysRegs so that we can recognize + // otherwise-unused loads. + continue; + } + + // If the restore size is not std::nullopt then we are dealing with a + // reload of a spilled register. + if (MI.getRestoreSize(TII)) { + Register Reg = MI.getOperand(0).getReg(); + assert(Reg.isPhysical() && "VReg seen in function with NoVRegs set?"); + // Don't delete live physreg defs, or any reserved register defs. + if (!LivePhysRegs.available(Reg) || MRI->isReserved(Reg)) + continue; + // There should be an exact match between the loaded register and the + // FAKE_USE use. If not, this is a load that is unused by anything? It + // should probably be deleted, but that's outside of this pass' scope. + if (RegFakeUses.contains(Reg)) { + LLVM_DEBUG(dbgs() << "RemoveLoadsIntoFakeUses: DELETING: " << MI); + // It is possible that some DBG_VALUE instructions refer to this + // instruction. They will be deleted in the live debug variable + // analysis. + MI.eraseFromParent(); + AnyChanges = true; + ++NumLoadsDeleted; + // Each FAKE_USE now appears to be a fake use of the previous value + // of the loaded register; delete them to avoid incorrectly + // interpreting them as such. + for (MachineInstr *FakeUse : RegFakeUses[Reg]) { + LLVM_DEBUG(dbgs() + << "RemoveLoadsIntoFakeUses: DELETING: " << *FakeUse); + FakeUse->eraseFromParent(); + } + NumFakeUsesDeleted += RegFakeUses[Reg].size(); + RegFakeUses[Reg].clear(); + } + continue; + } + + // In addition to tracking LivePhysRegs, we need to clear RegFakeUses each + // time a register is defined, as existing FAKE_USEs no longer apply to + // that register. + if (!RegFakeUses.empty()) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isDef()) { + Register Reg = MO.getReg(); + assert(Reg.isPhysical() && + "VReg seen in function with NoVRegs set?"); + for (MCRegUnit Unit : TRI->regunits(Reg)) + RegFakeUses.erase(Unit); + } + } + } + LivePhysRegs.stepBackward(MI); + } + } + + return AnyChanges; +} diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 067f82c99adc..162af2d9d708 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1464,6 +1464,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { updateValueMap(II, ResultReg); return true; } + case Intrinsic::fake_use: + // At -O0, we don't need fake use, so just ignore it. + return true; case Intrinsic::experimental_stackmap: return selectStackmap(II); case Intrinsic::experimental_patchpoint_void: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 221dcfe14559..b5c80005a0ec 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2438,6 +2438,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { report_fatal_error("Do not know how to promote this operator's operand!"); case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break; + case ISD::FAKE_USE: + R = PromoteFloatOp_FAKE_USE(N, OpNo); + break; case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: @@ -2480,6 +2483,13 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) { return DAG.getBitcast(N->getValueType(0), Convert); } +SDValue DAGTypeLegalizer::PromoteFloatOp_FAKE_USE(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "Only Operand 1 must need promotion here"); + SDValue Op = GetPromotedFloat(N->getOperand(OpNo)); + return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::Other, N->getOperand(0), + Op); +} + // Promote Operand 1 of FCOPYSIGN. Operand 0 ought to be handled by // PromoteFloatRes_FCOPYSIGN. SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) { @@ -3433,6 +3443,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { "operand!"); case ISD::BITCAST: Res = SoftPromoteHalfOp_BITCAST(N); break; + case ISD::FAKE_USE: + Res = SoftPromoteHalfOp_FAKE_USE(N, OpNo); + break; case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break; @@ -3473,6 +3486,13 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0); } +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "Only Operand 1 must need promotion here"); + SDValue Op = GetSoftPromotedHalf(N->getOperand(OpNo)); + return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::Other, N->getOperand(0), + Op); +} + SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "Only Operand 1 must need promotion here"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index c19a5a499562..05971152d535 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1934,6 +1934,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::FAKE_USE: + Res = PromoteIntOp_FAKE_USE(N); + break; case ISD::INSERT_VECTOR_ELT: Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo); break; @@ -5280,6 +5283,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + case ISD::FAKE_USE: + Res = ExpandOp_FAKE_USE(N); + break; case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; case ISD::EXPERIMENTAL_VP_SPLAT: @@ -6115,6 +6121,19 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) { return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0)); } +// FIXME: We wouldn't need this if clang could promote short integers +// that are arguments to FAKE_USE. +SDValue DAGTypeLegalizer::PromoteIntOp_FAKE_USE(SDNode *N) { + SDLoc dl(N); + SDValue V0 = N->getOperand(0); + SDValue V1 = N->getOperand(1); + EVT InVT1 = V1.getValueType(); + SDValue VPromoted = + DAG.getNode(ISD::ANY_EXTEND, dl, + TLI.getTypeToTransformTo(*DAG.getContext(), InVT1), V1); + return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), V0, VPromoted); +} + SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) { SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 1088db4bdbe0..4577346a02d6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -391,6 +391,7 @@ private: SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N); + SDValue PromoteIntOp_FAKE_USE(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntOp_ScalarOp(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); @@ -755,6 +756,7 @@ private: bool PromoteFloatOperand(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FAKE_USE(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, unsigned OpNo); @@ -800,6 +802,7 @@ private: bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_BITCAST(SDNode *N); + SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N); SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N); @@ -877,6 +880,7 @@ private: SDValue ScalarizeVecOp_VECREDUCE(SDNode *N); SDValue ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N); SDValue ScalarizeVecOp_CMP(SDNode *N); + SDValue ScalarizeVecOp_FAKE_USE(SDNode *N); //===--------------------------------------------------------------------===// // Vector Splitting Support: LegalizeVectorTypes.cpp @@ -964,6 +968,7 @@ private: SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_ExtVecInRegOp(SDNode *N); + SDValue SplitVecOp_FAKE_USE(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N, unsigned OpNo); @@ -1069,6 +1074,7 @@ private: SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N); + SDValue WidenVecOp_FAKE_USE(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo); SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo); @@ -1198,6 +1204,7 @@ private: SDValue ExpandOp_BITCAST (SDNode *N); SDValue ExpandOp_BUILD_VECTOR (SDNode *N); SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N); + SDValue ExpandOp_FAKE_USE(SDNode *N); SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N); SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N); SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index a55364ea2c4e..b402e8237627 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -403,6 +403,17 @@ SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { return N->getConstantOperandVal(1) ? Hi : Lo; } +// Split the integer operand in two and create a second FAKE_USE node for +// the other half. The original SDNode is updated in place. +SDValue DAGTypeLegalizer::ExpandOp_FAKE_USE(SDNode *N) { + SDValue Lo, Hi; + SDValue Chain = N->getOperand(0); + GetExpandedOp(N->getOperand(1), Lo, Hi); + SDValue LoUse = DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, Chain, Lo); + DAG.UpdateNodeOperands(N, LoUse, Hi); + return SDValue(N, 0); +} + SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { // The vector type is legal but the element type needs expansion. EVT VecVT = N->getValueType(0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 475d5806467d..4c6da7c5df6b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -746,6 +746,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = ScalarizeVecOp_BITCAST(N); break; + case ISD::FAKE_USE: + Res = ScalarizeVecOp_FAKE_USE(N); + break; case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: @@ -846,6 +849,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { N->getValueType(0), Elt); } +// Need to legalize vector operands of fake uses. Must be <1 x ty>. +SDValue DAGTypeLegalizer::ScalarizeVecOp_FAKE_USE(SDNode *N) { + assert(N->getOperand(1).getValueType().getVectorNumElements() == 1 && + "Fake Use: Unexpected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Elt); +} + /// If the input is a vector that needs to be scalarized, it must be <1 x ty>. /// Do the operation on the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { @@ -3291,6 +3302,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { Res = SplitVecOp_CMP(N); break; + case ISD::FAKE_USE: + Res = SplitVecOp_FAKE_USE(N); + break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: @@ -3505,6 +3519,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } +// Split a FAKE_USE use of a vector into FAKE_USEs of hi and lo part. +SDValue DAGTypeLegalizer::SplitVecOp_FAKE_USE(SDNode *N) { + SDValue Lo, Hi; + GetSplitVector(N->getOperand(1), Lo, Hi); + SDValue Chain = + DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Lo); + return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, Chain, Hi); +} + SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will // end up being split all the way down to individual components. Convert the @@ -6466,6 +6489,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { report_fatal_error("Do not know how to widen this operator's operand!"); case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; + case ISD::FAKE_USE: + Res = WidenVecOp_FAKE_USE(N); + break; case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; case ISD::INSERT_SUBVECTOR: Res = WidenVecOp_INSERT_SUBVECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; @@ -6851,6 +6877,16 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { return CreateStackStoreLoad(InOp, VT); } +// Vectors with sizes that are not powers of 2 need to be widened to the +// next largest power of 2. For example, we may get a vector of 3 32-bit +// integers or of 6 16-bit integers, both of which have to be widened to a +// 128-bit vector. +SDValue DAGTypeLegalizer::WidenVecOp_FAKE_USE(SDNode *N) { + SDValue WidenedOp = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), + WidenedOp); +} + SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ad24704d940a..521a4fee8aaf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1622,6 +1622,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef Values, SDValue N = NodeMap[V]; if (!N.getNode() && isa(V)) // Check unused arguments map. N = UnusedArgNodeMap[V]; + if (N.getNode()) { // Only emit func arg dbg value for non-variadic dbg.values for now. if (!IsVariadic && @@ -7703,6 +7704,38 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } + case Intrinsic::fake_use: { + Value *V = I.getArgOperand(0); + SDValue Ops[2]; + // For Values not declared or previously used in this basic block, the + // NodeMap will not have an entry, and `getValue` will assert if V has no + // valid register value. + auto FakeUseValue = [&]() -> SDValue { + SDValue &N = NodeMap[V]; + if (N.getNode()) + return N; + + // If there's a virtual register allocated and initialized for this + // value, use it. + if (SDValue copyFromReg = getCopyFromRegs(V, V->getType())) + return copyFromReg; + // FIXME: Do we want to preserve constants? It seems pointless. + if (isa(V)) + return getValue(V); + return SDValue(); + }(); + if (!FakeUseValue || FakeUseValue.isUndef()) + return; + Ops[0] = getRoot(); + Ops[1] = FakeUseValue; + // Also, do not translate a fake use with an undef operand, or any other + // empty SDValues. + if (!Ops[1] || Ops[1].isUndef()) + return; + DAG.setRoot(DAG.getNode(ISD::FAKE_USE, sdl, MVT::Other, Ops)); + return; + } + case Intrinsic::eh_exceptionpointer: case Intrinsic::eh_exceptioncode: { // Get the exception pointer vreg, copy from it, and resize it to fit. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 001f782f209f..a253d1a0e201 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -454,6 +454,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UBSANTRAP: return "ubsantrap"; case ISD::LIFETIME_START: return "lifetime.start"; case ISD::LIFETIME_END: return "lifetime.end"; + case ISD::FAKE_USE: + return "fake_use"; case ISD::PSEUDO_PROBE: return "pseudoprobe"; case ISD::GC_TRANSITION_START: return "gc_transition.start"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 09bde54b9aaa..8e268d4f4968 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -804,6 +804,50 @@ static void reportFastISelFailure(MachineFunction &MF, LLVM_DEBUG(dbgs() << R.getMsg() << "\n"); } +// Detect any fake uses that follow a tail call and move them before the tail +// call. Ignore fake uses that use values that are def'd by or after the tail +// call. +static void preserveFakeUses(BasicBlock::iterator Begin, + BasicBlock::iterator End) { + BasicBlock::iterator I = End; + if (--I == Begin || !isa(*I)) + return; + // Detect whether there are any fake uses trailing a (potential) tail call. + bool HaveFakeUse = false; + bool HaveTailCall = false; + do { + if (const CallInst *CI = dyn_cast(--I)) + if (CI->isTailCall()) { + HaveTailCall = true; + break; + } + if (const IntrinsicInst *II = dyn_cast(I)) + if (II->getIntrinsicID() == Intrinsic::fake_use) + HaveFakeUse = true; + } while (I != Begin); + + // If we didn't find any tail calls followed by fake uses, we are done. + if (!HaveTailCall || !HaveFakeUse) + return; + + SmallVector FakeUses; + // Record the fake uses we found so we can move them to the front of the + // tail call. Ignore them if they use a value that is def'd by or after + // the tail call. + for (BasicBlock::iterator Inst = I; Inst != End; Inst++) { + if (IntrinsicInst *FakeUse = dyn_cast(Inst); + FakeUse && FakeUse->getIntrinsicID() == Intrinsic::fake_use) { + if (auto UsedDef = dyn_cast(FakeUse->getOperand(0)); + !UsedDef || UsedDef->getParent() != I->getParent() || + UsedDef->comesBefore(&*I)) + FakeUses.push_back(FakeUse); + } + } + + for (auto *Inst : FakeUses) + Inst->moveBefore(*Inst->getParent(), I); +} + void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, bool &HadTailCall) { @@ -1665,6 +1709,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FuncInfo->VisitedBBs[LLVMBB->getNumber()] = true; } + // Fake uses that follow tail calls are dropped. To avoid this, move + // such fake uses in front of the tail call, provided they don't + // use anything def'd by or after the tail call. + { + BasicBlock::iterator BBStart = + const_cast(LLVMBB)->getFirstNonPHI()->getIterator(); + BasicBlock::iterator BBEnd = const_cast(LLVMBB)->end(); + preserveFakeUses(BBStart, BBEnd); + } + BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI()->getIterator(); BasicBlock::const_iterator const End = LLVMBB->end(); @@ -2448,6 +2502,13 @@ void SelectionDAGISel::Select_UNDEF(SDNode *N) { CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); } +// Use the generic target FAKE_USE target opcode. The chain operand +// must come last, because InstrEmitter::AddOperand() requires it. +void SelectionDAGISel::Select_FAKE_USE(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::FAKE_USE, N->getValueType(0), + N->getOperand(1), N->getOperand(0)); +} + void SelectionDAGISel::Select_FREEZE(SDNode *N) { // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now. // If FREEZE instruction is added later, the code below must be changed as @@ -3219,6 +3280,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::UNDEF: Select_UNDEF(NodeToMatch); return; + case ISD::FAKE_USE: + Select_FAKE_USE(NodeToMatch); + return; case ISD::FREEZE: Select_FREEZE(NodeToMatch); return; diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 1d52ebe6717f..c0b834650d73 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1206,6 +1206,7 @@ void TargetPassConfig::addMachinePasses() { // addPreEmitPass. Maybe only pass "false" here for those targets? addPass(&FuncletLayoutID); + addPass(&RemoveLoadsIntoFakeUsesID); addPass(&StackMapLivenessID); addPass(&LiveDebugValuesID); addPass(&MachineSanitizerBinaryMetadataID); diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 6f0f3f244c05..62d88ce21657 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -1171,7 +1171,10 @@ Instruction::getNextNonDebugInstruction(bool SkipPseudoOp) const { const Instruction * Instruction::getPrevNonDebugInstruction(bool SkipPseudoOp) const { for (const Instruction *I = getPrevNode(); I; I = I->getPrevNode()) - if (!isa(I) && !(SkipPseudoOp && isa(I))) + if (!isa(I) && + !(SkipPseudoOp && isa(I)) && + !(isa(I) && + cast(I)->getIntrinsicID() == Intrinsic::fake_use)) return I; return nullptr; } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 2c0f10a34f91..79b3ca3b6a5a 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5128,6 +5128,7 @@ void Verifier::visitInstruction(Instruction &I) { F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void || F->getIntrinsicID() == Intrinsic::experimental_patchpoint || + F->getIntrinsicID() == Intrinsic::fake_use || F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint || F->getIntrinsicID() == Intrinsic::wasm_rethrow || IsAttachedCallOperand(F, CBI, i), diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 097e29527eed..e86d3771bd2f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -315,6 +315,7 @@ void NVPTXPassConfig::addIRPasses() { disablePass(&FuncletLayoutID); disablePass(&PatchableFunctionID); disablePass(&ShrinkWrapID); + disablePass(&RemoveLoadsIntoFakeUsesID); addPass(createNVPTXAAWrapperPass()); addPass(createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) { diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 48a2ce89bad3..7058b15d53aa 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -140,6 +140,7 @@ void SPIRVPassConfig::addPostRegAlloc() { disablePass(&ShrinkWrapID); disablePass(&LiveDebugValuesID); disablePass(&MachineLateInstrsCleanupID); + disablePass(&RemoveLoadsIntoFakeUsesID); // Do not work with OpPhi. disablePass(&BranchFolderPassID); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 23539a5f4b26..73765f8fa009 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -552,6 +552,7 @@ void WebAssemblyPassConfig::addPostRegAlloc() { disablePass(&StackMapLivenessID); disablePass(&PatchableFunctionID); disablePass(&ShrinkWrapID); + disablePass(&RemoveLoadsIntoFakeUsesID); // This pass hurts code size for wasm because it can generate irreducible // control flow. diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp index 02c3ca9839fc..ea94a4be32b2 100644 --- a/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -432,6 +432,24 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { if (MI.isCall()) FPInstClass = X86II::SpecialFP; + // A fake_use with a floating point pseudo register argument that is + // killed must behave like any other floating point operation and pop + // the floating point stack (this is done in handleSpecialFP()). + // Fake_use is, however, unusual, in that sometimes its operand is not + // killed because a later instruction (probably a return) will use it. + // It is this instruction that will pop the stack. + // In this scenario we can safely remove the fake_use's operand + // (it is live anyway). + if (MI.isFakeUse()) { + const MachineOperand &MO = MI.getOperand(0); + if (MO.isReg() && X86::RFP80RegClass.contains(MO.getReg())) { + if (MO.isKill()) + FPInstClass = X86II::SpecialFP; + else + MI.removeOperand(0); + } + } + if (FPInstClass == X86II::NotFP) continue; // Efficiently ignore non-fp insts! @@ -1737,6 +1755,20 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) { // Don't delete the inline asm! return; } + + // FAKE_USE must pop its register operand off the stack if it is killed, + // because this constitutes the register's last use. If the operand + // is not killed, it will have its last use later, so we leave it alone. + // In either case we remove the operand so later passes don't see it. + case TargetOpcode::FAKE_USE: { + assert(MI.getNumExplicitOperands() == 1 && + "FAKE_USE must have exactly one operand"); + if (MI.getOperand(0).isKill()) { + freeStackSlotBefore(Inst, getFPReg(MI.getOperand(0))); + } + MI.removeOperand(0); + return; + } } Inst = MBB->erase(Inst); // Remove the pseudo instruction diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 26b62cb79cde..2310cb3a7dec 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -3802,6 +3802,12 @@ private: struct LoadOpSplitter : public OpSplitter { AAMDNodes AATags; + // A vector to hold the split components that we want to emit + // separate fake uses for. + SmallVector Components; + // A vector to hold all the fake uses of the struct that we are splitting. + // Usually there should only be one, but we are handling the general case. + SmallVector FakeUses; LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, AAMDNodes AATags, Align BaseAlign, const DataLayout &DL, @@ -3826,10 +3832,32 @@ private: GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) Load->setAAMetadata( AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL)); + // Record the load so we can generate a fake use for this aggregate + // component. + Components.push_back(Load); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); } + + // Stash the fake uses that use the value generated by this instruction. + void recordFakeUses(LoadInst &LI) { + for (Use &U : LI.uses()) + if (auto *II = dyn_cast(U.getUser())) + if (II->getIntrinsicID() == Intrinsic::fake_use) + FakeUses.push_back(II); + } + + // Replace all fake uses of the aggregate with a series of fake uses, one + // for each split component. + void emitFakeUses() { + for (Instruction *I : FakeUses) { + IRB.SetInsertPoint(I); + for (auto *V : Components) + IRB.CreateIntrinsic(Intrinsic::fake_use, {}, {V}); + I->eraseFromParent(); + } + } }; bool visitLoadInst(LoadInst &LI) { @@ -3841,8 +3869,10 @@ private: LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(), getAdjustedAlignment(&LI, 0), DL, IRB); + Splitter.recordFakeUses(LI); Value *V = PoisonValue::get(LI.getType()); Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); + Splitter.emitFakeUses(); Visited.erase(&LI); LI.replaceAllUsesWith(V); LI.eraseFromParent(); diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 47e3c03288d9..dc9ca1423f3e 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -513,6 +513,12 @@ void PruningFunctionCloner::CloneBlock( for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; ++II) { + // Don't clone fake_use as it may suppress many optimizations + // due to inlining, especially SROA. + if (auto *IntrInst = dyn_cast(II)) + if (IntrInst->getIntrinsicID() == Intrinsic::fake_use) + continue; + Instruction *NewInst = cloneInstruction(II); NewInst->insertInto(NewBB, NewBB->end()); diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index e4809cd4bb44..d0669e44f821 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3491,6 +3491,9 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To, unsigned Count = 0; for (Use &U : llvm::make_early_inc_range(From->uses())) { + auto *II = dyn_cast(U.getUser()); + if (II && II->getIntrinsicID() == Intrinsic::fake_use) + continue; if (!ShouldReplace(Root, U)) continue; LLVM_DEBUG(dbgs() << "Replace dominated use of '"; diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 5251eb86bca9..1b7912fdf5e3 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -80,7 +80,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { if (SI->isVolatile()) return false; } else if (const IntrinsicInst *II = dyn_cast(U)) { - if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) + if (!II->isLifetimeStartOrEnd() && !II->isDroppable() && + II->getIntrinsicID() != Intrinsic::fake_use) return false; } else if (const BitCastInst *BCI = dyn_cast(U)) { if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI)) diff --git a/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll b/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll index 2370fe1468b4..5d304d1569d3 100644 --- a/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll +++ b/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll @@ -16,7 +16,7 @@ for.body.lr.ph: ; preds = %entry for.body: ; preds = %for.inc, %for.body.lr.ph %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] %add = add nsw i32 %i.02, 50, !dbg !16 - call void @llvm.dbg.value(metadata i32 %add, i64 0, metadata !18, metadata !19), !dbg !20 + tail call void @llvm.dbg.value(metadata i32 %add, i64 0, metadata !18, metadata !19), !dbg !20 %idxprom = sext i32 %add to i64, !dbg !21 ; CHECK: %idxprom = sext i32 %add to i64 diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll index ba611493e1a7..ec6f24a5650f 100644 --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -69,6 +69,7 @@ ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: Workaround A53 erratum 835769 pass ; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Sanitizer Binary Metadata diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 3465b717261c..ffbe3dd37710 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -224,6 +224,7 @@ ; CHECK-NEXT: Machine Copy Propagation Pass ; CHECK-NEXT: Workaround A53 erratum 835769 pass ; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Sanitizer Binary Metadata diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 29e8ebdafb58..7bf1b8746fd8 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -143,6 +143,7 @@ ; GCN-O0-NEXT: Post RA hazard recognizer ; GCN-O0-NEXT: Branch relaxation pass ; GCN-O0-NEXT: Register Usage Information Collector Pass +; GCN-O0-NEXT: Remove Loads Into Fake Uses ; GCN-O0-NEXT: Live DEBUG_VALUE analysis ; GCN-O0-NEXT: Machine Sanitizer Binary Metadata ; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis @@ -420,6 +421,7 @@ ; GCN-O1-NEXT: AMDGPU Insert Delay ALU ; GCN-O1-NEXT: Branch relaxation pass ; GCN-O1-NEXT: Register Usage Information Collector Pass +; GCN-O1-NEXT: Remove Loads Into Fake Uses ; GCN-O1-NEXT: Live DEBUG_VALUE analysis ; GCN-O1-NEXT: Machine Sanitizer Binary Metadata ; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis @@ -725,6 +727,7 @@ ; GCN-O1-OPTS-NEXT: AMDGPU Insert Delay ALU ; GCN-O1-OPTS-NEXT: Branch relaxation pass ; GCN-O1-OPTS-NEXT: Register Usage Information Collector Pass +; GCN-O1-OPTS-NEXT: Remove Loads Into Fake Uses ; GCN-O1-OPTS-NEXT: Live DEBUG_VALUE analysis ; GCN-O1-OPTS-NEXT: Machine Sanitizer Binary Metadata ; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis @@ -1036,6 +1039,7 @@ ; GCN-O2-NEXT: AMDGPU Insert Delay ALU ; GCN-O2-NEXT: Branch relaxation pass ; GCN-O2-NEXT: Register Usage Information Collector Pass +; GCN-O2-NEXT: Remove Loads Into Fake Uses ; GCN-O2-NEXT: Live DEBUG_VALUE analysis ; GCN-O2-NEXT: Machine Sanitizer Binary Metadata ; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis @@ -1359,6 +1363,7 @@ ; GCN-O3-NEXT: AMDGPU Insert Delay ALU ; GCN-O3-NEXT: Branch relaxation pass ; GCN-O3-NEXT: Register Usage Information Collector Pass +; GCN-O3-NEXT: Remove Loads Into Fake Uses ; GCN-O3-NEXT: Live DEBUG_VALUE analysis ; GCN-O3-NEXT: Machine Sanitizer Binary Metadata ; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index 9b983d96f793..819623d3fcc5 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -193,6 +193,7 @@ ; CHECK-NEXT: ARM block placement ; CHECK-NEXT: optimise barriers pass ; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Sanitizer Binary Metadata diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll index 38c1dbcb1075..24bd4c75a982 100644 --- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll @@ -62,6 +62,7 @@ ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Sanitizer Binary Metadata diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll index 50f154663177..53cdbd18f9b9 100644 --- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll @@ -166,6 +166,7 @@ ; LAXX-NEXT: Implement the 'patchable-function' attribute ; LAXX-NEXT: Branch relaxation pass ; LAXX-NEXT: Contiguously Lay Out Funclets +; LAXX-NEXT: Remove Loads Into Fake Uses ; LAXX-NEXT: StackMap Liveness Analysis ; LAXX-NEXT: Live DEBUG_VALUE analysis ; LAXX-NEXT: Machine Sanitizer Binary Metadata diff --git a/llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir b/llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir new file mode 100644 index 000000000000..7eb8915f26a8 --- /dev/null +++ b/llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir @@ -0,0 +1,99 @@ +# In certain cases CodeGenPrepare folds a return instruction into +# the return block's predecessor blocks and subsequently deletes the return block. +# The purpose of this is to enable tail call optimization in the predecessor blocks. +# Removal of the return block also removes fake use instructions that were present +# in the return block, potentially causing debug information to be lost. +# +# The fix is to clone any fake use instructions that are not dominated by definitions +# in the return block itself into the predecessor blocks. This test enures that we do so. +# +# Generated from the following source with +# clang -fextend-lifetimes -S -emit-llvm -O2 -mllvm -stop-before=codegenprepare -o test.mir test.c +# +# extern int f0(); +# extern int f1(); +# +# int foo(int i) { +# int temp = i; +# if (temp == 0) +# temp = f0(); +# else +# temp = f1(); +# return temp; +# } +# +# RUN: llc -run-pass=codegenprepare -o - %s | FileCheck %s +# +# CHECK: define{{.*}}foo +# CHECK: if.then: +# CHECK-NEXT: call{{.*}}fake.use(i32 %i) +# CHECK-NEXT: tail call i32{{.*}}@f0 +# CHECK-NEXT: ret +# CHECK: if.else: +# CHECK-NEXT: call{{.*}}fake.use(i32 %i) +# CHECK-NEXT: tail call i32{{.*}}@f1 +# CHECK-NEXT: ret + +--- | + define hidden i32 @foo(i32 %i) local_unnamed_addr optdebug { + entry: + %cmp = icmp eq i32 %i, 0 + br i1 %cmp, label %if.then, label %if.else + + if.then: + %call = tail call i32 (...) @f0() + br label %if.end + + if.else: + %call1 = tail call i32 (...) @f1() + br label %if.end + + if.end: + %temp.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ] + notail call void (...) @llvm.fake.use(i32 %temp.0) + notail call void (...) @llvm.fake.use(i32 %i) + ret i32 %temp.0 + } + declare i32 @f0(...) local_unnamed_addr + declare i32 @f1(...) local_unnamed_addr + +... +--- +name: foo +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + +... diff --git a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll index 4a17384e4999..5853647bf3b9 100644 --- a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll @@ -60,6 +60,7 @@ ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: PowerPC Pre-Emit Peephole ; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Sanitizer Binary Metadata diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll index 39b23a57513d..21bd4bb8502c 100644 --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -214,6 +214,7 @@ ; CHECK-NEXT: PowerPC Pre-Emit Peephole ; CHECK-NEXT: PowerPC Early-Return Creation ; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Sanitizer Binary Metadata diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index 7473809a2c5d..84c7f3f987c0 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -63,6 +63,7 @@ ; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: RISC-V Make Compressible ; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Sanitizer Binary Metadata diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 44c270fdc3c2..5d14d14d2162 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -189,6 +189,7 @@ ; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: RISC-V Make Compressible ; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Sanitizer Binary Metadata diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index 98b86384b844..4c99dd830b44 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -71,6 +71,7 @@ ; CHECK-NEXT: X86 Insert Cache Prefetches ; CHECK-NEXT: X86 insert wait instruction ; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Sanitizer Binary Metadata diff --git a/llvm/test/CodeGen/X86/fake-use-hpfloat.ll b/llvm/test/CodeGen/X86/fake-use-hpfloat.ll new file mode 100644 index 000000000000..7a95c3880183 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-hpfloat.ll @@ -0,0 +1,15 @@ +; assert in DAGlegalizer with fake use of half precision float. +; Changes to half float promotion. +; RUN: llc -stop-after=finalize-isel -o - %s | FileCheck %s +; +; CHECK: bb.0.entry: +; CHECK-NEXT: %0:fr16 = FsFLD0SH +; CHECK-NEXT: FAKE_USE killed %0 +; +target triple = "x86_64-unknown-unknown" + +define void @_Z6doTestv() local_unnamed_addr optdebug { +entry: + tail call void (...) @llvm.fake.use(half 0xH0000) + ret void +} diff --git a/llvm/test/CodeGen/X86/fake-use-ld.ll b/llvm/test/CodeGen/X86/fake-use-ld.ll new file mode 100644 index 000000000000..86e7235091dd --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-ld.ll @@ -0,0 +1,43 @@ +; RUN: llc -O0 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s + +; Checks that fake uses of the FP stack do not cause a crash. +; +; /*******************************************************************/ +; extern long double foo(long double, long double, long double); +; +; long double actual(long double p1, long double p2, long double p3) { +; return fmal(p1, p2, p3); +; } +; /*******************************************************************/ + +define x86_fp80 @actual(x86_fp80 %p1, x86_fp80 %p2, x86_fp80 %p3) optdebug { +; +; CHECK: actual +; +entry: + %p1.addr = alloca x86_fp80, align 16 + %p2.addr = alloca x86_fp80, align 16 + %p3.addr = alloca x86_fp80, align 16 + store x86_fp80 %p1, ptr %p1.addr, align 16 + store x86_fp80 %p2, ptr %p2.addr, align 16 + store x86_fp80 %p3, ptr %p3.addr, align 16 + %0 = load x86_fp80, ptr %p1.addr, align 16 + %1 = load x86_fp80, ptr %p2.addr, align 16 + %2 = load x86_fp80, ptr %p3.addr, align 16 +; +; CHECK: callq{{.*}}foo +; + %3 = call x86_fp80 @foo(x86_fp80 %0, x86_fp80 %1, x86_fp80 %2) + %4 = load x86_fp80, ptr %p1.addr, align 16 + call void (...) @llvm.fake.use(x86_fp80 %4) + %5 = load x86_fp80, ptr %p2.addr, align 16 + call void (...) @llvm.fake.use(x86_fp80 %5) + %6 = load x86_fp80, ptr %p3.addr, align 16 + call void (...) @llvm.fake.use(x86_fp80 %6) +; +; CHECK: ret +; + ret x86_fp80 %3 +} + +declare x86_fp80 @foo(x86_fp80, x86_fp80, x86_fp80) diff --git a/llvm/test/CodeGen/X86/fake-use-scheduler.mir b/llvm/test/CodeGen/X86/fake-use-scheduler.mir new file mode 100644 index 000000000000..7e55f1d79aa7 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-scheduler.mir @@ -0,0 +1,123 @@ +# Prevent the machine scheduler from moving instructions past FAKE_USE. +# RUN: llc -run-pass machine-scheduler -debug-only=machine-scheduler 2>&1 -o - %s | FileCheck %s +# REQUIRES: asserts +# +# We make sure that, beginning with the first FAKE_USE instruction, +# no changes to the sequence of instructions are undertaken by the +# scheduler. We don't bother to check that the order of the FAKE_USEs +# remains the same. They should, but it is irrelevant. +# +# CHECK: ********** MI Scheduling ********** +# CHECK-NEXT: foo:%bb.0 entry +# CHECK-NEXT: From: %0:gr64 = COPY $rdi +# CHECK-NEXT: To: FAKE_USE %5:gr64 +# CHECK-NEXT: RegionInstrs: 7 +# +# CHECK: ********** MI Scheduling ********** +# CHECK-NEXT: bar:%bb.0 entry +# CHECK-NEXT: From: %0:gr64 = COPY $rdi +# CHECK-NEXT: To: RET 0, killed $rax +# CHECK-NEXT: RegionInstrs: 7 +# +--- | + ; ModuleID = 'test.ll' + source_filename = "test.ll" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + + @glb = common dso_local local_unnamed_addr global [100 x i32] zeroinitializer, align 16 + + define dso_local i64 @foo(ptr %p) local_unnamed_addr optdebug { + entry: + %0 = load i32, ptr @glb, align 16 + store i32 %0, ptr %p, align 4 + %conv = sext i32 %0 to i64 + %1 = load i32, ptr getelementptr inbounds ([100 x i32], ptr @glb, i64 0, i64 1), align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1 + store i32 %1, ptr %arrayidx1, align 4 + %conv2 = sext i32 %1 to i64 + %add3 = add nsw i64 %conv2, %conv + notail call void (...) @llvm.fake.use(i64 %add3) + notail call void (...) @llvm.fake.use(i32 %1) + notail call void (...) @llvm.fake.use(i32 %0) + notail call void (...) @llvm.fake.use(ptr %p) + ret i64 %add3 + } + + define dso_local i64 @bar(ptr %p) local_unnamed_addr optdebug { + entry: + %0 = load i32, ptr @glb, align 16 + store i32 %0, ptr %p, align 4 + %conv = sext i32 %0 to i64 + %1 = load i32, ptr getelementptr inbounds ([100 x i32], ptr @glb, i64 0, i64 1), align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1 + store i32 %1, ptr %arrayidx1, align 4 + %conv2 = sext i32 %1 to i64 + %add3 = add nsw i64 %conv2, %conv + ret i64 %add3 + } + + ; Function Attrs: nocallback nofree nosync nounwind willreturn + declare void @llvm.stackprotector(ptr, ptr) + +... +--- +name: foo +alignment: 16 +tracksRegLiveness: true +debugInstrRef: true +registers: + - { id: 0, class: gr64, preferred-register: '' } + - { id: 1, class: gr64_with_sub_8bit, preferred-register: '' } + - { id: 2, class: gr32, preferred-register: '' } + - { id: 3, class: gr64_with_sub_8bit, preferred-register: '' } + - { id: 4, class: gr32, preferred-register: '' } + - { id: 5, class: gr64, preferred-register: '' } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $rdi + + %0:gr64 = COPY $rdi + %1:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb, $noreg + MOV32mr %0, 1, $noreg, 0, $noreg, %1.sub_32bit + %3:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb + 4, $noreg + MOV32mr %0, 1, $noreg, 4, $noreg, %3.sub_32bit + %5:gr64 = COPY %3 + %5:gr64 = nsw ADD64rr %5, %1, implicit-def dead $eflags + FAKE_USE %5 + FAKE_USE %3.sub_32bit + FAKE_USE %1.sub_32bit + FAKE_USE %0 + $rax = COPY %5 + RET 0, killed $rax + +... +--- +name: bar +alignment: 16 +tracksRegLiveness: true +debugInstrRef: true +registers: + - { id: 0, class: gr64, preferred-register: '' } + - { id: 1, class: gr64_with_sub_8bit, preferred-register: '' } + - { id: 2, class: gr32, preferred-register: '' } + - { id: 3, class: gr64_with_sub_8bit, preferred-register: '' } + - { id: 4, class: gr32, preferred-register: '' } + - { id: 5, class: gr64_with_sub_8bit, preferred-register: '' } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $rdi + + %0:gr64 = COPY $rdi + %1:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb, $noreg + MOV32mr %0, 1, $noreg, 0, $noreg, %1.sub_32bit + %5:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb + 4, $noreg + MOV32mr %0, 1, $noreg, 4, $noreg, %5.sub_32bit + %5:gr64_with_sub_8bit = nsw ADD64rr %5, %1, implicit-def dead $eflags + $rax = COPY %5 + RET 0, killed $rax + +... diff --git a/llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll b/llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll new file mode 100644 index 000000000000..45a210ef3910 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -O2 -o - \ +; RUN: | FileCheck %s --implicit-check-not=TAILCALL +; Generated with: clang -emit-llvm -O2 -S -fextend-lifetimes test.cpp -o - +; =========== test.cpp =============== +; extern int bar(int); +; int foo1(int i) +; { +; return bar(i); +; } +; =========== test.cpp =============== + +; CHECK: TAILCALL + +; ModuleID = 'test.cpp' +source_filename = "test.cpp" + +define i32 @_Z4foo1i(i32 %i) local_unnamed_addr optdebug { +entry: + %call = tail call i32 @_Z3bari(i32 %i) + tail call void (...) @llvm.fake.use(i32 %i) + ret i32 %call +} + +declare i32 @_Z3bari(i32) local_unnamed_addr diff --git a/llvm/test/CodeGen/X86/fake-use-suppress-load.ll b/llvm/test/CodeGen/X86/fake-use-suppress-load.ll new file mode 100644 index 000000000000..c1b442ebd79f --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-suppress-load.ll @@ -0,0 +1,14 @@ +; Suppress redundant loads feeding into fake uses. +; RUN: llc -filetype=asm -o - %s --mtriple=x86_64-unknown-unknown | FileCheck %s +; Windows ABI works differently, there's no offset. +; +; Look for the spill +; CHECK: movq %r{{[a-z]+,}} -{{[0-9]+\(%rsp\)}} +; CHECK-NOT: movq -{{[0-9]+\(%rsp\)}}, %r{{[a-z]+}} + +define dso_local i32 @f(ptr %p) local_unnamed_addr optdebug { +entry: + call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"() #1 + notail call void (...) @llvm.fake.use(ptr %p) + ret i32 4 +} diff --git a/llvm/test/CodeGen/X86/fake-use-tailcall.ll b/llvm/test/CodeGen/X86/fake-use-tailcall.ll new file mode 100644 index 000000000000..10bb22e1b564 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-tailcall.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -stop-after=finalize-isel - | FileCheck %s --implicit-check-not FAKE_USE +; Fake uses following tail calls should be pulled in front +; of the TCRETURN instruction. Fake uses using something defined by +; the tail call or after it should be suppressed. + +; CHECK: name:{{ +}}bar +; CHECK: body: +; CHECK: bb.0.{{.*}}: +; CHECK: %0:{{.*}}= COPY +; CHECK: FAKE_USE %0 +; CHECK: TCRETURN + +; CHECK: name:{{ +}}baz +; CHECK: body: +; CHECK: bb.0.{{.*}}: +; CHECK: %0:{{.*}}= COPY +; CHECK: FAKE_USE %0 +; CHECK: TCRETURN + +define void @bar(i32 %v) optdebug { +entry: + %call = tail call i32 @_Z3fooi(i32 %v) + %mul = mul nsw i32 %call, 3 + notail call void (...) @llvm.fake.use(i32 %mul) + notail call void (...) @llvm.fake.use(i32 %call) + notail call void (...) @llvm.fake.use(i32 %v) + ret void +} + +define i32 @baz(i32 %v) optdebug { +entry: + %call = tail call i32 @_Z3fooi(i32 %v) + notail call void (...) @llvm.fake.use(i32 %v) + ret i32 %call +} + +declare i32 @_Z3fooi(i32) local_unnamed_addr diff --git a/llvm/test/CodeGen/X86/fake-use-vector.ll b/llvm/test/CodeGen/X86/fake-use-vector.ll new file mode 100644 index 000000000000..cb46ccc8cac1 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-vector.ll @@ -0,0 +1,39 @@ +; assert in DAGlegalizer with fake use of 1-element vectors. +; RUN: llc -stop-after=finalize-isel -filetype=asm -o - %s | FileCheck %s +; +; ModuleID = 't2.cpp' +; source_filename = "t2.cpp" +; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +; +; Check that we get past ISel and generate FAKE_USE machine instructions for +; one-element vectors. +; +; CHECK: bb.0.entry: +; CHECK-DAG: %1:gr64 = COPY $rdi +; CHECK-DAG: %0:vr128 = COPY $xmm0 +; CHECK: %2:vr64 = +; CHECK-DAG: FAKE_USE %1 +; CHECK-DAG: FAKE_USE %0 +; CHECK: RET + + +target triple = "x86_64-unknown-unknown" + +; Function Attrs: nounwind sspstrong uwtable +define <4 x float> @_Z3runDv4_fDv1_x(<4 x float> %r, i64 %b.coerce) local_unnamed_addr #0 { +entry: + %0 = insertelement <1 x i64> undef, i64 %b.coerce, i32 0 + %1 = bitcast i64 %b.coerce to <1 x i64> + %2 = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %r, <1 x i64> %1) + tail call void (...) @llvm.fake.use(<1 x i64> %0) + tail call void (...) @llvm.fake.use(<4 x float> %r) + ret <4 x float> %2 +} + +; Function Attrs: nounwind readnone +declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) + +; Function Attrs: nounwind +declare void @llvm.fake.use(...) + +attributes #0 = { "target-cpu"="btver2" optdebug } diff --git a/llvm/test/CodeGen/X86/fake-use-vector2.ll b/llvm/test/CodeGen/X86/fake-use-vector2.ll new file mode 100644 index 000000000000..6f2d3a5566dc --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-vector2.ll @@ -0,0 +1,27 @@ +; RUN: llc -stop-after=finalize-isel -filetype=asm -o - %s | FileCheck %s +; +; Make sure we can split vectors that are used as operands of FAKE_USE. + +; Generated from: +; +; typedef long __attribute__((ext_vector_type(8))) long8; +; void test0() { long8 id208 {0, 1, 2, 3, 4, 5, 6, 7}; } + +; ModuleID = 't5.cpp' +source_filename = "t5.cpp" + + +; CHECK: %0:vr256 = VMOV +; CHECK: %1:vr256 = VMOV +; CHECK-DAG: FAKE_USE killed %1 +; CHECK-DAG: FAKE_USE killed %0 +; CHECK: RET +define void @_Z5test0v() local_unnamed_addr #0 { +entry: + tail call void (...) @llvm.fake.use(<8 x i64> ) #1 + ret void +} + +declare void @llvm.fake.use(...) + +attributes #0 = { "target-cpu"="btver2" optdebug } diff --git a/llvm/test/CodeGen/X86/fake-use-zero-length.ll b/llvm/test/CodeGen/X86/fake-use-zero-length.ll new file mode 100644 index 000000000000..e8c6791b8edf --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-zero-length.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -stop-after=finalize-isel | FileCheck %s --implicit-check-not=FAKE_USE +; +; Make sure SelectionDAG does not crash handling fake uses of zero-length arrays +; and structs. Check also that they are not propagated. +; +; Generated from the following source with +; clang -fextend-lifetimes -S -emit-llvm -O2 -mllvm -stop-after=safe-stack -o test.mir test.cpp +; +; int main () +; { int array[0]; } +; +; +; CHECK: liveins: $[[IN_REG:[a-zA-Z0-9]+]] +; CHECK: %[[IN_VREG:[a-zA-Z0-9]+]]:gr32 = COPY $[[IN_REG]] +; CHECK: FAKE_USE %[[IN_VREG]] + +source_filename = "test.ll" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define hidden i32 @main([0 x i32] %zero, [1 x i32] %one) local_unnamed_addr optdebug { +entry: + notail call void (...) @bar([0 x i32] %zero) + notail call void (...) @baz([1 x i32] %one) + notail call void (...) @llvm.fake.use([0 x i32] %zero) + notail call void (...) @llvm.fake.use([1 x i32] %one) + ret i32 0 +} + +declare void @bar([0 x i32] %a) +declare void @baz([1 x i32] %a) diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 12c16a03b134..545640b76616 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -211,6 +211,7 @@ ; CHECK-NEXT: X86 Insert Cache Prefetches ; CHECK-NEXT: X86 insert wait instruction ; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Sanitizer Binary Metadata diff --git a/llvm/test/DebugInfo/AArch64/fake-use-global-isel.ll b/llvm/test/DebugInfo/AArch64/fake-use-global-isel.ll new file mode 100644 index 000000000000..65a645830969 --- /dev/null +++ b/llvm/test/DebugInfo/AArch64/fake-use-global-isel.ll @@ -0,0 +1,98 @@ +; REQUIRES: object-emission + +; Make sure the fake use of 'b' at the end of 'foo' causes location information for 'b' +; to extend all the way to the end of the function. +; Duplicates `DebugInfo/X86/fake-use.ll` for global-isel. + +; RUN: %llc_dwarf -O2 --global-isel=1 -mtriple=aarch64--linux-gnu -filetype=obj -dwarf-linkage-names=Abstract < %s | llvm-dwarfdump --debug-info --debug-line -v - -o %t +; RUN: %python %p/../Inputs/check-fake-use.py %t +; RUN: sed -e 's,call void (...) @llvm.fake.use,;,' %s \ +; RUN: | %llc_dwarf - -O2 --global-isel=1 -mtriple=aarch64--linux-gnu -filetype=obj -dwarf-linkage-names=Abstract \ +; RUN: | llvm-dwarfdump --debug-info --debug-line -v - -o %t +; RUN: not %python %p/../Inputs/check-fake-use.py %t + +; Generated with: +; clang -O2 -g -S -emit-llvm -fextend-this-ptr fake-use.c +; +; int glob[10]; +; extern void bar(); +; +; int foo(int b, int i) +; { +; int loc = glob[i] * 2; +; if (b) { +; glob[2] = loc; +; bar(); +; } +; return loc; +; } +; +; ModuleID = 't2.c' +source_filename = "t2.c" + +@glob = common local_unnamed_addr global [10 x i32] zeroinitializer, align 16, !dbg !0 + +; Function Attrs: nounwind sspstrong uwtable +define i32 @foo(i32 %b, i32 %i) local_unnamed_addr optdebug !dbg !13 { +entry: + #dbg_value(i32 %b, !17, !20, !21) + %c = add i32 %b, 42 + %tobool = icmp sgt i32 %c, 2, !dbg !27 + tail call void (...) @bar() #2, !dbg !32 + %idxprom = sext i32 %i to i64, !dbg !22 + %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @glob, i64 0, i64 %idxprom, !dbg !22 + %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !23 + %mul = shl nsw i32 %0, 1, !dbg !22 + br i1 %tobool, label %if.end, label %if.then, !dbg !29 + +if.then: ; preds = %entry + store i32 %mul, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @glob, i64 0, i64 2), align 8, !dbg !30, !tbaa !23 + tail call void (...) @bar() #2, !dbg !32 + br label %if.end, !dbg !33 + +if.end: ; preds = %entry, %if.then + call void (...) @llvm.fake.use(i32 %b), !dbg !34 + ret i32 %mul, !dbg !35 +} + +declare void @bar(...) local_unnamed_addr + +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!9, !10, !11} +!llvm.ident = !{!12} + +!0 = distinct !DIGlobalVariableExpression(var: !DIGlobalVariable(name: "glob", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true), expr: !DIExpression()) +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 4.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4) +!2 = !DIFile(filename: "t2.c", directory: "/") +!3 = !{} +!4 = !{!0} +!5 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 320, align: 32, elements: !7) +!6 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!7 = !{!8} +!8 = !DISubrange(count: 10) +!9 = !{i32 2, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{i32 1, !"PIC Level", i32 2} +!12 = !{!"clang version 4.0.0"} +!13 = distinct !DISubprogram(name: "foo", scope: !2, file: !2, line: 4, type: !14, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !16) +!14 = !DISubroutineType(types: !15) +!15 = !{!6, !6, !6} +!16 = !{!17, !19} +!17 = !DILocalVariable(name: "b", arg: 1, scope: !13, file: !2, line: 4, type: !6) +!19 = !DILocalVariable(name: "loc", scope: !13, file: !2, line: 6, type: !6) +!20 = !DIExpression() +!21 = !DILocation(line: 4, scope: !13) +!22 = !DILocation(line: 6, scope: !13) +!23 = !{!24, !24, i64 0} +!24 = !{!"int", !25, i64 0} +!25 = !{!"omnipotent char", !26, i64 0} +!26 = !{!"Simple C/C++ TBAA"} +!27 = !DILocation(line: 7, scope: !28) +!28 = distinct !DILexicalBlock(scope: !13, file: !2, line: 7) +!29 = !DILocation(line: 7, scope: !13) +!30 = !DILocation(line: 8, scope: !31) +!31 = distinct !DILexicalBlock(scope: !28, file: !2, line: 7) +!32 = !DILocation(line: 9, scope: !31) +!33 = !DILocation(line: 10, scope: !31) +!34 = !DILocation(line: 12, scope: !13) +!35 = !DILocation(line: 11, scope: !13) diff --git a/llvm/test/DebugInfo/Inputs/check-fake-use.py b/llvm/test/DebugInfo/Inputs/check-fake-use.py new file mode 100644 index 000000000000..7797e102419b --- /dev/null +++ b/llvm/test/DebugInfo/Inputs/check-fake-use.py @@ -0,0 +1,107 @@ +#!/usr/bin/python3 + +# Parsing dwarfdump's output to determine whether the location list for the +# parameter "b" covers all of the function. The script searches for information +# in the input file to determine the [prologue, epilogue) range for the +# function, the location list range for "b", and checks that the latter covers +# the entirety of the former. +import re +import sys + +DebugInfoPattern = r"\.debug_info contents:" +DebugLinePattern = r"\.debug_line contents:" +ProloguePattern = r"^\s*0x([0-9a-f]+)\s.+prologue_end" +EpiloguePattern = r"^\s*0x([0-9a-f]+)\s.+epilogue_begin" +FormalPattern = r"^0x[0-9a-f]+:\s+DW_TAG_formal_parameter" +LocationPattern = r"DW_AT_location\s+\[DW_FORM_([a-z_]+)\](?:.*0x([a-f0-9]+))" +DebugLocPattern = r'\[0x([a-f0-9]+),\s+0x([a-f0-9]+)\) ".text": (.+)$' + +SeenDebugInfo = False +SeenDebugLine = False +LocationRanges = None +PrologueEnd = None +EpilogueBegin = None + +# The dwarfdump output should contain the DW_AT_location for "b" first, then the +# line table which should contain prologue_end and epilogue_begin entries. +with open(sys.argv[1], "r") as dwarf_dump_file: + dwarf_iter = iter(dwarf_dump_file) + for line in dwarf_iter: + if not SeenDebugInfo and re.match(DebugInfoPattern, line): + SeenDebugInfo = True + if not SeenDebugLine and re.match(DebugLinePattern, line): + SeenDebugLine = True + # Get the range of DW_AT_location for "b". + if LocationRanges is None: + if match := re.match(FormalPattern, line): + # Go until we either find DW_AT_location or reach the end of this entry. + location_match = None + while location_match is None: + if (line := next(dwarf_iter, "")) == "\n": + raise RuntimeError( + ".debug_info output is missing DW_AT_location for 'b'" + ) + location_match = re.search(LocationPattern, line) + # Variable has whole-scope location, represented by an empty tuple. + if location_match.group(1) == "exprloc": + LocationRanges = () + continue + if location_match.group(1) != "sec_offset": + raise RuntimeError( + f"Unhandled form for DW_AT_location: DW_FORM_{location_match.group(1)}" + ) + # Variable has location range list. + if ( + debug_loc_match := re.search(DebugLocPattern, next(dwarf_iter, "")) + ) is None: + raise RuntimeError(f"Invalid location range list for 'b'") + LocationRanges = ( + int(debug_loc_match.group(1), 16), + int(debug_loc_match.group(2), 16), + ) + while ( + debug_loc_match := re.search(DebugLocPattern, next(dwarf_iter, "")) + ) is not None: + match_loc_start = int(debug_loc_match.group(1), 16) + match_loc_end = int(debug_loc_match.group(2), 16) + match_expr = debug_loc_match.group(3) + if match_loc_start != LocationRanges[1]: + raise RuntimeError( + f"Location list for 'b' is discontinuous from [0x{LocationRanges[1]:x}, 0x{match_loc_start:x})" + ) + if "stack_value" in match_expr: + raise RuntimeError( + f"Location list for 'b' contains a stack_value expression: {match_expr}" + ) + LocationRanges = (LocationRanges[0], match_loc_end) + # Get the prologue_end address. + elif PrologueEnd is None: + if match := re.match(ProloguePattern, line): + PrologueEnd = int(match.group(1), 16) + # Get the epilogue_begin address. + elif EpilogueBegin is None: + if match := re.match(EpiloguePattern, line): + EpilogueBegin = int(match.group(1), 16) + break + +if not SeenDebugInfo: + raise RuntimeError(".debug_info section not found.") +if not SeenDebugLine: + raise RuntimeError(".debug_line section not found.") + +if LocationRanges is None: + raise RuntimeError(".debug_info output is missing parameter 'b'") +if PrologueEnd is None: + raise RuntimeError(".debug_line output is missing prologue_end") +if EpilogueBegin is None: + raise RuntimeError(".debug_line output is missing epilogue_begin") + +if len(LocationRanges) == 2 and ( + LocationRanges[0] > PrologueEnd or LocationRanges[1] < EpilogueBegin +): + raise RuntimeError( + f"""Location list for 'b' does not cover the whole function:") + Prologue to Epilogue = [0x{PrologueEnd:x}, 0x{EpilogueBegin:x}) + Location range = [0x{LocationRanges[0]:x}, 0x{LocationRanges[1]:x}) +""" + ) diff --git a/llvm/test/DebugInfo/X86/fake-use.ll b/llvm/test/DebugInfo/X86/fake-use.ll new file mode 100644 index 000000000000..f44aadfeef56 --- /dev/null +++ b/llvm/test/DebugInfo/X86/fake-use.ll @@ -0,0 +1,96 @@ +; REQUIRES: object-emission + +; Make sure the fake use of 'b' at the end of 'foo' causes location information for 'b' +; to extend all the way to the end of the function. + +; RUN: %llc_dwarf -O2 -filetype=obj -dwarf-linkage-names=Abstract < %s | llvm-dwarfdump --debug-info --debug-line -v - -o %t +; RUN: %python %p/../Inputs/check-fake-use.py %t +; RUN: sed -e 's,call void (...) @llvm.fake.use,;,' %s | %llc_dwarf - -O2 -filetype=obj -dwarf-linkage-names=Abstract | llvm-dwarfdump --debug-info --debug-line -v - -o %t +; RUN: not %python %p/../Inputs/check-fake-use.py %t + +; Generated with: +; clang -O2 -g -S -emit-llvm -fextend-this-ptr fake-use.c +; +; int glob[10]; +; extern void bar(); +; +; int foo(int b, int i) +; { +; int loc = glob[i] * 2; +; if (b) { +; glob[2] = loc; +; bar(); +; } +; return loc; +; } +; +; ModuleID = 't2.c' +source_filename = "t2.c" + +@glob = common local_unnamed_addr global [10 x i32] zeroinitializer, align 16, !dbg !0 + +; Function Attrs: nounwind sspstrong uwtable +define i32 @foo(i32 %b, i32 %i) local_unnamed_addr optdebug !dbg !13 { +entry: + #dbg_value(i32 %b, !17, !20, !21) + %c = add i32 %b, 42 + %tobool = icmp sgt i32 %c, 2, !dbg !27 + tail call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"() + tail call void (...) @bar() #2, !dbg !32 + %idxprom = sext i32 %i to i64, !dbg !22 + %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @glob, i64 0, i64 %idxprom, !dbg !22 + %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !23 + %mul = shl nsw i32 %0, 1, !dbg !22 + br i1 %tobool, label %if.end, label %if.then, !dbg !29 + +if.then: ; preds = %entry + store i32 %mul, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @glob, i64 0, i64 2), align 8, !dbg !30, !tbaa !23 + tail call void (...) @bar() #2, !dbg !32 + br label %if.end, !dbg !33 + +if.end: ; preds = %entry, %if.then + call void (...) @llvm.fake.use(i32 %b), !dbg !34 + ret i32 %mul, !dbg !35 +} + +declare void @bar(...) local_unnamed_addr + +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!9, !10, !11} +!llvm.ident = !{!12} + +!0 = distinct !DIGlobalVariableExpression(var: !DIGlobalVariable(name: "glob", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true), expr: !DIExpression()) +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 4.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4) +!2 = !DIFile(filename: "t2.c", directory: "/") +!3 = !{} +!4 = !{!0} +!5 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 320, align: 32, elements: !7) +!6 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!7 = !{!8} +!8 = !DISubrange(count: 10) +!9 = !{i32 2, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{i32 1, !"PIC Level", i32 2} +!12 = !{!"clang version 4.0.0"} +!13 = distinct !DISubprogram(name: "foo", scope: !2, file: !2, line: 4, type: !14, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !16) +!14 = !DISubroutineType(types: !15) +!15 = !{!6, !6, !6} +!16 = !{!17, !19} +!17 = !DILocalVariable(name: "b", arg: 1, scope: !13, file: !2, line: 4, type: !6) +!19 = !DILocalVariable(name: "loc", scope: !13, file: !2, line: 6, type: !6) +!20 = !DIExpression() +!21 = !DILocation(line: 4, scope: !13) +!22 = !DILocation(line: 6, scope: !13) +!23 = !{!24, !24, i64 0} +!24 = !{!"int", !25, i64 0} +!25 = !{!"omnipotent char", !26, i64 0} +!26 = !{!"Simple C/C++ TBAA"} +!27 = !DILocation(line: 7, scope: !28) +!28 = distinct !DILexicalBlock(scope: !13, file: !2, line: 7) +!29 = !DILocation(line: 7, scope: !13) +!30 = !DILocation(line: 8, scope: !31) +!31 = distinct !DILexicalBlock(scope: !28, file: !2, line: 7) +!32 = !DILocation(line: 9, scope: !31) +!33 = !DILocation(line: 10, scope: !31) +!34 = !DILocation(line: 12, scope: !13) +!35 = !DILocation(line: 11, scope: !13) diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td index b52849b6bc93..00601b7ae6e0 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td @@ -136,14 +136,14 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { // CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2([[#LOWER:]]), GIMT_Encode2([[#UPPER:]]), /*)*//*default:*//*Label 6*/ GIMT_Encode4([[#DEFAULT:]]), -// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(470), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(506), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(553), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(587), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(610), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(622), +// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(474), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(510), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(557), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(591), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(614), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(626), // CHECK-NEXT: // Label 0: @[[#%u, mul(UPPER-LOWER, 4) + 10]] -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(494), // Rule ID 4 // +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(498), // Rule ID 4 // // CHECK-NEXT: GIM_CheckFeatures, GIMT_Encode2(GIFBS_HasAnswerToEverything), // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule3Enabled), // CHECK-NEXT: // MIs[0] a @@ -156,8 +156,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIM_CheckIsSafeToFold, /*NumInsns*/1, // CHECK-NEXT: // Combiner Rule #3: InstTest1 // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner2), -// CHECK-NEXT: // Label 7: @494 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(505), // Rule ID 3 // +// CHECK-NEXT: // Label 7: @498 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(509), // Rule ID 3 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled), // CHECK-NEXT: // MIs[0] a // CHECK-NEXT: // No operand predicates @@ -165,10 +165,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: // No operand predicates // CHECK-NEXT: // Combiner Rule #2: InstTest0 // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner1), -// CHECK-NEXT: // Label 8: @505 +// CHECK-NEXT: // Label 8: @509 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 1: @506 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(552), // Rule ID 6 // +// CHECK-NEXT: // Label 1: @510 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(556), // Rule ID 6 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule5Enabled), // CHECK-NEXT: GIM_RootCheckType, /*Op*/2, /*Type*/GILLT_s32, // CHECK-NEXT: // MIs[0] dst @@ -185,10 +185,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/0, // dst // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/1, /*OpIdx*/1, // z // CHECK-NEXT: GIR_EraseRootFromParent_Done, -// CHECK-NEXT: // Label 9: @552 +// CHECK-NEXT: // Label 9: @556 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 2: @553 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(586), // Rule ID 5 // +// CHECK-NEXT: // Label 2: @557 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(590), // Rule ID 5 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule4Enabled), // CHECK-NEXT: // MIs[0] tmp // CHECK-NEXT: GIM_RecordInsnIgnoreCopies, /*DefineMI*/1, /*MI*/0, /*OpIdx*/0, // MIs[1] @@ -204,29 +204,29 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/1, // ptr // CHECK-NEXT: GIR_MergeMemOperands, /*InsnID*/0, /*NumInsns*/2, /*MergeInsnID's*/0, 1, // CHECK-NEXT: GIR_EraseRootFromParent_Done, -// CHECK-NEXT: // Label 10: @586 +// CHECK-NEXT: // Label 10: @590 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 3: @587 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4(598), // Rule ID 0 // +// CHECK-NEXT: // Label 3: @591 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4(602), // Rule ID 0 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled), // CHECK-NEXT: // Combiner Rule #0: WipOpcodeTest0; wip_match_opcode 'G_TRUNC' // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0), -// CHECK-NEXT: // Label 11: @598 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4(609), // Rule ID 1 // +// CHECK-NEXT: // Label 11: @602 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4(613), // Rule ID 1 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled), // CHECK-NEXT: // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_TRUNC' // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0), -// CHECK-NEXT: // Label 12: @609 +// CHECK-NEXT: // Label 12: @613 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 4: @610 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4(621), // Rule ID 2 // +// CHECK-NEXT: // Label 4: @614 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4(625), // Rule ID 2 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled), // CHECK-NEXT: // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_SEXT' // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0), -// CHECK-NEXT: // Label 13: @621 +// CHECK-NEXT: // Label 13: @625 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 5: @622 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4(656), // Rule ID 7 // +// CHECK-NEXT: // Label 5: @626 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4(660), // Rule ID 7 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule6Enabled), // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -240,7 +240,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/0, // dst // CHECK-NEXT: GIR_AddSimpleTempRegister, /*InsnID*/0, /*TempRegID*/0, // CHECK-NEXT: GIR_EraseRootFromParent_Done, -// CHECK-NEXT: // Label 14: @656 +// CHECK-NEXT: // Label 14: @660 // CHECK-NEXT: GIM_Reject, // CHECK-NEXT: // Label 6: @[[#%u, DEFAULT]] // CHECK-NEXT: GIM_Reject, diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fake-use-phi.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fake-use-phi.ll new file mode 100644 index 000000000000..064d3f29dd9e --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/fake-use-phi.ll @@ -0,0 +1,50 @@ +; RUN: opt < %s -passes='require,function(codegenprepare)' -S -mtriple=x86_64 | FileCheck %s --implicit-check-not="llvm.fake.use" +; +; When performing return duplication to enable +; tail call optimization we clone fake uses that exist in the to-be-eliminated +; return block into the predecessor blocks. When doing this with fake uses +; of PHI-nodes, they cannot be easily copied, but require the correct operand. +; We are currently not able to do this correctly, so we suppress the cloning +; of such fake uses at the moment. +; +; There should be no fake use of a call result in any of the resulting return +; blocks. + +; Fake uses of `this` should be duplicated into both return blocks. +; CHECK: if.then: +; CHECK: @llvm.fake.use({{.*}}this +; CHECK: if.else: +; CHECK: @llvm.fake.use({{.*}}this + +; CHECK: declare void @llvm.fake.use + +source_filename = "test.ll" + +%class.a = type { i8 } + +declare i32 @foo(ptr nonnull dereferenceable(1)) local_unnamed_addr +declare i32 @bar(ptr nonnull dereferenceable(1)) local_unnamed_addr + +define hidden void @func(ptr nonnull dereferenceable(1) %this) local_unnamed_addr align 2 optdebug { +entry: + %b = getelementptr inbounds %class.a, ptr %this, i64 0, i32 0 + %0 = load i8, i8* %b, align 1 + %tobool.not = icmp eq i8 %0, 0 + br i1 %tobool.not, label %if.else, label %if.then + +if.then: ; preds = %entry + %call = tail call i32 @foo(ptr nonnull dereferenceable(1) %this) + %call2 = tail call i32 @bar(ptr nonnull dereferenceable(1) %this) + br label %if.end + +if.else: ; preds = %entry + %call4 = tail call i32 @bar(ptr nonnull dereferenceable(1) %this) + %call5 = tail call i32 @foo(ptr nonnull dereferenceable(1) %this) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %call4.sink = phi i32 [ %call4, %if.else ], [ %call, %if.then ] + notail call void (...) @llvm.fake.use(i32 %call4.sink) + notail call void (...) @llvm.fake.use(ptr nonnull %this) + ret void +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fake-use-split-ret.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fake-use-split-ret.ll new file mode 100644 index 000000000000..b2cf89f6f2dd --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/fake-use-split-ret.ll @@ -0,0 +1,37 @@ +; RUN: opt -mtriple=x86_64-unknown-unknown -S -codegenprepare <%s -o - | FileCheck %s +; +; Ensure return instruction splitting ignores fake uses. +; +; IR Generated with clang -O2 -S -emit-llvm -fextend-lifetimes test.cpp +; +;// test.cpp +;extern int bar(int); +; +;int foo2(int i) +;{ +; --i; +; if (i <= 0) +; return -1; +; return bar(i); +;} + +declare i32 @_Z3bari(i32) local_unnamed_addr + +define i32 @_Z4foo2i(i32 %i) local_unnamed_addr optdebug { +entry: + %dec = add nsw i32 %i, -1 + %cmp = icmp slt i32 %i, 2 + br i1 %cmp, label %cleanup, label %if.end + +if.end: ; preds = %entry + %call = tail call i32 @_Z3bari(i32 %dec) +; CHECK: ret i32 %call + br label %cleanup + +cleanup: ; preds = %entry, %if.end +; CHECK: cleanup: + %retval.0 = phi i32 [ %call, %if.end ], [ -1, %entry ] + tail call void (...) @llvm.fake.use(i32 %dec) +; CHECK: ret i32 -1 + ret i32 %retval.0 +} diff --git a/llvm/test/Transforms/GVN/fake-use-constprop.ll b/llvm/test/Transforms/GVN/fake-use-constprop.ll new file mode 100644 index 000000000000..1466f9f9fca2 --- /dev/null +++ b/llvm/test/Transforms/GVN/fake-use-constprop.ll @@ -0,0 +1,60 @@ +; RUN: opt -passes=gvn -S < %s | FileCheck %s +; +; The Global Value Numbering pass (GVN) propagates boolean values +; that are constant in dominated basic blocks to all the uses +; in these basic blocks. However, we don't want the constant propagated +; into fake.use intrinsics since this would render the intrinsic useless +; with respect to keeping the variable live up until the fake.use. +; This test checks that we don't generate any fake.uses with constant 0. +; +; Reduced from the following test case, generated with clang -O2 -S -emit-llvm -fextend-lifetimes test.c +; +; extern void func1(); +; extern int bar(); +; extern void baz(int); +; +; int foo(int i, float f, int *punused) +; { +; int j = 3*i; +; if (j > 0) { +; int m = bar(i); +; if (m) { +; char b = f; +; baz(b); +; if (b) +; goto lab; +; func1(); +; } +; lab: +; func1(); +; } +; return 1; +; } + +;; GVN should propagate a constant value through to a regular call, but not to +;; a fake use, which should continue to track the original value. +; CHECK: %[[CONV_VAR:[a-zA-Z0-9]+]] = fptosi +; CHECK: call {{.+}} @bees(i8 0) +; CHECK: call {{.+}} @llvm.fake.use(i8 %[[CONV_VAR]]) + +define i32 @foo(float %f) optdebug { + %conv = fptosi float %f to i8 + %tobool3 = icmp eq i8 %conv, 0 + br i1 %tobool3, label %if.end, label %lab + +if.end: + tail call void (...) @bees(i8 %conv) + tail call void (...) @llvm.fake.use(i8 %conv) + br label %lab + +lab: + ret i32 1 +} + +declare i32 @bar(...) + +declare void @baz(i32) + +declare void @bees(i32) + +declare void @func1(...) diff --git a/llvm/test/Transforms/SROA/fake-use-escape.ll b/llvm/test/Transforms/SROA/fake-use-escape.ll new file mode 100644 index 000000000000..5429d09740e5 --- /dev/null +++ b/llvm/test/Transforms/SROA/fake-use-escape.ll @@ -0,0 +1,21 @@ +; RUN: opt -S -passes=sroa %s | FileCheck %s +; +;; Check that we do not assert and that we retain the fake_use instruction that +;; uses the address of bar. +; +; CHECK: define{{.*}}foo +; CHECK: call{{.*llvm\.fake\.use.*}}(ptr %bar.addr) + +define void @_Z3fooPi(ptr %bar) { +entry: + %bar.addr = alloca ptr, align 8 + %baz = alloca ptr, align 8 + store ptr %bar, ptr %bar.addr, align 8 + store ptr %bar.addr, ptr %baz, align 8 + %0 = load ptr, ptr %bar.addr, align 8 + %1 = load ptr, ptr %baz, align 8 + call void (...) @llvm.fake.use(ptr %1) + ret void +} + +declare void @llvm.fake.use(...) diff --git a/llvm/test/Transforms/SROA/fake-use-sroa.ll b/llvm/test/Transforms/SROA/fake-use-sroa.ll new file mode 100644 index 000000000000..9e92df154875 --- /dev/null +++ b/llvm/test/Transforms/SROA/fake-use-sroa.ll @@ -0,0 +1,52 @@ +; RUN: opt -S -passes=sroa %s | FileCheck %s +; With fake use instrinsics generated for small aggregates, check that when +; SROA slices the aggregate, we generate individual fake use intrinsics for +; the individual values. + +; Generated from the following source: +; struct s { +; int i; +; int j; +; }; +; +; void foo(struct s S) { +; } +; +; void bar() { +; int arr[2] = {5, 6}; +; } +; +%struct.s = type { i32, i32 } +@__const.bar.arr = private unnamed_addr constant [2 x i32] [i32 5, i32 6], align 4 + +; A small struct passed as parameter +; CHECK-LABEL: define{{.*}}foo +; CHECK: %[[SLICE1:[^ ]+]] = trunc i64 +; CHECK: %[[SLICE2:[^ ]+]] = trunc i64 +; CHECK-DAG: call{{.*}} @llvm.fake.use(i32 %[[SLICE1]]) +; CHECK-DAG: call{{.*}} @llvm.fake.use(i32 %[[SLICE2]]) +define dso_local void @foo(i64 %S.coerce) optdebug { +entry: + %S = alloca %struct.s, align 4 + store i64 %S.coerce, ptr %S, align 4 + %fake.use = load %struct.s, ptr %S, align 4 + notail call void (...) @llvm.fake.use(%struct.s %fake.use) + ret void +} + +; A local variable with a small array type. +; CHECK-LABEL: define{{.*}}bar +; CHECK: %[[ARRAYSLICE1:[^ ]+]] = load +; CHECK: %[[ARRAYSLICE2:[^ ]+]] = load +; CHECK-DAG: call{{.*}} @llvm.fake.use(i32 %[[ARRAYSLICE1]]) +; CHECK-DAG: call{{.*}} @llvm.fake.use(i32 %[[ARRAYSLICE2]]) +define dso_local void @bar() optdebug { +entry: + %arr = alloca [2 x i32], align 4 + call void @llvm.memcpy.p0i8.p0i8.i64(ptr align 4 %arr, ptr align 4 bitcast (ptr @__const.bar.arr to ptr), i64 8, i1 false) + %fake.use = load [2 x i32], ptr %arr, align 4 + notail call void (...) @llvm.fake.use([2 x i32] %fake.use) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index bbd1f9af6509..3a4c7ea03b3a 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -201,6 +201,7 @@ static_library("CodeGen") { "RegisterPressure.cpp", "RegisterScavenging.cpp", "RegisterUsageInfo.cpp", + "RemoveLoadsIntoFakeUses.cpp", "RemoveRedundantDebugValues.cpp", "RenameIndependentSubregs.cpp", "ReplaceWithVeclib.cpp",