[CodeGen] Rename expand-fp to expand-ir-insts (#172681)

The pass now contains a non-fp expansion and should
be used for any similar expansions regardless of the
types involved. Hence a generic name seems apt.

Rename the source files, pass, and adjust the pass
description. Move all tests for the expansions
that have previously been merged into the pass
to a single directory.
This commit is contained in:
Frederik Harwath 2025-12-18 12:15:04 +01:00 committed by GitHub
parent 80e3548372
commit 5c05824d2b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
52 changed files with 146 additions and 138 deletions

View File

@ -673,7 +673,7 @@ default optimization pipelines, e.g. (the output has been trimmed):
ModulePass Manager
Pre-ISel Intrinsic Lowering
FunctionPass Manager
Expand fp
Expand IR instructions
Expand Atomic instructions
SVE intrinsics optimizations
FunctionPass Manager

View File

@ -1,4 +1,4 @@
//===- ExpandFp.h -----------------------------------*- C++ -*-===//
//===- ExpandIRInsts.h -----------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_EXPANDFP_H
#define LLVM_CODEGEN_EXPANDFP_H
#ifndef LLVM_CODEGEN_EXPANDIRINSTS_H
#define LLVM_CODEGEN_EXPANDIRINSTS_H
#include "llvm/IR/PassManager.h"
#include "llvm/Support/CodeGen.h"
@ -16,13 +16,13 @@ namespace llvm {
class TargetMachine;
class ExpandFpPass : public PassInfoMixin<ExpandFpPass> {
class ExpandIRInstsPass : public PassInfoMixin<ExpandIRInstsPass> {
private:
const TargetMachine *TM;
CodeGenOptLevel OptLevel;
public:
explicit ExpandFpPass(const TargetMachine &TM, CodeGenOptLevel OptLevel);
explicit ExpandIRInstsPass(const TargetMachine &TM, CodeGenOptLevel OptLevel);
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
static bool isRequired() { return true; }
@ -32,4 +32,4 @@ public:
} // end namespace llvm
#endif // LLVM_CODEGEN_EXPANDFP_H
#endif // LLVM_CODEGEN_EXPANDIRINSTS_H

View File

@ -547,7 +547,7 @@ LLVM_ABI FunctionPass *createExpandReductionsPass();
LLVM_ABI FunctionPass *createReplaceWithVeclibLegacyPass();
// Expands large div/rem instructions.
LLVM_ABI FunctionPass *createExpandFpPass();
LLVM_ABI FunctionPass *createExpandIRInstsPass();
// This pass expands memcmp() to load/stores.
LLVM_ABI FunctionPass *createExpandMemCmpLegacyPass();
@ -565,7 +565,7 @@ LLVM_ABI FunctionPass *createCFIFixup();
LLVM_ABI FunctionPass *createCFIInstrInserter();
// Expands floating point instructions.
FunctionPass *createExpandFpPass(CodeGenOptLevel);
FunctionPass *createExpandIRInstsPass(CodeGenOptLevel);
/// Creates CFGuard longjmp target identification pass.
/// \see CFGuardLongjmp.cpp

View File

@ -2219,13 +2219,13 @@ public:
}
/// Returns the size in bits of the maximum div/rem the backend supports.
/// Larger operations will be expanded by ExpandFp.
/// Larger operations will be expanded by ExpandIRInsts.
unsigned getMaxDivRemBitWidthSupported() const {
return MaxDivRemBitWidthSupported;
}
/// Returns the size in bits of the maximum fp to/from int conversion the
/// backend supports. Larger operations will be expanded by ExpandFp.
/// backend supports. Larger operations will be expanded by ExpandIRInsts.
unsigned getMaxLargeFPConvertBitWidthSupported() const {
return MaxLargeFPConvertBitWidthSupported;
}
@ -2885,13 +2885,13 @@ protected:
}
/// Set the size in bits of the maximum div/rem the backend supports.
/// Larger operations will be expanded by ExpandFp.
/// Larger operations will be expanded by ExpandIRInsts.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits) {
MaxDivRemBitWidthSupported = SizeInBits;
}
/// Set the size in bits of the maximum fp to/from int conversion the backend
/// supports. Larger operations will be expanded by ExpandFp.
/// supports. Larger operations will be expanded by ExpandIRInsts.
void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits) {
MaxLargeFPConvertBitWidthSupported = SizeInBits;
}
@ -3742,12 +3742,12 @@ private:
unsigned MaxAtomicSizeInBitsSupported;
/// Size in bits of the maximum div/rem size the backend supports.
/// Larger operations will be expanded by ExpandFp.
/// Larger operations will be expanded by ExpandIRInsts.
unsigned MaxDivRemBitWidthSupported;
/// Size in bits of the maximum fp to/from int conversion size the
/// backend supports. Larger operations will be expanded by
/// ExpandFp.
/// ExpandIRInsts.
unsigned MaxLargeFPConvertBitWidthSupported;
/// Size in bits of the minimum cmpxchg or ll/sc operation the

View File

@ -112,7 +112,7 @@ LLVM_ABI void initializeEarlyMachineLICMPass(PassRegistry &);
LLVM_ABI void initializeEarlyTailDuplicateLegacyPass(PassRegistry &);
LLVM_ABI void initializeEdgeBundlesWrapperLegacyPass(PassRegistry &);
LLVM_ABI void initializeEHContGuardTargetsPass(PassRegistry &);
LLVM_ABI void initializeExpandFpLegacyPassPass(PassRegistry &);
LLVM_ABI void initializeExpandIRInstsLegacyPassPass(PassRegistry &);
LLVM_ABI void initializeExpandMemCmpLegacyPassPass(PassRegistry &);
LLVM_ABI void initializeExpandPostRALegacyPass(PassRegistry &);
LLVM_ABI void initializeExpandReductionsPass(PassRegistry &);

View File

@ -32,7 +32,7 @@
#include "llvm/CodeGen/DetectDeadLanes.h"
#include "llvm/CodeGen/DwarfEHPrepare.h"
#include "llvm/CodeGen/EarlyIfConversion.h"
#include "llvm/CodeGen/ExpandFp.h"
#include "llvm/CodeGen/ExpandIRInsts.h"
#include "llvm/CodeGen/ExpandMemCmp.h"
#include "llvm/CodeGen/ExpandPostRAPseudos.h"
#include "llvm/CodeGen/ExpandReductions.h"
@ -677,7 +677,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addISelPasses(
addModulePass(LowerEmuTLSPass(), PMW);
addModulePass(PreISelIntrinsicLoweringPass(&TM), PMW);
addFunctionPass(ExpandFpPass(TM, getOptLevel()), PMW);
addFunctionPass(ExpandIRInstsPass(TM, getOptLevel()), PMW);
derived().addIRPasses(PMW);
derived().addCodeGenPrepare(PMW);

View File

@ -57,7 +57,7 @@ add_llvm_component_library(LLVMCodeGen
EdgeBundles.cpp
EHContGuardTargets.cpp
ExecutionDomainFix.cpp
ExpandFp.cpp
ExpandIRInsts.cpp
ExpandMemCmp.cpp
ExpandPostRAPseudos.cpp
ExpandReductions.cpp

View File

@ -39,7 +39,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeEarlyIfPredicatorPass(Registry);
initializeEarlyMachineLICMPass(Registry);
initializeEarlyTailDuplicateLegacyPass(Registry);
initializeExpandFpLegacyPassPass(Registry);
initializeExpandIRInstsLegacyPassPass(Registry);
initializeExpandMemCmpLegacyPassPass(Registry);
initializeExpandPostRALegacyPass(Registry);
initializeFEntryInserterLegacyPass(Registry);

View File

@ -1,26 +1,33 @@
//===--- ExpandFp.cpp - Expand fp instructions ----------------------------===//
//===--- ExpandIRInsts.cpp - Expand IR instructions -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass expands certain floating point instructions at the IR level.
// This pass expands certain instructions at the IR level.
//
// It expands fptoui .. to, fptosi .. to, uitofp .. to, sitofp
// The following expansions are implemented:
// - Expansion of fptoui .. to, fptosi .. to, uitofp .. to, sitofp
// .. to instructions with a bitwidth above a threshold. This is
// useful for targets like x86_64 that cannot lower fp convertions
// with more than 128 bits.
//
// This pass also expands div/rem instructions with a bitwidth above a
// threshold into a call to auto-generated functions. This is useful
// for targets like x86_64 that cannot lower divisions with more than
// 128 bits or targets like x86_32 that cannot lower divisions with
// more than 64 bits.
// - Expansion of frem for types MVT::f16, MVT::f32, and MVT::f64 for
// targets which use "Expand" as the legalization action for the
// corresponding type.
//
// - Expansion of udiv, sdiv, urem, and srem instructions with
// a bitwidth above a threshold into a call to auto-generated
// functions. This is useful for targets like x86_64 that cannot
// lower divisions with more than 128 bits or targets like x86_32 that
// cannot lower divisions with more than 64 bits.
//
// Instructions with vector types are scalarized first if their scalar
// types can be expanded. Scalable vector types are not supported.
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ExpandFp.h"
#include "llvm/CodeGen/ExpandIRInsts.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
@ -45,7 +52,7 @@
#include <llvm/Support/Casting.h>
#include <optional>
#define DEBUG_TYPE "expand-fp"
#define DEBUG_TYPE "expand-ir-insts"
using namespace llvm;
@ -1125,18 +1132,18 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
}
namespace {
class ExpandFpLegacyPass : public FunctionPass {
class ExpandIRInstsLegacyPass : public FunctionPass {
CodeGenOptLevel OptLevel;
public:
static char ID;
ExpandFpLegacyPass(CodeGenOptLevel OptLevel)
ExpandIRInstsLegacyPass(CodeGenOptLevel OptLevel)
: FunctionPass(ID), OptLevel(OptLevel) {
initializeExpandFpLegacyPassPass(*PassRegistry::getPassRegistry());
initializeExpandIRInstsLegacyPassPass(*PassRegistry::getPassRegistry());
}
ExpandFpLegacyPass() : ExpandFpLegacyPass(CodeGenOptLevel::None) {};
ExpandIRInstsLegacyPass() : ExpandIRInstsLegacyPass(CodeGenOptLevel::None) {};
bool runOnFunction(Function &F) override {
auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
@ -1165,19 +1172,21 @@ public:
};
} // namespace
ExpandFpPass::ExpandFpPass(const TargetMachine &TM, CodeGenOptLevel OptLevel)
ExpandIRInstsPass::ExpandIRInstsPass(const TargetMachine &TM,
CodeGenOptLevel OptLevel)
: TM(&TM), OptLevel(OptLevel) {}
void ExpandFpPass::printPipeline(
void ExpandIRInstsPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<ExpandFpPass> *>(this)->printPipeline(
static_cast<PassInfoMixin<ExpandIRInstsPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
OS << '<';
OS << "O" << (int)OptLevel;
OS << '>';
}
PreservedAnalyses ExpandFpPass::run(Function &F, FunctionAnalysisManager &FAM) {
PreservedAnalyses ExpandIRInstsPass::run(Function &F,
FunctionAnalysisManager &FAM) {
const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
auto &TLI = *STI->getTargetLowering();
AssumptionCache *AC = nullptr;
@ -1202,12 +1211,13 @@ PreservedAnalyses ExpandFpPass::run(Function &F, FunctionAnalysisManager &FAM) {
: PreservedAnalyses::all();
}
char ExpandFpLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ExpandFpLegacyPass, "expand-fp",
char ExpandIRInstsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ExpandIRInstsLegacyPass, "expand-ir-insts",
"Expand certain fp instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(LibcallLoweringInfoWrapper)
INITIALIZE_PASS_END(ExpandFpLegacyPass, "expand-fp", "Expand fp", false, false)
INITIALIZE_PASS_END(ExpandIRInstsLegacyPass, "expand-ir-insts",
"Expand IR instructions", false, false)
FunctionPass *llvm::createExpandFpPass(CodeGenOptLevel OptLevel) {
return new ExpandFpLegacyPass(OptLevel);
FunctionPass *llvm::createExpandIRInstsPass(CodeGenOptLevel OptLevel) {
return new ExpandIRInstsLegacyPass(OptLevel);
}

View File

@ -1090,7 +1090,7 @@ bool TargetPassConfig::addISelPasses() {
PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
addPass(createPreISelIntrinsicLoweringPass());
addPass(createExpandFpPass(getOptLevel()));
addPass(createExpandIRInstsPass(getOptLevel()));
addIRPasses();
addCodeGenPrepare();
addPassesToHandleExceptions();

View File

@ -91,7 +91,7 @@
#include "llvm/CodeGen/DwarfEHPrepare.h"
#include "llvm/CodeGen/EarlyIfConversion.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/ExpandFp.h"
#include "llvm/CodeGen/ExpandIRInsts.h"
#include "llvm/CodeGen/ExpandMemCmp.h"
#include "llvm/CodeGen/ExpandPostRAPseudos.h"
#include "llvm/CodeGen/ExpandReductions.h"
@ -1632,7 +1632,7 @@ parseBoundsCheckingOptions(StringRef Params) {
return Options;
}
Expected<CodeGenOptLevel> parseExpandFpOptions(StringRef Param) {
Expected<CodeGenOptLevel> parseExpandIRInstsOptions(StringRef Param) {
if (Param.empty())
return CodeGenOptLevel::None;
@ -1641,13 +1641,13 @@ Expected<CodeGenOptLevel> parseExpandFpOptions(StringRef Param) {
uint8_t N;
if (!Prefix.empty() || Digit.getAsInteger(10, N))
return createStringError("invalid expand-fp pass parameter '%s'",
return createStringError("invalid expand-ir-insts pass parameter '%s'",
Param.str().c_str());
std::optional<CodeGenOptLevel> Level = CodeGenOpt::getLevel(N);
if (!Level.has_value())
return createStringError(
"invalid optimization level for expand-fp pass: %s",
"invalid optimization level for expand-ir-insts pass: %s",
Digit.str().c_str());
return *Level;

View File

@ -729,11 +729,11 @@ FUNCTION_PASS_WITH_PARAMS(
parseBoundsCheckingOptions,
"trap;rt;rt-abort;min-rt;min-rt-abort;merge;guard=N")
FUNCTION_PASS_WITH_PARAMS(
"expand-fp", "ExpandFpPass",
"expand-ir-insts", "ExpandIRInstsPass",
[TM = TM](CodeGenOptLevel OL) {
return ExpandFpPass(*TM, OL);
return ExpandIRInstsPass(*TM, OL);
},
parseExpandFpOptions, "O0;O1;O2;O3")
parseExpandIRInstsOptions, "O0;O1;O2;O3")
#undef FUNCTION_PASS_WITH_PARAMS

View File

@ -17,7 +17,7 @@
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Expand fp
; CHECK-NEXT: Expand IR instructions
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: Module Verifier
; CHECK-NEXT: Lower Garbage Collection Instructions

View File

@ -21,7 +21,7 @@
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Expand fp
; CHECK-NEXT: Expand IR instructions
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: SVE intrinsics optimizations
; CHECK-NEXT: FunctionPass Manager

View File

@ -579,8 +579,8 @@ define amdgpu_kernel void @fdiv_constant_sel_constants(ptr addrspace(1) %p, i1 %
ret void
}
; ExpandFp now expands frem before it reaches dagcombine.
; TODO Implement this optimization in/before ExpandFP
; ExpandIRInsts now expands frem before it reaches dagcombine.
; TODO Implement this optimization in/before ExpandIRInsts?
define amdgpu_kernel void @frem_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) {
; GFX9-LABEL: frem_constant_sel_constants:
; GFX9: ; %bb.0:

View File

@ -2,7 +2,7 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,SDAG %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2>&1 | FileCheck -check-prefix=GISEL %s
; FIXME: GISEL can't handle the "fptrunc float to bfloat" that expand-fp emits.
; FIXME: GISEL can't handle the "fptrunc float to bfloat" that expand-ir-insts emits.
; GISEL: unable to translate instruction: fptrunc

View File

@ -9,11 +9,11 @@
; RUN: | FileCheck -check-prefix=GCN-O3 %s
; GCN-O0: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-fp<O0>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim)),require<amdgpu-argument-usage>,cgscc(function(amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-mode-register,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,branch-relaxation))),require<reg-usage>,cgscc(function(machine-function(reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
; GCN-O0: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-ir-insts<O0>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim)),require<amdgpu-argument-usage>,cgscc(function(amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-mode-register,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,branch-relaxation))),require<reg-usage>,cgscc(function(machine-function(reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-fp<O2>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim)),require<amdgpu-argument-usage>,cgscc(function(flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-mode-register,si-insert-hard-clauses,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation))),require<reg-usage>,cgscc(function(machine-function(reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-ir-insts<O2>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim)),require<amdgpu-argument-usage>,cgscc(function(flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-mode-register,si-insert-hard-clauses,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation))),require<reg-usage>,cgscc(function(machine-function(reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-fp<O3>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim)),require<amdgpu-argument-usage>,cgscc(function(flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-mode-register,si-insert-hard-clauses,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation))),require<reg-usage>,cgscc(function(machine-function(reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-ir-insts<O3>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim)),require<amdgpu-argument-usage>,cgscc(function(flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-mode-register,si-insert-hard-clauses,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation))),require<reg-usage>,cgscc(function(machine-function(reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function))
define void @empty() {
ret void

View File

@ -28,7 +28,7 @@
; GCN-O0-NEXT: ModulePass Manager
; GCN-O0-NEXT: Pre-ISel Intrinsic Lowering
; GCN-O0-NEXT: FunctionPass Manager
; GCN-O0-NEXT: Expand fp
; GCN-O0-NEXT: Expand IR instructions
; GCN-O0-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O0-NEXT: AMDGPU Printf lowering
; GCN-O0-NEXT: Lower ctors and dtors for AMDGPU
@ -182,7 +182,7 @@
; GCN-O1-NEXT: ModulePass Manager
; GCN-O1-NEXT: Pre-ISel Intrinsic Lowering
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Expand fp
; GCN-O1-NEXT: Expand IR instructions
; GCN-O1-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O1-NEXT: AMDGPU Printf lowering
; GCN-O1-NEXT: Lower ctors and dtors for AMDGPU
@ -475,7 +475,7 @@
; GCN-O1-OPTS-NEXT: ModulePass Manager
; GCN-O1-OPTS-NEXT: Pre-ISel Intrinsic Lowering
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: Expand fp
; GCN-O1-OPTS-NEXT: Expand IR instructions
; GCN-O1-OPTS-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O1-OPTS-NEXT: AMDGPU Printf lowering
; GCN-O1-OPTS-NEXT: Lower ctors and dtors for AMDGPU
@ -796,7 +796,7 @@
; GCN-O2-NEXT: ModulePass Manager
; GCN-O2-NEXT: Pre-ISel Intrinsic Lowering
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Expand fp
; GCN-O2-NEXT: Expand IR instructions
; GCN-O2-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O2-NEXT: AMDGPU Printf lowering
; GCN-O2-NEXT: Lower ctors and dtors for AMDGPU
@ -1122,7 +1122,7 @@
; GCN-O3-NEXT: ModulePass Manager
; GCN-O3-NEXT: Pre-ISel Intrinsic Lowering
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Expand fp
; GCN-O3-NEXT: Expand IR instructions
; GCN-O3-NEXT: AMDGPU Remove Incompatible Functions
; GCN-O3-NEXT: AMDGPU Printf lowering
; GCN-O3-NEXT: Lower ctors and dtors for AMDGPU

View File

@ -5,7 +5,7 @@
; CHECK: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Expand fp
; CHECK-NEXT: Expand IR instructions
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: Simplify the CFG
; CHECK-NEXT: Dominator Tree Construction

View File

@ -21,7 +21,7 @@
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Expand fp
; CHECK-NEXT: Expand IR instructions
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: Module Verifier
; CHECK-NEXT: Lower Garbage Collection Instructions

View File

@ -33,7 +33,7 @@
; LAXX-NEXT: ModulePass Manager
; LAXX-NEXT: Pre-ISel Intrinsic Lowering
; LAXX-NEXT: FunctionPass Manager
; LAXX-NEXT: Expand fp
; LAXX-NEXT: Expand IR instructions
; LAXX-NEXT: Expand Atomic instructions
; LAXX-NEXT: Module Verifier
; LAXX-NEXT: Dominator Tree Construction

View File

@ -2,7 +2,7 @@
; CHECK: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Expand fp
; CHECK-NEXT: Expand IR instructions
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: Module Verifier
; CHECK-NEXT: Dominator Tree Construction

View File

@ -18,7 +18,7 @@
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Expand fp
; CHECK-NEXT: Expand IR instructions
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: PPC Lower MASS Entries
; CHECK-NEXT: FunctionPass Manager

View File

@ -21,7 +21,7 @@
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Expand fp
; CHECK-NEXT: Expand IR instructions
; CHECK-NEXT: Convert i1 constants to i32/i64 if they are returned
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: PPC Lower MASS Entries

View File

@ -21,7 +21,7 @@
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Expand fp
; CHECK-NEXT: Expand IR instructions
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: RISC-V Zacas ABI fix
; CHECK-NEXT: Module Verifier

View File

@ -25,7 +25,7 @@
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Expand fp
; CHECK-NEXT: Expand IR instructions
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: RISC-V Zacas ABI fix
; CHECK-NEXT: Dominator Tree Construction

View File

@ -23,7 +23,7 @@
; SPIRV-O0-NEXT: ModulePass Manager
; SPIRV-O0-NEXT: Pre-ISel Intrinsic Lowering
; SPIRV-O0-NEXT: FunctionPass Manager
; SPIRV-O0-NEXT: Expand fp
; SPIRV-O0-NEXT: Expand IR instructions
; SPIRV-O0-NEXT: Lower Garbage Collection Instructions
; SPIRV-O0-NEXT: Shadow Stack GC Lowering
; SPIRV-O0-NEXT: Remove unreachable blocks from the CFG
@ -99,7 +99,7 @@
; SPIRV-Opt-NEXT: ModulePass Manager
; SPIRV-Opt-NEXT: Pre-ISel Intrinsic Lowering
; SPIRV-Opt-NEXT: FunctionPass Manager
; SPIRV-Opt-NEXT: Expand fp
; SPIRV-Opt-NEXT: Expand IR instructions
; SPIRV-Opt-NEXT: Dominator Tree Construction
; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl)
; SPIRV-Opt-NEXT: Natural Loop Information

View File

@ -19,7 +19,7 @@
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Expand fp
; CHECK-NEXT: Expand IR instructions
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: Lower AMX intrinsics
; CHECK-NEXT: Lower AMX type for load/store

View File

@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s
; expand-fp must also run with optnone
; expand-ir-insts must also run with optnone
; Function Attrs: noinline optnone
define double @main(i224 %0) #0 {

View File

@ -29,7 +29,7 @@
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Expand fp
; CHECK-NEXT: Expand IR instructions
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: Lower AMX intrinsics
; CHECK-NEXT: Lower AMX type for load/store

View File

@ -1,20 +0,0 @@
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-fp<O0>" %s -S -disable-output
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-fp<O1>" %s -S -disable-output
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-fp<O2>" %s -S -disable-output
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-fp<O3>" %s -S -disable-output
; RUN: not opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-fp<O4>" %s -S -disable-output 2>&1 | FileCheck --check-prefix=TOO-LARGE %s
; TOO-LARGE: {{.*}}invalid optimization level for expand-fp pass: 4
; RUN: not opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-fp<Os>" %s -S -disable-output 2>&1 | FileCheck --check-prefix=NON-NUMERIC %s
; NON-NUMERIC: {{.*}}invalid expand-fp pass parameter
; RUN: not opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-fp<O-1>" %s -S -disable-output 2>&1 | FileCheck --check-prefix=NEGATIVE %s
; NEGATIVE: {{.*}}invalid expand-fp pass parameter 'O-1'
; RUN: not opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-fp<foo>" %s -S -disable-output 2>&1 | FileCheck --check-prefix=NO-O-PREFIX %s
; NO-O-PREFIX: {{.*}}invalid expand-fp pass parameter 'foo'
define void @empty() {
ret void
}

View File

@ -1,5 +1,5 @@
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-fp<O0>" %s -S -o - | FileCheck --check-prefixes CHECK %s
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-fp<O1>" %s -S -o - | FileCheck --check-prefixes CHECK,OPT1 %s
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-ir-insts<O0>" %s -S -o - | FileCheck --check-prefixes CHECK %s
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-ir-insts<O1>" %s -S -o - | FileCheck --check-prefixes CHECK,OPT1 %s
; Check the handling of potentially infinite numerators in the frem
; expansion at different optimization levels and with different

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-fp<O1>" %s -S -o - | FileCheck %s
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-ir-insts<O1>" %s -S -o - | FileCheck %s
define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
; CHECK-LABEL: define amdgpu_kernel void @frem_f16(

View File

@ -1,4 +1,4 @@
; RUN: not opt -mtriple=amdgcn -passes=expand-fp -disable-output %s 2>&1 | FileCheck %s
; RUN: not opt -mtriple=amdgcn -passes=expand-ir-insts -disable-output %s 2>&1 | FileCheck %s
; CHECK: 'LibcallLoweringModuleAnalysis' analysis required
define void @empty() {

View File

@ -0,0 +1,20 @@
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-ir-insts<O0>" %s -S -disable-output
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-ir-insts<O1>" %s -S -disable-output
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-ir-insts<O2>" %s -S -disable-output
; RUN: opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-ir-insts<O3>" %s -S -disable-output
; RUN: not opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-ir-insts<O4>" %s -S -disable-output 2>&1 | FileCheck --check-prefix=TOO-LARGE %s
; TOO-LARGE: {{.*}}invalid optimization level for expand-ir-insts pass: 4
; RUN: not opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-ir-insts<Os>" %s -S -disable-output 2>&1 | FileCheck --check-prefix=NON-NUMERIC %s
; NON-NUMERIC: {{.*}}invalid expand-ir-insts pass parameter
; RUN: not opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-ir-insts<O-1>" %s -S -disable-output 2>&1 | FileCheck --check-prefix=NEGATIVE %s
; NEGATIVE: {{.*}}invalid expand-ir-insts pass parameter 'O-1'
; RUN: not opt -mtriple=amdgcn -passes="require<libcall-lowering-info>,expand-ir-insts<foo>" %s -S -disable-output 2>&1 | FileCheck --check-prefix=NO-O-PREFIX %s
; NO-O-PREFIX: {{.*}}invalid expand-ir-insts pass parameter 'foo'
define void @empty() {
ret void
}

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=x86_64-- --expand-fp < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-fp' < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
define i129 @halftosi129(half %a) {
; CHECK-LABEL: @halftosi129(

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=x86_64-- --expand-fp < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-fp' < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
define i129 @halftoui129(half %a) {
; CHECK-LABEL: @halftoui129(

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=x86_64-- --expand-fp < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-fp' < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
define half @si129tohalf(i129 %a) {
; CHECK-LABEL: @si129tohalf(

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=x86_64-- --expand-fp < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-fp' < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
define half @ui129tohalf(i129 %a) {
; CHECK-LABEL: @ui129tohalf(

View File

@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -mtriple=x86_64-- --expand-fp < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-fp' < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
; expand-fp must also run with optnone
; expand-ir-insts must also run with optnone
; Function Attrs: noinline optnone
define double @main(i224 %0) #0 {

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=x86_64-- -expand-fp -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-fp' -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -expand-ir-insts -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' -expand-div-rem-bits 128 < %s | FileCheck %s
define void @sdiv129(ptr %ptr, ptr %out) nounwind {
; CHECK-LABEL: @sdiv129(

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=x86_64-- -expand-fp -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-fp' -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -expand-ir-insts -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' -expand-div-rem-bits 128 < %s | FileCheck %s
define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-LABEL: @test(

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=x86_64-- -expand-fp -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-fp' -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -expand-ir-insts -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' -expand-div-rem-bits 128 < %s | FileCheck %s
define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-LABEL: @test(

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=x86_64-- -expand-fp -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-fp' -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -expand-ir-insts -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' -expand-div-rem-bits 128 < %s | FileCheck %s
define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-LABEL: @test(

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S -mtriple=x86_64-- -expand-fp -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-fp' -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -expand-ir-insts -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' -expand-div-rem-bits 128 < %s | FileCheck %s
define <2 x i129> @sdiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-LABEL: define <2 x i129> @sdiv129(

View File

@ -1,2 +0,0 @@
if not "X86" in config.root.targets:
config.unsupported = True

View File

@ -3,7 +3,7 @@
; RUN: not opt -passes=codegenprepare -disable-output %s 2>&1 | FileCheck %s
; RUN: not opt -passes=complex-deinterleaving -disable-output %s 2>&1 | FileCheck %s
; RUN: not opt -passes=dwarf-eh-prepare -disable-output %s 2>&1 | FileCheck %s
; RUN: not opt -passes=expand-fp -disable-output %s 2>&1 | FileCheck %s
; RUN: not opt -passes=expand-ir-insts -disable-output %s 2>&1 | FileCheck %s
; RUN: not opt -passes=expand-memcmp -disable-output %s 2>&1 | FileCheck %s
; RUN: not opt -passes=indirectbr-expand -disable-output %s 2>&1 | FileCheck %s
; RUN: not opt -passes=interleaved-access -disable-output %s 2>&1 | FileCheck %s
@ -12,7 +12,7 @@
; RUN: not opt -passes=select-optimize -disable-output %s 2>&1 | FileCheck %s
; RUN: not opt -passes=stack-protector -disable-output %s 2>&1 | FileCheck %s
; RUN: not opt -passes=typepromotion -disable-output %s 2>&1 | FileCheck %s
; RUN: not opt -passes='expand-fp<O1>' -disable-output %s 2>&1 | FileCheck %s
; RUN: not opt -passes='expand-ir-insts<O1>' -disable-output %s 2>&1 | FileCheck %s
define void @foo() { ret void }
; CHECK: pass '{{.+}}' requires TargetMachine
;requires TargetMachine

View File

@ -377,7 +377,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
"select-optimize",
"structurizecfg",
"fix-irreducible",
"expand-fp",
"expand-ir-insts",
"callbrprepare",
"scalarizer",
};
@ -428,7 +428,7 @@ optMain(int argc, char **argv,
initializeTarget(Registry);
// For codegen passes, only passes that do IR to IR transformation are
// supported.
initializeExpandFpLegacyPassPass(Registry);
initializeExpandIRInstsLegacyPassPass(Registry);
initializeExpandMemCmpLegacyPassPass(Registry);
initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
initializeSelectOptimizePass(Registry);

View File

@ -57,7 +57,7 @@ static_library("CodeGen") {
"EarlyIfConversion.cpp",
"EdgeBundles.cpp",
"ExecutionDomainFix.cpp",
"ExpandFp.cpp",
"ExpandIRInsts.cpp",
"ExpandMemCmp.cpp",
"ExpandPostRAPseudos.cpp",
"ExpandReductions.cpp",

View File

@ -134,15 +134,15 @@ Transforms/CorrelatedValuePropagation/urem.ll
Transforms/CrossDSOCFI/basic.ll
Transforms/CrossDSOCFI/cfi_functions.ll
Transforms/CrossDSOCFI/thumb.ll
Transforms/ExpandLargeDivRem/X86/sdiv129.ll
Transforms/ExpandLargeDivRem/X86/srem129.ll
Transforms/ExpandLargeDivRem/X86/udiv129.ll
Transforms/ExpandLargeDivRem/X86/urem129.ll
Transforms/ExpandLargeDivRem/X86/vector.ll
Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll
Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll
Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll
Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll
Transforms/ExpandIRInsts/X86/sdiv129.ll
Transforms/ExpandIRInsts/X86/srem129.ll
Transforms/ExpandIRInsts/X86/udiv129.ll
Transforms/ExpandIRInsts/X86/urem129.ll
Transforms/ExpandIRInsts/X86/vector.ll
Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll
Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll
Transforms/ExpandIRInsts/X86/expand-large-fp-convert-si129tofp.ll
Transforms/ExpandIRInsts/X86/expand-large-fp-convert-ui129tofp.ll
Transforms/FixIrreducible/basic.ll
Transforms/FixIrreducible/bug45623.ll
Transforms/FixIrreducible/callbr.ll