[llc] Add -mtune option (#186998)

This patch adds a Clang-compatible -mtune option to llc, to enable
decoupled ISA and microarchitecture targeting, which is especially
important for backend development. For example, it can enable to easily
test a subtarget feature or scheduling model effects on codegen across a
variaty of workloads on the IR corpus benchmark:
https://github.com/dtcxzyw/llvm-codegen-benchmark.

The implementation adds an isolated generic codegen flag, to establish a
base for wider usage - the plan is to add it to `opt` as well in a
followup patch. Then `llc` consumes it, and sets `tune-cpu` attributes
for functions, which are further consumed by the backend.
This commit is contained in:
Tomer Shafir 2026-03-20 10:20:40 +02:00 committed by GitHub
parent 4df296733d
commit 69cd746bd2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 149 additions and 32 deletions

View File

@ -133,8 +133,8 @@ static std::string OptLLVM(const std::string &IR, CodeGenOptLevel OLvl) {
if (!TM)
ErrorAndExit("Could not create target machine");
codegen::setFunctionAttributes(codegen::getCPUStr(),
codegen::getFeaturesStr(), *M);
codegen::setFunctionAttributes(*M, codegen::getCPUStr(),
codegen::getFeaturesStr());
// Add a pass that writes the optimized IR to an output stream
std::string outString;

View File

@ -81,6 +81,17 @@ End-user Options
llvm-as < /dev/null | llc -march=xyz -mcpu=help
.. option:: -mtune=<cpuname>
Specify a specific chip microarchitecture in the current architecture
to tune code for. By default this is inferred from the target triple and
autodetected to the current architecture. For a list of available tuning
CPUs, use:
.. code-block:: none
llvm-as < /dev/null | llc -march=xyz -mtune=help
.. option:: -filetype=<output file type>
Specify what kind of output ``llc`` should generated. Options are: ``asm``

View File

@ -219,6 +219,7 @@ Changes to the LLVM tools
* `llvm-objcopy` no longer corrupts the symbol table when `--update-section` is called for ELF files.
* `FileCheck` option `-check-prefix` now accepts a comma-separated list of
prefixes, making it an alias of the existing `-check-prefixes` option.
* Add `-mtune` option to `llc`.
Changes to LLDB
---------------

View File

@ -37,6 +37,8 @@ LLVM_ABI std::string getMArch();
LLVM_ABI std::string getMCPU();
LLVM_ABI std::string getMTune();
LLVM_ABI std::vector<std::string> getMAttrs();
LLVM_ABI Reloc::Model getRelocModel();
@ -162,6 +164,12 @@ struct RegisterCodeGenFlags {
LLVM_ABI RegisterCodeGenFlags();
};
/// Tools that support subtarget tuning should create this object with static
/// storage to register the -mtune command line option.
struct RegisterMTuneFlag {
LLVM_ABI RegisterMTuneFlag();
};
/// Tools that support stats saving should create this object with static
/// storage to register the --save-stats command line option.
struct RegisterSaveStatsFlag {
@ -184,21 +192,23 @@ InitTargetOptionsFromCodeGenFlags(const llvm::Triple &TheTriple);
LLVM_ABI std::string getCPUStr();
LLVM_ABI std::string getTuneCPUStr();
LLVM_ABI std::string getFeaturesStr();
LLVM_ABI std::vector<std::string> getFeatureList();
LLVM_ABI void renderBoolStringAttr(AttrBuilder &B, StringRef Name, bool Val);
/// Set function attributes of function \p F based on CPU, Features, and command
/// line flags.
LLVM_ABI void setFunctionAttributes(StringRef CPU, StringRef Features,
Function &F);
/// Set function attributes of function \p F based on CPU, TuneCPU, Features,
/// and command line flags.
LLVM_ABI void setFunctionAttributes(Function &F, StringRef CPU,
StringRef Features, StringRef TuneCPU = "");
/// Set function attributes of functions in Module M based on CPU,
/// Features, and command line flags.
LLVM_ABI void setFunctionAttributes(StringRef CPU, StringRef Features,
Module &M);
/// TuneCPU, Features, and command line flags.
LLVM_ABI void setFunctionAttributes(Module &M, StringRef CPU,
StringRef Features, StringRef TuneCPU = "");
/// Should value-tracking variable locations / instruction referencing be
/// enabled by default for this triple?

View File

@ -66,6 +66,7 @@ using namespace llvm;
CGOPT(std::string, MArch)
CGOPT(std::string, MCPU)
CGOPT(std::string, MTune)
CGLIST(std::string, MAttrs)
CGOPT_EXP(Reloc::Model, RelocModel)
CGOPT(ThreadModel::Model, ThreadModel)
@ -541,6 +542,15 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
mc::RegisterMCTargetOptionsFlags();
}
codegen::RegisterMTuneFlag::RegisterMTuneFlag() {
static cl::opt<std::string> MTune(
"mtune",
cl::desc("Tune for a specific CPU microarchitecture (-mtune=help for "
"details)"),
cl::value_desc("tune-cpu-name"), cl::init(""));
CGBINDOPT(MTune);
}
codegen::RegisterSaveStatsFlag::RegisterSaveStatsFlag() {
static cl::opt<SaveStatsMode> SaveStats(
"save-stats",
@ -638,8 +648,8 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
std::string codegen::getCPUStr() {
std::string MCPU = getMCPU();
// If user asked for the 'native' CPU, autodetect here. If autodection fails,
// this will set the CPU to an empty string which tells the target to
// If user asked for the 'native' CPU, autodetect here. If auto-detection
// fails, this will set the CPU to an empty string which tells the target to
// pick a basic default.
if (MCPU == "native")
return std::string(sys::getHostCPUName());
@ -647,6 +657,18 @@ std::string codegen::getCPUStr() {
return MCPU;
}
std::string codegen::getTuneCPUStr() {
std::string TuneCPU = getMTune();
// If user asked for the 'native' tune CPU, autodetect here. If auto-detection
// fails, this will set the tune CPU to an empty string which tells the target
// to pick a basic default.
if (TuneCPU == "native")
return std::string(sys::getHostCPUName());
return TuneCPU;
}
std::string codegen::getFeaturesStr() {
SubtargetFeatures Features;
@ -691,16 +713,16 @@ void codegen::renderBoolStringAttr(AttrBuilder &B, StringRef Name, bool Val) {
renderBoolStringAttr(NewAttrs, AttrName, *CL); \
} while (0)
/// Set function attributes of function \p F based on CPU, Features, and command
/// line flags.
void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
Function &F) {
void codegen::setFunctionAttributes(Function &F, StringRef CPU,
StringRef Features, StringRef TuneCPU) {
auto &Ctx = F.getContext();
AttributeList Attrs = F.getAttributes();
AttrBuilder NewAttrs(Ctx);
if (!CPU.empty() && !F.hasFnAttribute("target-cpu"))
NewAttrs.addAttribute("target-cpu", CPU);
if (!TuneCPU.empty() && !F.hasFnAttribute("tune-cpu"))
NewAttrs.addAttribute("tune-cpu", TuneCPU);
if (!Features.empty()) {
// Append the command line features to any that are already on the function.
StringRef OldFeatures =
@ -761,12 +783,10 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
F.setAttributes(Attrs.addFnAttributes(Ctx, NewAttrs));
}
/// Set function attributes of functions in Module M based on CPU,
/// Features, and command line flags.
void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
Module &M) {
void codegen::setFunctionAttributes(Module &M, StringRef CPU,
StringRef Features, StringRef TuneCPU) {
for (Function &F : M)
setFunctionAttributes(CPU, Features, F);
setFunctionAttributes(F, CPU, Features, TuneCPU);
}
Expected<std::unique_ptr<TargetMachine>>

View File

@ -0,0 +1,69 @@
; REQUIRES: aarch64-registered-target
;; There shouldn't be a default -mtune.
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOTUNE
; RUN: llc < %s -mtriple=aarch64 -mtune=generic | FileCheck %s --check-prefixes=CHECK-TUNE-GENERIC
; RUN: llc < %s -mtriple=aarch64 -mtune=apple-m5 | FileCheck %s --check-prefixes=CHECK-TUNE-APPLE-M5
;; Check interaction between mcpu and mtune.
; RUN: llc < %s -mtriple=aarch64 -mcpu=apple-m5 | FileCheck %s --check-prefixes=CHECK-TUNE-APPLE-M5
; RUN: llc < %s -mtriple=aarch64 -mcpu=apple-m5 -mtune=generic | FileCheck %s --check-prefixes=CHECK-TUNE-GENERIC
;; Test -mtune=help
; RUN: llc -mtriple=aarch64 -mtune=help 2>&1 | FileCheck %s --check-prefixes=CHECK-TUNE-HELP,CHECK-TUNE-HELP-NO-COMPILE
; RUN: llc -mtriple=aarch64 -mtune=help -o %t.s 2>&1 | FileCheck %s --check-prefixes=CHECK-TUNE-HELP,CHECK-TUNE-HELP-NO-COMPILE
; RUN: llc < %s -mtriple=aarch64 -mtune=help 2>&1 | FileCheck %s --check-prefixes=CHECK-TUNE-HELP,CHECK-TUNE-HELP-NO-COMPILE
; RUN: llc < %s -mtriple=aarch64 -mtune=help -o %t.s 2>&1 | FileCheck %s --check-prefixes=CHECK-TUNE-HELP,CHECK-TUNE-HELP-NO-COMPILE
;; Missing target triple for -mtune=help
; RUN: not llc -mtune=help 2>&1 | FileCheck %s --check-prefixes=CHECK-TUNE-HELP-MISSING-TRIPLE
; RUN: not llc < %s -mtune=help 2>&1 | FileCheck %s --check-prefixes=CHECK-TUNE-HELP-MISSING-TRIPLE
; CHECK-TUNE-HELP: Available CPUs for this target:
; CHECK-TUNE-HELP: Available features for this target:
;; To check we dont compile the file
; CHECK-TUNE-HELP-NO-COMPILE-NOT: zero_cycle_regmove_FPR32:
; CHECK-TUNE-HELP-MISSING-TRIPLE: error: unable to get target for 'unknown', see --version and --triple.
;; A test case that depends on `FeatureZCRegMoveFPR128` tuning feature, to enable -mtune verification
;; through codegen effects. Taken from: llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmove-fpr.ll
define void @zero_cycle_regmove_FPR32(float %a, float %b, float %c, float %d) {
; CHECK-NOTUNE-LABEL: zero_cycle_regmove_FPR32:
; CHECK-NOTUNE: fmov s0, s2
; CHECK-NOTUNE-NEXT: fmov s1, s3
; CHECK-NOTUNE-NEXT: fmov s8, s3
; CHECK-NOTUNE-NEXT: fmov s9, s2
; CHECK-NOTUNE-NEXT: bl foo_float
; CHECK-NOTUNE-NEXT: fmov s0, s9
; CHECK-NOTUNE-NEXT: fmov s1, s8
; CHECK-NOTUNE-NEXT: bl foo_float
;
; CHECK-TUNE-GENERIC-LABEL: zero_cycle_regmove_FPR32:
; CHECK-TUNE-GENERIC: fmov s0, s2
; CHECK-TUNE-GENERIC-NEXT: fmov s1, s3
; CHECK-TUNE-GENERIC-NEXT: fmov s8, s3
; CHECK-TUNE-GENERIC-NEXT: fmov s9, s2
; CHECK-TUNE-GENERIC-NEXT: bl foo_float
; CHECK-TUNE-GENERIC-NEXT: fmov s0, s9
; CHECK-TUNE-GENERIC-NEXT: fmov s1, s8
; CHECK-TUNE-GENERIC-NEXT: bl foo_float
;
; CHECK-TUNE-APPLE-M5-LABEL: zero_cycle_regmove_FPR32:
; CHECK-TUNE-APPLE-M5: mov v8.16b, v3.16b
; CHECK-TUNE-APPLE-M5-NEXT: mov v9.16b, v2.16b
; CHECK-TUNE-APPLE-M5-NEXT: mov v0.16b, v2.16b
; CHECK-TUNE-APPLE-M5-NEXT: mov v1.16b, v3.16b
; CHECK-TUNE-APPLE-M5-NEXT: bl foo_float
; CHECK-TUNE-APPLE-M5-NEXT: mov v0.16b, v9.16b
; CHECK-TUNE-APPLE-M5-NEXT: mov v1.16b, v8.16b
; CHECK-TUNE-APPLE-M5-NEXT: bl foo_float
entry:
%call = call float @foo_float(float %c, float %d)
%call1 = call float @foo_float(float %c, float %d)
unreachable
}
declare float @foo_float(float, float)

View File

@ -67,6 +67,7 @@
using namespace llvm;
static codegen::RegisterCodeGenFlags CGF;
static codegen::RegisterMTuneFlag MTF;
static codegen::RegisterSaveStatsFlag SSF;
// General options for llc. Other pass-specific options are specified
@ -501,16 +502,19 @@ static int compileModule(char **argv, SmallVectorImpl<PassPlugin> &PluginList,
std::unique_ptr<Module> M;
std::unique_ptr<MIRParser> MIR;
Triple TheTriple;
std::string CPUStr = codegen::getCPUStr(),
FeaturesStr = codegen::getFeaturesStr();
std::string CPUStr = codegen::getCPUStr();
std::string TuneCPUStr = codegen::getTuneCPUStr();
std::string FeaturesStr = codegen::getFeaturesStr();
// Set attributes on functions as loaded from MIR from command line arguments.
auto setMIRFunctionAttributes = [&CPUStr, &FeaturesStr](Function &F) {
codegen::setFunctionAttributes(CPUStr, FeaturesStr, F);
auto setMIRFunctionAttributes = [&CPUStr, &TuneCPUStr,
&FeaturesStr](Function &F) {
codegen::setFunctionAttributes(F, CPUStr, FeaturesStr, TuneCPUStr);
};
auto MAttrs = codegen::getMAttrs();
bool SkipModule = CPUStr == "help" || is_contained(MAttrs, "help");
bool SkipModule =
CPUStr == "help" || TuneCPUStr == "help" || is_contained(MAttrs, "help");
CodeGenOptLevel OLvl;
if (auto Level = CodeGenOpt::parseLevel(OptLevel)) {
@ -658,8 +662,10 @@ static int compileModule(char **argv, SmallVectorImpl<PassPlugin> &PluginList,
}
InitializeOptions(TheTriple);
// Pass "help" as CPU for -mtune=help
std::string SkipModuleCPU = (TuneCPUStr == "help" ? "help" : CPUStr);
Target = std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
TheTriple, CPUStr, FeaturesStr, Options, RM, CM, OLvl));
TheTriple, SkipModuleCPU, FeaturesStr, Options, RM, CM, OLvl));
assert(Target && "Could not allocate target machine!");
// Set PGO options based on command line flags
@ -711,9 +717,9 @@ static int compileModule(char **argv, SmallVectorImpl<PassPlugin> &PluginList,
if (!NoVerify && verifyModule(*M, &errs()))
reportError("input module cannot be verified", InputFilename);
// Override function attributes based on CPUStr, FeaturesStr, and command line
// flags.
codegen::setFunctionAttributes(CPUStr, FeaturesStr, *M);
// Override function attributes based on CPUStr, TuneCPUStr, FeaturesStr, and
// command line flags.
codegen::setFunctionAttributes(*M, CPUStr, FeaturesStr, TuneCPUStr);
for (auto &Plugin : PluginList) {
CodeGenFileType CGFT = codegen::getFileType();

View File

@ -126,8 +126,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
M->setTargetTriple(TM->getTargetTriple());
M->setDataLayout(TM->createDataLayout());
codegen::setFunctionAttributes(TM->getTargetCPU(),
TM->getTargetFeatureString(), *M);
codegen::setFunctionAttributes(*M, TM->getTargetCPU(),
TM->getTargetFeatureString());
// Create pass pipeline
//

View File

@ -657,7 +657,7 @@ optMain(int argc, char **argv,
// Override function attributes based on CPUStr, FeaturesStr, and command line
// flags.
codegen::setFunctionAttributes(CPUStr, FeaturesStr, *M);
codegen::setFunctionAttributes(*M, CPUStr, FeaturesStr);
// If the output is set to be emitted to standard out, and standard out is a
// console, print out a warning message and refuse to do it. We don't