[FMV][AIX] Implement target_clones (cpu-only) (#177428)

This PR implements Function Multi-versioning on AIX using `__attribute__
((target_clones(<feature-list>)))`.
Initially, we will only support specifying a cpu in the version list. 
Feature strings (such as "altivec" or "isel") on target_clones will be
implemented in a future PR.

Accepted syntax:
```
__attribute__((target_clones(<OPTIONS>)))
```
where `<OPTIONS>` is a comma separated list of strings, each string is
either:
1) the default string `"default"`
2) a cpu string `"cpu=<CPU>"`, where `<CPU>`is a value accepted by the
`-mcpu` flag.
For example, specifying the following on a function
```
__attribute__((target_clones("default", "cpu=power8", "cpu=power9")))
int foo(int x) { return x + 1; }
```
Would generate 3 versions of `foo`: (1) `foo.default`, (2)
`foo.cpu_power8`, and (3) `foo.cpu_power9`,
an IFUNC `foo`, and the resolver function `foo.resolver`, for the IFUNC,
that chooses one of the three versions at runtime.

---------

Co-authored-by: Wael Yehia <wyehia@ca.ibm.com>
This commit is contained in:
Wael Yehia 2026-03-17 23:15:15 -04:00 committed by GitHub
parent 3661bf74cd
commit 495c518b96
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 633 additions and 74 deletions

View File

@ -3294,6 +3294,12 @@ multiversioned function would have if it had been declared without the attribute
For backward compatibility with earlier Clang releases, a function alias with an
``.ifunc`` suffix is also emitted. The ``.ifunc`` suffixed symbol is a deprecated
feature and support for it may be removed in the future.
For PowerPC targets, ``target_clones`` is supported on AIX only. Only CPU
(specified as ``cpu=CPU``) and ``default`` options are allowed. IFUNC is supported
on AIX in Clang, so dispatch is implemented similar to other targets using IFUNC.
An FMV function that is only declared in a translation unit is treated as a
non-FMV. The resolver and the function clones are given internal linkage.
}];
}

View File

@ -1571,7 +1571,7 @@ public:
/// which requires support for cpu_supports and cpu_is functionality.
bool supportsMultiVersioning() const {
return getTriple().isX86() || getTriple().isAArch64() ||
getTriple().isRISCV();
getTriple().isRISCV() || getTriple().isOSAIX();
}
/// Identify whether this target supports IFuncs.

View File

@ -53,6 +53,11 @@ public:
// vector double vec_xxpermdi(vector double, vector double, int);
// vector short vec_xxsldwi(vector short, vector short, int);
bool BuiltinVSX(CallExpr *TheCall);
bool checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params,
const SmallVectorImpl<SourceLocation> &Locs,
SmallVectorImpl<SmallString<64>> &NewParams,
SourceLocation AttrLoc);
};
} // namespace clang

View File

@ -15247,6 +15247,14 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
Target->getTargetOpts().FeaturesAsWritten.begin(),
Target->getTargetOpts().FeaturesAsWritten.end());
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
} else if (Target->getTriple().isOSAIX()) {
std::vector<std::string> Features;
StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
if (VersionStr.starts_with("cpu="))
TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1);
else
assert(VersionStr == "default");
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
} else {
std::vector<std::string> Features;
StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());

View File

@ -678,6 +678,57 @@ void PPCTargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
}
}
ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
ParsedTargetAttr Ret;
if (Features == "default")
return Ret;
SmallVector<StringRef, 1> AttrFeatures;
Features.split(AttrFeatures, ",");
// Grab the various features and prepend a "+" to turn on the feature to
// the backend and add them to our existing set of features.
for (auto &Feature : AttrFeatures) {
// Go ahead and trim whitespace rather than either erroring or
// accepting it weirdly.
Feature = Feature.trim();
if (Feature.starts_with("cpu=")) {
if (!Ret.CPU.empty())
Ret.Duplicate = "cpu=";
else
Ret.CPU = Feature.split("=").second.trim();
} else if (Feature.starts_with("tune=")) {
if (!Ret.Tune.empty())
Ret.Duplicate = "tune=";
else
Ret.Tune = Feature.split("=").second.trim();
} else if (Feature.starts_with("no-"))
Ret.Features.push_back("-" + Feature.split("-").second.str());
else
Ret.Features.push_back("+" + Feature.str());
}
return Ret;
}
llvm::APInt PPCTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
if (Features.empty())
return llvm::APInt(32, 0);
assert(Features.size() == 1 && "one feature/cpu per clone on PowerPC");
ParsedTargetAttr ParsedAttr = parseTargetAttr(Features[0]);
if (!ParsedAttr.CPU.empty()) {
int Priority = llvm::StringSwitch<int>(ParsedAttr.CPU)
.Case("pwr7", 1)
.Case("pwr8", 2)
.Case("pwr9", 3)
.Case("pwr10", 4)
.Case("pwr11", 5)
.Default(0);
return llvm::APInt(32, Priority);
}
assert(false && "unimplemented");
return llvm::APInt(32, 0);
}
// Make sure that registers are added in the correct array index which should be
// the DWARF number for PPC registers.
const char *const PPCTargetInfo::GCCRegNames[] = {

View File

@ -199,6 +199,10 @@ public:
bool supportsTargetAttributeTune() const override { return true; }
ParsedTargetAttr parseTargetAttr(StringRef Str) const override;
llvm::APInt getFMVPriority(ArrayRef<StringRef> Features) const override;
ArrayRef<const char *> getGCCRegNames() const override;
ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;

View File

@ -45,6 +45,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/SipHash.h"
@ -3075,12 +3076,86 @@ void CodeGenFunction::EmitMultiVersionResolver(
case llvm::Triple::riscv64be:
EmitRISCVMultiVersionResolver(Resolver, Options);
return;
case llvm::Triple::ppc:
case llvm::Triple::ppc64:
if (getContext().getTargetInfo().getTriple().isOSAIX()) {
EmitPPCAIXMultiVersionResolver(Resolver, Options);
return;
}
[[fallthrough]];
default:
assert(false && "Only implemented for x86, AArch64 and RISC-V targets");
assert(false &&
"Only implemented for x86, AArch64, RISC-V, and PowerPC AIX");
}
}
/**
* define internal ptr @foo.resolver() {
* entry:
* %is_version_1 = __builtin_cpu_supports(version_1)
* br i1 %1, label %if.version_1, label %if.else_2
*
* if.version_1:
* ret ptr @foo.version_1
*
* if.else_2:
* %is_version_2 = __builtin_cpu_supports(version_2)
* ...
* if.else: ; preds = %entry
* ret ptr @foo.default
* }
*/
void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
// entry:
llvm::BasicBlock *CurBlock = createBasicBlock("entry", Resolver);
SmallVector<std::pair<llvm::Value *, llvm::BasicBlock *>, 3> PhiArgs;
for (const FMVResolverOption &RO : Options) {
Builder.SetInsertPoint(CurBlock);
// The 'default' or 'generic' case.
if (!RO.Architecture && RO.Features.empty()) {
// if.else:
// ret ptr @foo.default
assert(&RO == Options.end() - 1 &&
"Default or Generic case must be last");
Builder.CreateRet(RO.Function);
return;
}
// if.else_n:
// %is_version_n = __builtin_cpu_supports(version_n)
// br i1 %is_version_n, label %if.version_n, label %if.else_n+1
//
// if.version_n:
// ret ptr @foo_version_n
assert(RO.Features.size() == 1 &&
"for now one feature requirement per version");
assert(RO.Features[0].starts_with("cpu="));
StringRef CPU = RO.Features[0].split("=").second.trim();
StringRef Feature = llvm::StringSwitch<StringRef>(CPU)
.Case("pwr7", "arch_2_06")
.Case("pwr8", "arch_2_07")
.Case("pwr9", "arch_3_00")
.Case("pwr10", "arch_3_1")
.Case("pwr11", "arch_3_1")
.Default("error");
llvm::Value *Condition = EmitPPCBuiltinCpu(
Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), Feature);
llvm::BasicBlock *ThenBlock = createBasicBlock("if.version", Resolver);
CurBlock = createBasicBlock("if.else", Resolver);
Builder.CreateCondBr(Condition, ThenBlock, CurBlock);
Builder.SetInsertPoint(ThenBlock);
Builder.CreateRet(RO.Function);
}
llvm_unreachable("Default case missing");
}
void CodeGenFunction::EmitRISCVMultiVersionResolver(
llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {

View File

@ -4912,6 +4912,8 @@ public:
llvm::Value *BuildVector(ArrayRef<llvm::Value *> Ops);
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitPPCBuiltinCpu(unsigned BuiltinID, llvm::Type *ReturnType,
StringRef CPUStr);
llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
@ -5578,6 +5580,8 @@ public:
ArrayRef<FMVResolverOption> Options);
void EmitRISCVMultiVersionResolver(llvm::Function *Resolver,
ArrayRef<FMVResolverOption> Options);
void EmitPPCAIXMultiVersionResolver(llvm::Function *Resolver,
ArrayRef<FMVResolverOption> Options);
Address EmitAddressOfPFPField(Address RecordPtr, const PFPField &Field);
Address EmitAddressOfPFPField(Address RecordPtr, Address FieldPtr,

View File

@ -3060,11 +3060,13 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
// Now add the target-cpu and target-features to the function.
// While we populated the feature map above, we still need to
// get and parse the target attribute so we can get the cpu for
// the function.
if (TD) {
ParsedTargetAttr ParsedAttr =
Target.parseTargetAttr(TD->getFeaturesStr());
// get and parse the target/target_clones attribute so we can
// get the cpu for the function.
StringRef FeatureStr = TD ? TD->getFeaturesStr() : StringRef();
if (TC && (getTriple().isOSAIX() || getTriple().isX86()))
FeatureStr = TC->getFeatureStr(GD.getMultiVersionIndex());
if (!FeatureStr.empty()) {
ParsedTargetAttr ParsedAttr = Target.parseTargetAttr(FeatureStr);
if (!ParsedAttr.CPU.empty() &&
getTarget().isValidCPUName(ParsedAttr.CPU)) {
TargetCPU = ParsedAttr.CPU;
@ -4769,7 +4771,7 @@ getFMVPriority(const TargetInfo &TI,
static llvm::GlobalValue::LinkageTypes
getMultiversionLinkage(CodeGenModule &CGM, GlobalDecl GD) {
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
if (FD->getFormalLinkage() == Linkage::Internal)
if (FD->getFormalLinkage() == Linkage::Internal || CGM.getTriple().isOSAIX())
return llvm::GlobalValue::InternalLinkage;
return llvm::GlobalValue::WeakODRLinkage;
}
@ -4803,7 +4805,7 @@ void CodeGenModule::emitMultiVersionFunctions() {
// For AArch64, a resolver is only emitted if a function marked with
// target_version("default")) or target_clones("default") is defined
// in this TU. For other architectures it is always emitted.
bool ShouldEmitResolver = !getTarget().getTriple().isAArch64();
bool ShouldEmitResolver = !getTriple().isAArch64();
SmallVector<CodeGenFunction::FMVResolverOption, 10> Options;
llvm::DenseMap<llvm::Function *, const FunctionDecl *> DeclMap;
@ -4855,7 +4857,8 @@ void CodeGenModule::emitMultiVersionFunctions() {
if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant)) {
ResolverConstant = IFunc->getResolver();
if (FD->isTargetClonesMultiVersion() &&
!getTarget().getTriple().isAArch64()) {
!getTarget().getTriple().isAArch64() &&
!getTarget().getTriple().isOSAIX()) {
std::string MangledName = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
if (!GetGlobalValue(MangledName + ".ifunc")) {
@ -5155,9 +5158,14 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
llvm::Constant *Resolver = GetOrCreateLLVMFunction(
MangledName + ".resolver", ResolverType, GlobalDecl{},
/*ForVTable=*/false);
llvm::GlobalIFunc *GIF =
llvm::GlobalIFunc::create(DeclTy, AS, getMultiversionLinkage(*this, GD),
"", Resolver, &getModule());
// on AIX, the FMV is ignored on a declaration, and so we don't need the
// ifunc, which is only generated on FMV definitions, to be weak.
auto Linkage = getTriple().isOSAIX() ? getFunctionLinkage(GD)
: getMultiversionLinkage(*this, GD);
llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(DeclTy, AS, Linkage, "",
Resolver, &getModule());
GIF->setName(ResolverName);
SetCommonAttributes(FD, GIF);
if (ResolverGV)
@ -5176,6 +5184,7 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
void CodeGenModule::setMultiVersionResolverAttributes(llvm::Function *Resolver,
GlobalDecl GD) {
const NamedDecl *D = dyn_cast_or_null<NamedDecl>(GD.getDecl());
Resolver->setLinkage(getMultiversionLinkage(*this, GD));
// Function body has to be emitted before calling setGlobalVisibility
@ -5255,6 +5264,15 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
AddDeferredMultiVersionResolverToEmit(GD);
NameWithoutMultiVersionMangling = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
}
// On AIX, a declared (but not defined) FMV shall be treated like a
// regular non-FMV function. If a definition is later seen, then
// GetOrCreateMultiVersionResolver will get called (when processing said
// definition) which will replace the IR declaration we're creating here
// with the FMV ifunc (see replaceDeclarationWith).
else if (getTriple().isOSAIX() && !FD->isDefined()) {
NameWithoutMultiVersionMangling = getMangledNameImpl(
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
} else
return GetOrCreateMultiVersionResolver(GD);
}
@ -6713,6 +6731,9 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
auto *Fn = cast<llvm::Function>(GV);
setFunctionLinkage(GD, Fn);
if (getTriple().isOSAIX() && D->isTargetClonesMultiVersion())
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
// FIXME: this is redundant with part of setFunctionDefinitionAttributes
setGVProperties(Fn, GD);

View File

@ -70,31 +70,21 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
return CI;
}
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
// Do not emit the builtin arguments in the arguments of a function call,
// because the evaluation order of function arguments is not specified in C++.
// This is important when testing to ensure the arguments are emitted in the
// same order every time. Eg:
// Instead of:
// return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
// EmitScalarExpr(E->getArg(1)), "swdiv");
// Use:
// Value *Op0 = EmitScalarExpr(E->getArg(0));
// Value *Op1 = EmitScalarExpr(E->getArg(1));
// return Builder.CreateFDiv(Op0, Op1, "swdiv")
Intrinsic::ID ID = Intrinsic::not_intrinsic;
Value *CodeGenFunction::EmitPPCBuiltinCpu(unsigned BuiltinID,
llvm::Type *ReturnType,
StringRef CPUStr) {
assert(BuiltinID == Builtin::BI__builtin_cpu_is ||
BuiltinID == Builtin::BI__builtin_cpu_supports);
#include "llvm/TargetParser/PPCTargetParser.def"
auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
unsigned Mask, CmpInst::Predicate CompOp,
unsigned OpValue) -> Value * {
if (SupportMethod == BUILTIN_PPC_FALSE)
return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
return llvm::ConstantInt::getFalse(ReturnType);
if (SupportMethod == BUILTIN_PPC_TRUE)
return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
return llvm::ConstantInt::getTrue(ReturnType);
assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
@ -137,12 +127,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
};
switch (BuiltinID) {
default: return nullptr;
case Builtin::BI__builtin_cpu_is: {
const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
if (BuiltinID == Builtin::BI__builtin_cpu_is) {
llvm::Triple Triple = getTarget().getTriple();
typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
@ -170,7 +155,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
"Invalid CPU name. Missed by SemaChecking?");
if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
return llvm::ConstantInt::getFalse(ReturnType);
Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
@ -178,47 +163,71 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
return Builder.CreateICmpEQ(TheCall,
llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
}
case Builtin::BI__builtin_cpu_supports: {
llvm::Triple Triple = getTarget().getTriple();
const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
if (Triple.isOSAIX()) {
typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
unsigned>
CPUSupportType;
auto [SupportMethod, FieldIdx, Mask, CompOp, Value] =
static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
// else BuiltinID == Builtin::BI__builtin_cpu_supports
llvm::Triple Triple = getTarget().getTriple();
if (Triple.isOSAIX()) {
typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
unsigned>
CPUSupportType;
auto [SupportMethod, FieldIdx, Mask, CompOp, Value] =
static_cast<CPUSupportType>(
StringSwitch<CPUSupportType>(CPUStr)
#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
VALUE) \
.Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
#include "llvm/TargetParser/PPCTargetParser.def"
.Default({BUILTIN_PPC_FALSE, 0, 0,
CmpInst::Predicate(), 0}));
return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
Value);
}
.Default({BUILTIN_PPC_FALSE, 0, 0, CmpInst::Predicate(), 0}));
return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
Value);
}
assert(Triple.isOSLinux() &&
"__builtin_cpu_supports() is only supported for AIX and Linux.");
auto [FeatureWord, BitMask] =
StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
assert(Triple.isOSLinux() &&
"__builtin_cpu_supports() is only supported for AIX and Linux.");
auto [FeatureWord, BitMask] =
StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
.Case(Name, {FA_WORD, Bitmask})
#include "llvm/TargetParser/PPCTargetParser.def"
.Default({0, 0});
if (!BitMask)
return Builder.getFalse();
Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
Value *Mask =
Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
.Default({0, 0});
if (!BitMask)
return Builder.getFalse();
Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
Value *Mask =
Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
#undef PPC_FAWORD_HWCAP
#undef PPC_FAWORD_HWCAP2
#undef PPC_FAWORD_CPUID
}
}
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
// Do not emit the builtin arguments in the arguments of a function call,
// because the evaluation order of function arguments is not specified in C++.
// This is important when testing to ensure the arguments are emitted in the
// same order every time. Eg:
// Instead of:
// return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
// EmitScalarExpr(E->getArg(1)), "swdiv");
// Use:
// Value *Op0 = EmitScalarExpr(E->getArg(0));
// Value *Op1 = EmitScalarExpr(E->getArg(1));
// return Builder.CreateFDiv(Op0, Op1, "swdiv")
Intrinsic::ID ID = Intrinsic::not_intrinsic;
switch (BuiltinID) {
default:
return nullptr;
case Builtin::BI__builtin_cpu_is:
case Builtin::BI__builtin_cpu_supports: {
const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
return EmitPPCBuiltinCpu(BuiltinID, ConvertType(E->getType()), CPUStr);
}
// __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
// call __builtin_readcyclecounter.
case PPC::BI__builtin_ppc_get_timebase:

View File

@ -128,8 +128,38 @@ public:
RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
AggValueSlot Slot) const override;
using ABIInfo::appendAttributeMangling;
void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
raw_ostream &Out) const override;
void appendAttributeMangling(StringRef AttrStr,
raw_ostream &Out) const override;
};
void AIXABIInfo::appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
raw_ostream &Out) const {
appendAttributeMangling(Attr->getFeatureStr(Index), Out);
}
void AIXABIInfo::appendAttributeMangling(StringRef AttrStr,
raw_ostream &Out) const {
if (AttrStr == "default") {
Out << ".default";
return;
}
const TargetInfo &TI = CGT.getTarget();
ParsedTargetAttr Info = TI.parseTargetAttr(AttrStr);
if (!Info.CPU.empty()) {
assert(Info.Features.empty() && "cannot have both a CPU and a feature");
Out << ".cpu_" << Info.CPU;
return;
}
assert(0 && "specifying target features on an FMV is unsupported on AIX");
}
class AIXTargetCodeGenInfo : public TargetCodeGenInfo {
const bool Is64Bit;

View File

@ -55,6 +55,7 @@
#include "clang/Sema/SemaObjC.h"
#include "clang/Sema/SemaOpenCL.h"
#include "clang/Sema/SemaOpenMP.h"
#include "clang/Sema/SemaPPC.h"
#include "clang/Sema/SemaRISCV.h"
#include "clang/Sema/SemaSYCL.h"
#include "clang/Sema/SemaSwift.h"
@ -3631,6 +3632,10 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (S.X86().checkTargetClonesAttr(Params, Locations, NewParams,
AL.getLoc()))
return;
} else if (S.Context.getTargetInfo().getTriple().isOSAIX()) {
if (S.PPC().checkTargetClonesAttr(Params, Locations, NewParams,
AL.getLoc()))
return;
}
Params.clear();
for (auto &SmallStr : NewParams)

View File

@ -22,6 +22,7 @@
#include "clang/Basic/TargetInfo.h"
#include "clang/Sema/Sema.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/TargetParser/PPCTargetParser.h"
namespace clang {
@ -573,4 +574,72 @@ bool SemaPPC::BuiltinVSX(CallExpr *TheCall) {
return false;
}
bool SemaPPC::checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params,
const SmallVectorImpl<SourceLocation> &Locs,
SmallVectorImpl<SmallString<64>> &NewParams,
SourceLocation AttrLoc) {
using namespace DiagAttrParams;
assert(Params.size() == Locs.size() &&
"Mismatch between number of string parameters and locations");
auto &TargetInfo = getASTContext().getTargetInfo();
bool HasDefault = false;
bool HasComma = false;
for (unsigned I = 0, E = Params.size(); I < E; ++I) {
const StringRef Param = Params[I].trim();
const SourceLocation &Loc = Locs[I];
if (Param.empty() || Param.ends_with(','))
return Diag(Loc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << "" << TargetClones;
if (Param.contains(','))
HasComma = true;
StringRef LHS;
StringRef RHS = Param;
do {
std::tie(LHS, RHS) = RHS.split(',');
LHS = LHS.trim();
const SourceLocation &CurLoc =
Loc.getLocWithOffset(LHS.data() - Param.data());
if (LHS.starts_with("cpu=")) {
StringRef CPUStr = LHS.drop_front(sizeof("cpu=") - 1);
if (!TargetInfo.isValidCPUName(CPUStr))
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unknown << CPU << CPUStr << TargetClones;
else if (!TargetInfo.validateCpuIs(CPUStr))
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << CPU << CPUStr << TargetClones;
} else if (LHS == "default") {
HasDefault = true;
} else {
// it's a feature string, but not supported yet.
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << LHS << TargetClones;
}
SmallString<64> CPU;
if (LHS.starts_with("cpu=")) {
CPU.append("cpu=");
CPU.append(
llvm::PPC::normalizeCPUName(LHS.drop_front(sizeof("cpu=") - 1)));
LHS = CPU.str();
}
if (llvm::is_contained(NewParams, LHS)) {
Diag(CurLoc, diag::warn_target_clone_duplicate_options);
continue;
}
NewParams.push_back(LHS);
} while (!RHS.empty());
}
if (HasComma && Params.size() > 1)
Diag(Locs[0], diag::warn_target_clone_mixed_values);
if (!HasDefault)
return Diag(AttrLoc, diag::err_target_clone_must_have_default);
return false;
}
} // namespace clang

View File

@ -0,0 +1,142 @@
// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -target-cpu pwr7 -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -target-cpu pwr7 -emit-llvm %s -o - | FileCheck %s
// CHECK: @internal = internal ifunc i32 (), ptr @internal.resolver
// CHECK: @foo = ifunc i32 (), ptr @foo.resolver
// CHECK: @foo_dupes = ifunc void (), ptr @foo_dupes.resolver
// CHECK: @unused = ifunc void (), ptr @unused.resolver
// CHECK: @foo_inline = linkonce ifunc i32 (), ptr @foo_inline.resolver
// CHECK: @foo_ref_then_def = ifunc i32 (), ptr @foo_ref_then_def.resolver
// CHECK: @foo_priority = ifunc i32 (i32), ptr @foo_priority.resolver
// CHEECK: @isa_level = ifunc i32 (i32), ptr @isa_level.resolver
static int __attribute__((target_clones("cpu=power10, default"))) internal(void) { return 0; }
int use(void) { return internal(); }
// CHECK: define internal ptr @internal.resolver()
// test all supported cpus
int __attribute__((target_clones("cpu=power10, cpu=power11, cpu=pwr9, cpu=pwr7, cpu=power8, default"))) foo(void) { return 0; }
// CHECK: define internal {{.*}}i32 @foo.cpu_pwr10() #[[#ATTR_P10:]]
// CHECK: define internal {{.*}}i32 @foo.cpu_pwr11() #[[#ATTR_P11:]]
// CHECK: define internal {{.*}}i32 @foo.cpu_pwr9() #[[#ATTR_P9:]]
// CHECK: define internal {{.*}}i32 @foo.cpu_pwr7() #[[#ATTR_P7:]]
// CHECK: define internal {{.*}}i32 @foo.cpu_pwr8() #[[#ATTR_P8:]]
// CHECK: define internal {{.*}}i32 @foo.default() #[[#ATTR_P7:]]
// CHECK: define internal ptr @foo.resolver()
// CHECK: ret ptr @foo.cpu_pwr11
// CHECK: ret ptr @foo.cpu_pwr10
// CHECK: ret ptr @foo.cpu_pwr9
// CHECK: ret ptr @foo.cpu_pwr8
// CHECK: ret ptr @foo.cpu_pwr7
// CHECK: ret ptr @foo.default
__attribute__((target_clones("default,default ,cpu=pwr8"))) void foo_dupes(void) {}
// CHECK: define internal void @foo_dupes.default() #[[#ATTR_P7]]
// CHECK: define internal void @foo_dupes.cpu_pwr8() #[[#ATTR_P8:]]
// CHECK: define internal ptr @foo_dupes.resolver()
// CHECK: ret ptr @foo_dupes.cpu_pwr8
// CHECK: ret ptr @foo_dupes.default
void bar2(void) {
// CHECK: define {{.*}}void @bar2()
foo_dupes();
// CHECK: call void @foo_dupes()
}
int bar(void) {
// CHECK: define {{.*}}i32 @bar()
return foo();
// CHECK: call {{.*}}i32 @foo()
}
void __attribute__((target_clones("default, cpu=pwr9"))) unused(void) {}
// CHECK: define internal void @unused.default() #[[#ATTR_P7]]
// CHECK: define internal void @unused.cpu_pwr9() #[[#ATTR_P9:]]
// CHECK: define internal ptr @unused.resolver()
// CHECK: ret ptr @unused.cpu_pwr9
// CHECK: ret ptr @unused.default
int __attribute__((target_clones("cpu=power10, default"))) inherited(void);
int inherited(void) { return 0; }
// CHECK: define internal {{.*}}i32 @inherited.cpu_pwr10() #[[#ATTR_P10]]
// CHECK: define internal {{.*}}i32 @inherited.default() #[[#ATTR_P7]]
// CHECK: define internal ptr @inherited.resolver()
// CHECK: ret ptr @inherited.cpu_pwr10
// CHECK: ret ptr @inherited.default
int test_inherited(void) {
// CHECK: define {{.*}}i32 @test_inherited()
return inherited();
// CHECK: call {{.*}}i32 @inherited()
}
inline int __attribute__((target_clones("default,cpu=pwr8")))
foo_inline(void) { return 0; }
int __attribute__((target_clones("cpu=pwr7,default")))
foo_ref_then_def(void);
int bar3(void) {
// CHECK: define {{.*}}i32 @bar3()
return foo_inline() + foo_ref_then_def();
// CHECK: call {{.*}}i32 @foo_inline()
// CHECK: call {{.*}}i32 @foo_ref_then_def()
}
// CHECK: define internal ptr @foo_inline.resolver()
// CHECK: ret ptr @foo_inline.cpu_pwr8
// CHECK: ret ptr @foo_inline.default
int __attribute__((target_clones("cpu=pwr7,default")))
foo_ref_then_def(void){ return 0; }
// CHECK: define internal ptr @foo_ref_then_def.resolver()
// CHECK: ret ptr @foo_ref_then_def.cpu_pwr7
// CHECK: ret ptr @foo_ref_then_def.default
int __attribute__((target_clones("default", "cpu=pwr8")))
foo_unused_no_defn(void);
// CHECK-NOT: foo_unused_no_defn
int __attribute__((target_clones("default", "cpu=pwr9")))
foo_used_no_defn(void);
int test_foo_used_no_defn(void) {
// CHECK: define {{.*}}i32 @test_foo_used_no_defn()
return foo_used_no_defn();
// CHECK: call {{.*}}i32 @foo_used_no_defn()
}
// CHECK: declare {{.*}}i32 @foo_used_no_defn()
// Test that the CPU conditions are checked from the most to the least
// restrictive (highest to lowest CPU). Also test the codegen for the
// conditions
int __attribute__((target_clones("cpu=pwr10", "cpu=pwr7", "cpu=pwr9", "default", "cpu=pwr8")))
foo_priority(int x) { return x & (x - 1); }
// CHECK: define internal ptr @foo_priority.resolver()
// CHECK-NEXT: entry
// if (__builtin_cpu_supports("arch_3_1")) return &foo_priority.cpu_pwr10;
// CHECK-NEXT: %[[#L1:]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_system_configuration, {{i32|i64}} 4)
// CHECK-NEXT: icmp uge i32 %[[#L1]], 262144
// CHECK: ret ptr @foo_priority.cpu_pwr10
// if (__builtin_cpu_supports("arch_3_00")) return &foo_priority.cpu_pwr9;
// CHECK: %[[#L2:]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_system_configuration, {{i32|i64}} 4)
// CHECK-NEXT: icmp uge i32 %[[#L2]], 131072
// CHECK: ret ptr @foo_priority.cpu_pwr9
// if (__builtin_cpu_supports("arch_2_07")) return &foo_priority.cpu_pwr8;
// CHECK: %[[#L3:]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_system_configuration, {{i32|i64}} 4)
// CHECK-NEXT: icmp uge i32 %[[#L3]], 65536
// CHECK: ret ptr @foo_priority.cpu_pwr8
// if (__builtin_cpu_supports("arch_2_06")) return &foo_priority.cpu_pwr8;
// CHECK: %[[#L4:]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_system_configuration, {{i32|i64}} 4)
// CHECK-NEXT: icmp uge i32 %[[#L4]], 32768
// CHECK: ret ptr @foo_priority.cpu_pwr7
// CHECK: ret ptr @foo_priority.default
// CHECK: attributes #[[#ATTR_P7]] = {{.*}} "target-cpu"="pwr7"
// CHECK: attributes #[[#ATTR_P10]] = {{.*}} "target-cpu"="pwr10"
// CHECK: attributes #[[#ATTR_P11]] = {{.*}} "target-cpu"="pwr11"
// CHECK: attributes #[[#ATTR_P9]] = {{.*}} "target-cpu"="pwr9"
// CHECK: attributes #[[#ATTR_P8]] = {{.*}} "target-cpu"="pwr8"

View File

@ -0,0 +1,132 @@
// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -fsyntax-only -verify %s
// expected-error@+1 {{'target_clones' multiversioning requires a default target}}
void __attribute__((target_clones("cpu=pwr7")))
no_default(void);
// expected-error@+2 {{'target_clones' and 'target' attributes are not compatible}}
// expected-note@+1 {{conflicting attribute is here}}
void __attribute__((target("cpu=pwr7"), target_clones("cpu=pwr8")))
ignored_attr(void);
// expected-error@+2 {{'target' and 'target_clones' attributes are not compatible}}
// expected-note@+1 {{conflicting attribute is here}}
void __attribute__((target_clones("default", "cpu=pwr8"), target("cpu=pwr7")))
ignored_attr2(void);
int __attribute__((target_clones("cpu=pwr9", "default"))) redecl4(void);
// expected-error@+3 {{'target_clones' attribute does not match previous declaration}}
// expected-note@-2 {{previous declaration is here}}
int __attribute__((target_clones("cpu=pwr7", "default")))
redecl4(void) { return 1; }
int __attribute__((target_clones("cpu=pwr7", "default"))) redecl7(void);
// expected-error@+2 {{multiversioning attributes cannot be combined}}
// expected-note@-2 {{previous declaration is here}}
int __attribute__((target("cpu=pwr8"))) redecl7(void) { return 1; }
int __attribute__((target("cpu=pwr9"))) redef2(void) { return 1; }
// expected-error@+2 {{multiversioning attributes cannot be combined}}
// expected-note@-2 {{previous declaration is here}}
int __attribute__((target_clones("cpu=pwr7", "default"))) redef2(void) { return 1; }
int __attribute__((target_clones("cpu=pwr9,default"))) redef3(void) { return 1; }
// expected-error@+2 {{redefinition of 'redef3'}}
// expected-note@-2 {{previous definition is here}}
int __attribute__((target_clones("cpu=pwr9,default"))) redef3(void) { return 1; }
// Duplicates are allowed
// expected-warning@+2 {{mixing 'target_clones' specifier mechanisms is permitted for GCC compatibility}}
// expected-warning@+1 2 {{version list contains duplicate entries}}
int __attribute__((target_clones("cpu=pwr9,cpu=power9", "cpu=power9, default")))
dupes(void) { return 1; }
// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
void __attribute__((target_clones("")))
empty_target_1(void);
// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
void __attribute__((target_clones(",default")))
empty_target_2(void);
// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
void __attribute__((target_clones("default,")))
empty_target_3(void);
// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
void __attribute__((target_clones("default, ,cpu=pwr7")))
empty_target_4(void);
// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
void __attribute__((target_clones("default,cpu=pwr7", "")))
empty_target_5(void);
// expected-warning@+1 {{version list contains duplicate entries}}
void __attribute__((target_clones("default", "default")))
dupe_default(void);
// expected-warning@+1 {{version list contains duplicate entries}}
void __attribute__((target_clones("cpu=pwr9,cpu=power9,default")))
dupe_normal(void);
// expected-error@+2 {{attribute 'target_clones' cannot appear more than once on a declaration}}
// expected-note@+1 {{conflicting attribute is here}}
void __attribute__((target_clones("cpu=pwr7,default"), target_clones("cpu=pwr8,default")))
dupe_normal2(void);
int mv_after_use(void);
int useage(void) {
return mv_after_use();
}
// expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}}
int __attribute__((target_clones("cpu=pwr9", "default"))) mv_after_use(void) { return 1; }
void bad_overload1(void) __attribute__((target_clones("cpu=pwr8", "default")));
// expected-error@+2 {{conflicting types for 'bad_overload1'}}
// expected-note@-2 {{previous declaration is here}}
void bad_overload1(int p) {}
void bad_overload2(int p) {}
// expected-error@+2 {{conflicting types for 'bad_overload2'}}
// expected-note@-2 {{previous definition is here}}
void bad_overload2(void) __attribute__((target_clones("cpu=pwr8", "default")));
void bad_overload3(void) __attribute__((target_clones("cpu=pwr8", "default")));
// expected-error@+2 {{conflicting types for 'bad_overload3'}}
// expected-note@-2 {{previous declaration is here}}
void bad_overload3(int) __attribute__((target_clones("cpu=pwr8", "default")));
void good_overload1(void) __attribute__((target_clones("cpu=pwr7", "cpu=power10", "default")));
void __attribute__((__overloadable__)) good_overload1(int p) {}
// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
void __attribute__((__overloadable__)) good_overload2(void) __attribute__((target_clones("cpu=pwr7", "default")));
void good_overload2(int p) {}
// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
void __attribute__((__overloadable__)) good_overload3(void) __attribute__((target_clones("cpu=pwr7", "default")));
// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
void __attribute__((__overloadable__)) good_overload3(int) __attribute__((target_clones("cpu=pwr7", "default")));
void good_overload4(void) __attribute__((target_clones("cpu=pwr7", "default")));
// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
void __attribute__((__overloadable__)) good_overload4(int) __attribute__((target_clones("cpu=pwr7", "default")));
// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
void __attribute__((__overloadable__)) good_overload5(void) __attribute__((target_clones("cpu=pwr7", "default")));
void good_overload5(int) __attribute__((target_clones("cpu=pwr7", "default")));
void good_isa_level(int) __attribute__((target_clones("default", "cpu=pwr7", "cpu=pwr8", "cpu=pwr9", "cpu=pwr10")));
// expected-warning@+1 {{unknown CPU 'bad-cpu' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
void bad_cpu(int) __attribute__((target_clones("default", "cpu=bad-cpu")));
// expected-warning@+1 {{unsupported CPU 'pwr3' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
void bad_cpu(int) __attribute__((target_clones("default", "cpu=pwr3")));
// expected-error@+1 {{'target_clones' multiversioning requires a default target}}
void __attribute__((target_clones()))
gh173684_empty_attribute_args(void);
// expected-error@+1 {{'target_clones' multiversioning requires a default target}}
void __attribute__((target_clones))
gh173684_empty_attribute_args_2(void);

View File

@ -75,15 +75,13 @@ int __attribute__((target("tune=pwr8"))) baz(void) { return 4; }
//expected-warning@+1 {{unsupported 'fpmath=' in the 'target' attribute string; 'target' attribute ignored}}
int __attribute__((target("fpmath=387"))) walrus(void) { return 4; }
//expected-warning@+1 {{unknown CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
int __attribute__((target("float128,arch=hiss"))) meow(void) { return 4; }
int __attribute__((target("float128,cpu=hiss"))) meow(void) { return 4; }
// no warning, same as saying 'nothing'.
int __attribute__((target("arch="))) turtle(void) { return 4; }
int __attribute__((target("cpu="))) turtle(void) { return 4; }
//expected-warning@+1 {{unknown CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
int __attribute__((target("arch=hiss,arch=woof"))) pine_tree(void) { return 4; }
//expected-warning@+1 {{duplicate 'arch=' in the 'target' attribute string; 'target' attribute ignored}}
int __attribute__((target("arch=pwr9,arch=pwr10"))) oak_tree(void) { return 4; }
//expected-warning@+1 {{unsupported 'branch-protection' in the 'target' attribute string; 'target' attribute ignored}}
int __attribute__((target("branch-protection=none"))) birch_tree(void) { return 5; }
int __attribute__((target("cpu=hiss,cpu=woof"))) pine_tree(void) { return 4; }
//expected-warning@+1 {{duplicate 'cpu=' in the 'target' attribute string; 'target' attribute ignored}}
int __attribute__((target("cpu=pwr9,cpu=pwr10"))) oak_tree(void) { return 4; }
//expected-warning@+1 {{unknown tune CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
int __attribute__((target("tune=hiss,tune=woof"))) apple_tree(void) { return 4; }