[FMV][AIX] Implement target_clones (cpu-only) (#177428)
This PR implements Function Multi-versioning on AIX using `__attribute__
((target_clones(<feature-list>)))`.
Initially, we will only support specifying a cpu in the version list.
Feature strings (such as "altivec" or "isel") on target_clones will be
implemented in a future PR.
Accepted syntax:
```
__attribute__((target_clones(<OPTIONS>)))
```
where `<OPTIONS>` is a comma separated list of strings, each string is
either:
1) the default string `"default"`
2) a cpu string `"cpu=<CPU>"`, where `<CPU>`is a value accepted by the
`-mcpu` flag.
For example, specifying the following on a function
```
__attribute__((target_clones("default", "cpu=power8", "cpu=power9")))
int foo(int x) { return x + 1; }
```
Would generate 3 versions of `foo`: (1) `foo.default`, (2)
`foo.cpu_power8`, and (3) `foo.cpu_power9`,
an IFUNC `foo`, and the resolver function `foo.resolver`, for the IFUNC,
that chooses one of the three versions at runtime.
---------
Co-authored-by: Wael Yehia <wyehia@ca.ibm.com>
This commit is contained in:
parent
3661bf74cd
commit
495c518b96
@ -3294,6 +3294,12 @@ multiversioned function would have if it had been declared without the attribute
|
||||
For backward compatibility with earlier Clang releases, a function alias with an
|
||||
``.ifunc`` suffix is also emitted. The ``.ifunc`` suffixed symbol is a deprecated
|
||||
feature and support for it may be removed in the future.
|
||||
|
||||
For PowerPC targets, ``target_clones`` is supported on AIX only. Only CPU
|
||||
(specified as ``cpu=CPU``) and ``default`` options are allowed. IFUNC is supported
|
||||
on AIX in Clang, so dispatch is implemented similar to other targets using IFUNC.
|
||||
An FMV function that is only declared in a translation unit is treated as a
|
||||
non-FMV. The resolver and the function clones are given internal linkage.
|
||||
}];
|
||||
}
|
||||
|
||||
|
||||
@ -1571,7 +1571,7 @@ public:
|
||||
/// which requires support for cpu_supports and cpu_is functionality.
|
||||
bool supportsMultiVersioning() const {
|
||||
return getTriple().isX86() || getTriple().isAArch64() ||
|
||||
getTriple().isRISCV();
|
||||
getTriple().isRISCV() || getTriple().isOSAIX();
|
||||
}
|
||||
|
||||
/// Identify whether this target supports IFuncs.
|
||||
|
||||
@ -53,6 +53,11 @@ public:
|
||||
// vector double vec_xxpermdi(vector double, vector double, int);
|
||||
// vector short vec_xxsldwi(vector short, vector short, int);
|
||||
bool BuiltinVSX(CallExpr *TheCall);
|
||||
|
||||
bool checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params,
|
||||
const SmallVectorImpl<SourceLocation> &Locs,
|
||||
SmallVectorImpl<SmallString<64>> &NewParams,
|
||||
SourceLocation AttrLoc);
|
||||
};
|
||||
} // namespace clang
|
||||
|
||||
|
||||
@ -15247,6 +15247,14 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
|
||||
Target->getTargetOpts().FeaturesAsWritten.begin(),
|
||||
Target->getTargetOpts().FeaturesAsWritten.end());
|
||||
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
|
||||
} else if (Target->getTriple().isOSAIX()) {
|
||||
std::vector<std::string> Features;
|
||||
StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
|
||||
if (VersionStr.starts_with("cpu="))
|
||||
TargetCPU = VersionStr.drop_front(sizeof("cpu=") - 1);
|
||||
else
|
||||
assert(VersionStr == "default");
|
||||
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
|
||||
} else {
|
||||
std::vector<std::string> Features;
|
||||
StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
|
||||
|
||||
@ -678,6 +678,57 @@ void PPCTargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
|
||||
}
|
||||
}
|
||||
|
||||
ParsedTargetAttr PPCTargetInfo::parseTargetAttr(StringRef Features) const {
|
||||
ParsedTargetAttr Ret;
|
||||
if (Features == "default")
|
||||
return Ret;
|
||||
SmallVector<StringRef, 1> AttrFeatures;
|
||||
Features.split(AttrFeatures, ",");
|
||||
|
||||
// Grab the various features and prepend a "+" to turn on the feature to
|
||||
// the backend and add them to our existing set of features.
|
||||
for (auto &Feature : AttrFeatures) {
|
||||
// Go ahead and trim whitespace rather than either erroring or
|
||||
// accepting it weirdly.
|
||||
Feature = Feature.trim();
|
||||
|
||||
if (Feature.starts_with("cpu=")) {
|
||||
if (!Ret.CPU.empty())
|
||||
Ret.Duplicate = "cpu=";
|
||||
else
|
||||
Ret.CPU = Feature.split("=").second.trim();
|
||||
} else if (Feature.starts_with("tune=")) {
|
||||
if (!Ret.Tune.empty())
|
||||
Ret.Duplicate = "tune=";
|
||||
else
|
||||
Ret.Tune = Feature.split("=").second.trim();
|
||||
} else if (Feature.starts_with("no-"))
|
||||
Ret.Features.push_back("-" + Feature.split("-").second.str());
|
||||
else
|
||||
Ret.Features.push_back("+" + Feature.str());
|
||||
}
|
||||
return Ret;
|
||||
}
|
||||
|
||||
llvm::APInt PPCTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
|
||||
if (Features.empty())
|
||||
return llvm::APInt(32, 0);
|
||||
assert(Features.size() == 1 && "one feature/cpu per clone on PowerPC");
|
||||
ParsedTargetAttr ParsedAttr = parseTargetAttr(Features[0]);
|
||||
if (!ParsedAttr.CPU.empty()) {
|
||||
int Priority = llvm::StringSwitch<int>(ParsedAttr.CPU)
|
||||
.Case("pwr7", 1)
|
||||
.Case("pwr8", 2)
|
||||
.Case("pwr9", 3)
|
||||
.Case("pwr10", 4)
|
||||
.Case("pwr11", 5)
|
||||
.Default(0);
|
||||
return llvm::APInt(32, Priority);
|
||||
}
|
||||
assert(false && "unimplemented");
|
||||
return llvm::APInt(32, 0);
|
||||
}
|
||||
|
||||
// Make sure that registers are added in the correct array index which should be
|
||||
// the DWARF number for PPC registers.
|
||||
const char *const PPCTargetInfo::GCCRegNames[] = {
|
||||
|
||||
@ -199,6 +199,10 @@ public:
|
||||
|
||||
bool supportsTargetAttributeTune() const override { return true; }
|
||||
|
||||
ParsedTargetAttr parseTargetAttr(StringRef Str) const override;
|
||||
|
||||
llvm::APInt getFMVPriority(ArrayRef<StringRef> Features) const override;
|
||||
|
||||
ArrayRef<const char *> getGCCRegNames() const override;
|
||||
|
||||
ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
|
||||
|
||||
@ -45,6 +45,7 @@
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/IntrinsicsPowerPC.h"
|
||||
#include "llvm/IR/MDBuilder.h"
|
||||
#include "llvm/Support/CRC.h"
|
||||
#include "llvm/Support/SipHash.h"
|
||||
@ -3075,12 +3076,86 @@ void CodeGenFunction::EmitMultiVersionResolver(
|
||||
case llvm::Triple::riscv64be:
|
||||
EmitRISCVMultiVersionResolver(Resolver, Options);
|
||||
return;
|
||||
|
||||
case llvm::Triple::ppc:
|
||||
case llvm::Triple::ppc64:
|
||||
if (getContext().getTargetInfo().getTriple().isOSAIX()) {
|
||||
EmitPPCAIXMultiVersionResolver(Resolver, Options);
|
||||
return;
|
||||
}
|
||||
[[fallthrough]];
|
||||
default:
|
||||
assert(false && "Only implemented for x86, AArch64 and RISC-V targets");
|
||||
assert(false &&
|
||||
"Only implemented for x86, AArch64, RISC-V, and PowerPC AIX");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* define internal ptr @foo.resolver() {
|
||||
* entry:
|
||||
* %is_version_1 = __builtin_cpu_supports(version_1)
|
||||
* br i1 %1, label %if.version_1, label %if.else_2
|
||||
*
|
||||
* if.version_1:
|
||||
* ret ptr @foo.version_1
|
||||
*
|
||||
* if.else_2:
|
||||
* %is_version_2 = __builtin_cpu_supports(version_2)
|
||||
* ...
|
||||
* if.else: ; preds = %entry
|
||||
* ret ptr @foo.default
|
||||
* }
|
||||
*/
|
||||
void CodeGenFunction::EmitPPCAIXMultiVersionResolver(
|
||||
llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
|
||||
|
||||
// entry:
|
||||
llvm::BasicBlock *CurBlock = createBasicBlock("entry", Resolver);
|
||||
|
||||
SmallVector<std::pair<llvm::Value *, llvm::BasicBlock *>, 3> PhiArgs;
|
||||
for (const FMVResolverOption &RO : Options) {
|
||||
Builder.SetInsertPoint(CurBlock);
|
||||
// The 'default' or 'generic' case.
|
||||
if (!RO.Architecture && RO.Features.empty()) {
|
||||
// if.else:
|
||||
// ret ptr @foo.default
|
||||
assert(&RO == Options.end() - 1 &&
|
||||
"Default or Generic case must be last");
|
||||
Builder.CreateRet(RO.Function);
|
||||
return;
|
||||
}
|
||||
// if.else_n:
|
||||
// %is_version_n = __builtin_cpu_supports(version_n)
|
||||
// br i1 %is_version_n, label %if.version_n, label %if.else_n+1
|
||||
//
|
||||
// if.version_n:
|
||||
// ret ptr @foo_version_n
|
||||
assert(RO.Features.size() == 1 &&
|
||||
"for now one feature requirement per version");
|
||||
|
||||
assert(RO.Features[0].starts_with("cpu="));
|
||||
StringRef CPU = RO.Features[0].split("=").second.trim();
|
||||
StringRef Feature = llvm::StringSwitch<StringRef>(CPU)
|
||||
.Case("pwr7", "arch_2_06")
|
||||
.Case("pwr8", "arch_2_07")
|
||||
.Case("pwr9", "arch_3_00")
|
||||
.Case("pwr10", "arch_3_1")
|
||||
.Case("pwr11", "arch_3_1")
|
||||
.Default("error");
|
||||
|
||||
llvm::Value *Condition = EmitPPCBuiltinCpu(
|
||||
Builtin::BI__builtin_cpu_supports, Builder.getInt1Ty(), Feature);
|
||||
|
||||
llvm::BasicBlock *ThenBlock = createBasicBlock("if.version", Resolver);
|
||||
CurBlock = createBasicBlock("if.else", Resolver);
|
||||
Builder.CreateCondBr(Condition, ThenBlock, CurBlock);
|
||||
|
||||
Builder.SetInsertPoint(ThenBlock);
|
||||
Builder.CreateRet(RO.Function);
|
||||
}
|
||||
|
||||
llvm_unreachable("Default case missing");
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitRISCVMultiVersionResolver(
|
||||
llvm::Function *Resolver, ArrayRef<FMVResolverOption> Options) {
|
||||
|
||||
|
||||
@ -4912,6 +4912,8 @@ public:
|
||||
|
||||
llvm::Value *BuildVector(ArrayRef<llvm::Value *> Ops);
|
||||
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
|
||||
llvm::Value *EmitPPCBuiltinCpu(unsigned BuiltinID, llvm::Type *ReturnType,
|
||||
StringRef CPUStr);
|
||||
llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
|
||||
llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
|
||||
llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
|
||||
@ -5578,6 +5580,8 @@ public:
|
||||
ArrayRef<FMVResolverOption> Options);
|
||||
void EmitRISCVMultiVersionResolver(llvm::Function *Resolver,
|
||||
ArrayRef<FMVResolverOption> Options);
|
||||
void EmitPPCAIXMultiVersionResolver(llvm::Function *Resolver,
|
||||
ArrayRef<FMVResolverOption> Options);
|
||||
|
||||
Address EmitAddressOfPFPField(Address RecordPtr, const PFPField &Field);
|
||||
Address EmitAddressOfPFPField(Address RecordPtr, Address FieldPtr,
|
||||
|
||||
@ -3060,11 +3060,13 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
|
||||
|
||||
// Now add the target-cpu and target-features to the function.
|
||||
// While we populated the feature map above, we still need to
|
||||
// get and parse the target attribute so we can get the cpu for
|
||||
// the function.
|
||||
if (TD) {
|
||||
ParsedTargetAttr ParsedAttr =
|
||||
Target.parseTargetAttr(TD->getFeaturesStr());
|
||||
// get and parse the target/target_clones attribute so we can
|
||||
// get the cpu for the function.
|
||||
StringRef FeatureStr = TD ? TD->getFeaturesStr() : StringRef();
|
||||
if (TC && (getTriple().isOSAIX() || getTriple().isX86()))
|
||||
FeatureStr = TC->getFeatureStr(GD.getMultiVersionIndex());
|
||||
if (!FeatureStr.empty()) {
|
||||
ParsedTargetAttr ParsedAttr = Target.parseTargetAttr(FeatureStr);
|
||||
if (!ParsedAttr.CPU.empty() &&
|
||||
getTarget().isValidCPUName(ParsedAttr.CPU)) {
|
||||
TargetCPU = ParsedAttr.CPU;
|
||||
@ -4769,7 +4771,7 @@ getFMVPriority(const TargetInfo &TI,
|
||||
static llvm::GlobalValue::LinkageTypes
|
||||
getMultiversionLinkage(CodeGenModule &CGM, GlobalDecl GD) {
|
||||
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
|
||||
if (FD->getFormalLinkage() == Linkage::Internal)
|
||||
if (FD->getFormalLinkage() == Linkage::Internal || CGM.getTriple().isOSAIX())
|
||||
return llvm::GlobalValue::InternalLinkage;
|
||||
return llvm::GlobalValue::WeakODRLinkage;
|
||||
}
|
||||
@ -4803,7 +4805,7 @@ void CodeGenModule::emitMultiVersionFunctions() {
|
||||
// For AArch64, a resolver is only emitted if a function marked with
|
||||
// target_version("default")) or target_clones("default") is defined
|
||||
// in this TU. For other architectures it is always emitted.
|
||||
bool ShouldEmitResolver = !getTarget().getTriple().isAArch64();
|
||||
bool ShouldEmitResolver = !getTriple().isAArch64();
|
||||
SmallVector<CodeGenFunction::FMVResolverOption, 10> Options;
|
||||
llvm::DenseMap<llvm::Function *, const FunctionDecl *> DeclMap;
|
||||
|
||||
@ -4855,7 +4857,8 @@ void CodeGenModule::emitMultiVersionFunctions() {
|
||||
if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant)) {
|
||||
ResolverConstant = IFunc->getResolver();
|
||||
if (FD->isTargetClonesMultiVersion() &&
|
||||
!getTarget().getTriple().isAArch64()) {
|
||||
!getTarget().getTriple().isAArch64() &&
|
||||
!getTarget().getTriple().isOSAIX()) {
|
||||
std::string MangledName = getMangledNameImpl(
|
||||
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
|
||||
if (!GetGlobalValue(MangledName + ".ifunc")) {
|
||||
@ -5155,9 +5158,14 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
|
||||
llvm::Constant *Resolver = GetOrCreateLLVMFunction(
|
||||
MangledName + ".resolver", ResolverType, GlobalDecl{},
|
||||
/*ForVTable=*/false);
|
||||
llvm::GlobalIFunc *GIF =
|
||||
llvm::GlobalIFunc::create(DeclTy, AS, getMultiversionLinkage(*this, GD),
|
||||
"", Resolver, &getModule());
|
||||
|
||||
// on AIX, the FMV is ignored on a declaration, and so we don't need the
|
||||
// ifunc, which is only generated on FMV definitions, to be weak.
|
||||
auto Linkage = getTriple().isOSAIX() ? getFunctionLinkage(GD)
|
||||
: getMultiversionLinkage(*this, GD);
|
||||
|
||||
llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(DeclTy, AS, Linkage, "",
|
||||
Resolver, &getModule());
|
||||
GIF->setName(ResolverName);
|
||||
SetCommonAttributes(FD, GIF);
|
||||
if (ResolverGV)
|
||||
@ -5176,6 +5184,7 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
|
||||
void CodeGenModule::setMultiVersionResolverAttributes(llvm::Function *Resolver,
|
||||
GlobalDecl GD) {
|
||||
const NamedDecl *D = dyn_cast_or_null<NamedDecl>(GD.getDecl());
|
||||
|
||||
Resolver->setLinkage(getMultiversionLinkage(*this, GD));
|
||||
|
||||
// Function body has to be emitted before calling setGlobalVisibility
|
||||
@ -5255,6 +5264,15 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
|
||||
AddDeferredMultiVersionResolverToEmit(GD);
|
||||
NameWithoutMultiVersionMangling = getMangledNameImpl(
|
||||
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
|
||||
}
|
||||
// On AIX, a declared (but not defined) FMV shall be treated like a
|
||||
// regular non-FMV function. If a definition is later seen, then
|
||||
// GetOrCreateMultiVersionResolver will get called (when processing said
|
||||
// definition) which will replace the IR declaration we're creating here
|
||||
// with the FMV ifunc (see replaceDeclarationWith).
|
||||
else if (getTriple().isOSAIX() && !FD->isDefined()) {
|
||||
NameWithoutMultiVersionMangling = getMangledNameImpl(
|
||||
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
|
||||
} else
|
||||
return GetOrCreateMultiVersionResolver(GD);
|
||||
}
|
||||
@ -6713,6 +6731,9 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
|
||||
auto *Fn = cast<llvm::Function>(GV);
|
||||
setFunctionLinkage(GD, Fn);
|
||||
|
||||
if (getTriple().isOSAIX() && D->isTargetClonesMultiVersion())
|
||||
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
|
||||
|
||||
// FIXME: this is redundant with part of setFunctionDefinitionAttributes
|
||||
setGVProperties(Fn, GD);
|
||||
|
||||
|
||||
@ -70,31 +70,21 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
|
||||
return CI;
|
||||
}
|
||||
|
||||
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
||||
const CallExpr *E) {
|
||||
// Do not emit the builtin arguments in the arguments of a function call,
|
||||
// because the evaluation order of function arguments is not specified in C++.
|
||||
// This is important when testing to ensure the arguments are emitted in the
|
||||
// same order every time. Eg:
|
||||
// Instead of:
|
||||
// return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
|
||||
// EmitScalarExpr(E->getArg(1)), "swdiv");
|
||||
// Use:
|
||||
// Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
// Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
// return Builder.CreateFDiv(Op0, Op1, "swdiv")
|
||||
|
||||
Intrinsic::ID ID = Intrinsic::not_intrinsic;
|
||||
Value *CodeGenFunction::EmitPPCBuiltinCpu(unsigned BuiltinID,
|
||||
llvm::Type *ReturnType,
|
||||
StringRef CPUStr) {
|
||||
assert(BuiltinID == Builtin::BI__builtin_cpu_is ||
|
||||
BuiltinID == Builtin::BI__builtin_cpu_supports);
|
||||
|
||||
#include "llvm/TargetParser/PPCTargetParser.def"
|
||||
auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
|
||||
unsigned Mask, CmpInst::Predicate CompOp,
|
||||
unsigned OpValue) -> Value * {
|
||||
if (SupportMethod == BUILTIN_PPC_FALSE)
|
||||
return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
|
||||
return llvm::ConstantInt::getFalse(ReturnType);
|
||||
|
||||
if (SupportMethod == BUILTIN_PPC_TRUE)
|
||||
return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
|
||||
return llvm::ConstantInt::getTrue(ReturnType);
|
||||
|
||||
assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
|
||||
|
||||
@ -137,12 +127,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
||||
ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
|
||||
};
|
||||
|
||||
switch (BuiltinID) {
|
||||
default: return nullptr;
|
||||
|
||||
case Builtin::BI__builtin_cpu_is: {
|
||||
const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
|
||||
StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
|
||||
if (BuiltinID == Builtin::BI__builtin_cpu_is) {
|
||||
llvm::Triple Triple = getTarget().getTriple();
|
||||
|
||||
typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
|
||||
@ -170,7 +155,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
||||
"Invalid CPU name. Missed by SemaChecking?");
|
||||
|
||||
if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
|
||||
return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
|
||||
return llvm::ConstantInt::getFalse(ReturnType);
|
||||
|
||||
Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
|
||||
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
|
||||
@ -178,47 +163,71 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
||||
return Builder.CreateICmpEQ(TheCall,
|
||||
llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
|
||||
}
|
||||
case Builtin::BI__builtin_cpu_supports: {
|
||||
llvm::Triple Triple = getTarget().getTriple();
|
||||
const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
|
||||
StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
|
||||
if (Triple.isOSAIX()) {
|
||||
typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
|
||||
unsigned>
|
||||
CPUSupportType;
|
||||
auto [SupportMethod, FieldIdx, Mask, CompOp, Value] =
|
||||
static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
|
||||
// else BuiltinID == Builtin::BI__builtin_cpu_supports
|
||||
llvm::Triple Triple = getTarget().getTriple();
|
||||
if (Triple.isOSAIX()) {
|
||||
typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
|
||||
unsigned>
|
||||
CPUSupportType;
|
||||
auto [SupportMethod, FieldIdx, Mask, CompOp, Value] =
|
||||
static_cast<CPUSupportType>(
|
||||
StringSwitch<CPUSupportType>(CPUStr)
|
||||
#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
|
||||
VALUE) \
|
||||
.Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
|
||||
#include "llvm/TargetParser/PPCTargetParser.def"
|
||||
.Default({BUILTIN_PPC_FALSE, 0, 0,
|
||||
CmpInst::Predicate(), 0}));
|
||||
return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
|
||||
Value);
|
||||
}
|
||||
.Default({BUILTIN_PPC_FALSE, 0, 0, CmpInst::Predicate(), 0}));
|
||||
return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
|
||||
Value);
|
||||
}
|
||||
|
||||
assert(Triple.isOSLinux() &&
|
||||
"__builtin_cpu_supports() is only supported for AIX and Linux.");
|
||||
auto [FeatureWord, BitMask] =
|
||||
StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
|
||||
assert(Triple.isOSLinux() &&
|
||||
"__builtin_cpu_supports() is only supported for AIX and Linux.");
|
||||
auto [FeatureWord, BitMask] =
|
||||
StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
|
||||
#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
|
||||
.Case(Name, {FA_WORD, Bitmask})
|
||||
#include "llvm/TargetParser/PPCTargetParser.def"
|
||||
.Default({0, 0});
|
||||
if (!BitMask)
|
||||
return Builder.getFalse();
|
||||
Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
|
||||
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
|
||||
Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
|
||||
Value *Mask =
|
||||
Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
|
||||
return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
|
||||
.Default({0, 0});
|
||||
if (!BitMask)
|
||||
return Builder.getFalse();
|
||||
Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
|
||||
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
|
||||
Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
|
||||
Value *Mask =
|
||||
Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
|
||||
return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
|
||||
#undef PPC_FAWORD_HWCAP
|
||||
#undef PPC_FAWORD_HWCAP2
|
||||
#undef PPC_FAWORD_CPUID
|
||||
}
|
||||
}
|
||||
|
||||
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
||||
const CallExpr *E) {
|
||||
// Do not emit the builtin arguments in the arguments of a function call,
|
||||
// because the evaluation order of function arguments is not specified in C++.
|
||||
// This is important when testing to ensure the arguments are emitted in the
|
||||
// same order every time. Eg:
|
||||
// Instead of:
|
||||
// return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
|
||||
// EmitScalarExpr(E->getArg(1)), "swdiv");
|
||||
// Use:
|
||||
// Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
// Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
// return Builder.CreateFDiv(Op0, Op1, "swdiv")
|
||||
|
||||
Intrinsic::ID ID = Intrinsic::not_intrinsic;
|
||||
|
||||
switch (BuiltinID) {
|
||||
default:
|
||||
return nullptr;
|
||||
|
||||
case Builtin::BI__builtin_cpu_is:
|
||||
case Builtin::BI__builtin_cpu_supports: {
|
||||
const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
|
||||
StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
|
||||
return EmitPPCBuiltinCpu(BuiltinID, ConvertType(E->getType()), CPUStr);
|
||||
}
|
||||
// __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
|
||||
// call __builtin_readcyclecounter.
|
||||
case PPC::BI__builtin_ppc_get_timebase:
|
||||
|
||||
@ -128,8 +128,38 @@ public:
|
||||
|
||||
RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
|
||||
AggValueSlot Slot) const override;
|
||||
|
||||
using ABIInfo::appendAttributeMangling;
|
||||
void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
|
||||
raw_ostream &Out) const override;
|
||||
void appendAttributeMangling(StringRef AttrStr,
|
||||
raw_ostream &Out) const override;
|
||||
};
|
||||
|
||||
void AIXABIInfo::appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
|
||||
raw_ostream &Out) const {
|
||||
appendAttributeMangling(Attr->getFeatureStr(Index), Out);
|
||||
}
|
||||
|
||||
void AIXABIInfo::appendAttributeMangling(StringRef AttrStr,
|
||||
raw_ostream &Out) const {
|
||||
if (AttrStr == "default") {
|
||||
Out << ".default";
|
||||
return;
|
||||
}
|
||||
|
||||
const TargetInfo &TI = CGT.getTarget();
|
||||
ParsedTargetAttr Info = TI.parseTargetAttr(AttrStr);
|
||||
|
||||
if (!Info.CPU.empty()) {
|
||||
assert(Info.Features.empty() && "cannot have both a CPU and a feature");
|
||||
Out << ".cpu_" << Info.CPU;
|
||||
return;
|
||||
}
|
||||
|
||||
assert(0 && "specifying target features on an FMV is unsupported on AIX");
|
||||
}
|
||||
|
||||
class AIXTargetCodeGenInfo : public TargetCodeGenInfo {
|
||||
const bool Is64Bit;
|
||||
|
||||
|
||||
@ -55,6 +55,7 @@
|
||||
#include "clang/Sema/SemaObjC.h"
|
||||
#include "clang/Sema/SemaOpenCL.h"
|
||||
#include "clang/Sema/SemaOpenMP.h"
|
||||
#include "clang/Sema/SemaPPC.h"
|
||||
#include "clang/Sema/SemaRISCV.h"
|
||||
#include "clang/Sema/SemaSYCL.h"
|
||||
#include "clang/Sema/SemaSwift.h"
|
||||
@ -3631,6 +3632,10 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
|
||||
if (S.X86().checkTargetClonesAttr(Params, Locations, NewParams,
|
||||
AL.getLoc()))
|
||||
return;
|
||||
} else if (S.Context.getTargetInfo().getTriple().isOSAIX()) {
|
||||
if (S.PPC().checkTargetClonesAttr(Params, Locations, NewParams,
|
||||
AL.getLoc()))
|
||||
return;
|
||||
}
|
||||
Params.clear();
|
||||
for (auto &SmallStr : NewParams)
|
||||
|
||||
@ -22,6 +22,7 @@
|
||||
#include "clang/Basic/TargetInfo.h"
|
||||
#include "clang/Sema/Sema.h"
|
||||
#include "llvm/ADT/APSInt.h"
|
||||
#include "llvm/TargetParser/PPCTargetParser.h"
|
||||
|
||||
namespace clang {
|
||||
|
||||
@ -573,4 +574,72 @@ bool SemaPPC::BuiltinVSX(CallExpr *TheCall) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SemaPPC::checkTargetClonesAttr(const SmallVectorImpl<StringRef> &Params,
|
||||
const SmallVectorImpl<SourceLocation> &Locs,
|
||||
SmallVectorImpl<SmallString<64>> &NewParams,
|
||||
SourceLocation AttrLoc) {
|
||||
using namespace DiagAttrParams;
|
||||
|
||||
assert(Params.size() == Locs.size() &&
|
||||
"Mismatch between number of string parameters and locations");
|
||||
|
||||
auto &TargetInfo = getASTContext().getTargetInfo();
|
||||
bool HasDefault = false;
|
||||
bool HasComma = false;
|
||||
for (unsigned I = 0, E = Params.size(); I < E; ++I) {
|
||||
const StringRef Param = Params[I].trim();
|
||||
const SourceLocation &Loc = Locs[I];
|
||||
|
||||
if (Param.empty() || Param.ends_with(','))
|
||||
return Diag(Loc, diag::warn_unsupported_target_attribute)
|
||||
<< Unsupported << None << "" << TargetClones;
|
||||
|
||||
if (Param.contains(','))
|
||||
HasComma = true;
|
||||
|
||||
StringRef LHS;
|
||||
StringRef RHS = Param;
|
||||
do {
|
||||
std::tie(LHS, RHS) = RHS.split(',');
|
||||
LHS = LHS.trim();
|
||||
const SourceLocation &CurLoc =
|
||||
Loc.getLocWithOffset(LHS.data() - Param.data());
|
||||
|
||||
if (LHS.starts_with("cpu=")) {
|
||||
StringRef CPUStr = LHS.drop_front(sizeof("cpu=") - 1);
|
||||
if (!TargetInfo.isValidCPUName(CPUStr))
|
||||
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
|
||||
<< Unknown << CPU << CPUStr << TargetClones;
|
||||
else if (!TargetInfo.validateCpuIs(CPUStr))
|
||||
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
|
||||
<< Unsupported << CPU << CPUStr << TargetClones;
|
||||
} else if (LHS == "default") {
|
||||
HasDefault = true;
|
||||
} else {
|
||||
// it's a feature string, but not supported yet.
|
||||
return Diag(CurLoc, diag::warn_unsupported_target_attribute)
|
||||
<< Unsupported << None << LHS << TargetClones;
|
||||
}
|
||||
SmallString<64> CPU;
|
||||
if (LHS.starts_with("cpu=")) {
|
||||
CPU.append("cpu=");
|
||||
CPU.append(
|
||||
llvm::PPC::normalizeCPUName(LHS.drop_front(sizeof("cpu=") - 1)));
|
||||
LHS = CPU.str();
|
||||
}
|
||||
if (llvm::is_contained(NewParams, LHS)) {
|
||||
Diag(CurLoc, diag::warn_target_clone_duplicate_options);
|
||||
continue;
|
||||
}
|
||||
NewParams.push_back(LHS);
|
||||
} while (!RHS.empty());
|
||||
}
|
||||
if (HasComma && Params.size() > 1)
|
||||
Diag(Locs[0], diag::warn_target_clone_mixed_values);
|
||||
|
||||
if (!HasDefault)
|
||||
return Diag(AttrLoc, diag::err_target_clone_must_have_default);
|
||||
|
||||
return false;
|
||||
}
|
||||
} // namespace clang
|
||||
|
||||
142
clang/test/CodeGen/PowerPC/attr-target-clones.c
Normal file
142
clang/test/CodeGen/PowerPC/attr-target-clones.c
Normal file
@ -0,0 +1,142 @@
|
||||
// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -target-cpu pwr7 -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -target-cpu pwr7 -emit-llvm %s -o - | FileCheck %s
|
||||
|
||||
// CHECK: @internal = internal ifunc i32 (), ptr @internal.resolver
|
||||
// CHECK: @foo = ifunc i32 (), ptr @foo.resolver
|
||||
// CHECK: @foo_dupes = ifunc void (), ptr @foo_dupes.resolver
|
||||
// CHECK: @unused = ifunc void (), ptr @unused.resolver
|
||||
// CHECK: @foo_inline = linkonce ifunc i32 (), ptr @foo_inline.resolver
|
||||
// CHECK: @foo_ref_then_def = ifunc i32 (), ptr @foo_ref_then_def.resolver
|
||||
// CHECK: @foo_priority = ifunc i32 (i32), ptr @foo_priority.resolver
|
||||
// CHEECK: @isa_level = ifunc i32 (i32), ptr @isa_level.resolver
|
||||
|
||||
|
||||
static int __attribute__((target_clones("cpu=power10, default"))) internal(void) { return 0; }
|
||||
int use(void) { return internal(); }
|
||||
// CHECK: define internal ptr @internal.resolver()
|
||||
|
||||
// test all supported cpus
|
||||
int __attribute__((target_clones("cpu=power10, cpu=power11, cpu=pwr9, cpu=pwr7, cpu=power8, default"))) foo(void) { return 0; }
|
||||
// CHECK: define internal {{.*}}i32 @foo.cpu_pwr10() #[[#ATTR_P10:]]
|
||||
// CHECK: define internal {{.*}}i32 @foo.cpu_pwr11() #[[#ATTR_P11:]]
|
||||
// CHECK: define internal {{.*}}i32 @foo.cpu_pwr9() #[[#ATTR_P9:]]
|
||||
// CHECK: define internal {{.*}}i32 @foo.cpu_pwr7() #[[#ATTR_P7:]]
|
||||
// CHECK: define internal {{.*}}i32 @foo.cpu_pwr8() #[[#ATTR_P8:]]
|
||||
// CHECK: define internal {{.*}}i32 @foo.default() #[[#ATTR_P7:]]
|
||||
// CHECK: define internal ptr @foo.resolver()
|
||||
// CHECK: ret ptr @foo.cpu_pwr11
|
||||
// CHECK: ret ptr @foo.cpu_pwr10
|
||||
// CHECK: ret ptr @foo.cpu_pwr9
|
||||
// CHECK: ret ptr @foo.cpu_pwr8
|
||||
// CHECK: ret ptr @foo.cpu_pwr7
|
||||
// CHECK: ret ptr @foo.default
|
||||
|
||||
__attribute__((target_clones("default,default ,cpu=pwr8"))) void foo_dupes(void) {}
|
||||
// CHECK: define internal void @foo_dupes.default() #[[#ATTR_P7]]
|
||||
// CHECK: define internal void @foo_dupes.cpu_pwr8() #[[#ATTR_P8:]]
|
||||
// CHECK: define internal ptr @foo_dupes.resolver()
|
||||
// CHECK: ret ptr @foo_dupes.cpu_pwr8
|
||||
// CHECK: ret ptr @foo_dupes.default
|
||||
|
||||
void bar2(void) {
|
||||
// CHECK: define {{.*}}void @bar2()
|
||||
foo_dupes();
|
||||
// CHECK: call void @foo_dupes()
|
||||
}
|
||||
|
||||
int bar(void) {
|
||||
// CHECK: define {{.*}}i32 @bar()
|
||||
return foo();
|
||||
// CHECK: call {{.*}}i32 @foo()
|
||||
}
|
||||
|
||||
void __attribute__((target_clones("default, cpu=pwr9"))) unused(void) {}
|
||||
// CHECK: define internal void @unused.default() #[[#ATTR_P7]]
|
||||
// CHECK: define internal void @unused.cpu_pwr9() #[[#ATTR_P9:]]
|
||||
// CHECK: define internal ptr @unused.resolver()
|
||||
// CHECK: ret ptr @unused.cpu_pwr9
|
||||
// CHECK: ret ptr @unused.default
|
||||
|
||||
int __attribute__((target_clones("cpu=power10, default"))) inherited(void);
|
||||
int inherited(void) { return 0; }
|
||||
// CHECK: define internal {{.*}}i32 @inherited.cpu_pwr10() #[[#ATTR_P10]]
|
||||
// CHECK: define internal {{.*}}i32 @inherited.default() #[[#ATTR_P7]]
|
||||
// CHECK: define internal ptr @inherited.resolver()
|
||||
// CHECK: ret ptr @inherited.cpu_pwr10
|
||||
// CHECK: ret ptr @inherited.default
|
||||
|
||||
|
||||
int test_inherited(void) {
|
||||
// CHECK: define {{.*}}i32 @test_inherited()
|
||||
return inherited();
|
||||
// CHECK: call {{.*}}i32 @inherited()
|
||||
}
|
||||
|
||||
inline int __attribute__((target_clones("default,cpu=pwr8")))
|
||||
foo_inline(void) { return 0; }
|
||||
int __attribute__((target_clones("cpu=pwr7,default")))
|
||||
foo_ref_then_def(void);
|
||||
|
||||
int bar3(void) {
|
||||
// CHECK: define {{.*}}i32 @bar3()
|
||||
return foo_inline() + foo_ref_then_def();
|
||||
// CHECK: call {{.*}}i32 @foo_inline()
|
||||
// CHECK: call {{.*}}i32 @foo_ref_then_def()
|
||||
}
|
||||
|
||||
// CHECK: define internal ptr @foo_inline.resolver()
|
||||
// CHECK: ret ptr @foo_inline.cpu_pwr8
|
||||
// CHECK: ret ptr @foo_inline.default
|
||||
|
||||
int __attribute__((target_clones("cpu=pwr7,default")))
|
||||
foo_ref_then_def(void){ return 0; }
|
||||
// CHECK: define internal ptr @foo_ref_then_def.resolver()
|
||||
// CHECK: ret ptr @foo_ref_then_def.cpu_pwr7
|
||||
// CHECK: ret ptr @foo_ref_then_def.default
|
||||
|
||||
int __attribute__((target_clones("default", "cpu=pwr8")))
|
||||
foo_unused_no_defn(void);
|
||||
// CHECK-NOT: foo_unused_no_defn
|
||||
|
||||
int __attribute__((target_clones("default", "cpu=pwr9")))
|
||||
foo_used_no_defn(void);
|
||||
|
||||
int test_foo_used_no_defn(void) {
|
||||
// CHECK: define {{.*}}i32 @test_foo_used_no_defn()
|
||||
return foo_used_no_defn();
|
||||
// CHECK: call {{.*}}i32 @foo_used_no_defn()
|
||||
}
|
||||
// CHECK: declare {{.*}}i32 @foo_used_no_defn()
|
||||
|
||||
// Test that the CPU conditions are checked from the most to the least
|
||||
// restrictive (highest to lowest CPU). Also test the codegen for the
|
||||
// conditions
|
||||
int __attribute__((target_clones("cpu=pwr10", "cpu=pwr7", "cpu=pwr9", "default", "cpu=pwr8")))
|
||||
foo_priority(int x) { return x & (x - 1); }
|
||||
// CHECK: define internal ptr @foo_priority.resolver()
|
||||
// CHECK-NEXT: entry
|
||||
// if (__builtin_cpu_supports("arch_3_1")) return &foo_priority.cpu_pwr10;
|
||||
// CHECK-NEXT: %[[#L1:]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_system_configuration, {{i32|i64}} 4)
|
||||
// CHECK-NEXT: icmp uge i32 %[[#L1]], 262144
|
||||
// CHECK: ret ptr @foo_priority.cpu_pwr10
|
||||
// if (__builtin_cpu_supports("arch_3_00")) return &foo_priority.cpu_pwr9;
|
||||
// CHECK: %[[#L2:]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_system_configuration, {{i32|i64}} 4)
|
||||
// CHECK-NEXT: icmp uge i32 %[[#L2]], 131072
|
||||
// CHECK: ret ptr @foo_priority.cpu_pwr9
|
||||
// if (__builtin_cpu_supports("arch_2_07")) return &foo_priority.cpu_pwr8;
|
||||
// CHECK: %[[#L3:]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_system_configuration, {{i32|i64}} 4)
|
||||
// CHECK-NEXT: icmp uge i32 %[[#L3]], 65536
|
||||
// CHECK: ret ptr @foo_priority.cpu_pwr8
|
||||
// if (__builtin_cpu_supports("arch_2_06")) return &foo_priority.cpu_pwr8;
|
||||
// CHECK: %[[#L4:]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_system_configuration, {{i32|i64}} 4)
|
||||
// CHECK-NEXT: icmp uge i32 %[[#L4]], 32768
|
||||
// CHECK: ret ptr @foo_priority.cpu_pwr7
|
||||
// CHECK: ret ptr @foo_priority.default
|
||||
|
||||
|
||||
// CHECK: attributes #[[#ATTR_P7]] = {{.*}} "target-cpu"="pwr7"
|
||||
// CHECK: attributes #[[#ATTR_P10]] = {{.*}} "target-cpu"="pwr10"
|
||||
// CHECK: attributes #[[#ATTR_P11]] = {{.*}} "target-cpu"="pwr11"
|
||||
// CHECK: attributes #[[#ATTR_P9]] = {{.*}} "target-cpu"="pwr9"
|
||||
// CHECK: attributes #[[#ATTR_P8]] = {{.*}} "target-cpu"="pwr8"
|
||||
|
||||
132
clang/test/Sema/PowerPC/attr-target-clones.c
Normal file
132
clang/test/Sema/PowerPC/attr-target-clones.c
Normal file
@ -0,0 +1,132 @@
|
||||
// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -fsyntax-only -verify %s
|
||||
// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -fsyntax-only -verify %s
|
||||
|
||||
// expected-error@+1 {{'target_clones' multiversioning requires a default target}}
|
||||
void __attribute__((target_clones("cpu=pwr7")))
|
||||
no_default(void);
|
||||
|
||||
// expected-error@+2 {{'target_clones' and 'target' attributes are not compatible}}
|
||||
// expected-note@+1 {{conflicting attribute is here}}
|
||||
void __attribute__((target("cpu=pwr7"), target_clones("cpu=pwr8")))
|
||||
ignored_attr(void);
|
||||
|
||||
// expected-error@+2 {{'target' and 'target_clones' attributes are not compatible}}
|
||||
// expected-note@+1 {{conflicting attribute is here}}
|
||||
void __attribute__((target_clones("default", "cpu=pwr8"), target("cpu=pwr7")))
|
||||
ignored_attr2(void);
|
||||
|
||||
int __attribute__((target_clones("cpu=pwr9", "default"))) redecl4(void);
|
||||
// expected-error@+3 {{'target_clones' attribute does not match previous declaration}}
|
||||
// expected-note@-2 {{previous declaration is here}}
|
||||
int __attribute__((target_clones("cpu=pwr7", "default")))
|
||||
redecl4(void) { return 1; }
|
||||
|
||||
int __attribute__((target_clones("cpu=pwr7", "default"))) redecl7(void);
|
||||
// expected-error@+2 {{multiversioning attributes cannot be combined}}
|
||||
// expected-note@-2 {{previous declaration is here}}
|
||||
int __attribute__((target("cpu=pwr8"))) redecl7(void) { return 1; }
|
||||
|
||||
int __attribute__((target("cpu=pwr9"))) redef2(void) { return 1; }
|
||||
// expected-error@+2 {{multiversioning attributes cannot be combined}}
|
||||
// expected-note@-2 {{previous declaration is here}}
|
||||
int __attribute__((target_clones("cpu=pwr7", "default"))) redef2(void) { return 1; }
|
||||
|
||||
int __attribute__((target_clones("cpu=pwr9,default"))) redef3(void) { return 1; }
|
||||
// expected-error@+2 {{redefinition of 'redef3'}}
|
||||
// expected-note@-2 {{previous definition is here}}
|
||||
int __attribute__((target_clones("cpu=pwr9,default"))) redef3(void) { return 1; }
|
||||
|
||||
// Duplicates are allowed
|
||||
// expected-warning@+2 {{mixing 'target_clones' specifier mechanisms is permitted for GCC compatibility}}
|
||||
// expected-warning@+1 2 {{version list contains duplicate entries}}
|
||||
int __attribute__((target_clones("cpu=pwr9,cpu=power9", "cpu=power9, default")))
|
||||
dupes(void) { return 1; }
|
||||
|
||||
// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
|
||||
void __attribute__((target_clones("")))
|
||||
empty_target_1(void);
|
||||
// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
|
||||
void __attribute__((target_clones(",default")))
|
||||
empty_target_2(void);
|
||||
// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
|
||||
void __attribute__((target_clones("default,")))
|
||||
empty_target_3(void);
|
||||
// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
|
||||
void __attribute__((target_clones("default, ,cpu=pwr7")))
|
||||
empty_target_4(void);
|
||||
|
||||
// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
|
||||
void __attribute__((target_clones("default,cpu=pwr7", "")))
|
||||
empty_target_5(void);
|
||||
|
||||
// expected-warning@+1 {{version list contains duplicate entries}}
|
||||
void __attribute__((target_clones("default", "default")))
|
||||
dupe_default(void);
|
||||
|
||||
// expected-warning@+1 {{version list contains duplicate entries}}
|
||||
void __attribute__((target_clones("cpu=pwr9,cpu=power9,default")))
|
||||
dupe_normal(void);
|
||||
|
||||
// expected-error@+2 {{attribute 'target_clones' cannot appear more than once on a declaration}}
|
||||
// expected-note@+1 {{conflicting attribute is here}}
|
||||
void __attribute__((target_clones("cpu=pwr7,default"), target_clones("cpu=pwr8,default")))
|
||||
dupe_normal2(void);
|
||||
|
||||
int mv_after_use(void);
|
||||
int useage(void) {
|
||||
return mv_after_use();
|
||||
}
|
||||
// expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}}
|
||||
int __attribute__((target_clones("cpu=pwr9", "default"))) mv_after_use(void) { return 1; }
|
||||
|
||||
void bad_overload1(void) __attribute__((target_clones("cpu=pwr8", "default")));
|
||||
// expected-error@+2 {{conflicting types for 'bad_overload1'}}
|
||||
// expected-note@-2 {{previous declaration is here}}
|
||||
void bad_overload1(int p) {}
|
||||
|
||||
void bad_overload2(int p) {}
|
||||
// expected-error@+2 {{conflicting types for 'bad_overload2'}}
|
||||
// expected-note@-2 {{previous definition is here}}
|
||||
void bad_overload2(void) __attribute__((target_clones("cpu=pwr8", "default")));
|
||||
|
||||
void bad_overload3(void) __attribute__((target_clones("cpu=pwr8", "default")));
|
||||
// expected-error@+2 {{conflicting types for 'bad_overload3'}}
|
||||
// expected-note@-2 {{previous declaration is here}}
|
||||
void bad_overload3(int) __attribute__((target_clones("cpu=pwr8", "default")));
|
||||
|
||||
|
||||
void good_overload1(void) __attribute__((target_clones("cpu=pwr7", "cpu=power10", "default")));
|
||||
void __attribute__((__overloadable__)) good_overload1(int p) {}
|
||||
|
||||
// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
|
||||
void __attribute__((__overloadable__)) good_overload2(void) __attribute__((target_clones("cpu=pwr7", "default")));
|
||||
void good_overload2(int p) {}
|
||||
// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
|
||||
void __attribute__((__overloadable__)) good_overload3(void) __attribute__((target_clones("cpu=pwr7", "default")));
|
||||
// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
|
||||
void __attribute__((__overloadable__)) good_overload3(int) __attribute__((target_clones("cpu=pwr7", "default")));
|
||||
|
||||
void good_overload4(void) __attribute__((target_clones("cpu=pwr7", "default")));
|
||||
// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
|
||||
void __attribute__((__overloadable__)) good_overload4(int) __attribute__((target_clones("cpu=pwr7", "default")));
|
||||
|
||||
// expected-error@+1 {{attribute 'target_clones' multiversioning cannot be combined with attribute 'overloadable'}}
|
||||
void __attribute__((__overloadable__)) good_overload5(void) __attribute__((target_clones("cpu=pwr7", "default")));
|
||||
void good_overload5(int) __attribute__((target_clones("cpu=pwr7", "default")));
|
||||
|
||||
|
||||
void good_isa_level(int) __attribute__((target_clones("default", "cpu=pwr7", "cpu=pwr8", "cpu=pwr9", "cpu=pwr10")));
|
||||
|
||||
// expected-warning@+1 {{unknown CPU 'bad-cpu' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
|
||||
void bad_cpu(int) __attribute__((target_clones("default", "cpu=bad-cpu")));
|
||||
|
||||
// expected-warning@+1 {{unsupported CPU 'pwr3' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
|
||||
void bad_cpu(int) __attribute__((target_clones("default", "cpu=pwr3")));
|
||||
|
||||
// expected-error@+1 {{'target_clones' multiversioning requires a default target}}
|
||||
void __attribute__((target_clones()))
|
||||
gh173684_empty_attribute_args(void);
|
||||
|
||||
// expected-error@+1 {{'target_clones' multiversioning requires a default target}}
|
||||
void __attribute__((target_clones))
|
||||
gh173684_empty_attribute_args_2(void);
|
||||
@ -75,15 +75,13 @@ int __attribute__((target("tune=pwr8"))) baz(void) { return 4; }
|
||||
//expected-warning@+1 {{unsupported 'fpmath=' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("fpmath=387"))) walrus(void) { return 4; }
|
||||
//expected-warning@+1 {{unknown CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("float128,arch=hiss"))) meow(void) { return 4; }
|
||||
int __attribute__((target("float128,cpu=hiss"))) meow(void) { return 4; }
|
||||
// no warning, same as saying 'nothing'.
|
||||
int __attribute__((target("arch="))) turtle(void) { return 4; }
|
||||
int __attribute__((target("cpu="))) turtle(void) { return 4; }
|
||||
//expected-warning@+1 {{unknown CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("arch=hiss,arch=woof"))) pine_tree(void) { return 4; }
|
||||
//expected-warning@+1 {{duplicate 'arch=' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("arch=pwr9,arch=pwr10"))) oak_tree(void) { return 4; }
|
||||
//expected-warning@+1 {{unsupported 'branch-protection' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("branch-protection=none"))) birch_tree(void) { return 5; }
|
||||
int __attribute__((target("cpu=hiss,cpu=woof"))) pine_tree(void) { return 4; }
|
||||
//expected-warning@+1 {{duplicate 'cpu=' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("cpu=pwr9,cpu=pwr10"))) oak_tree(void) { return 4; }
|
||||
//expected-warning@+1 {{unknown tune CPU 'hiss' in the 'target' attribute string; 'target' attribute ignored}}
|
||||
int __attribute__((target("tune=hiss,tune=woof"))) apple_tree(void) { return 4; }
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user