[clang] Simplify device kernel attributes (#137882)
We have multiple different attributes in clang representing device kernels for specific targets/languages. Refactor them into one attribute with different spellings to make it more easily scalable for new languages/targets. --------- Signed-off-by: Sarnie, Nick <nick.sarnie@intel.com>
This commit is contained in:
parent
b2379bd5d5
commit
3b9ebe9201
@ -16,14 +16,14 @@ namespace clang::tidy::altera {
|
||||
|
||||
void SingleWorkItemBarrierCheck::registerMatchers(MatchFinder *Finder) {
|
||||
// Find any function that calls barrier but does not call an ID function.
|
||||
// hasAttr(attr::Kind::OpenCLKernel) restricts it to only kernel functions.
|
||||
// hasAttr(attr::Kind::DeviceKernel) restricts it to only kernel functions.
|
||||
// FIXME: Have it accept all functions but check for a parameter that gets an
|
||||
// ID from one of the four ID functions.
|
||||
Finder->addMatcher(
|
||||
// Find function declarations...
|
||||
functionDecl(
|
||||
// That are OpenCL kernels...
|
||||
hasAttr(attr::Kind::OpenCLKernel),
|
||||
// That are device kernels...
|
||||
hasAttr(attr::Kind::DeviceKernel),
|
||||
// And call a barrier function (either 1.x or 2.x version)...
|
||||
forEachDescendant(callExpr(callee(functionDecl(hasAnyName(
|
||||
"barrier", "work_group_barrier"))))
|
||||
|
@ -164,7 +164,7 @@ public:
|
||||
}
|
||||
|
||||
static KernelReferenceKind getDefaultKernelReference(const FunctionDecl *D) {
|
||||
return (D->hasAttr<OpenCLKernelAttr>() || D->getLangOpts().CUDAIsDevice)
|
||||
return (D->hasAttr<DeviceKernelAttr>() || D->getLangOpts().CUDAIsDevice)
|
||||
? KernelReferenceKind::Kernel
|
||||
: KernelReferenceKind::Stub;
|
||||
}
|
||||
|
@ -196,7 +196,9 @@ def FunctionPointer : SubsetSubject<DeclBase,
|
||||
"functions pointers">;
|
||||
|
||||
def OpenCLKernelFunction
|
||||
: SubsetSubject<Function, [{S->hasAttr<OpenCLKernelAttr>()}],
|
||||
: SubsetSubject<Function, [{S->getASTContext().getLangOpts().OpenCL &&
|
||||
DeviceKernelAttr::isOpenCLSpelling(
|
||||
S->getAttr<DeviceKernelAttr>()}],
|
||||
"kernel functions">;
|
||||
|
||||
// HasFunctionProto is a more strict version of FunctionLike, so it should
|
||||
@ -1515,12 +1517,6 @@ def CUDAGridConstant : InheritableAttr {
|
||||
let Documentation = [CUDAGridConstantAttrDocs];
|
||||
}
|
||||
|
||||
def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
|
||||
let Spellings = [Clang<"nvptx_kernel">];
|
||||
let Subjects = SubjectList<[Function]>;
|
||||
let Documentation = [Undocumented];
|
||||
}
|
||||
|
||||
def HIPManaged : InheritableAttr {
|
||||
let Spellings = [GNU<"managed">, Declspec<"__managed__">];
|
||||
let Subjects = SubjectList<[Var]>;
|
||||
@ -1555,11 +1551,52 @@ def CUDAShared : InheritableAttr {
|
||||
}
|
||||
def : MutualExclusions<[CUDAConstant, CUDAShared, HIPManaged]>;
|
||||
|
||||
def SYCLKernel : InheritableAttr {
|
||||
let Spellings = [Clang<"sycl_kernel">];
|
||||
let Subjects = SubjectList<[FunctionTmpl]>;
|
||||
let LangOpts = [SYCLDevice];
|
||||
let Documentation = [SYCLKernelDocs];
|
||||
def DeviceKernel : DeclOrTypeAttr {
|
||||
let Spellings = [Clang<"device_kernel">, Clang<"sycl_kernel">,
|
||||
Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">,
|
||||
CustomKeyword<"__kernel">, CustomKeyword<"kernel">];
|
||||
let Documentation = [DeviceKernelDocs];
|
||||
let AdditionalMembers =
|
||||
[{
|
||||
static inline bool isAMDGPUSpelling(const AttributeCommonInfo& A) {
|
||||
return A.getAttributeSpellingListIndex() == GNU_amdgpu_kernel ||
|
||||
A.getAttributeSpellingListIndex() == CXX11_clang_amdgpu_kernel ||
|
||||
A.getAttributeSpellingListIndex() == C23_clang_amdgpu_kernel;
|
||||
}
|
||||
static inline bool isAMDGPUSpelling(const AttributeCommonInfo* A) {
|
||||
if(!A) return false;
|
||||
return isAMDGPUSpelling(*A);
|
||||
}
|
||||
static inline bool isNVPTXSpelling(const AttributeCommonInfo& A) {
|
||||
return A.getAttributeSpellingListIndex() == GNU_nvptx_kernel ||
|
||||
A.getAttributeSpellingListIndex() == CXX11_clang_nvptx_kernel ||
|
||||
A.getAttributeSpellingListIndex() == C23_clang_nvptx_kernel;
|
||||
}
|
||||
static inline bool isNVPTXSpelling(const AttributeCommonInfo* A) {
|
||||
if(!A) return false;
|
||||
return isNVPTXSpelling(*A);
|
||||
}
|
||||
static inline bool isSYCLSpelling(const AttributeCommonInfo& A) {
|
||||
return A.getAttributeSpellingListIndex() == GNU_sycl_kernel ||
|
||||
A.getAttributeSpellingListIndex() == CXX11_clang_sycl_kernel ||
|
||||
A.getAttributeSpellingListIndex() == C23_clang_sycl_kernel;
|
||||
}
|
||||
static inline bool isSYCLSpelling(const AttributeCommonInfo* A) {
|
||||
if(!A) return false;
|
||||
return isSYCLSpelling(*A);
|
||||
}
|
||||
static inline bool isOpenCLSpelling(const AttributeCommonInfo& A) {
|
||||
// Tablegen trips underscores from spellings to build the spelling
|
||||
// list, but here we have the same spelling with unscores and without,
|
||||
// so handle that case manually.
|
||||
return A.getAttributeSpellingListIndex() == Keyword_kernel ||
|
||||
A.getAttrName()->getName() == "kernel";
|
||||
}
|
||||
static inline bool isOpenCLSpelling(const AttributeCommonInfo* A) {
|
||||
if (!A) return false;
|
||||
return isOpenCLSpelling(*A);
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
def SYCLKernelEntryPoint : InheritableAttr {
|
||||
@ -1625,15 +1662,6 @@ def Allocating : TypeAttr {
|
||||
let Documentation = [AllocatingDocs];
|
||||
}
|
||||
|
||||
// Similar to CUDA, OpenCL attributes do not receive a [[]] spelling because
|
||||
// the specification does not expose them with one currently.
|
||||
def OpenCLKernel : InheritableAttr {
|
||||
let Spellings = [CustomKeyword<"__kernel">, CustomKeyword<"kernel">];
|
||||
let Subjects = SubjectList<[Function], ErrorDiag>;
|
||||
let Documentation = [Undocumented];
|
||||
let SimpleHandler = 1;
|
||||
}
|
||||
|
||||
def OpenCLUnrollHint : StmtAttr {
|
||||
let Spellings = [GNU<"opencl_unroll_hint">];
|
||||
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt],
|
||||
@ -2370,11 +2398,6 @@ def AMDGPUMaxNumWorkGroups : InheritableAttr {
|
||||
let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
|
||||
}
|
||||
|
||||
def AMDGPUKernelCall : DeclOrTypeAttr {
|
||||
let Spellings = [Clang<"amdgpu_kernel">];
|
||||
let Documentation = [Undocumented];
|
||||
}
|
||||
|
||||
def BPFPreserveAccessIndex : InheritableAttr,
|
||||
TargetSpecificAttr<TargetBPF> {
|
||||
let Spellings = [Clang<"preserve_access_index">];
|
||||
|
@ -396,9 +396,13 @@ any option of a multiversioned function is undefined.
|
||||
}];
|
||||
}
|
||||
|
||||
def SYCLKernelDocs : Documentation {
|
||||
def DeviceKernelDocs : Documentation {
|
||||
let Category = DocCatFunction;
|
||||
let Heading = "device_kernel, sycl_kernel, nvptx_kernel, amdgpu_kernel, "
|
||||
"kernel, __kernel";
|
||||
let Content = [{
|
||||
These attributes specify that the function represents a kernel for device offloading.
|
||||
The specific semantics depend on the offloading language, target, and attribute spelling.
|
||||
The ``sycl_kernel`` attribute specifies that a function template will be used
|
||||
to outline device code and to generate an OpenCL kernel.
|
||||
Here is a code example of the SYCL program, which demonstrates the compiler's
|
||||
|
@ -289,14 +289,13 @@ namespace clang {
|
||||
CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp")))
|
||||
CC_IntelOclBicc, // __attribute__((intel_ocl_bicc))
|
||||
CC_SpirFunction, // default for OpenCL functions on SPIR target
|
||||
CC_OpenCLKernel, // inferred for OpenCL kernels
|
||||
CC_DeviceKernel, // __attribute__((device_kernel))
|
||||
CC_Swift, // __attribute__((swiftcall))
|
||||
CC_SwiftAsync, // __attribute__((swiftasynccall))
|
||||
CC_PreserveMost, // __attribute__((preserve_most))
|
||||
CC_PreserveAll, // __attribute__((preserve_all))
|
||||
CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs))
|
||||
CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs))
|
||||
CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel))
|
||||
CC_M68kRTD, // __attribute__((m68k_rtd))
|
||||
CC_PreserveNone, // __attribute__((preserve_none))
|
||||
CC_RISCVVectorCall, // __attribute__((riscv_vector_cc))
|
||||
@ -326,7 +325,7 @@ namespace clang {
|
||||
case CC_X86Pascal:
|
||||
case CC_X86VectorCall:
|
||||
case CC_SpirFunction:
|
||||
case CC_OpenCLKernel:
|
||||
case CC_DeviceKernel:
|
||||
case CC_Swift:
|
||||
case CC_SwiftAsync:
|
||||
case CC_M68kRTD:
|
||||
|
@ -3541,7 +3541,7 @@ bool FunctionDecl::isExternC() const {
|
||||
}
|
||||
|
||||
bool FunctionDecl::isInExternCContext() const {
|
||||
if (hasAttr<OpenCLKernelAttr>())
|
||||
if (DeviceKernelAttr::isOpenCLSpelling(getAttr<DeviceKernelAttr>()))
|
||||
return true;
|
||||
return getLexicalDeclContext()->isExternCContext();
|
||||
}
|
||||
@ -5510,7 +5510,8 @@ FunctionDecl *FunctionDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) {
|
||||
}
|
||||
|
||||
bool FunctionDecl::isReferenceableKernel() const {
|
||||
return hasAttr<CUDAGlobalAttr>() || hasAttr<OpenCLKernelAttr>();
|
||||
return hasAttr<CUDAGlobalAttr>() ||
|
||||
DeviceKernelAttr::isOpenCLSpelling(getAttr<DeviceKernelAttr>());
|
||||
}
|
||||
|
||||
BlockDecl *BlockDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) {
|
||||
|
@ -1557,7 +1557,8 @@ void CXXNameMangler::mangleUnqualifiedName(
|
||||
FD && FD->hasAttr<CUDAGlobalAttr>() &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
|
||||
bool IsOCLDeviceStub =
|
||||
FD && FD->hasAttr<OpenCLKernelAttr>() &&
|
||||
FD &&
|
||||
DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()) &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
|
||||
if (IsDeviceStub)
|
||||
mangleDeviceStubName(II);
|
||||
@ -3532,10 +3533,9 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) {
|
||||
case CC_AAPCS_VFP:
|
||||
case CC_AArch64VectorCall:
|
||||
case CC_AArch64SVEPCS:
|
||||
case CC_AMDGPUKernelCall:
|
||||
case CC_IntelOclBicc:
|
||||
case CC_SpirFunction:
|
||||
case CC_OpenCLKernel:
|
||||
case CC_DeviceKernel:
|
||||
case CC_PreserveMost:
|
||||
case CC_PreserveAll:
|
||||
case CC_M68kRTD:
|
||||
|
@ -1164,7 +1164,9 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
|
||||
->hasAttr<CUDAGlobalAttr>())) &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
|
||||
bool IsOCLDeviceStub =
|
||||
ND && isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>() &&
|
||||
ND && isa<FunctionDecl>(ND) &&
|
||||
DeviceKernelAttr::isOpenCLSpelling(
|
||||
ND->getAttr<DeviceKernelAttr>()) &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
|
||||
if (IsDeviceStub)
|
||||
mangleSourceName(
|
||||
|
@ -3606,14 +3606,12 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) {
|
||||
return "aarch64_vector_pcs";
|
||||
case CC_AArch64SVEPCS:
|
||||
return "aarch64_sve_pcs";
|
||||
case CC_AMDGPUKernelCall:
|
||||
return "amdgpu_kernel";
|
||||
case CC_IntelOclBicc:
|
||||
return "intel_ocl_bicc";
|
||||
case CC_SpirFunction:
|
||||
return "spir_function";
|
||||
case CC_OpenCLKernel:
|
||||
return "opencl_kernel";
|
||||
case CC_DeviceKernel:
|
||||
return "device_kernel";
|
||||
case CC_Swift:
|
||||
return "swiftcall";
|
||||
case CC_SwiftAsync:
|
||||
@ -4328,7 +4326,7 @@ bool AttributedType::isCallingConv() const {
|
||||
case attr::VectorCall:
|
||||
case attr::AArch64VectorPcs:
|
||||
case attr::AArch64SVEPcs:
|
||||
case attr::AMDGPUKernelCall:
|
||||
case attr::DeviceKernel:
|
||||
case attr::Pascal:
|
||||
case attr::MSABI:
|
||||
case attr::SysVABI:
|
||||
|
@ -1100,8 +1100,8 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
|
||||
case CC_AArch64SVEPCS:
|
||||
OS << "__attribute__((aarch64_sve_pcs))";
|
||||
break;
|
||||
case CC_AMDGPUKernelCall:
|
||||
OS << "__attribute__((amdgpu_kernel))";
|
||||
case CC_DeviceKernel:
|
||||
OS << "__attribute__((device_kernel))";
|
||||
break;
|
||||
case CC_IntelOclBicc:
|
||||
OS << " __attribute__((intel_ocl_bicc))";
|
||||
@ -1116,7 +1116,6 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
|
||||
OS << " __attribute__((regcall))";
|
||||
break;
|
||||
case CC_SpirFunction:
|
||||
case CC_OpenCLKernel:
|
||||
// Do nothing. These CCs are not available as attributes.
|
||||
break;
|
||||
case CC_Swift:
|
||||
@ -2069,7 +2068,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
|
||||
}
|
||||
case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break;
|
||||
case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break;
|
||||
case attr::AMDGPUKernelCall: OS << "amdgpu_kernel"; break;
|
||||
case attr::DeviceKernel:
|
||||
OS << T->getAttr()->getSpelling();
|
||||
break;
|
||||
case attr::IntelOclBicc: OS << "inteloclbicc"; break;
|
||||
case attr::PreserveMost:
|
||||
OS << "preserve_most";
|
||||
|
@ -1400,7 +1400,7 @@ AArch64TargetInfo::checkCallingConvention(CallingConv CC) const {
|
||||
case CC_PreserveMost:
|
||||
case CC_PreserveAll:
|
||||
case CC_PreserveNone:
|
||||
case CC_OpenCLKernel:
|
||||
case CC_DeviceKernel:
|
||||
case CC_AArch64VectorCall:
|
||||
case CC_AArch64SVEPCS:
|
||||
case CC_Win64:
|
||||
@ -1758,7 +1758,7 @@ WindowsARM64TargetInfo::checkCallingConvention(CallingConv CC) const {
|
||||
case CC_X86FastCall:
|
||||
return CCCR_Ignore;
|
||||
case CC_C:
|
||||
case CC_OpenCLKernel:
|
||||
case CC_DeviceKernel:
|
||||
case CC_PreserveMost:
|
||||
case CC_PreserveAll:
|
||||
case CC_PreserveNone:
|
||||
|
@ -415,8 +415,7 @@ public:
|
||||
default:
|
||||
return CCCR_Warning;
|
||||
case CC_C:
|
||||
case CC_OpenCLKernel:
|
||||
case CC_AMDGPUKernelCall:
|
||||
case CC_DeviceKernel:
|
||||
return CCCR_OK;
|
||||
}
|
||||
}
|
||||
|
@ -1404,7 +1404,7 @@ ARMTargetInfo::checkCallingConvention(CallingConv CC) const {
|
||||
case CC_AAPCS_VFP:
|
||||
case CC_Swift:
|
||||
case CC_SwiftAsync:
|
||||
case CC_OpenCLKernel:
|
||||
case CC_DeviceKernel:
|
||||
return CCCR_OK;
|
||||
default:
|
||||
return CCCR_Warning;
|
||||
@ -1479,7 +1479,7 @@ WindowsARMTargetInfo::checkCallingConvention(CallingConv CC) const {
|
||||
case CC_X86VectorCall:
|
||||
return CCCR_Ignore;
|
||||
case CC_C:
|
||||
case CC_OpenCLKernel:
|
||||
case CC_DeviceKernel:
|
||||
case CC_PreserveMost:
|
||||
case CC_PreserveAll:
|
||||
case CC_Swift:
|
||||
|
@ -94,7 +94,7 @@ public:
|
||||
default:
|
||||
return CCCR_Warning;
|
||||
case CC_C:
|
||||
case CC_OpenCLKernel:
|
||||
case CC_DeviceKernel:
|
||||
return CCCR_OK;
|
||||
}
|
||||
}
|
||||
|
@ -336,7 +336,7 @@ WindowsMipsTargetInfo::checkCallingConvention(CallingConv CC) const {
|
||||
case CC_X86VectorCall:
|
||||
return CCCR_Ignore;
|
||||
case CC_C:
|
||||
case CC_OpenCLKernel:
|
||||
case CC_DeviceKernel:
|
||||
case CC_PreserveMost:
|
||||
case CC_PreserveAll:
|
||||
case CC_Swift:
|
||||
|
@ -193,7 +193,7 @@ public:
|
||||
}
|
||||
|
||||
CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
|
||||
return (CC == CC_SpirFunction || CC == CC_OpenCLKernel) ? CCCR_OK
|
||||
return (CC == CC_SpirFunction || CC == CC_DeviceKernel) ? CCCR_OK
|
||||
: CCCR_Warning;
|
||||
}
|
||||
|
||||
|
@ -245,7 +245,7 @@ public:
|
||||
switch (CC) {
|
||||
case CC_C:
|
||||
case CC_Swift:
|
||||
case CC_OpenCLKernel:
|
||||
case CC_DeviceKernel:
|
||||
return CCCR_OK;
|
||||
case CC_SwiftAsync:
|
||||
return CCCR_Error;
|
||||
|
@ -409,10 +409,11 @@ public:
|
||||
case CC_Swift:
|
||||
case CC_X86Pascal:
|
||||
case CC_IntelOclBicc:
|
||||
case CC_OpenCLKernel:
|
||||
return CCCR_OK;
|
||||
case CC_SwiftAsync:
|
||||
return CCCR_Error;
|
||||
case CC_DeviceKernel:
|
||||
return IsOpenCL ? CCCR_OK : CCCR_Warning;
|
||||
default:
|
||||
return CCCR_Warning;
|
||||
}
|
||||
@ -440,7 +441,13 @@ public:
|
||||
uint64_t getPointerAlignV(LangAS AddrSpace) const override {
|
||||
return getPointerWidthV(AddrSpace);
|
||||
}
|
||||
void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
|
||||
TargetInfo::adjust(Diags, Opts);
|
||||
IsOpenCL = Opts.OpenCL;
|
||||
}
|
||||
|
||||
private:
|
||||
bool IsOpenCL = false;
|
||||
};
|
||||
|
||||
// X86-32 generic target
|
||||
@ -786,8 +793,9 @@ public:
|
||||
case CC_PreserveAll:
|
||||
case CC_PreserveNone:
|
||||
case CC_X86RegCall:
|
||||
case CC_OpenCLKernel:
|
||||
return CCCR_OK;
|
||||
case CC_DeviceKernel:
|
||||
return IsOpenCL ? CCCR_OK : CCCR_Warning;
|
||||
default:
|
||||
return CCCR_Warning;
|
||||
}
|
||||
@ -818,7 +826,6 @@ public:
|
||||
return X86TargetInfo::validateGlobalRegisterVariable(RegName, RegSize,
|
||||
HasSizeMismatch);
|
||||
}
|
||||
|
||||
void setMaxAtomicWidth() override {
|
||||
if (hasFeature("cx16"))
|
||||
MaxAtomicInlineWidth = 128;
|
||||
@ -830,6 +837,14 @@ public:
|
||||
size_t getMaxBitIntWidth() const override {
|
||||
return llvm::IntegerType::MAX_INT_BITS;
|
||||
}
|
||||
|
||||
void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
|
||||
TargetInfo::adjust(Diags, Opts);
|
||||
IsOpenCL = Opts.OpenCL;
|
||||
}
|
||||
|
||||
private:
|
||||
bool IsOpenCL = false;
|
||||
};
|
||||
|
||||
// x86-64 UEFI target
|
||||
@ -915,7 +930,7 @@ public:
|
||||
case CC_Swift:
|
||||
case CC_SwiftAsync:
|
||||
case CC_X86RegCall:
|
||||
case CC_OpenCLKernel:
|
||||
case CC_DeviceKernel:
|
||||
return CCCR_OK;
|
||||
default:
|
||||
return CCCR_Warning;
|
||||
|
@ -81,12 +81,19 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
|
||||
return llvm::CallingConv::AArch64_VectorCall;
|
||||
case CC_AArch64SVEPCS:
|
||||
return llvm::CallingConv::AArch64_SVE_VectorCall;
|
||||
case CC_AMDGPUKernelCall:
|
||||
return llvm::CallingConv::AMDGPU_KERNEL;
|
||||
case CC_SpirFunction:
|
||||
return llvm::CallingConv::SPIR_FUNC;
|
||||
case CC_OpenCLKernel:
|
||||
case CC_DeviceKernel: {
|
||||
if (CGM.getLangOpts().OpenCL)
|
||||
return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
|
||||
if (CGM.getTriple().isSPIROrSPIRV())
|
||||
return llvm::CallingConv::SPIR_KERNEL;
|
||||
if (CGM.getTriple().isAMDGPU())
|
||||
return llvm::CallingConv::AMDGPU_KERNEL;
|
||||
if (CGM.getTriple().isNVPTX())
|
||||
return llvm::CallingConv::PTX_Kernel;
|
||||
llvm_unreachable("Unknown kernel calling convention");
|
||||
}
|
||||
case CC_PreserveMost:
|
||||
return llvm::CallingConv::PreserveMost;
|
||||
case CC_PreserveAll:
|
||||
@ -284,8 +291,8 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D,
|
||||
if (D->hasAttr<AArch64SVEPcsAttr>())
|
||||
return CC_AArch64SVEPCS;
|
||||
|
||||
if (D->hasAttr<AMDGPUKernelCallAttr>())
|
||||
return CC_AMDGPUKernelCall;
|
||||
if (D->hasAttr<DeviceKernelAttr>())
|
||||
return CC_DeviceKernel;
|
||||
|
||||
if (D->hasAttr<IntelOclBiccAttr>())
|
||||
return CC_IntelOclBicc;
|
||||
@ -533,7 +540,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const GlobalDecl GD) {
|
||||
assert(isa<FunctionType>(FTy));
|
||||
setCUDAKernelCallingConvention(FTy, CGM, FD);
|
||||
|
||||
if (FD->hasAttr<OpenCLKernelAttr>() &&
|
||||
if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()) &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
|
||||
const FunctionType *FT = FTy->getAs<FunctionType>();
|
||||
CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT);
|
||||
@ -761,7 +768,7 @@ CodeGenTypes::arrangeSYCLKernelCallerDeclaration(QualType resultType,
|
||||
|
||||
return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None,
|
||||
argTypes,
|
||||
FunctionType::ExtInfo(CC_OpenCLKernel),
|
||||
FunctionType::ExtInfo(CC_DeviceKernel),
|
||||
/*paramInfos=*/{}, RequiredArgs::All);
|
||||
}
|
||||
|
||||
@ -2536,7 +2543,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
|
||||
NumElemsParam);
|
||||
}
|
||||
|
||||
if (TargetDecl->hasAttr<OpenCLKernelAttr>() &&
|
||||
if (DeviceKernelAttr::isOpenCLSpelling(
|
||||
TargetDecl->getAttr<DeviceKernelAttr>()) &&
|
||||
CallingConv != CallingConv::CC_C &&
|
||||
CallingConv != CallingConv::CC_SpirFunction) {
|
||||
// Check CallingConv to avoid adding uniform-work-group-size attribute to
|
||||
@ -2919,7 +2927,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
|
||||
// > For arguments to a __kernel function declared to be a pointer to a
|
||||
// > data type, the OpenCL compiler can assume that the pointee is always
|
||||
// > appropriately aligned as required by the data type.
|
||||
if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>() &&
|
||||
if (TargetDecl &&
|
||||
DeviceKernelAttr::isOpenCLSpelling(
|
||||
TargetDecl->getAttr<DeviceKernelAttr>()) &&
|
||||
ParamType->isPointerType()) {
|
||||
QualType PTy = ParamType->getPointeeType();
|
||||
if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
|
||||
|
@ -1692,9 +1692,8 @@ static unsigned getDwarfCC(CallingConv CC) {
|
||||
return llvm::dwarf::DW_CC_LLVM_IntelOclBicc;
|
||||
case CC_SpirFunction:
|
||||
return llvm::dwarf::DW_CC_LLVM_SpirFunction;
|
||||
case CC_OpenCLKernel:
|
||||
case CC_AMDGPUKernelCall:
|
||||
return llvm::dwarf::DW_CC_LLVM_OpenCLKernel;
|
||||
case CC_DeviceKernel:
|
||||
return llvm::dwarf::DW_CC_LLVM_DeviceKernel;
|
||||
case CC_Swift:
|
||||
return llvm::dwarf::DW_CC_LLVM_Swift;
|
||||
case CC_SwiftAsync:
|
||||
|
@ -5944,7 +5944,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
|
||||
}
|
||||
|
||||
static GlobalDecl getGlobalDeclForDirectCall(const FunctionDecl *FD) {
|
||||
if (FD->hasAttr<OpenCLKernelAttr>())
|
||||
if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()))
|
||||
return GlobalDecl(FD, KernelReferenceKind::Stub);
|
||||
return GlobalDecl(FD);
|
||||
}
|
||||
@ -6375,7 +6375,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
|
||||
const auto *FnType = cast<FunctionType>(PointeeType);
|
||||
|
||||
if (const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
|
||||
FD && FD->hasAttr<OpenCLKernelAttr>())
|
||||
FD && DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()))
|
||||
CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FnType);
|
||||
|
||||
bool CFIUnchecked =
|
||||
|
@ -626,7 +626,7 @@ CodeGenFunction::getUBSanFunctionTypeHash(QualType Ty) const {
|
||||
|
||||
void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
|
||||
llvm::Function *Fn) {
|
||||
if (!FD->hasAttr<OpenCLKernelAttr>() && !FD->hasAttr<CUDAGlobalAttr>())
|
||||
if (!FD->hasAttr<DeviceKernelAttr>() && !FD->hasAttr<CUDAGlobalAttr>())
|
||||
return;
|
||||
|
||||
llvm::LLVMContext &Context = getLLVMContext();
|
||||
@ -1598,7 +1598,8 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
|
||||
// Implicit copy-assignment gets the same special treatment as implicit
|
||||
// copy-constructors.
|
||||
emitImplicitAssignmentOperatorBody(Args);
|
||||
} else if (FD->hasAttr<OpenCLKernelAttr>() &&
|
||||
} else if (DeviceKernelAttr::isOpenCLSpelling(
|
||||
FD->getAttr<DeviceKernelAttr>()) &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Kernel) {
|
||||
CallArgList CallArgs;
|
||||
for (unsigned i = 0; i < Args.size(); ++i) {
|
||||
|
@ -1913,7 +1913,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
|
||||
} else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
|
||||
Out << "__device_stub__" << II->getName();
|
||||
} else if (FD && FD->hasAttr<OpenCLKernelAttr>() &&
|
||||
} else if (FD &&
|
||||
DeviceKernelAttr::isOpenCLSpelling(
|
||||
FD->getAttr<DeviceKernelAttr>()) &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
|
||||
Out << "__clang_ocl_kern_imp_" << II->getName();
|
||||
} else {
|
||||
@ -3930,7 +3932,8 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
|
||||
|
||||
// Ignore declarations, they will be emitted on their first use.
|
||||
if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
|
||||
if (FD->hasAttr<OpenCLKernelAttr>() && FD->doesThisDeclarationHaveABody())
|
||||
if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()) &&
|
||||
FD->doesThisDeclarationHaveABody())
|
||||
addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub));
|
||||
|
||||
// Update deferred annotations with the latest declaration if the function
|
||||
@ -4895,7 +4898,7 @@ CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable,
|
||||
if (!Ty) {
|
||||
const auto *FD = cast<FunctionDecl>(GD.getDecl());
|
||||
Ty = getTypes().ConvertType(FD->getType());
|
||||
if (FD->hasAttr<OpenCLKernelAttr>() &&
|
||||
if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()) &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
|
||||
const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
|
||||
Ty = getTypes().GetFunctionType(FI);
|
||||
@ -6195,7 +6198,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
|
||||
(CodeGenOpts.OptimizationLevel == 0) &&
|
||||
!D->hasAttr<MinSizeAttr>();
|
||||
|
||||
if (D->hasAttr<OpenCLKernelAttr>()) {
|
||||
if (DeviceKernelAttr::isOpenCLSpelling(D->getAttr<DeviceKernelAttr>())) {
|
||||
if (GD.getKernelReferenceKind() == KernelReferenceKind::Stub &&
|
||||
!D->hasAttr<NoInlineAttr>() &&
|
||||
!Fn->hasFnAttribute(llvm::Attribute::NoInline) &&
|
||||
|
@ -191,7 +191,7 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel(
|
||||
auto *F = llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage, Name,
|
||||
&CGF.CGM.getModule());
|
||||
llvm::CallingConv::ID KernelCC =
|
||||
CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel);
|
||||
CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_DeviceKernel);
|
||||
F->setCallingConv(KernelCC);
|
||||
|
||||
llvm::AttrBuilder KernelAttrs(C);
|
||||
|
@ -337,7 +337,7 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D,
|
||||
return false;
|
||||
|
||||
return !D->hasAttr<OMPDeclareTargetDeclAttr>() &&
|
||||
(D->hasAttr<OpenCLKernelAttr>() ||
|
||||
(D->hasAttr<DeviceKernelAttr>() ||
|
||||
(isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
|
||||
(isa<VarDecl>(D) &&
|
||||
(D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
|
||||
@ -350,7 +350,7 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
|
||||
const auto *ReqdWGS =
|
||||
M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
|
||||
const bool IsOpenCLKernel =
|
||||
M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>();
|
||||
M.getLangOpts().OpenCL && FD->hasAttr<DeviceKernelAttr>();
|
||||
const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>();
|
||||
|
||||
const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
|
||||
@ -572,7 +572,7 @@ bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const {
|
||||
void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
|
||||
const FunctionType *&FT) const {
|
||||
FT = getABIInfo().getContext().adjustFunctionType(
|
||||
FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
|
||||
FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel));
|
||||
}
|
||||
|
||||
/// Return IR struct type for rtinfo struct in rocm-device-libs used for device
|
||||
|
@ -260,24 +260,15 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
|
||||
|
||||
llvm::Function *F = cast<llvm::Function>(GV);
|
||||
|
||||
// Perform special handling in OpenCL mode
|
||||
if (M.getLangOpts().OpenCL) {
|
||||
// Use OpenCL function attributes to check for kernel functions
|
||||
// Perform special handling in OpenCL/CUDA mode
|
||||
if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) {
|
||||
// Use function attributes to check for kernel functions
|
||||
// By default, all functions are device functions
|
||||
if (FD->hasAttr<OpenCLKernelAttr>()) {
|
||||
// OpenCL __kernel functions get kernel metadata
|
||||
if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) {
|
||||
// OpenCL/CUDA kernel functions get kernel metadata
|
||||
// Create !{<func-ref>, metadata !"kernel", i32 1} node
|
||||
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
|
||||
// And kernel functions are not subject to inlining
|
||||
F->addFnAttr(llvm::Attribute::NoInline);
|
||||
}
|
||||
}
|
||||
|
||||
// Perform special handling in CUDA mode.
|
||||
if (M.getLangOpts().CUDA) {
|
||||
// CUDA __global__ functions get a kernel metadata entry. Since
|
||||
// __global__ functions cannot be called from the device, we do not
|
||||
// need to set the noinline attribute.
|
||||
if (FD->hasAttr<CUDAGlobalAttr>()) {
|
||||
SmallVector<int, 10> GCI;
|
||||
for (auto IV : llvm::enumerate(FD->parameters()))
|
||||
@ -291,9 +282,9 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
|
||||
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
|
||||
M.handleCUDALaunchBoundsAttr(F, Attr);
|
||||
}
|
||||
|
||||
}
|
||||
// Attach kernel metadata directly if compiling for NVPTX.
|
||||
if (FD->hasAttr<NVPTXKernelAttr>()) {
|
||||
if (FD->hasAttr<DeviceKernelAttr>()) {
|
||||
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
|
||||
}
|
||||
}
|
||||
|
@ -228,7 +228,7 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
|
||||
// Convert HIP kernels to SPIR-V kernels.
|
||||
if (getABIInfo().getContext().getLangOpts().HIP) {
|
||||
FT = getABIInfo().getContext().adjustFunctionType(
|
||||
FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
|
||||
FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ void TCETargetCodeGenInfo::setTargetAttributes(
|
||||
llvm::Function *F = cast<llvm::Function>(GV);
|
||||
|
||||
if (M.getLangOpts().OpenCL) {
|
||||
if (FD->hasAttr<OpenCLKernelAttr>()) {
|
||||
if (FD->hasAttr<DeviceKernelAttr>()) {
|
||||
// OpenCL C Kernel functions are not subject to inlining
|
||||
F->addFnAttr(llvm::Attribute::NoInline);
|
||||
const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>();
|
||||
|
@ -8789,7 +8789,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
|
||||
FunctionDecl *FD = getCurFunctionDecl();
|
||||
// OpenCL v1.1 s6.5.2 and s6.5.3: no local or constant variables
|
||||
// in functions.
|
||||
if (FD && !FD->hasAttr<OpenCLKernelAttr>()) {
|
||||
if (FD && !FD->hasAttr<DeviceKernelAttr>()) {
|
||||
if (T.getAddressSpace() == LangAS::opencl_constant)
|
||||
Diag(NewVD->getLocation(), diag::err_opencl_function_variable)
|
||||
<< 0 /*non-kernel only*/ << "constant";
|
||||
@ -8801,7 +8801,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
|
||||
}
|
||||
// OpenCL v2.0 s6.5.2 and s6.5.3: local and constant variables must be
|
||||
// in the outermost scope of a kernel function.
|
||||
if (FD && FD->hasAttr<OpenCLKernelAttr>()) {
|
||||
if (FD && FD->hasAttr<DeviceKernelAttr>()) {
|
||||
if (!getCurScope()->isFunctionScope()) {
|
||||
if (T.getAddressSpace() == LangAS::opencl_constant)
|
||||
Diag(NewVD->getLocation(), diag::err_opencl_addrspace_scope)
|
||||
@ -10930,9 +10930,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
|
||||
|
||||
MarkUnusedFileScopedDecl(NewFD);
|
||||
|
||||
|
||||
|
||||
if (getLangOpts().OpenCL && NewFD->hasAttr<OpenCLKernelAttr>()) {
|
||||
if (getLangOpts().OpenCL && NewFD->hasAttr<DeviceKernelAttr>()) {
|
||||
// OpenCL v1.2 s6.8 static is invalid for kernel functions.
|
||||
if (SC == SC_Static) {
|
||||
Diag(D.getIdentifierLoc(), diag::err_static_kernel);
|
||||
@ -12437,7 +12435,7 @@ void Sema::CheckMain(FunctionDecl *FD, const DeclSpec &DS) {
|
||||
|
||||
if (getLangOpts().OpenCL) {
|
||||
Diag(FD->getLocation(), diag::err_opencl_no_main)
|
||||
<< FD->hasAttr<OpenCLKernelAttr>();
|
||||
<< FD->hasAttr<DeviceKernelAttr>();
|
||||
FD->setInvalidDecl();
|
||||
return;
|
||||
}
|
||||
@ -15713,7 +15711,7 @@ ShouldWarnAboutMissingPrototype(const FunctionDecl *FD,
|
||||
return false;
|
||||
|
||||
// Don't warn for OpenCL kernels.
|
||||
if (FD->hasAttr<OpenCLKernelAttr>())
|
||||
if (FD->hasAttr<DeviceKernelAttr>())
|
||||
return false;
|
||||
|
||||
// Don't warn on explicitly deleted functions.
|
||||
@ -20607,7 +20605,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD,
|
||||
|
||||
// SYCL functions can be template, so we check if they have appropriate
|
||||
// attribute prior to checking if it is a template.
|
||||
if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>())
|
||||
if (LangOpts.SYCLIsDevice && FD->hasAttr<DeviceKernelAttr>())
|
||||
return FunctionEmissionStatus::Emitted;
|
||||
|
||||
// Templates are emitted when they're instantiated.
|
||||
|
@ -5108,8 +5108,8 @@ static void handleGlobalAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
|
||||
if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice)
|
||||
S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD;
|
||||
|
||||
if (AL.getKind() == ParsedAttr::AT_NVPTXKernel)
|
||||
D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL));
|
||||
if (AL.getKind() == ParsedAttr::AT_DeviceKernel)
|
||||
D->addAttr(::new (S.Context) DeviceKernelAttr(S.Context, AL));
|
||||
else
|
||||
D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
|
||||
// In host compilation the kernel is emitted as a stub function, which is
|
||||
@ -5244,9 +5244,11 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
|
||||
case ParsedAttr::AT_AArch64SVEPcs:
|
||||
D->addAttr(::new (S.Context) AArch64SVEPcsAttr(S.Context, AL));
|
||||
return;
|
||||
case ParsedAttr::AT_AMDGPUKernelCall:
|
||||
D->addAttr(::new (S.Context) AMDGPUKernelCallAttr(S.Context, AL));
|
||||
case ParsedAttr::AT_DeviceKernel: {
|
||||
// The attribute should already be applied.
|
||||
assert(D->hasAttr<DeviceKernelAttr>() && "Expected attribute");
|
||||
return;
|
||||
}
|
||||
case ParsedAttr::AT_IntelOclBicc:
|
||||
D->addAttr(::new (S.Context) IntelOclBiccAttr(S.Context, AL));
|
||||
return;
|
||||
@ -5289,6 +5291,33 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
|
||||
}
|
||||
}
|
||||
|
||||
static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
|
||||
const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
|
||||
bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate();
|
||||
if (S.getLangOpts().SYCLIsDevice) {
|
||||
if (!IsFunctionTemplate) {
|
||||
S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type_str)
|
||||
<< AL << AL.isRegularKeywordAttribute() << "function templates";
|
||||
} else {
|
||||
S.SYCL().handleKernelAttr(D, AL);
|
||||
}
|
||||
} else if (DeviceKernelAttr::isSYCLSpelling(AL)) {
|
||||
S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL;
|
||||
} else if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) {
|
||||
handleGlobalAttr(S, D, AL);
|
||||
} else {
|
||||
// OpenCL C++ will throw a more specific error.
|
||||
if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
|
||||
S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str)
|
||||
<< AL << AL.isRegularKeywordAttribute() << "functions";
|
||||
}
|
||||
handleSimpleAttribute<DeviceKernelAttr>(S, D, AL);
|
||||
}
|
||||
// Make sure we validate the CC with the target
|
||||
// and warn/error if necessary.
|
||||
handleCallConvAttr(S, D, AL);
|
||||
}
|
||||
|
||||
static void handleSuppressAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
|
||||
if (AL.getAttributeSpellingListIndex() == SuppressAttr::CXX11_gsl_suppress) {
|
||||
// Suppression attribute with GSL spelling requires at least 1 argument.
|
||||
@ -5453,9 +5482,6 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
|
||||
case ParsedAttr::AT_AArch64SVEPcs:
|
||||
CC = CC_AArch64SVEPCS;
|
||||
break;
|
||||
case ParsedAttr::AT_AMDGPUKernelCall:
|
||||
CC = CC_AMDGPUKernelCall;
|
||||
break;
|
||||
case ParsedAttr::AT_RegCall:
|
||||
CC = CC_X86RegCall;
|
||||
break;
|
||||
@ -5525,6 +5551,11 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
|
||||
llvm::Log2_64(ABIVLen) - 5);
|
||||
break;
|
||||
}
|
||||
case ParsedAttr::AT_DeviceKernel: {
|
||||
// Validation was handled in handleDeviceKernelAttr.
|
||||
CC = CC_DeviceKernel;
|
||||
break;
|
||||
}
|
||||
default: llvm_unreachable("unexpected attribute kind");
|
||||
}
|
||||
|
||||
@ -7148,9 +7179,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
|
||||
case ParsedAttr::AT_EnumExtensibility:
|
||||
handleEnumExtensibilityAttr(S, D, AL);
|
||||
break;
|
||||
case ParsedAttr::AT_SYCLKernel:
|
||||
S.SYCL().handleKernelAttr(D, AL);
|
||||
break;
|
||||
case ParsedAttr::AT_SYCLKernelEntryPoint:
|
||||
S.SYCL().handleKernelEntryPointAttr(D, AL);
|
||||
break;
|
||||
@ -7175,7 +7203,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
|
||||
case ParsedAttr::AT_CalledOnce:
|
||||
handleCalledOnceAttr(S, D, AL);
|
||||
break;
|
||||
case ParsedAttr::AT_NVPTXKernel:
|
||||
case ParsedAttr::AT_CUDAGlobal:
|
||||
handleGlobalAttr(S, D, AL);
|
||||
break;
|
||||
@ -7439,13 +7466,15 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
|
||||
case ParsedAttr::AT_PreserveAll:
|
||||
case ParsedAttr::AT_AArch64VectorPcs:
|
||||
case ParsedAttr::AT_AArch64SVEPcs:
|
||||
case ParsedAttr::AT_AMDGPUKernelCall:
|
||||
case ParsedAttr::AT_M68kRTD:
|
||||
case ParsedAttr::AT_PreserveNone:
|
||||
case ParsedAttr::AT_RISCVVectorCC:
|
||||
case ParsedAttr::AT_RISCVVLSCC:
|
||||
handleCallConvAttr(S, D, AL);
|
||||
break;
|
||||
case ParsedAttr::AT_DeviceKernel:
|
||||
handleDeviceKernelAttr(S, D, AL);
|
||||
break;
|
||||
case ParsedAttr::AT_Suppress:
|
||||
handleSuppressAttr(S, D, AL);
|
||||
break;
|
||||
@ -7764,9 +7793,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
|
||||
|
||||
static bool isKernelDecl(Decl *D) {
|
||||
const FunctionType *FnTy = D->getFunctionType();
|
||||
return D->hasAttr<OpenCLKernelAttr>() ||
|
||||
(FnTy && FnTy->getCallConv() == CallingConv::CC_AMDGPUKernelCall) ||
|
||||
D->hasAttr<CUDAGlobalAttr>() || D->getAttr<NVPTXKernelAttr>();
|
||||
return D->hasAttr<DeviceKernelAttr>() ||
|
||||
(FnTy && FnTy->getCallConv() == CallingConv::CC_DeviceKernel) ||
|
||||
D->hasAttr<CUDAGlobalAttr>();
|
||||
}
|
||||
|
||||
void Sema::ProcessDeclAttributeList(
|
||||
@ -7793,7 +7822,7 @@ void Sema::ProcessDeclAttributeList(
|
||||
// good to have a way to specify "these attributes must appear as a group",
|
||||
// for these. Additionally, it would be good to have a way to specify "these
|
||||
// attribute must never appear as a group" for attributes like cold and hot.
|
||||
if (!(D->hasAttr<OpenCLKernelAttr>() ||
|
||||
if (!(D->hasAttr<DeviceKernelAttr>() ||
|
||||
(D->hasAttr<CUDAGlobalAttr>() &&
|
||||
Context.getTargetInfo().getTriple().isSPIRV()))) {
|
||||
// These attributes cannot be applied to a non-kernel function.
|
||||
|
@ -199,7 +199,7 @@ void SemaSYCL::handleKernelAttr(Decl *D, const ParsedAttr &AL) {
|
||||
return;
|
||||
}
|
||||
|
||||
handleSimpleAttribute<SYCLKernelAttr>(*this, D, AL);
|
||||
handleSimpleAttribute<DeviceKernelAttr>(*this, D, AL);
|
||||
}
|
||||
|
||||
void SemaSYCL::handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL) {
|
||||
|
@ -676,9 +676,9 @@ static void instantiateDependentAMDGPUMaxNumWorkGroupsAttr(
|
||||
// This doesn't take any template parameters, but we have a custom action that
|
||||
// needs to happen when the kernel itself is instantiated. We need to run the
|
||||
// ItaniumMangler to mark the names required to name this kernel.
|
||||
static void instantiateDependentSYCLKernelAttr(
|
||||
static void instantiateDependentDeviceKernelAttr(
|
||||
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
|
||||
const SYCLKernelAttr &Attr, Decl *New) {
|
||||
const DeviceKernelAttr &Attr, Decl *New) {
|
||||
New->addAttr(Attr.clone(S.getASTContext()));
|
||||
}
|
||||
|
||||
@ -920,8 +920,8 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto *A = dyn_cast<SYCLKernelAttr>(TmplAttr)) {
|
||||
instantiateDependentSYCLKernelAttr(*this, TemplateArgs, *A, New);
|
||||
if (auto *A = dyn_cast<DeviceKernelAttr>(TmplAttr)) {
|
||||
instantiateDependentDeviceKernelAttr(*this, TemplateArgs, *A, New);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -134,7 +134,7 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
|
||||
case ParsedAttr::AT_VectorCall: \
|
||||
case ParsedAttr::AT_AArch64VectorPcs: \
|
||||
case ParsedAttr::AT_AArch64SVEPcs: \
|
||||
case ParsedAttr::AT_AMDGPUKernelCall: \
|
||||
case ParsedAttr::AT_DeviceKernel: \
|
||||
case ParsedAttr::AT_MSABI: \
|
||||
case ParsedAttr::AT_SysVABI: \
|
||||
case ParsedAttr::AT_Pcs: \
|
||||
@ -3755,18 +3755,7 @@ static CallingConv getCCForDeclaratorChunk(
|
||||
CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic,
|
||||
IsCXXInstanceMethod);
|
||||
|
||||
// Attribute AT_OpenCLKernel affects the calling convention for SPIR
|
||||
// and AMDGPU targets, hence it cannot be treated as a calling
|
||||
// convention attribute. This is the simplest place to infer
|
||||
// calling convention for OpenCL kernels.
|
||||
if (S.getLangOpts().OpenCL) {
|
||||
for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
|
||||
if (AL.getKind() == ParsedAttr::AT_OpenCLKernel) {
|
||||
CC = CC_OpenCLKernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (S.getLangOpts().CUDA) {
|
||||
if (S.getLangOpts().CUDA) {
|
||||
// If we're compiling CUDA/HIP code and targeting HIPSPV we need to make
|
||||
// sure the kernels will be marked with the right calling convention so that
|
||||
// they will be visible by the APIs that ingest SPIR-V. We do not do this
|
||||
@ -3775,13 +3764,20 @@ static CallingConv getCCForDeclaratorChunk(
|
||||
if (Triple.isSPIRV() && Triple.getVendor() != llvm::Triple::AMD) {
|
||||
for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
|
||||
if (AL.getKind() == ParsedAttr::AT_CUDAGlobal) {
|
||||
CC = CC_OpenCLKernel;
|
||||
CC = CC_DeviceKernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!S.getLangOpts().isSYCL()) {
|
||||
for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
|
||||
if (AL.getKind() == ParsedAttr::AT_DeviceKernel) {
|
||||
CC = CC_DeviceKernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return CC;
|
||||
}
|
||||
|
||||
@ -7532,8 +7528,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {
|
||||
return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr);
|
||||
case ParsedAttr::AT_ArmStreaming:
|
||||
return createSimpleAttr<ArmStreamingAttr>(Ctx, Attr);
|
||||
case ParsedAttr::AT_AMDGPUKernelCall:
|
||||
return createSimpleAttr<AMDGPUKernelCallAttr>(Ctx, Attr);
|
||||
case ParsedAttr::AT_DeviceKernel:
|
||||
return createSimpleAttr<DeviceKernelAttr>(Ctx, Attr);
|
||||
case ParsedAttr::AT_Pcs: {
|
||||
// The attribute may have had a fixit applied where we treated an
|
||||
// identifier as a string literal. The contents of the string are valid,
|
||||
@ -8742,6 +8738,16 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State,
|
||||
}
|
||||
}
|
||||
|
||||
static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) {
|
||||
// The DeviceKernel attribute is shared for many targets, and
|
||||
// it is only allowed to be a type attribute with the AMDGPU
|
||||
// spelling, so skip processing the attr as a type attr
|
||||
// unless it has that spelling.
|
||||
if (Attr.getKind() != ParsedAttr::AT_DeviceKernel)
|
||||
return true;
|
||||
return DeviceKernelAttr::isAMDGPUSpelling(Attr);
|
||||
}
|
||||
|
||||
static void processTypeAttrs(TypeProcessingState &state, QualType &type,
|
||||
TypeAttrLocation TAL,
|
||||
const ParsedAttributesView &attrs,
|
||||
@ -8995,6 +9001,9 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
|
||||
break;
|
||||
[[fallthrough]];
|
||||
FUNCTION_TYPE_ATTRS_CASELIST:
|
||||
if (!isMultiSubjectAttrAllowedOnType(attr))
|
||||
break;
|
||||
|
||||
attr.setUsedAsTypeAttr();
|
||||
|
||||
// Attributes with standard syntax have strict rules for what they
|
||||
|
@ -109,7 +109,6 @@
|
||||
// CHECK-NEXT: NSConsumed (SubjectMatchRule_variable_is_parameter)
|
||||
// CHECK-NEXT: NSConsumesSelf (SubjectMatchRule_objc_method)
|
||||
// CHECK-NEXT: NSErrorDomain (SubjectMatchRule_enum)
|
||||
// CHECK-NEXT: NVPTXKernel (SubjectMatchRule_function)
|
||||
// CHECK-NEXT: Naked (SubjectMatchRule_function)
|
||||
// CHECK-NEXT: NoBuiltin (SubjectMatchRule_function)
|
||||
// CHECK-NEXT: NoCommon (SubjectMatchRule_variable)
|
||||
|
@ -732,8 +732,8 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) {
|
||||
TCALLINGCONV(RISCVVLSCall_32768);
|
||||
TCALLINGCONV(RISCVVLSCall_65536);
|
||||
case CC_SpirFunction: return CXCallingConv_Unexposed;
|
||||
case CC_AMDGPUKernelCall: return CXCallingConv_Unexposed;
|
||||
case CC_OpenCLKernel: return CXCallingConv_Unexposed;
|
||||
case CC_DeviceKernel:
|
||||
return CXCallingConv_Unexposed;
|
||||
break;
|
||||
}
|
||||
#undef TCALLINGCONV
|
||||
|
@ -1117,7 +1117,7 @@ HANDLE_DW_CC(0xc3, LLVM_AAPCS)
|
||||
HANDLE_DW_CC(0xc4, LLVM_AAPCS_VFP)
|
||||
HANDLE_DW_CC(0xc5, LLVM_IntelOclBicc)
|
||||
HANDLE_DW_CC(0xc6, LLVM_SpirFunction)
|
||||
HANDLE_DW_CC(0xc7, LLVM_OpenCLKernel)
|
||||
HANDLE_DW_CC(0xc7, LLVM_DeviceKernel)
|
||||
HANDLE_DW_CC(0xc8, LLVM_Swift)
|
||||
HANDLE_DW_CC(0xc9, LLVM_PreserveMost)
|
||||
HANDLE_DW_CC(0xca, LLVM_PreserveAll)
|
||||
|
@ -734,13 +734,15 @@ void DWARFTypePrinter<DieType>::appendSubroutineNameAfter(
|
||||
OS << " __attribute__((intel_ocl_bicc))";
|
||||
break;
|
||||
case dwarf::CallingConvention::DW_CC_LLVM_SpirFunction:
|
||||
case dwarf::CallingConvention::DW_CC_LLVM_OpenCLKernel:
|
||||
// These aren't available as attributes, but maybe we should still
|
||||
// render them somehow? (Clang doesn't render them, but that's an issue
|
||||
// This isn't available as an attribute, but maybe we should still
|
||||
// render it somehow? (Clang doesn't render it, but that's an issue
|
||||
// for template names too - since then the DWARF names of templates
|
||||
// instantiated with function types with these calling conventions won't
|
||||
// have distinct names - so we'd need to fix that too)
|
||||
break;
|
||||
case dwarf::CallingConvention::DW_CC_LLVM_DeviceKernel:
|
||||
OS << " __attribute__((device_kernel))";
|
||||
break;
|
||||
case dwarf::CallingConvention::DW_CC_LLVM_Swift:
|
||||
// SwiftAsync missing
|
||||
OS << " __attribute__((swiftcall))";
|
||||
|
@ -28,6 +28,6 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo
|
||||
!2 = !{i32 7, !"Dwarf Version", i32 5}
|
||||
!3 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!4 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
|
||||
!5 = !DISubroutineType(cc: DW_CC_LLVM_OpenCLKernel, types: !6)
|
||||
!5 = !DISubroutineType(cc: DW_CC_LLVM_DeviceKernel, types: !6)
|
||||
!6 = !{null}
|
||||
!7 = !{i32 1024, i32 1, i32 1}
|
||||
|
Loading…
x
Reference in New Issue
Block a user