[Clang][OpenCL][AMDGPU] Allow a kernel to call another kernel (#115821)
This feature is currently not supported in the compiler. To facilitate this we emit a stub version of each kernel function body with different name mangling scheme, and replaces the respective kernel call-sites appropriately. Fixes https://github.com/llvm/llvm-project/issues/60313 D120566 was an earlier attempt made to upstream a solution for this issue. --------- Co-authored-by: anikelal <anikelal@amd.com>
This commit is contained in:
parent
65cede26a6
commit
642481a428
@ -3048,6 +3048,8 @@ public:
|
||||
static FunctionDecl *castFromDeclContext(const DeclContext *DC) {
|
||||
return static_cast<FunctionDecl *>(const_cast<DeclContext*>(DC));
|
||||
}
|
||||
|
||||
bool isReferenceableKernel() const;
|
||||
};
|
||||
|
||||
/// Represents a member of a struct/union/class.
|
||||
|
@ -70,15 +70,15 @@ public:
|
||||
GlobalDecl(const VarDecl *D) { Init(D);}
|
||||
GlobalDecl(const FunctionDecl *D, unsigned MVIndex = 0)
|
||||
: MultiVersionIndex(MVIndex) {
|
||||
if (!D->hasAttr<CUDAGlobalAttr>()) {
|
||||
Init(D);
|
||||
if (D->isReferenceableKernel()) {
|
||||
Value.setPointerAndInt(D, unsigned(getDefaultKernelReference(D)));
|
||||
return;
|
||||
}
|
||||
Value.setPointerAndInt(D, unsigned(getDefaultKernelReference(D)));
|
||||
Init(D);
|
||||
}
|
||||
GlobalDecl(const FunctionDecl *D, KernelReferenceKind Kind)
|
||||
: Value(D, unsigned(Kind)) {
|
||||
assert(D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!");
|
||||
assert(D->isReferenceableKernel() && "Decl is not a GPU kernel!");
|
||||
}
|
||||
GlobalDecl(const NamedDecl *D) { Init(D); }
|
||||
GlobalDecl(const BlockDecl *D) { Init(D); }
|
||||
@ -131,12 +131,13 @@ public:
|
||||
|
||||
KernelReferenceKind getKernelReferenceKind() const {
|
||||
assert(((isa<FunctionDecl>(getDecl()) &&
|
||||
cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
|
||||
cast<FunctionDecl>(getDecl())->isReferenceableKernel()) ||
|
||||
(isa<FunctionTemplateDecl>(getDecl()) &&
|
||||
cast<FunctionTemplateDecl>(getDecl())
|
||||
->getTemplatedDecl()
|
||||
->hasAttr<CUDAGlobalAttr>())) &&
|
||||
"Decl is not a GPU kernel!");
|
||||
|
||||
return static_cast<KernelReferenceKind>(Value.getInt());
|
||||
}
|
||||
|
||||
@ -160,8 +161,9 @@ public:
|
||||
}
|
||||
|
||||
static KernelReferenceKind getDefaultKernelReference(const FunctionDecl *D) {
|
||||
return D->getLangOpts().CUDAIsDevice ? KernelReferenceKind::Kernel
|
||||
: KernelReferenceKind::Stub;
|
||||
return (D->hasAttr<OpenCLKernelAttr>() || D->getLangOpts().CUDAIsDevice)
|
||||
? KernelReferenceKind::Kernel
|
||||
: KernelReferenceKind::Stub;
|
||||
}
|
||||
|
||||
GlobalDecl getWithDecl(const Decl *D) {
|
||||
@ -197,7 +199,7 @@ public:
|
||||
|
||||
GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind) {
|
||||
assert(isa<FunctionDecl>(getDecl()) &&
|
||||
cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
|
||||
cast<FunctionDecl>(getDecl())->isReferenceableKernel() &&
|
||||
"Decl is not a GPU kernel!");
|
||||
GlobalDecl Result(*this);
|
||||
Result.Value.setInt(unsigned(Kind));
|
||||
|
@ -5468,6 +5468,10 @@ FunctionDecl *FunctionDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) {
|
||||
/*TrailingRequiresClause=*/{});
|
||||
}
|
||||
|
||||
bool FunctionDecl::isReferenceableKernel() const {
|
||||
return hasAttr<CUDAGlobalAttr>() || hasAttr<OpenCLKernelAttr>();
|
||||
}
|
||||
|
||||
BlockDecl *BlockDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) {
|
||||
return new (C, DC) BlockDecl(DC, L);
|
||||
}
|
||||
|
@ -695,9 +695,9 @@ std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK,
|
||||
GD = GlobalDecl(CD, Ctor_Base);
|
||||
else if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(ND))
|
||||
GD = GlobalDecl(DD, Dtor_Base);
|
||||
else if (ND->hasAttr<CUDAGlobalAttr>())
|
||||
GD = GlobalDecl(cast<FunctionDecl>(ND));
|
||||
else
|
||||
else if (auto FD = dyn_cast<FunctionDecl>(ND)) {
|
||||
GD = FD->isReferenceableKernel() ? GlobalDecl(FD) : GlobalDecl(ND);
|
||||
} else
|
||||
GD = GlobalDecl(ND);
|
||||
MC->mangleName(GD, Out);
|
||||
|
||||
|
@ -526,6 +526,7 @@ private:
|
||||
void mangleSourceName(const IdentifierInfo *II);
|
||||
void mangleRegCallName(const IdentifierInfo *II);
|
||||
void mangleDeviceStubName(const IdentifierInfo *II);
|
||||
void mangleOCLDeviceStubName(const IdentifierInfo *II);
|
||||
void mangleSourceNameWithAbiTags(
|
||||
const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr);
|
||||
void mangleLocalName(GlobalDecl GD,
|
||||
@ -1561,8 +1562,13 @@ void CXXNameMangler::mangleUnqualifiedName(
|
||||
bool IsDeviceStub =
|
||||
FD && FD->hasAttr<CUDAGlobalAttr>() &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
|
||||
bool IsOCLDeviceStub =
|
||||
FD && FD->hasAttr<OpenCLKernelAttr>() &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
|
||||
if (IsDeviceStub)
|
||||
mangleDeviceStubName(II);
|
||||
else if (IsOCLDeviceStub)
|
||||
mangleOCLDeviceStubName(II);
|
||||
else if (IsRegCall)
|
||||
mangleRegCallName(II);
|
||||
else
|
||||
@ -1780,6 +1786,15 @@ void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) {
|
||||
<< II->getName();
|
||||
}
|
||||
|
||||
void CXXNameMangler::mangleOCLDeviceStubName(const IdentifierInfo *II) {
|
||||
// <source-name> ::= <positive length number> __clang_ocl_kern_imp_
|
||||
// <identifier> <number> ::= [n] <non-negative decimal integer> <identifier>
|
||||
// ::= <unqualified source code identifier>
|
||||
StringRef OCLDeviceStubNamePrefix = "__clang_ocl_kern_imp_";
|
||||
Out << II->getLength() + OCLDeviceStubNamePrefix.size()
|
||||
<< OCLDeviceStubNamePrefix << II->getName();
|
||||
}
|
||||
|
||||
void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) {
|
||||
// <source-name> ::= <positive length number> <identifier>
|
||||
// <number> ::= [n] <non-negative decimal integer>
|
||||
|
@ -540,9 +540,9 @@ private:
|
||||
GD = GlobalDecl(CtorD, Ctor_Complete);
|
||||
else if (const auto *DtorD = dyn_cast<CXXDestructorDecl>(D))
|
||||
GD = GlobalDecl(DtorD, Dtor_Complete);
|
||||
else if (D->hasAttr<CUDAGlobalAttr>())
|
||||
GD = GlobalDecl(cast<FunctionDecl>(D));
|
||||
else
|
||||
else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
|
||||
GD = FD->isReferenceableKernel() ? GlobalDecl(FD) : GlobalDecl(D);
|
||||
} else
|
||||
GD = GlobalDecl(D);
|
||||
MC->mangleName(GD, OS);
|
||||
return false;
|
||||
|
@ -1162,9 +1162,15 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
|
||||
->getTemplatedDecl()
|
||||
->hasAttr<CUDAGlobalAttr>())) &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
|
||||
bool IsOCLDeviceStub =
|
||||
ND && isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>() &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
|
||||
if (IsDeviceStub)
|
||||
mangleSourceName(
|
||||
(llvm::Twine("__device_stub__") + II->getName()).str());
|
||||
else if (IsOCLDeviceStub)
|
||||
mangleSourceName(
|
||||
(llvm::Twine("__clang_ocl_kern_imp_") + II->getName()).str());
|
||||
else
|
||||
mangleSourceName(II->getName());
|
||||
break;
|
||||
|
@ -499,7 +499,8 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
|
||||
/// Arrange the argument and result information for the declaration or
|
||||
/// definition of the given function.
|
||||
const CGFunctionInfo &
|
||||
CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
|
||||
CodeGenTypes::arrangeFunctionDeclaration(const GlobalDecl GD) {
|
||||
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
|
||||
if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD))
|
||||
if (MD->isImplicitObjectMemberFunction())
|
||||
return arrangeCXXMethodDeclaration(MD);
|
||||
@ -509,6 +510,13 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
|
||||
assert(isa<FunctionType>(FTy));
|
||||
setCUDAKernelCallingConvention(FTy, CGM, FD);
|
||||
|
||||
if (FD->hasAttr<OpenCLKernelAttr>() &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
|
||||
const FunctionType *FT = FTy->getAs<FunctionType>();
|
||||
CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT);
|
||||
FTy = FT->getCanonicalTypeUnqualified();
|
||||
}
|
||||
|
||||
// When declaring a function without a prototype, always use a
|
||||
// non-variadic type.
|
||||
if (CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>()) {
|
||||
@ -581,13 +589,11 @@ CodeGenTypes::arrangeUnprototypedObjCMessageSend(QualType returnType,
|
||||
const CGFunctionInfo &
|
||||
CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) {
|
||||
// FIXME: Do we need to handle ObjCMethodDecl?
|
||||
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
|
||||
|
||||
if (isa<CXXConstructorDecl>(GD.getDecl()) ||
|
||||
isa<CXXDestructorDecl>(GD.getDecl()))
|
||||
return arrangeCXXStructorDeclaration(GD);
|
||||
|
||||
return arrangeFunctionDeclaration(FD);
|
||||
return arrangeFunctionDeclaration(GD);
|
||||
}
|
||||
|
||||
/// Arrange a thunk that takes 'this' as the first parameter followed by
|
||||
@ -2391,7 +2397,6 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
|
||||
// Collect function IR attributes from the callee prototype if we have one.
|
||||
AddAttributesFromFunctionProtoType(getContext(), FuncAttrs,
|
||||
CalleeInfo.getCalleeFunctionProtoType());
|
||||
|
||||
const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
|
||||
|
||||
// Attach assumption attributes to the declaration. If this is a call
|
||||
@ -2498,7 +2503,11 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
|
||||
NumElemsParam);
|
||||
}
|
||||
|
||||
if (TargetDecl->hasAttr<OpenCLKernelAttr>()) {
|
||||
if (TargetDecl->hasAttr<OpenCLKernelAttr>() &&
|
||||
CallingConv != CallingConv::CC_C &&
|
||||
CallingConv != CallingConv::CC_SpirFunction) {
|
||||
// Check CallingConv to avoid adding uniform-work-group-size attribute to
|
||||
// OpenCL Kernel Stub
|
||||
if (getLangOpts().OpenCLVersion <= 120) {
|
||||
// OpenCL v1.2 Work groups are always uniform
|
||||
FuncAttrs.addAttribute("uniform-work-group-size", "true");
|
||||
|
@ -5752,6 +5752,12 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
|
||||
return CGCallee::forDirect(CalleePtr, GD);
|
||||
}
|
||||
|
||||
static GlobalDecl getGlobalDeclForDirectCall(const FunctionDecl *FD) {
|
||||
if (FD->hasAttr<OpenCLKernelAttr>())
|
||||
return GlobalDecl(FD, KernelReferenceKind::Stub);
|
||||
return GlobalDecl(FD);
|
||||
}
|
||||
|
||||
CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
|
||||
E = E->IgnoreParens();
|
||||
|
||||
@ -5765,7 +5771,7 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
|
||||
// Resolve direct calls.
|
||||
} else if (auto DRE = dyn_cast<DeclRefExpr>(E)) {
|
||||
if (auto FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
|
||||
return EmitDirectCallee(*this, FD);
|
||||
return EmitDirectCallee(*this, getGlobalDeclForDirectCall(FD));
|
||||
}
|
||||
} else if (auto ME = dyn_cast<MemberExpr>(E)) {
|
||||
if (auto FD = dyn_cast<FunctionDecl>(ME->getMemberDecl())) {
|
||||
@ -6134,6 +6140,10 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
|
||||
|
||||
const auto *FnType = cast<FunctionType>(PointeeType);
|
||||
|
||||
if (const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
|
||||
FD && FD->hasAttr<OpenCLKernelAttr>())
|
||||
CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FnType);
|
||||
|
||||
// If we are checking indirect calls and this call is indirect, check that the
|
||||
// function pointer is a member of the bit set for the function type.
|
||||
if (SanOpts.has(SanitizerKind::CFIICall) &&
|
||||
|
@ -1595,6 +1595,26 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
|
||||
// Implicit copy-assignment gets the same special treatment as implicit
|
||||
// copy-constructors.
|
||||
emitImplicitAssignmentOperatorBody(Args);
|
||||
} else if (FD->hasAttr<OpenCLKernelAttr>() &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Kernel) {
|
||||
CallArgList CallArgs;
|
||||
for (unsigned i = 0; i < Args.size(); ++i) {
|
||||
Address ArgAddr = GetAddrOfLocalVar(Args[i]);
|
||||
QualType ArgQualType = Args[i]->getType();
|
||||
RValue ArgRValue = convertTempToRValue(ArgAddr, ArgQualType, Loc);
|
||||
CallArgs.add(ArgRValue, ArgQualType);
|
||||
}
|
||||
GlobalDecl GDStub = GlobalDecl(FD, KernelReferenceKind::Stub);
|
||||
const FunctionType *FT = cast<FunctionType>(FD->getType());
|
||||
CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT);
|
||||
const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall(
|
||||
CallArgs, FT, /*ChainCall=*/false);
|
||||
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FnInfo);
|
||||
llvm::Constant *GDStubFunctionPointer =
|
||||
CGM.getRawFunctionPointer(GDStub, FTy);
|
||||
CGCallee GDStubCallee = CGCallee::forDirect(GDStubFunctionPointer, GDStub);
|
||||
EmitCall(FnInfo, GDStubCallee, ReturnValueSlot(), CallArgs, nullptr, false,
|
||||
Loc);
|
||||
} else if (Body) {
|
||||
EmitFunctionBody(Body);
|
||||
} else
|
||||
|
@ -1903,6 +1903,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
|
||||
} else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
|
||||
Out << "__device_stub__" << II->getName();
|
||||
} else if (FD && FD->hasAttr<OpenCLKernelAttr>() &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
|
||||
Out << "__clang_ocl_kern_imp_" << II->getName();
|
||||
} else {
|
||||
Out << II->getName();
|
||||
}
|
||||
@ -3890,6 +3893,9 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
|
||||
|
||||
// Ignore declarations, they will be emitted on their first use.
|
||||
if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
|
||||
if (FD->hasAttr<OpenCLKernelAttr>() && FD->doesThisDeclarationHaveABody())
|
||||
addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub));
|
||||
|
||||
// Update deferred annotations with the latest declaration if the function
|
||||
// function was already used or defined.
|
||||
if (FD->hasAttr<AnnotateAttr>()) {
|
||||
@ -4857,6 +4863,11 @@ CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable,
|
||||
if (!Ty) {
|
||||
const auto *FD = cast<FunctionDecl>(GD.getDecl());
|
||||
Ty = getTypes().ConvertType(FD->getType());
|
||||
if (FD->hasAttr<OpenCLKernelAttr>() &&
|
||||
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
|
||||
const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
|
||||
Ty = getTypes().GetFunctionType(FI);
|
||||
}
|
||||
}
|
||||
|
||||
// Devirtualized destructor calls may come through here instead of via
|
||||
|
@ -207,7 +207,7 @@ public:
|
||||
|
||||
/// Free functions are functions that are compatible with an ordinary
|
||||
/// C function pointer type.
|
||||
const CGFunctionInfo &arrangeFunctionDeclaration(const FunctionDecl *FD);
|
||||
const CGFunctionInfo &arrangeFunctionDeclaration(const GlobalDecl GD);
|
||||
const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args,
|
||||
const FunctionType *Ty,
|
||||
bool ChainCall);
|
||||
|
@ -117,6 +117,12 @@ unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
|
||||
return llvm::CallingConv::SPIR_KERNEL;
|
||||
}
|
||||
|
||||
void TargetCodeGenInfo::setOCLKernelStubCallingConvention(
|
||||
const FunctionType *&FT) const {
|
||||
FT = getABIInfo().getContext().adjustFunctionType(
|
||||
FT, FT->getExtInfo().withCallingConv(CC_C));
|
||||
}
|
||||
|
||||
llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
|
||||
llvm::PointerType *T, QualType QT) const {
|
||||
return llvm::ConstantPointerNull::get(T);
|
||||
|
@ -400,7 +400,7 @@ public:
|
||||
virtual bool shouldEmitDWARFBitFieldSeparators() const { return false; }
|
||||
|
||||
virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}
|
||||
|
||||
virtual void setOCLKernelStubCallingConvention(const FunctionType *&FT) const;
|
||||
/// Return the device-side type for the CUDA device builtin surface type.
|
||||
virtual llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const {
|
||||
// By default, no change from the original one.
|
||||
|
@ -58,6 +58,8 @@ public:
|
||||
llvm::Type *getSPIRVImageTypeFromHLSLResource(
|
||||
const HLSLAttributedResourceType::Attributes &attributes,
|
||||
llvm::Type *ElementType, llvm::LLVMContext &Ctx) const;
|
||||
void
|
||||
setOCLKernelStubCallingConvention(const FunctionType *&FT) const override;
|
||||
};
|
||||
class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
|
||||
public:
|
||||
@ -230,6 +232,12 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
|
||||
}
|
||||
}
|
||||
|
||||
void CommonSPIRTargetCodeGenInfo::setOCLKernelStubCallingConvention(
|
||||
const FunctionType *&FT) const {
|
||||
FT = getABIInfo().getContext().adjustFunctionType(
|
||||
FT, FT->getExtInfo().withCallingConv(CC_SpirFunction));
|
||||
}
|
||||
|
||||
LangAS
|
||||
SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
|
||||
const VarDecl *D) const {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 --include-generated-funcs
|
||||
// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefix=AMDGCN %s
|
||||
|
||||
typedef int int2 __attribute__((ext_vector_type(2)));
|
||||
@ -42,6 +42,78 @@ struct LargeStructOneMember g_s;
|
||||
#endif
|
||||
|
||||
|
||||
Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
|
||||
Mat4X4 out;
|
||||
return out;
|
||||
}
|
||||
|
||||
// Expect two mem copies: one for the argument "in", and one for
|
||||
// the return value.
|
||||
|
||||
kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
|
||||
out[0] = foo(in[1]);
|
||||
}
|
||||
|
||||
Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
|
||||
Mat64X64 out;
|
||||
return out;
|
||||
}
|
||||
|
||||
kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
|
||||
out[0] = foo_large(in[1]);
|
||||
}
|
||||
|
||||
void FuncOneMember(struct StructOneMember u) {
|
||||
u.x = (int2)(0, 0);
|
||||
}
|
||||
|
||||
void FuncOneLargeMember(struct LargeStructOneMember u) {
|
||||
u.x[0] = (int2)(0, 0);
|
||||
}
|
||||
|
||||
#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
|
||||
void test_indirect_arg_globl(void) {
|
||||
FuncOneLargeMember(g_s);
|
||||
}
|
||||
#endif
|
||||
|
||||
kernel void test_indirect_arg_local(void) {
|
||||
local struct LargeStructOneMember l_s;
|
||||
FuncOneLargeMember(l_s);
|
||||
}
|
||||
|
||||
void test_indirect_arg_private(void) {
|
||||
struct LargeStructOneMember p_s;
|
||||
FuncOneLargeMember(p_s);
|
||||
}
|
||||
|
||||
kernel void KernelOneMember(struct StructOneMember u) {
|
||||
FuncOneMember(u);
|
||||
}
|
||||
|
||||
kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
|
||||
FuncOneMember(*u);
|
||||
}
|
||||
|
||||
kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
|
||||
FuncOneLargeMember(u);
|
||||
}
|
||||
|
||||
void FuncTwoMember(struct StructTwoMember u) {
|
||||
u.y = (int2)(0, 0);
|
||||
}
|
||||
|
||||
void FuncLargeTwoMember(struct LargeStructTwoMember u) {
|
||||
u.y[0] = (int2)(0, 0);
|
||||
}
|
||||
|
||||
kernel void KernelTwoMember(struct StructTwoMember u) {
|
||||
FuncTwoMember(u);
|
||||
}
|
||||
|
||||
kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
|
||||
FuncLargeTwoMember(u);
|
||||
}
|
||||
// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo(
|
||||
// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
@ -54,19 +126,27 @@ struct LargeStructOneMember g_s;
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
|
||||
// AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
|
||||
//
|
||||
Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
|
||||
Mat4X4 out;
|
||||
return out;
|
||||
}
|
||||
|
||||
// Expect two mem copies: one for the argument "in", and one for
|
||||
// the return value.
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
|
||||
// AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ker(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR4:[0-9]+]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_ker(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
|
||||
// AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
|
||||
// AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
|
||||
@ -78,17 +158,14 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
|
||||
// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
|
||||
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
|
||||
// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
|
||||
// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
|
||||
// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
|
||||
out[0] = foo(in[1]);
|
||||
}
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @foo_large(
|
||||
// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
@ -97,16 +174,27 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
|
||||
Mat64X64 out;
|
||||
return out;
|
||||
}
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
|
||||
// AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ker_large(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_ker_large(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
|
||||
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
|
||||
// AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
|
||||
@ -118,14 +206,11 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
|
||||
// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
|
||||
// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
|
||||
// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false)
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
|
||||
out[0] = foo_large(in[1]);
|
||||
}
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @FuncOneMember(
|
||||
// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
@ -141,10 +226,7 @@ kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
|
||||
// AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
void FuncOneMember(struct StructOneMember u) {
|
||||
u.x = (int2)(0, 0);
|
||||
}
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember(
|
||||
// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
@ -160,37 +242,32 @@ void FuncOneMember(struct StructOneMember u) {
|
||||
// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
void FuncOneLargeMember(struct LargeStructOneMember u) {
|
||||
u.x[0] = (int2)(0, 0);
|
||||
}
|
||||
|
||||
#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @test_indirect_arg_globl(
|
||||
// AMDGCN-SAME: ) #[[ATTR0]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false)
|
||||
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
|
||||
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
void test_indirect_arg_globl(void) {
|
||||
FuncOneLargeMember(g_s);
|
||||
}
|
||||
#endif
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
|
||||
// AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
|
||||
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_test_indirect_arg_local() #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
kernel void test_indirect_arg_local(void) {
|
||||
local struct LargeStructOneMember l_s;
|
||||
FuncOneLargeMember(l_s);
|
||||
}
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_test_indirect_arg_local(
|
||||
// AMDGCN-SAME: ) #[[ATTR2]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
|
||||
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @test_indirect_arg_private(
|
||||
// AMDGCN-SAME: ) #[[ATTR0]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
@ -198,14 +275,10 @@ kernel void test_indirect_arg_local(void) {
|
||||
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
|
||||
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
|
||||
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
void test_indirect_arg_private(void) {
|
||||
struct LargeStructOneMember p_s;
|
||||
FuncOneLargeMember(p_s);
|
||||
}
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
|
||||
// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
@ -215,13 +288,23 @@ void test_indirect_arg_private(void) {
|
||||
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8
|
||||
// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP0]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
kernel void KernelOneMember(struct StructOneMember u) {
|
||||
FuncOneMember(u);
|
||||
}
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMember(
|
||||
// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8
|
||||
// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
@ -229,32 +312,48 @@ kernel void KernelOneMember(struct StructOneMember u) {
|
||||
// AMDGCN-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
|
||||
// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMemberSpir(ptr addrspace(1) noundef align 8 [[TMP0]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
|
||||
FuncOneMember(*u);
|
||||
}
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMemberSpir(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META14]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
|
||||
// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
|
||||
// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
|
||||
// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false)
|
||||
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
|
||||
// AMDGCN-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5)
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
|
||||
FuncOneLargeMember(u);
|
||||
}
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeOneMember(
|
||||
// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U]], i64 800, i1 false)
|
||||
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @FuncTwoMember(
|
||||
// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
@ -272,10 +371,7 @@ kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
|
||||
// AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr [[Y]], align 8
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
void FuncTwoMember(struct StructTwoMember u) {
|
||||
u.y = (int2)(0, 0);
|
||||
}
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember(
|
||||
// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
@ -291,10 +387,7 @@ void FuncTwoMember(struct StructTwoMember u) {
|
||||
// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
void FuncLargeTwoMember(struct LargeStructTwoMember u) {
|
||||
u.y[0] = (int2)(0, 0);
|
||||
}
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
|
||||
// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
@ -310,18 +403,31 @@ void FuncLargeTwoMember(struct LargeStructTwoMember u) {
|
||||
// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8
|
||||
// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
|
||||
// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
kernel void KernelTwoMember(struct StructTwoMember u) {
|
||||
FuncTwoMember(u);
|
||||
}
|
||||
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelTwoMember(
|
||||
// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8
|
||||
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[TMP2]], align 8
|
||||
// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8
|
||||
// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP3]], <2 x i32> [[TMP5]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
|
||||
// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
|
||||
@ -329,13 +435,22 @@ kernel void KernelTwoMember(struct StructTwoMember u) {
|
||||
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
|
||||
// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false)
|
||||
// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
|
||||
// AMDGCN-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5)
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeTwoMember(
|
||||
// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
|
||||
// AMDGCN-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U]], i64 480, i1 false)
|
||||
// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
|
||||
FuncLargeTwoMember(u);
|
||||
}
|
||||
//.
|
||||
// AMDGCN: [[META4]] = !{i32 1, i32 1}
|
||||
// AMDGCN: [[META5]] = !{!"none", !"none"}
|
||||
|
@ -423,7 +423,7 @@ struct_char_arr32 func_ret_struct_char_arr32()
|
||||
return s;
|
||||
}
|
||||
|
||||
// CHECK: define{{.*}} i32 @func_transparent_union_ret() local_unnamed_addr #1 {
|
||||
// CHECK: define{{.*}} i32 @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] {
|
||||
// CHECK: ret i32 0
|
||||
transparent_u func_transparent_union_ret()
|
||||
{
|
||||
|
@ -1,4 +1,4 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs --prefix-filecheck-ir-name VAR
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --prefix-filecheck-ir-name VAR --version 5
|
||||
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa %s -fdenormal-fp-math-f32=preserve-sign | FileCheck %s --check-prefixes=CHECK,NOCPU
|
||||
|
||||
// // Check no-optnone and target-cpu behavior
|
||||
@ -70,9 +70,9 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// CHECK: @llvm.used = appending addrspace(1) global [10 x ptr] [ptr @__test_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr @__test_block_invoke_2_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr @__test_block_invoke_3_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr @__test_block_invoke_4_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr)], section "llvm.metadata"
|
||||
//.
|
||||
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// NOCPU-LABEL: define {{[^@]+}}@callee
|
||||
// NOCPU-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// NOCPU-NEXT: entry:
|
||||
// NOCPU-LABEL: define dso_local void @callee(
|
||||
// NOCPU-SAME: i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr
|
||||
@ -88,9 +88,33 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// NOCPU-LABEL: define {{[^@]+}}@test
|
||||
// NOCPU-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
|
||||
// NOCPU-NEXT: entry:
|
||||
// NOCPU-LABEL: define dso_local amdgpu_kernel void @test(
|
||||
// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
|
||||
// NOCPU-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
||||
// NOCPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
|
||||
// NOCPU-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
|
||||
// NOCPU-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
|
||||
// NOCPU-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1
|
||||
// NOCPU-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1
|
||||
// NOCPU-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test(ptr addrspace(1) noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR10:[0-9]+]]
|
||||
// NOCPU-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// NOCPU-LABEL: define dso_local void @__clang_ocl_kern_imp_test(
|
||||
// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
|
||||
// NOCPU-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
@ -213,10 +237,46 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// NOCPU-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// NOCPU-LABEL: define dso_local amdgpu_kernel void @test_target_features_kernel(
|
||||
// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
|
||||
// NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test_target_features_kernel(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR10]]
|
||||
// NOCPU-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// NOCPU-LABEL: define dso_local void @__clang_ocl_kern_imp_test_target_features_kernel(
|
||||
// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
|
||||
// NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
||||
// NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
|
||||
// NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
|
||||
// NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
|
||||
// NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
|
||||
// NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
|
||||
// NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4
|
||||
// NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
|
||||
// NOCPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
|
||||
// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
|
||||
// NOCPU-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
|
||||
// NOCPU-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline nounwind optnone
|
||||
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke
|
||||
// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4:[0-9]+]] {
|
||||
// NOCPU-NEXT: entry:
|
||||
// NOCPU-LABEL: define internal void @__test_block_invoke(
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7:[0-9]+]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
||||
@ -233,9 +293,9 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent nounwind
|
||||
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
|
||||
// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !associated [[META7:![0-9]+]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META9:![0-9]+]] !kernel_arg_type [[META10:![0-9]+]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11:![0-9]+]] {
|
||||
// NOCPU-NEXT: entry:
|
||||
// NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(
|
||||
// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR8:[0-9]+]] !associated [[META11:![0-9]+]] !kernel_arg_addr_space [[META12:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
||||
// NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
||||
@ -244,9 +304,9 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline nounwind optnone
|
||||
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2
|
||||
// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
|
||||
// NOCPU-NEXT: entry:
|
||||
// NOCPU-LABEL: define internal void @__test_block_invoke_2(
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
||||
@ -269,9 +329,9 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent nounwind
|
||||
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
|
||||
// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR5]] !associated [[META12:![0-9]+]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META10]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11]] {
|
||||
// NOCPU-NEXT: entry:
|
||||
// NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(
|
||||
// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR8]] !associated [[META14:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
||||
// NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
||||
@ -280,9 +340,9 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline nounwind optnone
|
||||
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3
|
||||
// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR4]] {
|
||||
// NOCPU-NEXT: entry:
|
||||
// NOCPU-LABEL: define internal void @__test_block_invoke_3(
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR7]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
|
||||
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
@ -311,9 +371,9 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent nounwind
|
||||
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
|
||||
// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR5]] !associated [[META13:![0-9]+]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META15:![0-9]+]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META17:![0-9]+]] {
|
||||
// NOCPU-NEXT: entry:
|
||||
// NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(
|
||||
// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR8]] !associated [[META15:![0-9]+]] !kernel_arg_addr_space [[META16:![0-9]+]] !kernel_arg_access_qual [[META17:![0-9]+]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META19:![0-9]+]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8
|
||||
// NOCPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
|
||||
@ -322,9 +382,9 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline nounwind optnone
|
||||
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4
|
||||
// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
|
||||
// NOCPU-NEXT: entry:
|
||||
// NOCPU-LABEL: define internal void @__test_block_invoke_4(
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
||||
@ -335,14 +395,14 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8
|
||||
// NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
|
||||
// NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8:[0-9]+]]
|
||||
// NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR10]]
|
||||
// NOCPU-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent nounwind
|
||||
// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
|
||||
// NOCPU-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR5]] !associated [[META18:![0-9]+]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META10]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11]] {
|
||||
// NOCPU-NEXT: entry:
|
||||
// NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(
|
||||
// NOCPU-SAME: <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR8]] !associated [[META20:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
||||
// NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
||||
@ -350,34 +410,10 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// NOCPU-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// NOCPU-LABEL: define {{[^@]+}}@test_target_features_kernel
|
||||
// NOCPU-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space [[META19:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META20:![0-9]+]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META11]] {
|
||||
// NOCPU-NEXT: entry:
|
||||
// NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
|
||||
// NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
||||
// NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
|
||||
// NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
|
||||
// NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
|
||||
// NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
|
||||
// NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
|
||||
// NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8
|
||||
// NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4
|
||||
// NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
|
||||
// NOCPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
|
||||
// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
|
||||
// NOCPU-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
|
||||
// NOCPU-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent noinline nounwind optnone
|
||||
// NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke
|
||||
// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
|
||||
// NOCPU-NEXT: entry:
|
||||
// NOCPU-LABEL: define internal void @__test_target_features_kernel_block_invoke(
|
||||
// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
||||
@ -389,9 +425,9 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// NOCPU: Function Attrs: convergent nounwind
|
||||
// NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel
|
||||
// NOCPU-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR5]] !associated [[META21:![0-9]+]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META10]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11]] {
|
||||
// NOCPU-NEXT: entry:
|
||||
// NOCPU-LABEL: define internal amdgpu_kernel void @__test_target_features_kernel_block_invoke_kernel(
|
||||
// NOCPU-SAME: { i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR8]] !associated [[META21:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] {
|
||||
// NOCPU-NEXT: [[ENTRY:.*:]]
|
||||
// NOCPU-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5)
|
||||
// NOCPU-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
||||
// NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
||||
@ -412,10 +448,12 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent norecurse nounwind
|
||||
// GFX900-LABEL: define {{[^@]+}}@callee
|
||||
// GFX900-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// GFX900-NEXT: entry:
|
||||
// GFX900-LABEL: define dso_local void @callee(
|
||||
// GFX900-SAME: i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr
|
||||
@ -431,9 +469,33 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent norecurse nounwind
|
||||
// GFX900-LABEL: define {{[^@]+}}@test
|
||||
// GFX900-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
|
||||
// GFX900-NEXT: entry:
|
||||
// GFX900-LABEL: define dso_local amdgpu_kernel void @test(
|
||||
// GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
|
||||
// GFX900-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
|
||||
// GFX900-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
|
||||
// GFX900-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
|
||||
// GFX900-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
|
||||
// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14:![0-9]+]]
|
||||
// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16:![0-9]+]]
|
||||
// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
|
||||
// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
|
||||
// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
|
||||
// GFX900-NEXT: [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
|
||||
// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
|
||||
// GFX900-NEXT: [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
|
||||
// GFX900-NEXT: call void @__clang_ocl_kern_imp_test(ptr addrspace(1) noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR8:[0-9]+]]
|
||||
// GFX900-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent norecurse nounwind
|
||||
// GFX900-LABEL: define dso_local void @__clang_ocl_kern_imp_test(
|
||||
// GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
|
||||
// GFX900-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
@ -468,14 +530,14 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// GFX900-NEXT: [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr
|
||||
// GFX900-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
|
||||
// GFX900-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr
|
||||
// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14:![0-9]+]]
|
||||
// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16:![0-9]+]]
|
||||
// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
|
||||
// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
|
||||
// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
|
||||
// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR7:[0-9]+]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9:[0-9]+]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17:![0-9]+]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19:![0-9]+]]
|
||||
// GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
|
||||
// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]]
|
||||
@ -535,12 +597,12 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// GFX900-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5
|
||||
// GFX900-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
|
||||
// GFX900-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0
|
||||
// GFX900-NEXT: store i64 100, ptr [[TMP18]], align 8
|
||||
// GFX900-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]])
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0
|
||||
// GFX900-NEXT: store i32 32, ptr [[BLOCK_SIZE22]], align 8
|
||||
// GFX900-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1
|
||||
@ -559,17 +621,59 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
|
||||
// GFX900-NEXT: [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA16]]
|
||||
// GFX900-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr addrspace(5) [[VARTMP27]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr [[BLOCK21_ASCAST]])
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent norecurse nounwind
|
||||
// GFX900-LABEL: define dso_local amdgpu_kernel void @test_target_features_kernel(
|
||||
// GFX900-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
|
||||
// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26:![0-9]+]]
|
||||
// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26]]
|
||||
// GFX900-NEXT: call void @__clang_ocl_kern_imp_test_target_features_kernel(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR8]]
|
||||
// GFX900-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent norecurse nounwind
|
||||
// GFX900-LABEL: define dso_local void @__clang_ocl_kern_imp_test_target_features_kernel(
|
||||
// GFX900-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
|
||||
// GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
||||
// GFX900-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
|
||||
// GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
|
||||
// GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
|
||||
// GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
|
||||
// GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
|
||||
// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
|
||||
// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
|
||||
// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
|
||||
// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
|
||||
// GFX900-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9]]
|
||||
// GFX900-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent nounwind
|
||||
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke
|
||||
// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5:[0-9]+]] {
|
||||
// GFX900-NEXT: entry:
|
||||
// GFX900-LABEL: define internal void @__test_block_invoke(
|
||||
// GFX900-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR6:[0-9]+]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
||||
// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
||||
@ -583,9 +687,9 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent nounwind
|
||||
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
|
||||
// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR5]] !associated [[META22:![0-9]+]] !kernel_arg_addr_space [[META23:![0-9]+]] !kernel_arg_access_qual [[META24:![0-9]+]] !kernel_arg_type [[META25:![0-9]+]] !kernel_arg_base_type [[META25]] !kernel_arg_type_qual [[META26:![0-9]+]] {
|
||||
// GFX900-NEXT: entry:
|
||||
// GFX900-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(
|
||||
// GFX900-SAME: <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR6]] !associated [[META28:![0-9]+]] !kernel_arg_addr_space [[META29:![0-9]+]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META30:![0-9]+]] !kernel_arg_base_type [[META30]] !kernel_arg_type_qual [[META25]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
|
||||
// GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
||||
// GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
||||
@ -594,9 +698,9 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent nounwind
|
||||
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2
|
||||
// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
|
||||
// GFX900-NEXT: entry:
|
||||
// GFX900-LABEL: define internal void @__test_block_invoke_2(
|
||||
// GFX900-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR6]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
||||
// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
||||
@ -616,9 +720,9 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent nounwind
|
||||
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
|
||||
// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR5]] !associated [[META27:![0-9]+]] !kernel_arg_addr_space [[META23]] !kernel_arg_access_qual [[META24]] !kernel_arg_type [[META25]] !kernel_arg_base_type [[META25]] !kernel_arg_type_qual [[META26]] {
|
||||
// GFX900-NEXT: entry:
|
||||
// GFX900-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(
|
||||
// GFX900-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR6]] !associated [[META31:![0-9]+]] !kernel_arg_addr_space [[META29]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META30]] !kernel_arg_base_type [[META30]] !kernel_arg_type_qual [[META25]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
||||
// GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
||||
// GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
||||
@ -627,15 +731,15 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent nounwind
|
||||
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3
|
||||
// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR5]] {
|
||||
// GFX900-NEXT: entry:
|
||||
// GFX900-LABEL: define internal void @__test_block_invoke_3(
|
||||
// GFX900-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR6]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
|
||||
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
||||
// GFX900-NEXT: [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr
|
||||
// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
||||
// GFX900-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA28:![0-9]+]]
|
||||
// GFX900-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA32:![0-9]+]]
|
||||
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6
|
||||
// GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]]
|
||||
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
|
||||
@ -648,16 +752,16 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]]
|
||||
// GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0
|
||||
// GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]]
|
||||
// GFX900-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA28]]
|
||||
// GFX900-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA32]]
|
||||
// GFX900-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0
|
||||
// GFX900-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[TBAA17]]
|
||||
// GFX900-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent nounwind
|
||||
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
|
||||
// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR5]] !associated [[META29:![0-9]+]] !kernel_arg_addr_space [[META30:![0-9]+]] !kernel_arg_access_qual [[META31:![0-9]+]] !kernel_arg_type [[META32:![0-9]+]] !kernel_arg_base_type [[META32]] !kernel_arg_type_qual [[META33:![0-9]+]] {
|
||||
// GFX900-NEXT: entry:
|
||||
// GFX900-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(
|
||||
// GFX900-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR6]] !associated [[META33:![0-9]+]] !kernel_arg_addr_space [[META34:![0-9]+]] !kernel_arg_access_qual [[META35:![0-9]+]] !kernel_arg_type [[META36:![0-9]+]] !kernel_arg_base_type [[META36]] !kernel_arg_type_qual [[META37:![0-9]+]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
|
||||
// GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8
|
||||
// GFX900-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr
|
||||
@ -666,9 +770,9 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent nounwind
|
||||
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4
|
||||
// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
|
||||
// GFX900-NEXT: entry:
|
||||
// GFX900-LABEL: define internal void @__test_block_invoke_4(
|
||||
// GFX900-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR6]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
||||
// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
||||
@ -676,14 +780,14 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA3]]
|
||||
// GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
|
||||
// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]]
|
||||
// GFX900-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8:[0-9]+]]
|
||||
// GFX900-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8]]
|
||||
// GFX900-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent nounwind
|
||||
// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
|
||||
// GFX900-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR5]] !associated [[META34:![0-9]+]] !kernel_arg_addr_space [[META23]] !kernel_arg_access_qual [[META24]] !kernel_arg_type [[META25]] !kernel_arg_base_type [[META25]] !kernel_arg_type_qual [[META26]] {
|
||||
// GFX900-NEXT: entry:
|
||||
// GFX900-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(
|
||||
// GFX900-SAME: <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR6]] !associated [[META38:![0-9]+]] !kernel_arg_addr_space [[META29]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META30]] !kernel_arg_base_type [[META30]] !kernel_arg_type_qual [[META25]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
|
||||
// GFX900-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
||||
// GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
||||
@ -691,40 +795,10 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// GFX900-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent norecurse nounwind
|
||||
// GFX900-LABEL: define {{[^@]+}}@test_target_features_kernel
|
||||
// GFX900-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META35:![0-9]+]] !kernel_arg_access_qual [[META24]] !kernel_arg_type [[META36:![0-9]+]] !kernel_arg_base_type [[META36]] !kernel_arg_type_qual [[META26]] {
|
||||
// GFX900-NEXT: entry:
|
||||
// GFX900-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
|
||||
// GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
|
||||
// GFX900-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
|
||||
// GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
|
||||
// GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
|
||||
// GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
|
||||
// GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
|
||||
// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA37:![0-9]+]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
|
||||
// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
|
||||
// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
|
||||
// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
|
||||
// GFX900-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR7]]
|
||||
// GFX900-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent nounwind
|
||||
// GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke
|
||||
// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] {
|
||||
// GFX900-NEXT: entry:
|
||||
// GFX900-LABEL: define internal void @__test_target_features_kernel_block_invoke(
|
||||
// GFX900-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR6]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
||||
// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr
|
||||
// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8
|
||||
@ -733,9 +807,9 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
//
|
||||
//
|
||||
// GFX900: Function Attrs: convergent nounwind
|
||||
// GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel
|
||||
// GFX900-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR5]] !associated [[META39:![0-9]+]] !kernel_arg_addr_space [[META23]] !kernel_arg_access_qual [[META24]] !kernel_arg_type [[META25]] !kernel_arg_base_type [[META25]] !kernel_arg_type_qual [[META26]] {
|
||||
// GFX900-NEXT: entry:
|
||||
// GFX900-LABEL: define internal amdgpu_kernel void @__test_target_features_kernel_block_invoke_kernel(
|
||||
// GFX900-SAME: { i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR6]] !associated [[META39:![0-9]+]] !kernel_arg_addr_space [[META29]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META30]] !kernel_arg_base_type [[META30]] !kernel_arg_type_qual [[META25]] {
|
||||
// GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// GFX900-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5)
|
||||
// GFX900-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
|
||||
// GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
||||
@ -746,22 +820,25 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// NOCPU: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
|
||||
// NOCPU: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// NOCPU: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
|
||||
// NOCPU: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
||||
// NOCPU: attributes #[[ATTR4]] = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// NOCPU: attributes #[[ATTR5]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// NOCPU: attributes #[[ATTR6]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" }
|
||||
// NOCPU: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
|
||||
// NOCPU: attributes #[[ATTR8]] = { convergent nounwind }
|
||||
// NOCPU: attributes #[[ATTR3]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// NOCPU: attributes #[[ATTR4]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" }
|
||||
// NOCPU: attributes #[[ATTR5]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" }
|
||||
// NOCPU: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
||||
// NOCPU: attributes #[[ATTR7]] = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// NOCPU: attributes #[[ATTR8]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// NOCPU: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
|
||||
// NOCPU: attributes #[[ATTR10]] = { convergent nounwind }
|
||||
//.
|
||||
// GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
|
||||
// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
|
||||
// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" }
|
||||
// GFX900: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
|
||||
// GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
||||
// GFX900: attributes #[[ATTR5]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
|
||||
// GFX900: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
|
||||
// GFX900: attributes #[[ATTR7]] = { nounwind }
|
||||
// GFX900: attributes #[[ATTR3]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
|
||||
// GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
|
||||
// GFX900: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
||||
// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
|
||||
// GFX900: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
|
||||
// GFX900: attributes #[[ATTR8]] = { convergent nounwind }
|
||||
// GFX900: attributes #[[ATTR9]] = { nounwind }
|
||||
//.
|
||||
// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
||||
// NOCPU: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
@ -770,20 +847,20 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// NOCPU: [[META4]] = !{!"none", !"none", !"none", !"none"}
|
||||
// NOCPU: [[META5]] = !{!"char*", !"char", !"long*", !"long"}
|
||||
// NOCPU: [[META6]] = !{!"", !"", !"", !""}
|
||||
// NOCPU: [[META7]] = !{ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle}
|
||||
// NOCPU: [[META8]] = !{i32 0}
|
||||
// NOCPU: [[META9]] = !{!"none"}
|
||||
// NOCPU: [[META10]] = !{!"__block_literal"}
|
||||
// NOCPU: [[META11]] = !{!""}
|
||||
// NOCPU: [[META12]] = !{ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle}
|
||||
// NOCPU: [[META13]] = !{ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle}
|
||||
// NOCPU: [[META14]] = !{i32 0, i32 3}
|
||||
// NOCPU: [[META15]] = !{!"none", !"none"}
|
||||
// NOCPU: [[META16]] = !{!"__block_literal", !"void*"}
|
||||
// NOCPU: [[META17]] = !{!"", !""}
|
||||
// NOCPU: [[META18]] = !{ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle}
|
||||
// NOCPU: [[META19]] = !{i32 1}
|
||||
// NOCPU: [[META20]] = !{!"int*"}
|
||||
// NOCPU: [[META7]] = !{i32 1}
|
||||
// NOCPU: [[META8]] = !{!"none"}
|
||||
// NOCPU: [[META9]] = !{!"int*"}
|
||||
// NOCPU: [[META10]] = !{!""}
|
||||
// NOCPU: [[META11]] = !{ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle}
|
||||
// NOCPU: [[META12]] = !{i32 0}
|
||||
// NOCPU: [[META13]] = !{!"__block_literal"}
|
||||
// NOCPU: [[META14]] = !{ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle}
|
||||
// NOCPU: [[META15]] = !{ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle}
|
||||
// NOCPU: [[META16]] = !{i32 0, i32 3}
|
||||
// NOCPU: [[META17]] = !{!"none", !"none"}
|
||||
// NOCPU: [[META18]] = !{!"__block_literal", !"void*"}
|
||||
// NOCPU: [[META19]] = !{!"", !""}
|
||||
// NOCPU: [[META20]] = !{ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle}
|
||||
// NOCPU: [[META21]] = !{ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle}
|
||||
//.
|
||||
// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
||||
@ -808,23 +885,23 @@ kernel void test_target_features_kernel(global int *i) {
|
||||
// GFX900: [[TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0}
|
||||
// GFX900: [[META20]] = !{!"queue_t", [[META5]], i64 0}
|
||||
// GFX900: [[TBAA_STRUCT21]] = !{i64 0, i64 4, [[TBAA17]]}
|
||||
// GFX900: [[META22]] = !{ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle}
|
||||
// GFX900: [[META23]] = !{i32 0}
|
||||
// GFX900: [[META24]] = !{!"none"}
|
||||
// GFX900: [[META25]] = !{!"__block_literal"}
|
||||
// GFX900: [[META26]] = !{!""}
|
||||
// GFX900: [[META27]] = !{ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle}
|
||||
// GFX900: [[TBAA28]] = !{[[META9]], [[META9]], i64 0}
|
||||
// GFX900: [[META29]] = !{ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle}
|
||||
// GFX900: [[META30]] = !{i32 0, i32 3}
|
||||
// GFX900: [[META31]] = !{!"none", !"none"}
|
||||
// GFX900: [[META32]] = !{!"__block_literal", !"void*"}
|
||||
// GFX900: [[META33]] = !{!"", !""}
|
||||
// GFX900: [[META34]] = !{ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle}
|
||||
// GFX900: [[META35]] = !{i32 1}
|
||||
// GFX900: [[META36]] = !{!"int*"}
|
||||
// GFX900: [[TBAA37]] = !{[[META38:![0-9]+]], [[META38]], i64 0}
|
||||
// GFX900: [[META38]] = !{!"p1 int", [[META9]], i64 0}
|
||||
// GFX900: [[META22]] = !{i32 1}
|
||||
// GFX900: [[META23]] = !{!"none"}
|
||||
// GFX900: [[META24]] = !{!"int*"}
|
||||
// GFX900: [[META25]] = !{!""}
|
||||
// GFX900: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0}
|
||||
// GFX900: [[META27]] = !{!"p1 int", [[META9]], i64 0}
|
||||
// GFX900: [[META28]] = !{ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle}
|
||||
// GFX900: [[META29]] = !{i32 0}
|
||||
// GFX900: [[META30]] = !{!"__block_literal"}
|
||||
// GFX900: [[META31]] = !{ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle}
|
||||
// GFX900: [[TBAA32]] = !{[[META9]], [[META9]], i64 0}
|
||||
// GFX900: [[META33]] = !{ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle}
|
||||
// GFX900: [[META34]] = !{i32 0, i32 3}
|
||||
// GFX900: [[META35]] = !{!"none", !"none"}
|
||||
// GFX900: [[META36]] = !{!"__block_literal", !"void*"}
|
||||
// GFX900: [[META37]] = !{!"", !""}
|
||||
// GFX900: [[META38]] = !{ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle}
|
||||
// GFX900: [[META39]] = !{ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle}
|
||||
//.
|
||||
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
|
@ -1,43 +1,86 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 4
|
||||
// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));
|
||||
|
||||
// CHECK-LABEL: @test_printf_noargs(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str) #[[ATTR4:[0-9]+]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
__kernel void test_printf_noargs() {
|
||||
printf("");
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_printf_int(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: store i32 [[I:%.*]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA8:![0-9]+]]
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA8]]
|
||||
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.1, i32 noundef [[TMP0]]) #[[ATTR4]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
__kernel void test_printf_int(int i) {
|
||||
printf("%d", i);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_printf_str_int(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5)
|
||||
// CHECK-NEXT: store i32 [[I:%.*]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA8]]
|
||||
// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR5:[0-9]+]]
|
||||
// CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 1 [[S]], ptr addrspace(4) align 1 @__const.test_printf_str_int.s, i64 4, i1 false)
|
||||
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(5) [[S]], i64 0, i64 0
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA8]]
|
||||
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.2, ptr addrspace(5) noundef [[ARRAYDECAY]], i32 noundef [[TMP2]]) #[[ATTR4]]
|
||||
// CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR5]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
__kernel void test_printf_str_int(int i) {
|
||||
char s[] = "foo";
|
||||
printf("%s:%d", s, i);
|
||||
}
|
||||
// CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_noargs(
|
||||
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META4]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_noargs() #[[ATTR5:[0-9]+]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_noargs(
|
||||
// CHECK-SAME: ) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META4]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str) #[[ATTR6:[0-9]+]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_int(
|
||||
// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META5:![0-9]+]] !kernel_arg_access_qual [[META6:![0-9]+]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8:![0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9:![0-9]+]]
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
|
||||
// CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_int(i32 noundef [[TMP0]]) #[[ATTR5]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_int(
|
||||
// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
|
||||
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.1, i32 noundef [[TMP0]]) #[[ATTR6]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_str_int(
|
||||
// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
|
||||
// CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_str_int(i32 noundef [[TMP0]]) #[[ATTR5]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_str_int(
|
||||
// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5)
|
||||
// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
|
||||
// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR7:[0-9]+]]
|
||||
// CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 1 [[S]], ptr addrspace(4) align 1 @__const.test_printf_str_int.s, i64 4, i1 false)
|
||||
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(5) [[S]], i64 0, i64 0
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]]
|
||||
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.2, ptr addrspace(5) noundef [[ARRAYDECAY]], i32 noundef [[TMP0]]) #[[ATTR6]]
|
||||
// CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR7]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//.
|
||||
// CHECK: [[META4]] = !{}
|
||||
// CHECK: [[META5]] = !{i32 0}
|
||||
// CHECK: [[META6]] = !{!"none"}
|
||||
// CHECK: [[META7]] = !{!"int"}
|
||||
// CHECK: [[META8]] = !{!""}
|
||||
// CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
|
||||
// CHECK: [[META10]] = !{!"int", [[META11:![0-9]+]], i64 0}
|
||||
// CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0}
|
||||
// CHECK: [[META12]] = !{!"Simple C/C++ TBAA"}
|
||||
//.
|
||||
|
@ -4,14 +4,20 @@
|
||||
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -foffload-uniform-block -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
|
||||
|
||||
kernel void ker() {};
|
||||
// CHECK: define{{.*}}@ker() #0
|
||||
// CHECK: define{{.*}}@ker() #[[ATTR0:[0-9]+]]
|
||||
// CHECK: call void @__clang_ocl_kern_imp_ker() #[[ATTR2:[0-9]+]]
|
||||
|
||||
// CHECK: define{{.*}}@__clang_ocl_kern_imp_ker() #[[ATTR1:[0-9]+]]
|
||||
|
||||
void foo() {};
|
||||
// CHECK: define{{.*}}@foo() #1
|
||||
// CHECK: define{{.*}}@foo() #[[ATTR1:[0-9]+]]
|
||||
|
||||
// CHECK-LABEL: attributes #0
|
||||
// CHECK: attributes #[[ATTR0]]
|
||||
// CHECK-UNIFORM: "uniform-work-group-size"="true"
|
||||
// CHECK-NONUNIFORM: "uniform-work-group-size"="false"
|
||||
|
||||
// CHECK-LABEL: attributes #1
|
||||
// CHECK: attributes #[[ATTR1]]
|
||||
// CHECK-NOT: uniform-work-group-size
|
||||
|
||||
// CHECK: attributes #[[ATTR2]]
|
||||
// CHECK-NOT: uniform-work-group-size
|
||||
|
@ -1,4 +1,4 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --version 5
|
||||
// RUN: %clang_cc1 -fno-ident -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" -fdenormal-fp-math-f32=preserve-sign -cl-uniform-work-group-size | FileCheck --check-prefix=SPIR32 %s
|
||||
// RUN: %clang_cc1 -fno-ident -ffp-exception-behavior=strict -fexperimental-strict-floating-point -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck --check-prefix=STRICTFP %s
|
||||
|
||||
@ -21,9 +21,26 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
|
||||
});
|
||||
}
|
||||
// SPIR32: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// SPIR32-LABEL: define {{[^@]+}}@device_side_enqueue
|
||||
// SPIR32-SAME: (ptr addrspace(1) align 4 [[A:%.*]], ptr addrspace(1) align 4 [[B:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META2:![0-9]+]] !kernel_arg_access_qual [[META3:![0-9]+]] !kernel_arg_type [[META4:![0-9]+]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META5:![0-9]+]] {
|
||||
// SPIR32-NEXT: entry:
|
||||
// SPIR32-LABEL: define dso_local spir_kernel void @device_side_enqueue(
|
||||
// SPIR32-SAME: ptr addrspace(1) align 4 [[A:%.*]], ptr addrspace(1) align 4 [[B:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META2:![0-9]+]] !kernel_arg_access_qual [[META3:![0-9]+]] !kernel_arg_type [[META4:![0-9]+]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META5:![0-9]+]] {
|
||||
// SPIR32-NEXT: [[ENTRY:.*:]]
|
||||
// SPIR32-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 4
|
||||
// SPIR32-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 4
|
||||
// SPIR32-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4
|
||||
// SPIR32-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 4
|
||||
// SPIR32-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 4
|
||||
// SPIR32-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
|
||||
// SPIR32-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 4
|
||||
// SPIR32-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 4
|
||||
// SPIR32-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ADDR]], align 4
|
||||
// SPIR32-NEXT: call spir_func void @__clang_ocl_kern_imp_device_side_enqueue(ptr addrspace(1) align 4 [[TMP0]], ptr addrspace(1) align 4 [[TMP1]], i32 [[TMP2]]) #[[ATTR5:[0-9]+]]
|
||||
// SPIR32-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// SPIR32: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// SPIR32-LABEL: define dso_local spir_func void @__clang_ocl_kern_imp_device_side_enqueue(
|
||||
// SPIR32-SAME: ptr addrspace(1) align 4 [[A:%.*]], ptr addrspace(1) align 4 [[B:%.*]], i32 [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META2]] !kernel_arg_access_qual [[META3]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META5]] {
|
||||
// SPIR32-NEXT: [[ENTRY:.*:]]
|
||||
// SPIR32-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 4
|
||||
// SPIR32-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 4
|
||||
// SPIR32-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4
|
||||
@ -60,9 +77,9 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
|
||||
//
|
||||
//
|
||||
// SPIR32: Function Attrs: convergent noinline nounwind optnone
|
||||
// SPIR32-LABEL: define {{[^@]+}}@__device_side_enqueue_block_invoke
|
||||
// SPIR32-SAME: (ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
// SPIR32-NEXT: entry:
|
||||
// SPIR32-LABEL: define internal spir_func void @__device_side_enqueue_block_invoke(
|
||||
// SPIR32-SAME: ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
// SPIR32-NEXT: [[ENTRY:.*:]]
|
||||
// SPIR32-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr addrspace(4), align 4
|
||||
// SPIR32-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr addrspace(4), align 4
|
||||
// SPIR32-NEXT: store ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 4
|
||||
@ -84,17 +101,34 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
|
||||
//
|
||||
//
|
||||
// SPIR32: Function Attrs: convergent nounwind
|
||||
// SPIR32-LABEL: define {{[^@]+}}@__device_side_enqueue_block_invoke_kernel
|
||||
// SPIR32-SAME: (ptr addrspace(4) [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] {
|
||||
// SPIR32-NEXT: entry:
|
||||
// SPIR32-LABEL: define spir_kernel void @__device_side_enqueue_block_invoke_kernel(
|
||||
// SPIR32-SAME: ptr addrspace(4) [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] {
|
||||
// SPIR32-NEXT: [[ENTRY:.*:]]
|
||||
// SPIR32-NEXT: call spir_func void @__device_side_enqueue_block_invoke(ptr addrspace(4) [[TMP0]])
|
||||
// SPIR32-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// STRICTFP: Function Attrs: convergent noinline norecurse nounwind optnone strictfp
|
||||
// STRICTFP-LABEL: define {{[^@]+}}@device_side_enqueue
|
||||
// STRICTFP-SAME: (ptr addrspace(1) align 4 [[A:%.*]], ptr addrspace(1) align 4 [[B:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META2:![0-9]+]] !kernel_arg_access_qual [[META3:![0-9]+]] !kernel_arg_type [[META4:![0-9]+]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META5:![0-9]+]] {
|
||||
// STRICTFP-NEXT: entry:
|
||||
// STRICTFP-LABEL: define dso_local spir_kernel void @device_side_enqueue(
|
||||
// STRICTFP-SAME: ptr addrspace(1) align 4 [[A:%.*]], ptr addrspace(1) align 4 [[B:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META2:![0-9]+]] !kernel_arg_access_qual [[META3:![0-9]+]] !kernel_arg_type [[META4:![0-9]+]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META5:![0-9]+]] {
|
||||
// STRICTFP-NEXT: [[ENTRY:.*:]]
|
||||
// STRICTFP-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 4
|
||||
// STRICTFP-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 4
|
||||
// STRICTFP-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4
|
||||
// STRICTFP-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 4
|
||||
// STRICTFP-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 4
|
||||
// STRICTFP-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
|
||||
// STRICTFP-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 4
|
||||
// STRICTFP-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 4
|
||||
// STRICTFP-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ADDR]], align 4
|
||||
// STRICTFP-NEXT: call spir_func void @__clang_ocl_kern_imp_device_side_enqueue(ptr addrspace(1) align 4 [[TMP0]], ptr addrspace(1) align 4 [[TMP1]], i32 [[TMP2]]) #[[ATTR5:[0-9]+]]
|
||||
// STRICTFP-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// STRICTFP: Function Attrs: convergent noinline norecurse nounwind optnone strictfp
|
||||
// STRICTFP-LABEL: define dso_local spir_func void @__clang_ocl_kern_imp_device_side_enqueue(
|
||||
// STRICTFP-SAME: ptr addrspace(1) align 4 [[A:%.*]], ptr addrspace(1) align 4 [[B:%.*]], i32 [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META2]] !kernel_arg_access_qual [[META3]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META5]] {
|
||||
// STRICTFP-NEXT: [[ENTRY:.*:]]
|
||||
// STRICTFP-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 4
|
||||
// STRICTFP-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 4
|
||||
// STRICTFP-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4
|
||||
@ -109,7 +143,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
|
||||
// STRICTFP-NEXT: store i32 0, ptr [[FLAGS]], align 4
|
||||
// STRICTFP-NEXT: [[TMP0:%.*]] = load target("spirv.Queue"), ptr [[DEFAULT_QUEUE]], align 4
|
||||
// STRICTFP-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS]], align 4
|
||||
// STRICTFP-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP]], ptr align 4 [[NDRANGE]], i32 4, i1 false) #[[ATTR5:[0-9]+]]
|
||||
// STRICTFP-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP]], ptr align 4 [[NDRANGE]], i32 4, i1 false) #[[ATTR6:[0-9]+]]
|
||||
// STRICTFP-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr [[BLOCK]], i32 0, i32 0
|
||||
// STRICTFP-NEXT: store i32 24, ptr [[BLOCK_SIZE]], align 4
|
||||
// STRICTFP-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr [[BLOCK]], i32 0, i32 1
|
||||
@ -126,14 +160,14 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
|
||||
// STRICTFP-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 4
|
||||
// STRICTFP-NEXT: store ptr addrspace(1) [[TMP4]], ptr [[BLOCK_CAPTURED2]], align 4
|
||||
// STRICTFP-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[BLOCK]] to ptr addrspace(4)
|
||||
// STRICTFP-NEXT: [[TMP6:%.*]] = call spir_func i32 @__enqueue_kernel_basic(target("spirv.Queue") [[TMP0]], i32 [[TMP1]], ptr [[TMP]], ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_kernel to ptr addrspace(4)), ptr addrspace(4) [[TMP5]]) #[[ATTR5]]
|
||||
// STRICTFP-NEXT: [[TMP6:%.*]] = call spir_func i32 @__enqueue_kernel_basic(target("spirv.Queue") [[TMP0]], i32 [[TMP1]], ptr [[TMP]], ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_kernel to ptr addrspace(4)), ptr addrspace(4) [[TMP5]]) #[[ATTR6]]
|
||||
// STRICTFP-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// STRICTFP: Function Attrs: convergent noinline nounwind optnone strictfp
|
||||
// STRICTFP-LABEL: define {{[^@]+}}@__device_side_enqueue_block_invoke
|
||||
// STRICTFP-SAME: (ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
// STRICTFP-NEXT: entry:
|
||||
// STRICTFP-LABEL: define internal spir_func void @__device_side_enqueue_block_invoke(
|
||||
// STRICTFP-SAME: ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
// STRICTFP-NEXT: [[ENTRY:.*:]]
|
||||
// STRICTFP-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr addrspace(4), align 4
|
||||
// STRICTFP-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr addrspace(4), align 4
|
||||
// STRICTFP-NEXT: store ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 4
|
||||
@ -144,7 +178,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
|
||||
// STRICTFP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[BLOCK_CAPTURE_ADDR1]], align 4
|
||||
// STRICTFP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP0]], i32 [[TMP1]]
|
||||
// STRICTFP-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX]], align 4
|
||||
// STRICTFP-NEXT: [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR5]]
|
||||
// STRICTFP-NEXT: [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR6]]
|
||||
// STRICTFP-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3
|
||||
// STRICTFP-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[BLOCK_CAPTURE_ADDR2]], align 4
|
||||
// STRICTFP-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
|
||||
@ -155,10 +189,10 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
|
||||
//
|
||||
//
|
||||
// STRICTFP: Function Attrs: convergent nounwind
|
||||
// STRICTFP-LABEL: define {{[^@]+}}@__device_side_enqueue_block_invoke_kernel
|
||||
// STRICTFP-SAME: (ptr addrspace(4) [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] {
|
||||
// STRICTFP-NEXT: entry:
|
||||
// STRICTFP-NEXT: call spir_func void @__device_side_enqueue_block_invoke(ptr addrspace(4) [[TMP0]]) #[[ATTR5]]
|
||||
// STRICTFP-LABEL: define spir_kernel void @__device_side_enqueue_block_invoke_kernel(
|
||||
// STRICTFP-SAME: ptr addrspace(4) [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] {
|
||||
// STRICTFP-NEXT: [[ENTRY:.*:]]
|
||||
// STRICTFP-NEXT: call spir_func void @__device_side_enqueue_block_invoke(ptr addrspace(4) [[TMP0]]) #[[ATTR6]]
|
||||
// STRICTFP-NEXT: ret void
|
||||
//
|
||||
//.
|
||||
@ -167,13 +201,15 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
|
||||
// SPIR32: attributes #[[ATTR2]] = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// SPIR32: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
// SPIR32: attributes #[[ATTR4]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
// SPIR32: attributes #[[ATTR5]] = { convergent nounwind "uniform-work-group-size"="true" }
|
||||
//.
|
||||
// STRICTFP: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone strictfp "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
|
||||
// STRICTFP: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
||||
// STRICTFP: attributes #[[ATTR2]] = { convergent noinline nounwind optnone strictfp "stack-protector-buffer-size"="8" }
|
||||
// STRICTFP: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind strictfp willreturn memory(inaccessiblemem: readwrite) }
|
||||
// STRICTFP: attributes #[[ATTR4]] = { convergent nounwind "stack-protector-buffer-size"="8" }
|
||||
// STRICTFP: attributes #[[ATTR5]] = { strictfp }
|
||||
// STRICTFP: attributes #[[ATTR5]] = { convergent nounwind strictfp "uniform-work-group-size"="false" }
|
||||
// STRICTFP: attributes #[[ATTR6]] = { strictfp }
|
||||
//.
|
||||
// SPIR32: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
|
||||
// SPIR32: [[META1:![0-9]+]] = !{i32 2, i32 0}
|
||||
|
@ -1,12 +1,12 @@
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefix=CHECK-LIFETIMES
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR,TRIPLESPIR
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR,TRIPLESPIR
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLESPIR
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR,TRIPLESPIR
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR,TRIPLESPIR
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLESPIR
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86,TRIPLEX86
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86,TRIPLEX86
|
||||
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLEX86
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
|
||||
|
||||
@ -39,7 +39,13 @@ void callee(int id, __global int *out) {
|
||||
out[id] = id;
|
||||
}
|
||||
|
||||
// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
|
||||
// TRIPLESPIR: define{{.*}} void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
|
||||
// TRIPLESPIR: call spir_func void @__clang_ocl_kern_imp_device_side_enqueue({{.*}})
|
||||
|
||||
// TRIPLEX86: define{{.*}} void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
|
||||
// TRIPLEX86: call void @__clang_ocl_kern_imp_device_side_enqueue({{.*}})
|
||||
|
||||
// COMMON-LABEL: define{{.*}} void @__clang_ocl_kern_imp_device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
|
||||
kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
||||
// SPIR: %default_queue = alloca target("spirv.Queue")
|
||||
// X86: %default_queue = alloca ptr
|
||||
|
@ -9,8 +9,15 @@
|
||||
typedef struct {int a;} ndrange_t;
|
||||
|
||||
kernel void test(int i) {
|
||||
|
||||
// AMDGPU-LABEL: define {{.*}} amdgpu_kernel void @test
|
||||
// AMDGPU-LABEL: call void @__clang_ocl_kern_imp_test(i32 noundef %0)
|
||||
|
||||
// SPIR-LABEL: define {{.*}} spir_kernel void @test
|
||||
// SPIR-LABEL: call spir_func void @__clang_ocl_kern_imp_test(i32 noundef %0)
|
||||
|
||||
// AMDGPU-LABEL: define {{.*}} void @__clang_ocl_kern_imp_test
|
||||
// SPIR-LABEL: define {{.*}} spir_func void @__clang_ocl_kern_imp_test
|
||||
|
||||
// COMMON-LABEL: entry:
|
||||
// AMDGPU: %block_sizes = alloca [1 x i64]
|
||||
@ -36,6 +43,6 @@ kernel void test(int i) {
|
||||
// Check that the temporary is scoped to the `if`
|
||||
|
||||
// CHECK-DEBUG: ![[TESTFILE:[0-9]+]] = !DIFile(filename: "<stdin>"
|
||||
// CHECK-DEBUG: ![[TESTSCOPE:[0-9]+]] = distinct !DISubprogram(name: "test", {{.*}} file: ![[TESTFILE]]
|
||||
// CHECK-DEBUG: ![[IFSCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: ![[TESTSCOPE]], file: ![[TESTFILE]], line: 26)
|
||||
// CHECK-DEBUG: ![[TEMPLOCATION]] = !DILocation(line: 27, scope: ![[IFSCOPE]])
|
||||
// CHECK-DEBUG: ![[TESTSCOPE:[0-9]+]] = distinct !DISubprogram(name: "test", linkageName: "__clang_ocl_kern_imp_test", {{.*}} file: ![[TESTFILE]]
|
||||
// CHECK-DEBUG: ![[IFSCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: ![[TESTSCOPE]], file: ![[TESTFILE]], line: 33)
|
||||
// CHECK-DEBUG: ![[TEMPLOCATION]] = !DILocation(line: 34, scope: ![[IFSCOPE]])
|
||||
|
@ -1,4 +1,4 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
|
||||
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
// Check there's no assertion when passing a pointer to an address space
|
||||
@ -8,19 +8,6 @@ extern void private_ptr(__private int *);
|
||||
extern void local_ptr(__local int *);
|
||||
extern void generic_ptr(__generic int *);
|
||||
|
||||
// CHECK-LABEL: define dso_local void @use_of_private_var(
|
||||
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
|
||||
// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[X]]) #[[ATTR4:[0-9]+]]
|
||||
// CHECK-NEXT: store i32 0, ptr [[X_ASCAST]], align 4, !tbaa [[TBAA4:![0-9]+]]
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[X_ASCAST]] to ptr addrspace(5)
|
||||
// CHECK-NEXT: call void @private_ptr(ptr addrspace(5) noundef [[TMP0]]) #[[ATTR5:[0-9]+]]
|
||||
// CHECK-NEXT: call void @generic_ptr(ptr noundef [[X_ASCAST]]) #[[ATTR5]]
|
||||
// CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[X]]) #[[ATTR4]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void use_of_private_var()
|
||||
{
|
||||
int x = 0 ;
|
||||
@ -28,30 +15,12 @@ void use_of_private_var()
|
||||
generic_ptr(&x);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define dso_local void @addr_of_arg(
|
||||
// CHECK-SAME: i32 noundef [[X:%.*]]) #[[ATTR0]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
|
||||
// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR_ASCAST]], align 4, !tbaa [[TBAA4]]
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[X_ADDR_ASCAST]] to ptr addrspace(5)
|
||||
// CHECK-NEXT: call void @private_ptr(ptr addrspace(5) noundef [[TMP0]]) #[[ATTR5]]
|
||||
// CHECK-NEXT: call void @generic_ptr(ptr noundef [[X_ADDR_ASCAST]]) #[[ATTR5]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void addr_of_arg(int x)
|
||||
{
|
||||
private_ptr(&x);
|
||||
generic_ptr(&x);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define dso_local amdgpu_kernel void @use_of_local_var(
|
||||
// CHECK-SAME: ) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: call void @local_ptr(ptr addrspace(3) noundef @use_of_local_var.x) #[[ATTR5]]
|
||||
// CHECK-NEXT: call void @generic_ptr(ptr noundef addrspacecast (ptr addrspace(3) @use_of_local_var.x to ptr)) #[[ATTR5]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
__kernel void use_of_local_var()
|
||||
{
|
||||
__local int x;
|
||||
@ -59,6 +28,46 @@ __kernel void use_of_local_var()
|
||||
generic_ptr(&x);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define dso_local void @use_of_private_var(
|
||||
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
|
||||
// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[X]]) #[[ATTR5:[0-9]+]]
|
||||
// CHECK-NEXT: store i32 0, ptr [[X_ASCAST]], align 4, !tbaa [[TBAA4:![0-9]+]]
|
||||
// CHECK-NEXT: [[X_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[X_ASCAST]] to ptr addrspace(5)
|
||||
// CHECK-NEXT: call void @private_ptr(ptr addrspace(5) noundef [[X_ASCAST_ASCAST]]) #[[ATTR6:[0-9]+]]
|
||||
// CHECK-NEXT: call void @generic_ptr(ptr noundef [[X_ASCAST]]) #[[ATTR6]]
|
||||
// CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[X]]) #[[ATTR5]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define dso_local void @addr_of_arg(
|
||||
// CHECK-SAME: i32 noundef [[X:%.*]]) #[[ATTR0]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
|
||||
// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR_ASCAST]], align 4, !tbaa [[TBAA4]]
|
||||
// CHECK-NEXT: [[X_ADDR_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR_ASCAST]] to ptr addrspace(5)
|
||||
// CHECK-NEXT: call void @private_ptr(ptr addrspace(5) noundef [[X_ADDR_ASCAST_ASCAST]]) #[[ATTR6]]
|
||||
// CHECK-NEXT: call void @generic_ptr(ptr noundef [[X_ADDR_ASCAST]]) #[[ATTR6]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define dso_local amdgpu_kernel void @use_of_local_var(
|
||||
// CHECK-SAME: ) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: call void @__clang_ocl_kern_imp_use_of_local_var() #[[ATTR6]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_use_of_local_var(
|
||||
// CHECK-SAME: ) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] {
|
||||
// CHECK-NEXT: [[ENTRY:.*:]]
|
||||
// CHECK-NEXT: call void @local_ptr(ptr addrspace(3) noundef @use_of_local_var.x) #[[ATTR6]]
|
||||
// CHECK-NEXT: call void @generic_ptr(ptr noundef addrspacecast (ptr addrspace(3) @use_of_local_var.x to ptr)) #[[ATTR6]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//.
|
||||
// CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
|
||||
// CHECK: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0}
|
||||
|
@ -21,6 +21,7 @@ kernel void foo(global int * globalintp, global int * restrict globalintrestrict
|
||||
*globalintrestrictp = constint + volatileint;
|
||||
}
|
||||
// CHECK: define{{.*}} spir_kernel void @foo{{[^!]+}}
|
||||
// ARGINFO: define{{.*}} spir_kernel void @foo{{[^!]+}}
|
||||
// CHECK: !kernel_arg_addr_space ![[MD11:[0-9]+]]
|
||||
// CHECK: !kernel_arg_access_qual ![[MD12:[0-9]+]]
|
||||
// CHECK: !kernel_arg_type ![[MD13:[0-9]+]]
|
||||
@ -32,6 +33,7 @@ kernel void foo(global int * globalintp, global int * restrict globalintrestrict
|
||||
kernel void foo2(read_only image1d_t img1, image2d_t img2, write_only image2d_array_t img3, read_write image1d_t img4) {
|
||||
}
|
||||
// CHECK: define{{.*}} spir_kernel void @foo2{{[^!]+}}
|
||||
// ARGINFO: define{{.*}} spir_kernel void @foo2{{[^!]+}}
|
||||
// CHECK: !kernel_arg_addr_space ![[MD21:[0-9]+]]
|
||||
// CHECK: !kernel_arg_access_qual ![[MD22:[0-9]+]]
|
||||
// CHECK: !kernel_arg_type ![[MD23:[0-9]+]]
|
||||
@ -43,6 +45,7 @@ kernel void foo2(read_only image1d_t img1, image2d_t img2, write_only image2d_ar
|
||||
kernel void foo3(__global half * X) {
|
||||
}
|
||||
// CHECK: define{{.*}} spir_kernel void @foo3{{[^!]+}}
|
||||
// ARGINFO: define{{.*}} spir_kernel void @foo3{{[^!]+}}
|
||||
// CHECK: !kernel_arg_addr_space ![[MD31:[0-9]+]]
|
||||
// CHECK: !kernel_arg_access_qual ![[MD32:[0-9]+]]
|
||||
// CHECK: !kernel_arg_type ![[MD33:[0-9]+]]
|
||||
@ -55,6 +58,7 @@ typedef unsigned int myunsignedint;
|
||||
kernel void foo4(__global unsigned int * X, __global myunsignedint * Y) {
|
||||
}
|
||||
// CHECK: define{{.*}} spir_kernel void @foo4{{[^!]+}}
|
||||
// ARGINFO: define{{.*}} spir_kernel void @foo4{{[^!]+}}
|
||||
// CHECK: !kernel_arg_addr_space ![[MD41:[0-9]+]]
|
||||
// CHECK: !kernel_arg_access_qual ![[MD42:[0-9]+]]
|
||||
// CHECK: !kernel_arg_type ![[MD43:[0-9]+]]
|
||||
@ -67,6 +71,7 @@ typedef image1d_t myImage;
|
||||
kernel void foo5(myImage img1, write_only image1d_t img2) {
|
||||
}
|
||||
// CHECK: define{{.*}} spir_kernel void @foo5{{[^!]+}}
|
||||
// ARGINFO: define{{.*}} spir_kernel void @foo5{{[^!]+}}
|
||||
// CHECK: !kernel_arg_addr_space ![[MD41:[0-9]+]]
|
||||
// CHECK: !kernel_arg_access_qual ![[MD51:[0-9]+]]
|
||||
// CHECK: !kernel_arg_type ![[MD52:[0-9]+]]
|
||||
@ -77,6 +82,8 @@ kernel void foo5(myImage img1, write_only image1d_t img2) {
|
||||
|
||||
typedef char char16 __attribute__((ext_vector_type(16)));
|
||||
__kernel void foo6(__global char16 arg[]) {}
|
||||
// CHECK: define{{.*}} spir_kernel void @foo6{{[^!]+}}
|
||||
// ARGINFO: define{{.*}} spir_kernel void @foo6{{[^!]+}}
|
||||
// CHECK: !kernel_arg_type ![[MD61:[0-9]+]]
|
||||
// ARGINFO: !kernel_arg_name ![[MD62:[0-9]+]]
|
||||
|
||||
@ -87,6 +94,7 @@ kernel void foo7(ROImage ro, WOImage wo, RWImage rw) {
|
||||
}
|
||||
|
||||
// CHECK: define{{.*}} spir_kernel void @foo7{{[^!]+}}
|
||||
// ARGINFO: define{{.*}} spir_kernel void @foo7{{[^!]+}}
|
||||
// CHECK: !kernel_arg_addr_space ![[MD71:[0-9]+]]
|
||||
// CHECK: !kernel_arg_access_qual ![[MD72:[0-9]+]]
|
||||
// CHECK: !kernel_arg_type ![[MD73:[0-9]+]]
|
||||
@ -99,6 +107,7 @@ typedef unsigned char uchar;
|
||||
typedef uchar uchar2 __attribute__((ext_vector_type(2)));
|
||||
kernel void foo8(pipe int p1, pipe uchar p2, pipe uchar2 p3, const pipe uchar p4, write_only pipe uchar p5) {}
|
||||
// CHECK: define{{.*}} spir_kernel void @foo8{{[^!]+}}
|
||||
// ARGINFO: define{{.*}} spir_kernel void @foo8{{[^!]+}}
|
||||
// CHECK: !kernel_arg_addr_space ![[PIPE_AS_QUAL:[0-9]+]]
|
||||
// CHECK: !kernel_arg_access_qual ![[PIPE_ACCESS_QUAL:[0-9]+]]
|
||||
// CHECK: !kernel_arg_type ![[PIPE_TY:[0-9]+]]
|
||||
@ -109,6 +118,7 @@ kernel void foo8(pipe int p1, pipe uchar p2, pipe uchar2 p3, const pipe uchar p4
|
||||
|
||||
kernel void foo9(signed char sc1, global const signed char* sc2) {}
|
||||
// CHECK: define{{.*}} spir_kernel void @foo9{{[^!]+}}
|
||||
// ARGINFO: define{{.*}} spir_kernel void @foo9{{[^!]+}}
|
||||
// CHECK: !kernel_arg_addr_space ![[SCHAR_AS_QUAL:[0-9]+]]
|
||||
// CHECK: !kernel_arg_access_qual ![[MD42]]
|
||||
// CHECK: !kernel_arg_type ![[SCHAR_TY:[0-9]+]]
|
||||
|
959
clang/test/CodeGenOpenCL/opencl-kernel-call.cl
Normal file
959
clang/test/CodeGenOpenCL/opencl-kernel-call.cl
Normal file
@ -0,0 +1,959 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --include-generated-funcs --version 4
|
||||
// RUN: %clang_cc1 -O0 -triple i686-pc-darwin -emit-llvm -o - %s | FileCheck -check-prefix=X86 %s
|
||||
// RUN: %clang_cc1 -O0 -triple amdgcn -emit-llvm -o - %s | FileCheck -check-prefix=AMDGCN %s
|
||||
|
||||
#pragma OPENCL EXTENSION __cl_clang_function_pointers : enable
|
||||
|
||||
typedef int int2 __attribute__((ext_vector_type(2)));
|
||||
|
||||
typedef struct {
|
||||
int cells[9];
|
||||
} Mat3X3;
|
||||
|
||||
typedef struct {
|
||||
int cells[16];
|
||||
} Mat4X4;
|
||||
|
||||
typedef struct {
|
||||
int cells[1024];
|
||||
} Mat32X32;
|
||||
|
||||
typedef struct {
|
||||
int cells[4096];
|
||||
} Mat64X64;
|
||||
|
||||
struct StructOneMember {
|
||||
int2 x;
|
||||
};
|
||||
|
||||
struct StructTwoMember {
|
||||
int2 x;
|
||||
int2 y;
|
||||
};
|
||||
|
||||
struct LargeStructOneMember {
|
||||
int2 x[100];
|
||||
};
|
||||
|
||||
struct LargeStructTwoMember {
|
||||
int2 x[40];
|
||||
int2 y[20];
|
||||
};
|
||||
|
||||
Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
|
||||
Mat4X4 out;
|
||||
return out;
|
||||
}
|
||||
|
||||
Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
|
||||
Mat64X64 out;
|
||||
return out;
|
||||
}
|
||||
|
||||
void FuncOneMember(struct StructOneMember u) {
|
||||
u.x = (int2)(0, 0);
|
||||
}
|
||||
|
||||
void FuncOneLargeMember(struct LargeStructOneMember u) {
|
||||
u.x[0] = (int2)(0, 0);
|
||||
}
|
||||
|
||||
void FuncTwoMember(struct StructTwoMember u) {
|
||||
u.y = (int2)(0, 0);
|
||||
}
|
||||
|
||||
void FuncLargeTwoMember(struct LargeStructTwoMember u) {
|
||||
u.y[0] = (int2)(0, 0);
|
||||
}
|
||||
|
||||
__attribute__((noinline)) kernel void callee_kern(global int *A){
|
||||
*A = 1;
|
||||
}
|
||||
|
||||
kernel void callee_kern_Mat3X3(global Mat3X3 *in, global Mat4X4 *out) {
|
||||
out[0] = foo(in[1]);
|
||||
}
|
||||
|
||||
kernel void callee_kern_Mat32X32(global Mat32X32 *in, global Mat64X64 *out) {
|
||||
out[0] = foo_large(in[1]);
|
||||
}
|
||||
|
||||
kernel void KernelOneMember(struct StructOneMember u) {
|
||||
FuncOneMember(u);
|
||||
}
|
||||
|
||||
kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
|
||||
FuncOneLargeMember(u);
|
||||
}
|
||||
|
||||
kernel void KernelTwoMember(struct StructTwoMember u) {
|
||||
FuncTwoMember(u);
|
||||
}
|
||||
|
||||
kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
|
||||
FuncLargeTwoMember(u);
|
||||
}
|
||||
|
||||
__attribute__((noinline)) kernel void ext_callee_kern(global int *A);
|
||||
|
||||
kernel void ext_callee_kern_Mat3X3(global Mat3X3 *in, global Mat4X4 *out);
|
||||
|
||||
kernel void ext_callee_kern_Mat32X32(global Mat32X32 *in, global Mat64X64 *out);
|
||||
|
||||
kernel void ext_KernelOneMember(struct StructOneMember u);
|
||||
|
||||
kernel void ext_KernelLargeOneMember(struct LargeStructOneMember u);
|
||||
|
||||
kernel void ext_KernelTwoMember(struct StructTwoMember u);
|
||||
|
||||
kernel void ext_KernelLargeTwoMember(struct LargeStructTwoMember u);
|
||||
|
||||
kernel void caller_kern(global int* A, global Mat3X3 *mat3X3, global Mat4X4 *mat4X4, global Mat32X32 *mat32X32, global Mat64X64 *mat64X64){
|
||||
callee_kern(A);
|
||||
ext_callee_kern(A);
|
||||
|
||||
callee_kern_Mat3X3(mat3X3, mat4X4);
|
||||
callee_kern_Mat32X32(mat32X32, mat64X64);
|
||||
ext_callee_kern_Mat3X3(mat3X3, mat4X4);
|
||||
ext_callee_kern_Mat32X32(mat32X32, mat64X64);
|
||||
}
|
||||
|
||||
kernel void caller_kern2(struct StructOneMember structOneMem, global struct StructOneMember* global_structOneMem, struct StructTwoMember structTwoMem){
|
||||
KernelOneMember(structOneMem);
|
||||
ext_KernelOneMember(structOneMem);
|
||||
KernelTwoMember(structTwoMem);
|
||||
ext_KernelTwoMember(structTwoMem);
|
||||
}
|
||||
|
||||
kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct LargeStructTwoMember largeStructTwoMem){
|
||||
KernelLargeOneMember(largeStructOneMem);
|
||||
KernelLargeTwoMember(largeStructTwoMem);
|
||||
ext_KernelLargeOneMember(largeStructOneMem);
|
||||
ext_KernelLargeTwoMember(largeStructTwoMem);
|
||||
}
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @foo(
|
||||
// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @foo_large(
|
||||
// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @FuncOneMember(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8
|
||||
// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false)
|
||||
// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
|
||||
// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[X]], align 8
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @FuncOneLargeMember(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8
|
||||
// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false)
|
||||
// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
|
||||
// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0
|
||||
// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @FuncTwoMember(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8
|
||||
// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false)
|
||||
// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
|
||||
// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[Y]], align 8
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @FuncLargeTwoMember(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8
|
||||
// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false)
|
||||
// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
|
||||
// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0
|
||||
// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define spir_kernel void @callee_kern(
|
||||
// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR4:[0-9]+]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern(
|
||||
// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
|
||||
// X86-NEXT: store i32 1, ptr [[TMP0]], align 4
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define spir_kernel void @callee_kern_Mat3X3(
|
||||
// X86-SAME: ptr noundef align 4 [[IN:%.*]], ptr noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META9:![0-9]+]] !kernel_arg_type [[META10:![0-9]+]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11:![0-9]+]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: store ptr [[IN]], ptr [[IN_ADDR]], align 4
|
||||
// X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern_Mat3X3(
|
||||
// X86-SAME: ptr noundef align 4 [[IN:%.*]], ptr noundef align 4 [[OUT:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META10]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4
|
||||
// X86-NEXT: store ptr [[IN]], ptr [[IN_ADDR]], align 4
|
||||
// X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
|
||||
// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr [[TMP0]], i32 0
|
||||
// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IN_ADDR]], align 4
|
||||
// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr [[TMP1]], i32 1
|
||||
// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[ARRAYIDX1]]) #[[ATTR4]]
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false)
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define spir_kernel void @callee_kern_Mat32X32(
|
||||
// X86-SAME: ptr noundef align 4 [[IN:%.*]], ptr noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META11]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: store ptr [[IN]], ptr [[IN_ADDR]], align 4
|
||||
// X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern_Mat32X32(
|
||||
// X86-SAME: ptr noundef align 4 [[IN:%.*]], ptr noundef align 4 [[OUT:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META11]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4
|
||||
// X86-NEXT: store ptr [[IN]], ptr [[IN_ADDR]], align 4
|
||||
// X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
|
||||
// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr [[TMP0]], i32 0
|
||||
// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IN_ADDR]], align 4
|
||||
// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32:%.*]], ptr [[TMP1]], i32 1
|
||||
// X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[ARRAYIDX1]]) #[[ATTR4]]
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false)
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define spir_kernel void @KernelOneMember(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @__clang_ocl_kern_imp_KernelOneMember(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false)
|
||||
// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define spir_kernel void @KernelLargeOneMember(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @__clang_ocl_kern_imp_KernelLargeOneMember(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false)
|
||||
// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define spir_kernel void @KernelTwoMember(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @__clang_ocl_kern_imp_KernelTwoMember(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false)
|
||||
// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define spir_kernel void @KernelLargeTwoMember(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @__clang_ocl_kern_imp_KernelLargeTwoMember(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false)
|
||||
// X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define spir_kernel void @caller_kern(
|
||||
// X86-SAME: ptr noundef align 4 [[A:%.*]], ptr noundef align 4 [[MAT3X3:%.*]], ptr noundef align 4 [[MAT4X4:%.*]], ptr noundef align 4 [[MAT32X32:%.*]], ptr noundef align 4 [[MAT64X64:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META18:![0-9]+]] !kernel_arg_access_qual [[META19:![0-9]+]] !kernel_arg_type [[META20:![0-9]+]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21:![0-9]+]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[MAT32X32_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[MAT64X64_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
|
||||
// X86-NEXT: store ptr [[MAT3X3]], ptr [[MAT3X3_ADDR]], align 4
|
||||
// X86-NEXT: store ptr [[MAT4X4]], ptr [[MAT4X4_ADDR]], align 4
|
||||
// X86-NEXT: store ptr [[MAT32X32]], ptr [[MAT32X32_ADDR]], align 4
|
||||
// X86-NEXT: store ptr [[MAT64X64]], ptr [[MAT64X64_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP3:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]], ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]], ptr noundef align 4 [[TMP4]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @__clang_ocl_kern_imp_caller_kern(
|
||||
// X86-SAME: ptr noundef align 4 [[A:%.*]], ptr noundef align 4 [[MAT3X3:%.*]], ptr noundef align 4 [[MAT4X4:%.*]], ptr noundef align 4 [[MAT32X32:%.*]], ptr noundef align 4 [[MAT64X64:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META18]] !kernel_arg_access_qual [[META19]] !kernel_arg_type [[META20]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[MAT32X32_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: [[MAT64X64_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
|
||||
// X86-NEXT: store ptr [[MAT3X3]], ptr [[MAT3X3_ADDR]], align 4
|
||||
// X86-NEXT: store ptr [[MAT4X4]], ptr [[MAT4X4_ADDR]], align 4
|
||||
// X86-NEXT: store ptr [[MAT32X32]], ptr [[MAT32X32_ADDR]], align 4
|
||||
// X86-NEXT: store ptr [[MAT64X64]], ptr [[MAT64X64_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR4]]
|
||||
// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern(ptr noundef align 4 [[TMP1]]) #[[ATTR4]]
|
||||
// X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP3:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]]) #[[ATTR4]]
|
||||
// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP5:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP4]], ptr noundef align 4 [[TMP5]]) #[[ATTR4]]
|
||||
// X86-NEXT: [[TMP6:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP7:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr noundef align 4 [[TMP6]], ptr noundef align 4 [[TMP7]]) #[[ATTR4]]
|
||||
// X86-NEXT: [[TMP8:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP9:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr noundef align 4 [[TMP8]], ptr noundef align 4 [[TMP9]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define spir_kernel void @caller_kern2(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[STRUCTONEMEM:%.*]], ptr noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[STRUCTTWOMEM:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: store ptr [[GLOBAL_STRUCTONEMEM]], ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4
|
||||
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]], ptr noundef align 8 [[TMP0]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @__clang_ocl_kern_imp_caller_kern2(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]], ptr noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP1:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[STRUCTONEMEM:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8
|
||||
// X86-NEXT: [[STRUCTTWOMEM:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8
|
||||
// X86-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr, align 4
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[STRUCTONEMEM]], ptr align 4 [[TMP0]], i32 8, i1 false)
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[STRUCTTWOMEM]], ptr align 4 [[TMP1]], i32 16, i1 false)
|
||||
// X86-NEXT: store ptr [[GLOBAL_STRUCTONEMEM]], ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR4]]
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR4]]
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]]
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define spir_kernel void @caller_kern3(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[LARGESTRUCTONEMEM:%.*]], ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[LARGESTRUCTTWOMEM:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META26:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27:![0-9]+]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern3(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]], ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// X86: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// X86-LABEL: define void @__clang_ocl_kern_imp_caller_kern3(
|
||||
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]], ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP1:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META26]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] {
|
||||
// X86-NEXT: entry:
|
||||
// X86-NEXT: [[LARGESTRUCTONEMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8
|
||||
// X86-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[LARGESTRUCTONEMEM]], ptr align 4 [[TMP0]], i32 800, i1 false)
|
||||
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[LARGESTRUCTTWOMEM]], ptr align 4 [[TMP1]], i32 480, i1 false)
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]]) #[[ATTR4]]
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR4]]
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]]) #[[ATTR4]]
|
||||
// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR4]]
|
||||
// X86-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo(
|
||||
// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
|
||||
// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
|
||||
// AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @foo_large(
|
||||
// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @FuncOneMember(
|
||||
// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
|
||||
// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember(
|
||||
// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
|
||||
// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @FuncTwoMember(
|
||||
// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8
|
||||
// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember(
|
||||
// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
|
||||
// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
|
||||
// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR5:[0-9]+]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_callee_kern(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8
|
||||
// AMDGCN-NEXT: store i32 1, ptr addrspace(1) [[TMP0]], align 4
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern_Mat3X3(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META9:![0-9]+]] !kernel_arg_type [[META10:![0-9]+]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11:![0-9]+]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_callee_kern_Mat3X3(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META10]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
|
||||
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
|
||||
// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
|
||||
// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern_Mat32X32(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META11]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_callee_kern_Mat32X32(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META11]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
|
||||
// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
|
||||
// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false)
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
|
||||
// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP0]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMember(
|
||||
// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
|
||||
// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
|
||||
// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
|
||||
// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeOneMember(
|
||||
// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
|
||||
// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
|
||||
// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
|
||||
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
|
||||
// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
|
||||
// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
|
||||
// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelTwoMember(
|
||||
// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8
|
||||
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP2]], align 8
|
||||
// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
|
||||
// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP3]], <2 x i32> [[TMP5]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
|
||||
// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
|
||||
// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
|
||||
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
|
||||
// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeTwoMember(
|
||||
// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
|
||||
// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @caller_kern(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]], ptr addrspace(1) noundef align 4 [[MAT3X3:%.*]], ptr addrspace(1) noundef align 4 [[MAT4X4:%.*]], ptr addrspace(1) noundef align 4 [[MAT32X32:%.*]], ptr addrspace(1) noundef align 4 [[MAT64X64:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META18:![0-9]+]] !kernel_arg_access_qual [[META19:![0-9]+]] !kernel_arg_type [[META20:![0-9]+]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21:![0-9]+]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[MAT32X32_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[MAT64X64_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[MAT3X3]], ptr addrspace(5) [[MAT3X3_ADDR]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[MAT4X4]], ptr addrspace(5) [[MAT4X4_ADDR]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[MAT32X32]], ptr addrspace(5) [[MAT32X32_ADDR]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[MAT64X64]], ptr addrspace(5) [[MAT64X64_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT3X3_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]], ptr addrspace(1) noundef align 4 [[TMP2]], ptr addrspace(1) noundef align 4 [[TMP3]], ptr addrspace(1) noundef align 4 [[TMP4]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_caller_kern(
|
||||
// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]], ptr addrspace(1) noundef align 4 [[MAT3X3:%.*]], ptr addrspace(1) noundef align 4 [[MAT4X4:%.*]], ptr addrspace(1) noundef align 4 [[MAT32X32:%.*]], ptr addrspace(1) noundef align 4 [[MAT64X64:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META18]] !kernel_arg_access_qual [[META19]] !kernel_arg_type [[META20]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[MAT32X32_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[MAT64X64_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[MAT3X3]], ptr addrspace(5) [[MAT3X3_ADDR]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[MAT4X4]], ptr addrspace(5) [[MAT4X4_ADDR]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[MAT32X32]], ptr addrspace(5) [[MAT32X32_ADDR]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[MAT64X64]], ptr addrspace(5) [[MAT64X64_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern(ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT3X3_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP2]], ptr addrspace(1) noundef align 4 [[TMP3]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP4]], ptr addrspace(1) noundef align 4 [[TMP5]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT3X3_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP6]], ptr addrspace(1) noundef align 4 [[TMP7]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP8]], ptr addrspace(1) noundef align 4 [[TMP9]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @caller_kern2(
|
||||
// AMDGCN-SAME: <2 x i32> [[STRUCTONEMEM_COERCE:%.*]], ptr addrspace(1) noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], [[STRUCT_STRUCTTWOMEMBER:%.*]] [[STRUCTTWOMEM_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[STRUCTONEMEM:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[STRUCTTWOMEM:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[STRUCTONEMEM_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[STRUCTTWOMEM_COERCE]], 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
|
||||
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[STRUCTTWOMEM_COERCE]], 1
|
||||
// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[GLOBAL_STRUCTONEMEM]], ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
|
||||
// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
|
||||
// AMDGCN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP8]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(<2 x i32> [[TMP5]], ptr addrspace(1) noundef align 8 [[TMP4]], <2 x i32> [[TMP7]], <2 x i32> [[TMP9]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_caller_kern2(
|
||||
// AMDGCN-SAME: <2 x i32> [[STRUCTONEMEM_COERCE:%.*]], ptr addrspace(1) noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], <2 x i32> [[STRUCTTWOMEM_COERCE0:%.*]], <2 x i32> [[STRUCTTWOMEM_COERCE1:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[STRUCTONEMEM:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[STRUCTTWOMEM:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[STRUCTONEMEM_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: store <2 x i32> [[STRUCTTWOMEM_COERCE0]], ptr addrspace(5) [[TMP0]], align 8
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: store <2 x i32> [[STRUCTTWOMEM_COERCE1]], ptr addrspace(5) [[TMP1]], align 8
|
||||
// AMDGCN-NEXT: store ptr addrspace(1) [[GLOBAL_STRUCTONEMEM]], ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP2]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE2]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelOneMember(<2 x i32> [[TMP3]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
|
||||
// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP8]], align 8
|
||||
// AMDGCN-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: [[TMP11:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP10]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelTwoMember(<2 x i32> [[TMP9]], <2 x i32> [[TMP11]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @caller_kern3(
|
||||
// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[LARGESTRUCTONEMEM_COERCE:%.*]], [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[LARGESTRUCTTWOMEM_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META26:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27:![0-9]+]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[LARGESTRUCTONEMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[LARGESTRUCTONEMEM]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[LARGESTRUCTONEMEM_COERCE]], 0
|
||||
// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
|
||||
// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[LARGESTRUCTTWOMEM]], i32 0, i32 0
|
||||
// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[LARGESTRUCTTWOMEM_COERCE]], 0
|
||||
// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
|
||||
// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[LARGESTRUCTTWOMEM]], i32 0, i32 1
|
||||
// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[LARGESTRUCTTWOMEM_COERCE]], 1
|
||||
// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP5]], ptr addrspace(5) [[TMP4]], align 8
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern3(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
|
||||
// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_caller_kern3(
|
||||
// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]], ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP1:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META26]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] {
|
||||
// AMDGCN-NEXT: entry:
|
||||
// AMDGCN-NEXT: [[LARGESTRUCTONEMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
|
||||
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[LARGESTRUCTTWOMEM]], ptr addrspace(5) align 8 [[TMP1]], i64 480, i1 false)
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR5]]
|
||||
// AMDGCN-NEXT: ret void
|
||||
//
|
||||
//.
|
||||
// X86: [[META4]] = !{i32 1}
|
||||
// X86: [[META5]] = !{!"none"}
|
||||
// X86: [[META6]] = !{!"int*"}
|
||||
// X86: [[META7]] = !{!""}
|
||||
// X86: [[META8]] = !{i32 1, i32 1}
|
||||
// X86: [[META9]] = !{!"none", !"none"}
|
||||
// X86: [[META10]] = !{!"Mat3X3*", !"Mat4X4*"}
|
||||
// X86: [[META11]] = !{!"", !""}
|
||||
// X86: [[META12]] = !{!"Mat32X32*", !"Mat64X64*"}
|
||||
// X86: [[META13]] = !{i32 0}
|
||||
// X86: [[META14]] = !{!"struct StructOneMember"}
|
||||
// X86: [[META15]] = !{!"struct LargeStructOneMember"}
|
||||
// X86: [[META16]] = !{!"struct StructTwoMember"}
|
||||
// X86: [[META17]] = !{!"struct LargeStructTwoMember"}
|
||||
// X86: [[META18]] = !{i32 1, i32 1, i32 1, i32 1, i32 1}
|
||||
// X86: [[META19]] = !{!"none", !"none", !"none", !"none", !"none"}
|
||||
// X86: [[META20]] = !{!"int*", !"Mat3X3*", !"Mat4X4*", !"Mat32X32*", !"Mat64X64*"}
|
||||
// X86: [[META21]] = !{!"", !"", !"", !"", !""}
|
||||
// X86: [[META22]] = !{i32 0, i32 1, i32 0}
|
||||
// X86: [[META23]] = !{!"none", !"none", !"none"}
|
||||
// X86: [[META24]] = !{!"struct StructOneMember", !"struct StructOneMember*", !"struct StructTwoMember"}
|
||||
// X86: [[META25]] = !{!"", !"", !""}
|
||||
// X86: [[META26]] = !{i32 0, i32 0}
|
||||
// X86: [[META27]] = !{!"struct LargeStructOneMember", !"struct LargeStructTwoMember"}
|
||||
//.
|
||||
// AMDGCN: [[META4]] = !{i32 1}
|
||||
// AMDGCN: [[META5]] = !{!"none"}
|
||||
// AMDGCN: [[META6]] = !{!"int*"}
|
||||
// AMDGCN: [[META7]] = !{!""}
|
||||
// AMDGCN: [[META8]] = !{i32 1, i32 1}
|
||||
// AMDGCN: [[META9]] = !{!"none", !"none"}
|
||||
// AMDGCN: [[META10]] = !{!"Mat3X3*", !"Mat4X4*"}
|
||||
// AMDGCN: [[META11]] = !{!"", !""}
|
||||
// AMDGCN: [[META12]] = !{!"Mat32X32*", !"Mat64X64*"}
|
||||
// AMDGCN: [[META13]] = !{i32 0}
|
||||
// AMDGCN: [[META14]] = !{!"struct StructOneMember"}
|
||||
// AMDGCN: [[META15]] = !{!"struct LargeStructOneMember"}
|
||||
// AMDGCN: [[META16]] = !{!"struct StructTwoMember"}
|
||||
// AMDGCN: [[META17]] = !{!"struct LargeStructTwoMember"}
|
||||
// AMDGCN: [[META18]] = !{i32 1, i32 1, i32 1, i32 1, i32 1}
|
||||
// AMDGCN: [[META19]] = !{!"none", !"none", !"none", !"none", !"none"}
|
||||
// AMDGCN: [[META20]] = !{!"int*", !"Mat3X3*", !"Mat4X4*", !"Mat32X32*", !"Mat64X64*"}
|
||||
// AMDGCN: [[META21]] = !{!"", !"", !"", !"", !""}
|
||||
// AMDGCN: [[META22]] = !{i32 0, i32 1, i32 0}
|
||||
// AMDGCN: [[META23]] = !{!"none", !"none", !"none"}
|
||||
// AMDGCN: [[META24]] = !{!"struct StructOneMember", !"struct StructOneMember*", !"struct StructTwoMember"}
|
||||
// AMDGCN: [[META25]] = !{!"", !"", !""}
|
||||
// AMDGCN: [[META26]] = !{i32 0, i32 0}
|
||||
// AMDGCN: [[META27]] = !{!"struct LargeStructOneMember", !"struct LargeStructTwoMember"}
|
||||
//.
|
@ -1,6 +1,13 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 4
|
||||
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s
|
||||
|
||||
bool device_function() {
|
||||
return __nvvm_reflect("__CUDA_ARCH") >= 700;
|
||||
}
|
||||
|
||||
__kernel void kernel_function(__global int *i) {
|
||||
*i = device_function();
|
||||
}
|
||||
// CHECK-LABEL: define dso_local zeroext i1 @device_function(
|
||||
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
@ -8,24 +15,28 @@
|
||||
// CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[TMP0]], 700
|
||||
// CHECK-NEXT: ret i1 [[CMP]]
|
||||
//
|
||||
bool device_function() {
|
||||
return __nvvm_reflect("__CUDA_ARCH") >= 700;
|
||||
}
|
||||
|
||||
//
|
||||
// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function(
|
||||
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
|
||||
// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
|
||||
// CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @device_function() #[[ATTR3:[0-9]+]]
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR]], align 4
|
||||
// CHECK-NEXT: call void @__clang_ocl_kern_imp_kernel_function(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR3:[0-9]+]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
//
|
||||
// CHECK-LABEL: define dso_local ptx_kernel void @__clang_ocl_kern_imp_kernel_function(
|
||||
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
|
||||
// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
|
||||
// CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @device_function() #[[ATTR3]]
|
||||
// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP0]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
__kernel void kernel_function(__global int *i) {
|
||||
*i = device_function();
|
||||
}
|
||||
//.
|
||||
// CHECK: [[META3]] = !{i32 1}
|
||||
// CHECK: [[META4]] = !{!"none"}
|
||||
|
@ -36,8 +36,12 @@ void fnc4smp(sampler_t s) {}
|
||||
|
||||
kernel void foo(sampler_t smp_par) {
|
||||
// CHECK-SPIR-LABEL: define{{.*}} spir_kernel void @foo(target("spirv.Sampler") %smp_par)
|
||||
// CHECK-SPIR: call spir_func void @__clang_ocl_kern_imp_foo(target("spirv.Sampler") %0)
|
||||
// CHECK-SPIR-LABEL: define{{.*}} spir_func void @__clang_ocl_kern_imp_foo(target("spirv.Sampler") %smp_par)
|
||||
// CHECK-SPIR: [[smp_par_ptr:%[A-Za-z0-9_\.]+]] = alloca target("spirv.Sampler")
|
||||
// CHECK-X86-LABEL: define{{.*}} spir_kernel void @foo(ptr %smp_par)
|
||||
// CHECK-X86: call void @__clang_ocl_kern_imp_foo(ptr %0)
|
||||
// CHECK-X86-LABEL: define{{.*}} void @__clang_ocl_kern_imp_foo(ptr %smp_par)
|
||||
// CHECK-X86: [[smp_par_ptr:%[A-Za-z0-9_\.]+]] = alloca ptr
|
||||
|
||||
// Case 2b
|
||||
|
@ -4,15 +4,18 @@ int get_dummy_id(int D);
|
||||
|
||||
kernel void bar(global int *A);
|
||||
|
||||
//CHECK: define{{.*}} spir_kernel void @foo(ptr addrspace(1) noundef align 4 %A)
|
||||
//CHECK: tail call spir_func void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4 %A)
|
||||
|
||||
kernel void foo(global int *A)
|
||||
// CHECK: define{{.*}} spir_kernel void @foo(ptr addrspace(1) noundef align 4 %A)
|
||||
// CHECK: define{{.*}} spir_func void @__clang_ocl_kern_imp_foo(ptr addrspace(1) noundef align 4 %A)
|
||||
{
|
||||
int id = get_dummy_id(0);
|
||||
// CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0)
|
||||
A[id] = id;
|
||||
bar(A);
|
||||
// CHECK: tail call spir_kernel void @bar(ptr addrspace(1) noundef align 4 %A)
|
||||
// CHECK: tail call spir_func void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4 %A)
|
||||
}
|
||||
|
||||
// CHECK: declare spir_func i32 @get_dummy_id(i32 noundef)
|
||||
// CHECK: declare spir_kernel void @bar(ptr addrspace(1) noundef align 4)
|
||||
// CHECK: declare spir_func void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4)
|
||||
|
@ -37,22 +37,33 @@ __attribute__((visibility("protected"))) extern int ext_protected;
|
||||
// FVIS-PROTECTED: @ext_default = external local_unnamed_addr
|
||||
// FVIS-HIDDEN: @ext_default = external local_unnamed_addr
|
||||
__attribute__((visibility("default"))) extern int ext_default;
|
||||
|
||||
// FVIS-DEFAULT: define{{.*}} amdgpu_kernel void @kern()
|
||||
// FVIS-PROTECTED: define protected amdgpu_kernel void @kern()
|
||||
// FVIS-HIDDEN: define protected amdgpu_kernel void @kern()
|
||||
// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern()
|
||||
// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern()
|
||||
// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern()
|
||||
kernel void kern() {}
|
||||
// FVIS-DEFAULT: define protected amdgpu_kernel void @kern_hidden()
|
||||
// FVIS-PROTECTED: define protected amdgpu_kernel void @kern_hidden()
|
||||
// FVIS-HIDDEN: define protected amdgpu_kernel void @kern_hidden()
|
||||
// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_hidden()
|
||||
// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_hidden()
|
||||
// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_hidden()
|
||||
__attribute__((visibility("hidden"))) kernel void kern_hidden() {}
|
||||
// FVIS-DEFAULT: define protected amdgpu_kernel void @kern_protected()
|
||||
// FVIS-PROTECTED: define protected amdgpu_kernel void @kern_protected()
|
||||
// FVIS-HIDDEN: define protected amdgpu_kernel void @kern_protected()
|
||||
// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_protected()
|
||||
// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_protected()
|
||||
// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_protected()
|
||||
__attribute__((visibility("protected"))) kernel void kern_protected() {}
|
||||
// FVIS-DEFAULT: define{{.*}} amdgpu_kernel void @kern_default()
|
||||
// FVIS-PROTECTED: define{{.*}} amdgpu_kernel void @kern_default()
|
||||
// FVIS-HIDDEN: define{{.*}} amdgpu_kernel void @kern_default()
|
||||
// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern_default()
|
||||
// FVIS-PROTECTED: define{{.*}} void @__clang_ocl_kern_imp_kern_default()
|
||||
// FVIS-HIDDEN: define{{.*}} void @__clang_ocl_kern_imp_kern_default()
|
||||
__attribute__((visibility("default"))) kernel void kern_default() {}
|
||||
|
||||
// FVIS-DEFAULT: define{{.*}} void @func()
|
||||
@ -85,31 +96,42 @@ __attribute__((visibility("default"))) extern void ext_func_default();
|
||||
void use() {
|
||||
glob = ext + ext_hidden + ext_protected + ext_default;
|
||||
ext_kern();
|
||||
// FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern()
|
||||
// FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern()
|
||||
// FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern()
|
||||
ext_kern_hidden();
|
||||
// FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
|
||||
// FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
|
||||
// FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
|
||||
ext_kern_protected();
|
||||
// FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
|
||||
// FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
|
||||
// FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
|
||||
ext_kern_default();
|
||||
// FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_default()
|
||||
// FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_default()
|
||||
// FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_default()
|
||||
ext_func();
|
||||
ext_func_hidden();
|
||||
ext_func_protected();
|
||||
ext_func_default();
|
||||
}
|
||||
|
||||
// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern()
|
||||
// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern()
|
||||
// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern()
|
||||
// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern()
|
||||
// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern()
|
||||
// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern()
|
||||
|
||||
// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_hidden()
|
||||
// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_hidden()
|
||||
// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_hidden()
|
||||
// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
|
||||
// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
|
||||
// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
|
||||
|
||||
// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_protected()
|
||||
// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_protected()
|
||||
// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_protected()
|
||||
|
||||
// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern_default()
|
||||
// FVIS-PROTECTED: declare amdgpu_kernel void @ext_kern_default()
|
||||
// FVIS-HIDDEN: declare amdgpu_kernel void @ext_kern_default()
|
||||
// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
|
||||
// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
|
||||
// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
|
||||
|
||||
// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern_default()
|
||||
// FVIS-PROTECTED: declare void @__clang_ocl_kern_imp_ext_kern_default()
|
||||
// FVIS-HIDDEN: declare void @__clang_ocl_kern_imp_ext_kern_default()
|
||||
|
||||
// FVIS-DEFAULT: declare void @ext_func()
|
||||
// FVIS-PROTECTED: declare protected void @ext_func()
|
||||
@ -126,3 +148,6 @@ void use() {
|
||||
// FVIS-DEFAULT: declare void @ext_func_default()
|
||||
// FVIS-PROTECTED: declare void @ext_func_default()
|
||||
// FVIS-HIDDEN: declare void @ext_func_default()
|
||||
|
||||
|
||||
|
||||
|
@ -125,9 +125,6 @@ __kernel void test__global() {
|
||||
// EXPL: call spir_func void @_ZNU3AS41CC1EOU3AS4S_(ptr addrspace(4) {{[^,]*}} [[C5GEN]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[CALL]])
|
||||
// IMPL: call void @llvm.memcpy.p0.p4.i32(ptr {{.*}}%c5, ptr addrspace(4) {{.*}}[[CALL]]
|
||||
|
||||
// Tests address space of inline members
|
||||
//COMMON: @_ZNU3AS41C3getEv(ptr addrspace(4) {{[^,]*}} %this)
|
||||
//COMMON: @_ZNU3AS41CplERU3AS4KS_(ptr dead_on_unwind noalias writable sret(%class.C) align 4 %agg.result, ptr addrspace(4) {{[^,]*}} %this
|
||||
#define TEST(AS) \
|
||||
__kernel void test##AS() { \
|
||||
AS C c; \
|
||||
@ -190,6 +187,10 @@ TEST(__private)
|
||||
// EXPL: call spir_func noundef align 4 dereferenceable(4) ptr addrspace(4) @_ZNU3AS41CaSERU3AS4KS_(ptr addrspace(4) {{[^,]*}} [[C2GEN]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[C1GEN]])
|
||||
// IMPL: call void @llvm.memcpy.p4.p4.i32(ptr addrspace(4) {{.*}}[[C2GEN]], ptr addrspace(4) {{.*}}[[C1GEN]]
|
||||
|
||||
// Tests address space of inline members
|
||||
//COMMON: @_ZNU3AS41C3getEv(ptr addrspace(4) {{[^,]*}} %this)
|
||||
//COMMON: @_ZNU3AS41CplERU3AS4KS_(ptr dead_on_unwind noalias writable sret(%class.C) align 4 %agg.result, ptr addrspace(4) {{[^,]*}} %this
|
||||
|
||||
// Test that calling a const method from a non-const method does not crash Clang.
|
||||
class ConstAndNonConstMethod {
|
||||
public:
|
||||
|
@ -1,5 +1,5 @@
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O1 -verify %s -o /dev/null
|
||||
|
||||
// expected-remark@+10 {{Function Name: foo}}
|
||||
// expected-remark@+9 {{ TotalSGPRs: 13}}
|
||||
|
Loading…
x
Reference in New Issue
Block a user