[CIR][AMDGPU] Add AMDGPU-specific function attributes for HIP kernels (#188007)
Upstreaming clangIR PR: https://github.com/llvm/clangir/pull/2091 This patch adds support for AMDGPU-specific function attributes for HIP kernels Added setTargetAttributes for AMDGPUTargetCIRGenInfo to set kernel attributes Added generic string attribute handler in amendFunction to translate string-values with "cir." prefix function attributes to LLVM function attributes Follows OGCG AMDGPU implementation from "clang/lib/CodeGen/Targets/AMDGPU.cpp".
This commit is contained in:
parent
accf41ef78
commit
83451d8d4d
@ -327,7 +327,6 @@ struct MissingFeatures {
|
||||
static bool setDLLStorageClass() { return false; }
|
||||
static bool setNonGC() { return false; }
|
||||
static bool setObjCGCLValueClass() { return false; }
|
||||
static bool setTargetAttributes() { return false; }
|
||||
static bool shouldSplitConstantStore() { return false; }
|
||||
static bool shouldUseBZeroPlusStoresToInitialize() { return false; }
|
||||
static bool shouldUseMemSetToInitialize() { return false; }
|
||||
|
||||
@ -669,7 +669,7 @@ void CIRGenModule::setNonAliasAttributes(GlobalDecl gd, mlir::Operation *op) {
|
||||
assert(!cir::MissingFeatures::opFuncCPUAndFeaturesAttributes());
|
||||
assert(!cir::MissingFeatures::opFuncSection());
|
||||
|
||||
assert(!cir::MissingFeatures::setTargetAttributes());
|
||||
getTargetCIRGenInfo().setTargetAttributes(gd.getDecl(), op, *this);
|
||||
}
|
||||
|
||||
std::optional<cir::SourceLanguage> CIRGenModule::getCIRSourceLanguage() const {
|
||||
@ -2560,12 +2560,15 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl,
|
||||
// represent them in dedicated ops. The correct attributes are ensured during
|
||||
// translation to LLVM. Thus, we don't need to check for them here.
|
||||
|
||||
const auto *funcDecl = cast<FunctionDecl>(globalDecl.getDecl());
|
||||
|
||||
if (!isIncompleteFunction)
|
||||
setCIRFunctionAttributes(globalDecl,
|
||||
getTypes().arrangeGlobalDeclaration(globalDecl),
|
||||
func, isThunk);
|
||||
|
||||
assert(!cir::MissingFeatures::setTargetAttributes());
|
||||
if (!isIncompleteFunction && func.isDeclaration())
|
||||
getTargetCIRGenInfo().setTargetAttributes(funcDecl, func, *this);
|
||||
|
||||
// TODO(cir): This needs a lot of work to better match CodeGen. That
|
||||
// ultimately ends up in setGlobalVisibility, which already has the linkage of
|
||||
@ -2577,17 +2580,16 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl,
|
||||
}
|
||||
|
||||
// If we plan on emitting this inline builtin, we can't treat it as a builtin.
|
||||
const auto *fd = cast<FunctionDecl>(globalDecl.getDecl());
|
||||
if (fd->isInlineBuiltinDeclaration()) {
|
||||
if (funcDecl->isInlineBuiltinDeclaration()) {
|
||||
const FunctionDecl *fdBody;
|
||||
bool hasBody = fd->hasBody(fdBody);
|
||||
bool hasBody = funcDecl->hasBody(fdBody);
|
||||
(void)hasBody;
|
||||
assert(hasBody && "Inline builtin declarations should always have an "
|
||||
"available body!");
|
||||
assert(!cir::MissingFeatures::attributeNoBuiltin());
|
||||
}
|
||||
|
||||
if (fd->isReplaceableGlobalAllocationFunction()) {
|
||||
if (funcDecl->isReplaceableGlobalAllocationFunction()) {
|
||||
// A replaceable global allocation function does not act like a builtin by
|
||||
// default, only if it is invoked by a new-expression or delete-expression.
|
||||
func->setAttr(cir::CIRDialect::getNoBuiltinAttrName(),
|
||||
|
||||
@ -52,6 +52,7 @@ add_clang_library(clangCIR
|
||||
CIRGenTypes.cpp
|
||||
CIRGenVTables.cpp
|
||||
TargetInfo.cpp
|
||||
Targets/AMDGPU.cpp
|
||||
|
||||
DEPENDS
|
||||
MLIRCIR
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
#include "TargetInfo.h"
|
||||
#include "ABIInfo.h"
|
||||
#include "CIRGenFunction.h"
|
||||
#include "CIRGenModule.h"
|
||||
#include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h"
|
||||
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
|
||||
#include "clang/CIR/Dialect/IR/CIRDialect.h"
|
||||
@ -53,6 +54,22 @@ class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo {
|
||||
public:
|
||||
AMDGPUTargetCIRGenInfo(CIRGenTypes &cgt)
|
||||
: TargetCIRGenInfo(std::make_unique<AMDGPUABIInfo>(cgt)) {}
|
||||
|
||||
void setTargetAttributes(const clang::Decl *decl, mlir::Operation *global,
|
||||
CIRGenModule &cgm) const override {
|
||||
if (auto func = mlir::dyn_cast<cir::FuncOp>(global)) {
|
||||
if (requiresAMDGPUProtectedVisibility(decl, func.getGlobalVisibility())) {
|
||||
func.setGlobalVisibility(cir::VisibilityKind::Protected);
|
||||
func.setDSOLocal(true);
|
||||
}
|
||||
setAMDGPUTargetFunctionAttributes(decl, func, cgm);
|
||||
} else if (auto gv = mlir::dyn_cast<cir::GlobalOp>(global)) {
|
||||
if (requiresAMDGPUProtectedVisibility(decl, gv.getGlobalVisibility())) {
|
||||
gv.setGlobalVisibility(cir::VisibilityKind::Protected);
|
||||
gv.setDSOLocal(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
@ -19,6 +19,7 @@
|
||||
#include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h"
|
||||
#include "clang/Basic/AddressSpaces.h"
|
||||
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
|
||||
#include "clang/CIR/Dialect/IR/CIRDialect.h"
|
||||
#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
|
||||
|
||||
#include <memory>
|
||||
@ -135,6 +136,14 @@ public:
|
||||
std::unique_ptr<TargetCIRGenInfo>
|
||||
createAMDGPUTargetCIRGenInfo(CIRGenTypes &cgt);
|
||||
|
||||
/// Check if AMDGPU protected visibility is required.
|
||||
bool requiresAMDGPUProtectedVisibility(const clang::Decl *d,
|
||||
cir::VisibilityKind visibility);
|
||||
|
||||
/// Set AMDGPU-specific function attributes for HIP kernels.
|
||||
void setAMDGPUTargetFunctionAttributes(const clang::Decl *decl,
|
||||
cir::FuncOp func, CIRGenModule &cgm);
|
||||
|
||||
std::unique_ptr<TargetCIRGenInfo> createX8664TargetCIRGenInfo(CIRGenTypes &cgt);
|
||||
|
||||
std::unique_ptr<TargetCIRGenInfo> createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt);
|
||||
|
||||
255
clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp
Normal file
255
clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp
Normal file
@ -0,0 +1,255 @@
|
||||
//===---- AMDGPU.cpp - AMDGPU-specific CIR CodeGen ------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This provides AMDGPU-specific CIR CodeGen logic for function attributes.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "../CIRGenModule.h"
|
||||
#include "../TargetInfo.h"
|
||||
|
||||
#include "clang/AST/Attr.h"
|
||||
#include "clang/AST/Decl.h"
|
||||
#include "clang/Basic/TargetInfo.h"
|
||||
#include "clang/CIR/Dialect/IR/CIRDialect.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace clang;
|
||||
using namespace clang::CIRGen;
|
||||
|
||||
bool clang::CIRGen::requiresAMDGPUProtectedVisibility(
|
||||
const Decl *d, cir::VisibilityKind visibility) {
|
||||
if (visibility != cir::VisibilityKind::Hidden)
|
||||
return false;
|
||||
|
||||
return !d->hasAttr<OMPDeclareTargetDeclAttr>() &&
|
||||
(d->hasAttr<DeviceKernelAttr>() ||
|
||||
(isa<FunctionDecl>(d) && d->hasAttr<CUDAGlobalAttr>()) ||
|
||||
(isa<VarDecl>(d) &&
|
||||
(d->hasAttr<CUDADeviceAttr>() || d->hasAttr<CUDAConstantAttr>() ||
|
||||
cast<VarDecl>(d)->getType()->isCUDADeviceBuiltinSurfaceType() ||
|
||||
cast<VarDecl>(d)->getType()->isCUDADeviceBuiltinTextureType())));
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
/// Handle amdgpu-flat-work-group-size attribute.
|
||||
static void
|
||||
handleAMDGPUFlatWorkGroupSizeAttr(const FunctionDecl *fd, cir::FuncOp func,
|
||||
CIRGenModule &cgm, CIRGenBuilderTy &builder,
|
||||
bool isOpenCLKernel, bool isHIPKernel) {
|
||||
const auto *flatWGS = fd->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
|
||||
const auto *reqdWGS =
|
||||
cgm.getLangOpts().OpenCL ? fd->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
|
||||
|
||||
if (flatWGS || reqdWGS) {
|
||||
unsigned min = 0, max = 0;
|
||||
if (flatWGS) {
|
||||
min = flatWGS->getMin()
|
||||
->EvaluateKnownConstInt(cgm.getASTContext())
|
||||
.getExtValue();
|
||||
max = flatWGS->getMax()
|
||||
->EvaluateKnownConstInt(cgm.getASTContext())
|
||||
.getExtValue();
|
||||
}
|
||||
if (reqdWGS && min == 0 && max == 0) {
|
||||
min = max = reqdWGS->getXDim()
|
||||
->EvaluateKnownConstInt(cgm.getASTContext())
|
||||
.getExtValue() *
|
||||
reqdWGS->getYDim()
|
||||
->EvaluateKnownConstInt(cgm.getASTContext())
|
||||
.getExtValue() *
|
||||
reqdWGS->getZDim()
|
||||
->EvaluateKnownConstInt(cgm.getASTContext())
|
||||
.getExtValue();
|
||||
}
|
||||
if (min != 0) {
|
||||
assert(min <= max && "Min must be less than or equal Max");
|
||||
std::string attrVal = llvm::utostr(min) + "," + llvm::utostr(max);
|
||||
func->setAttr("cir.amdgpu-flat-work-group-size",
|
||||
builder.getStringAttr(attrVal));
|
||||
} else {
|
||||
assert(max == 0 && "Max must be zero");
|
||||
}
|
||||
} else if (isOpenCLKernel || isHIPKernel) {
|
||||
// By default, restrict the maximum size to a value specified by
|
||||
// --gpu-max-threads-per-block=n or its default value for HIP.
|
||||
const unsigned openCLDefaultMaxWorkGroupSize = 256;
|
||||
const unsigned defaultMaxWorkGroupSize =
|
||||
isOpenCLKernel ? openCLDefaultMaxWorkGroupSize
|
||||
: cgm.getLangOpts().GPUMaxThreadsPerBlock;
|
||||
std::string attrVal =
|
||||
std::string("1,") + llvm::utostr(defaultMaxWorkGroupSize);
|
||||
func->setAttr("cir.amdgpu-flat-work-group-size",
|
||||
builder.getStringAttr(attrVal));
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle amdgpu-waves-per-eu attribute.
|
||||
static void handleAMDGPUWavesPerEUAttr(const FunctionDecl *fd, cir::FuncOp func,
|
||||
CIRGenModule &cgm,
|
||||
CIRGenBuilderTy &builder) {
|
||||
const auto *attr = fd->getAttr<AMDGPUWavesPerEUAttr>();
|
||||
if (!attr)
|
||||
return;
|
||||
unsigned min =
|
||||
attr->getMin()->EvaluateKnownConstInt(cgm.getASTContext()).getExtValue();
|
||||
unsigned max = attr->getMax()
|
||||
? attr->getMax()
|
||||
->EvaluateKnownConstInt(cgm.getASTContext())
|
||||
.getExtValue()
|
||||
: 0;
|
||||
|
||||
if (min != 0) {
|
||||
assert((max == 0 || min <= max) && "Min must be less than or equal Max");
|
||||
std::string attrVal = llvm::utostr(min);
|
||||
if (max != 0)
|
||||
attrVal = attrVal + "," + llvm::utostr(max);
|
||||
func->setAttr("cir.amdgpu-waves-per-eu", builder.getStringAttr(attrVal));
|
||||
} else {
|
||||
assert(max == 0 && "Max must be zero");
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle amdgpu-num-sgpr attribute.
|
||||
static void handleAMDGPUNumSGPRAttr(const FunctionDecl *fd, cir::FuncOp func,
|
||||
CIRGenModule &cgm,
|
||||
CIRGenBuilderTy &builder) {
|
||||
const auto *attr = fd->getAttr<AMDGPUNumSGPRAttr>();
|
||||
if (!attr)
|
||||
return;
|
||||
|
||||
uint32_t numSGPR = attr->getNumSGPR();
|
||||
if (numSGPR != 0) {
|
||||
func->setAttr("cir.amdgpu-num-sgpr",
|
||||
builder.getStringAttr(llvm::utostr(numSGPR)));
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle amdgpu-num-vgpr attribute.
|
||||
static void handleAMDGPUNumVGPRAttr(const FunctionDecl *fd, cir::FuncOp func,
|
||||
CIRGenModule &cgm,
|
||||
CIRGenBuilderTy &builder) {
|
||||
const auto *attr = fd->getAttr<AMDGPUNumVGPRAttr>();
|
||||
if (!attr)
|
||||
return;
|
||||
|
||||
uint32_t numVGPR = attr->getNumVGPR();
|
||||
if (numVGPR != 0) {
|
||||
func->setAttr("cir.amdgpu-num-vgpr",
|
||||
builder.getStringAttr(llvm::utostr(numVGPR)));
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle amdgpu-max-num-workgroups attribute.
|
||||
static void handleAMDGPUMaxNumWorkGroupsAttr(const FunctionDecl *fd,
|
||||
cir::FuncOp func,
|
||||
CIRGenModule &cgm,
|
||||
CIRGenBuilderTy &builder) {
|
||||
const auto *attr = fd->getAttr<AMDGPUMaxNumWorkGroupsAttr>();
|
||||
if (!attr)
|
||||
return;
|
||||
uint32_t x = attr->getMaxNumWorkGroupsX()
|
||||
->EvaluateKnownConstInt(cgm.getASTContext())
|
||||
.getExtValue();
|
||||
uint32_t y = attr->getMaxNumWorkGroupsY()
|
||||
? attr->getMaxNumWorkGroupsY()
|
||||
->EvaluateKnownConstInt(cgm.getASTContext())
|
||||
.getExtValue()
|
||||
: 1;
|
||||
uint32_t z = attr->getMaxNumWorkGroupsZ()
|
||||
? attr->getMaxNumWorkGroupsZ()
|
||||
->EvaluateKnownConstInt(cgm.getASTContext())
|
||||
.getExtValue()
|
||||
: 1;
|
||||
|
||||
llvm::SmallString<32> attrVal;
|
||||
llvm::raw_svector_ostream os(attrVal);
|
||||
os << x << ',' << y << ',' << z;
|
||||
func->setAttr("cir.amdgpu-max-num-workgroups",
|
||||
builder.getStringAttr(attrVal.str()));
|
||||
}
|
||||
|
||||
/// Handle amdgpu-cluster-dims attribute.
|
||||
static void handleAMDGPUClusterDimsAttr(const FunctionDecl *fd,
|
||||
cir::FuncOp func, CIRGenModule &cgm,
|
||||
CIRGenBuilderTy &builder,
|
||||
bool isOpenCLKernel) {
|
||||
|
||||
if (const auto *attr = fd->getAttr<CUDAClusterDimsAttr>()) {
|
||||
auto getExprVal = [&](const Expr *e) {
|
||||
return e ? e->EvaluateKnownConstInt(cgm.getASTContext()).getExtValue()
|
||||
: 1;
|
||||
};
|
||||
unsigned x = getExprVal(attr->getX());
|
||||
unsigned y = getExprVal(attr->getY());
|
||||
unsigned z = getExprVal(attr->getZ());
|
||||
|
||||
llvm::SmallString<32> attrVal;
|
||||
llvm::raw_svector_ostream os(attrVal);
|
||||
os << x << ',' << y << ',' << z;
|
||||
func->setAttr("cir.amdgpu-cluster-dims",
|
||||
builder.getStringAttr(attrVal.str()));
|
||||
}
|
||||
|
||||
const TargetInfo &targetInfo = cgm.getASTContext().getTargetInfo();
|
||||
if ((isOpenCLKernel &&
|
||||
targetInfo.hasFeatureEnabled(targetInfo.getTargetOpts().FeatureMap,
|
||||
"clusters")) ||
|
||||
fd->hasAttr<CUDANoClusterAttr>()) {
|
||||
func->setAttr("cir.amdgpu-cluster-dims", builder.getStringAttr("0,0,0"));
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle amdgpu-ieee attribute.
|
||||
static void handleAMDGPUIEEEAttr(cir::FuncOp func, CIRGenModule &cgm,
|
||||
CIRGenBuilderTy &builder) {
|
||||
if (!cgm.getCodeGenOpts().EmitIEEENaNCompliantInsts)
|
||||
func->setAttr("cir.amdgpu-ieee", builder.getStringAttr("false"));
|
||||
}
|
||||
|
||||
/// Handle amdgpu-expand-waitcnt-profiling attribute.
|
||||
static void handleAMDGPUExpandWaitcntProfilingAttr(cir::FuncOp func,
|
||||
CIRGenModule &cgm,
|
||||
CIRGenBuilderTy &builder) {
|
||||
if (cgm.getCodeGenOpts().AMDGPUExpandWaitcntProfiling)
|
||||
func->setAttr("cir.amdgpu-expand-waitcnt-profiling",
|
||||
builder.getStringAttr(""));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void clang::CIRGen::setAMDGPUTargetFunctionAttributes(const Decl *decl,
|
||||
cir::FuncOp func,
|
||||
CIRGenModule &cgm) {
|
||||
if (func.isDeclaration())
|
||||
return;
|
||||
|
||||
CIRGenBuilderTy &builder = cgm.getBuilder();
|
||||
|
||||
const auto *fd = dyn_cast_or_null<FunctionDecl>(decl);
|
||||
if (fd) {
|
||||
const bool isOpenCLKernel =
|
||||
cgm.getLangOpts().OpenCL && fd->hasAttr<DeviceKernelAttr>();
|
||||
const bool isHIPKernel =
|
||||
cgm.getLangOpts().HIP && fd->hasAttr<CUDAGlobalAttr>();
|
||||
|
||||
// TODO(CIR): Set amdgpu_kernel calling convention for HIP kernels.
|
||||
|
||||
handleAMDGPUFlatWorkGroupSizeAttr(fd, func, cgm, builder, isOpenCLKernel,
|
||||
isHIPKernel);
|
||||
handleAMDGPUWavesPerEUAttr(fd, func, cgm, builder);
|
||||
handleAMDGPUNumSGPRAttr(fd, func, cgm, builder);
|
||||
handleAMDGPUNumVGPRAttr(fd, func, cgm, builder);
|
||||
handleAMDGPUMaxNumWorkGroupsAttr(fd, func, cgm, builder);
|
||||
handleAMDGPUClusterDimsAttr(fd, func, cgm, builder, isOpenCLKernel);
|
||||
}
|
||||
handleAMDGPUIEEEAttr(func, cgm, builder);
|
||||
handleAMDGPUExpandWaitcntProfilingAttr(func, cgm, builder);
|
||||
}
|
||||
@ -15,9 +15,7 @@
|
||||
#include "mlir/IR/DialectRegistry.h"
|
||||
#include "mlir/Target/LLVMIR/LLVMTranslationInterface.h"
|
||||
#include "mlir/Target/LLVMIR/ModuleTranslation.h"
|
||||
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
|
||||
#include "clang/CIR/Dialect/IR/CIRDialect.h"
|
||||
#include "clang/CIR/MissingFeatures.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/IR/Constant.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
@ -54,7 +52,11 @@ public:
|
||||
mlir::Operation *op, llvm::ArrayRef<llvm::Instruction *> instructions,
|
||||
mlir::NamedAttribute attribute,
|
||||
mlir::LLVM::ModuleTranslation &moduleTranslation) const override {
|
||||
if (auto mod = dyn_cast<mlir::ModuleOp>(op)) {
|
||||
if (auto func = dyn_cast<mlir::LLVM::LLVMFuncOp>(op)) {
|
||||
if (mlir::failed(
|
||||
amendFunction(func, instructions, attribute, moduleTranslation)))
|
||||
return mlir::failure();
|
||||
} else if (auto mod = dyn_cast<mlir::ModuleOp>(op)) {
|
||||
if (mlir::failed(amendModule(mod, attribute, moduleTranslation)))
|
||||
return mlir::failure();
|
||||
}
|
||||
@ -62,6 +64,22 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
// Translate CIR function attributes to LLVM function attributes.
|
||||
mlir::LogicalResult
|
||||
amendFunction(mlir::LLVM::LLVMFuncOp func,
|
||||
llvm::ArrayRef<llvm::Instruction *> instructions,
|
||||
mlir::NamedAttribute attribute,
|
||||
mlir::LLVM::ModuleTranslation &moduleTranslation) const {
|
||||
llvm::Function *llvmFunc = moduleTranslation.lookupFunction(func.getName());
|
||||
llvm::StringRef attrName = attribute.getName().strref();
|
||||
|
||||
// Strip the "cir." prefix to get the LLVM attribute name.
|
||||
llvm::StringRef llvmAttrName = attrName.substr(strlen("cir."));
|
||||
if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(attribute.getValue()))
|
||||
llvmFunc->addFnAttr(llvmAttrName, strAttr.getValue());
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
// Translate CIR's module attributes to LLVM's module metadata
|
||||
mlir::LogicalResult
|
||||
amendModule(mlir::ModuleOp mod, mlir::NamedAttribute attribute,
|
||||
|
||||
98
clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip
Normal file
98
clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip
Normal file
@ -0,0 +1,98 @@
|
||||
#include "../CodeGenCUDA/Inputs/cuda.h"
|
||||
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
|
||||
// RUN: -fcuda-is-device -emit-cir %s -o %t.cir
|
||||
// RUN: FileCheck --check-prefix=CIR %s --input-file=%t.cir
|
||||
|
||||
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
|
||||
// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll
|
||||
// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ll
|
||||
|
||||
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
|
||||
// RUN: -fcuda-is-device -emit-llvm %s -o %t.ogcg.ll
|
||||
// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ogcg.ll
|
||||
|
||||
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
|
||||
// RUN: -fcuda-is-device -mamdgpu-expand-waitcnt-profiling \
|
||||
// RUN: -emit-cir %s -o %t.prof.cir
|
||||
// RUN: FileCheck --check-prefix=PROF %s --input-file=%t.prof.cir
|
||||
|
||||
// Test that AMDGPU-specific attributes are generated for HIP kernels
|
||||
|
||||
// Test: Default attributes for simple kernel
|
||||
// CIR: cir.func{{.*}} @_Z13kernel_simplev(){{.*}}"cir.amdgpu-flat-work-group-size" = "1,1024"
|
||||
// LLVM: define{{.*}} void @_Z13kernel_simplev(){{.*}} #[[SIMPLE_ATTR:[0-9]+]]
|
||||
__global__ void kernel_simple() {}
|
||||
|
||||
// Test: Explicit flat work group size attribute
|
||||
// CIR: cir.func{{.*}} @_Z21kernel_flat_wg_size_1v(){{.*}}"cir.amdgpu-flat-work-group-size" = "64,128"
|
||||
// LLVM: define{{.*}} void @_Z21kernel_flat_wg_size_1v(){{.*}} #[[FLAT_WG_ATTR:[0-9]+]]
|
||||
__attribute__((amdgpu_flat_work_group_size(64, 128)))
|
||||
__global__ void kernel_flat_wg_size_1() {}
|
||||
|
||||
// Test: Waves per EU attribute
|
||||
// CIR: cir.func{{.*}} @_Z19kernel_waves_per_euv(){{.*}}"cir.amdgpu-waves-per-eu" = "2"
|
||||
// LLVM: define{{.*}} void @_Z19kernel_waves_per_euv(){{.*}} #[[WAVES_ATTR:[0-9]+]]
|
||||
__attribute__((amdgpu_waves_per_eu(2)))
|
||||
__global__ void kernel_waves_per_eu() {}
|
||||
|
||||
// Test: Waves per EU with min and max
|
||||
// CIR: cir.func{{.*}} @_Z22kernel_waves_per_eu_mmv(){{.*}}"cir.amdgpu-waves-per-eu" = "2,4"
|
||||
// LLVM: define{{.*}} void @_Z22kernel_waves_per_eu_mmv(){{.*}} #[[WAVES_MM_ATTR:[0-9]+]]
|
||||
__attribute__((amdgpu_waves_per_eu(2, 4)))
|
||||
__global__ void kernel_waves_per_eu_mm() {}
|
||||
|
||||
// Test: Num SGPR attribute
|
||||
// CIR: cir.func{{.*}} @_Z15kernel_num_sgprv(){{.*}}"cir.amdgpu-num-sgpr" = "32"
|
||||
// LLVM: define{{.*}} void @_Z15kernel_num_sgprv(){{.*}} #[[SGPR_ATTR:[0-9]+]]
|
||||
__attribute__((amdgpu_num_sgpr(32)))
|
||||
__global__ void kernel_num_sgpr() {}
|
||||
|
||||
// Test: Num VGPR attribute
|
||||
// CIR: cir.func{{.*}} @_Z15kernel_num_vgprv(){{.*}}"cir.amdgpu-num-vgpr" = "64"
|
||||
// LLVM: define{{.*}} void @_Z15kernel_num_vgprv(){{.*}} #[[VGPR_ATTR:[0-9]+]]
|
||||
__attribute__((amdgpu_num_vgpr(64)))
|
||||
__global__ void kernel_num_vgpr() {}
|
||||
|
||||
// Test: Max num workgroups attribute
|
||||
// CIR: cir.func{{.*}} @_Z22kernel_max_num_wgroupsv(){{.*}}"cir.amdgpu-max-num-workgroups" = "8,4,2"
|
||||
// LLVM: define{{.*}} void @_Z22kernel_max_num_wgroupsv(){{.*}} #[[MAX_WG_ATTR:[0-9]+]]
|
||||
__attribute__((amdgpu_max_num_work_groups(8, 4, 2)))
|
||||
__global__ void kernel_max_num_wgroups() {}
|
||||
|
||||
// Test: Combined attributes
|
||||
// CIR: cir.func{{.*}} @_Z15kernel_combinedv(){{.*}}"cir.amdgpu-flat-work-group-size" = "256,256"{{.*}}"cir.amdgpu-num-sgpr" = "48"{{.*}}"cir.amdgpu-num-vgpr" = "32"{{.*}}"cir.amdgpu-waves-per-eu" = "1,2"
|
||||
// LLVM: define{{.*}} void @_Z15kernel_combinedv(){{.*}} #[[COMBINED_ATTR:[0-9]+]]
|
||||
__attribute__((amdgpu_flat_work_group_size(256, 256)))
|
||||
__attribute__((amdgpu_waves_per_eu(1, 2)))
|
||||
__attribute__((amdgpu_num_sgpr(48)))
|
||||
__attribute__((amdgpu_num_vgpr(32)))
|
||||
__global__ void kernel_combined() {}
|
||||
|
||||
// Test: Device function should NOT have kernel attributes
|
||||
// CIR: cir.func{{.*}} @_Z9device_fnv()
|
||||
// CIR-NOT: cir.amdgpu-flat-work-group-size
|
||||
// LLVM: define{{.*}} void @_Z9device_fnv()
|
||||
__device__ void device_fn() {}
|
||||
|
||||
// Verify LLVM attributes
|
||||
// LLVM-DAG: attributes #[[SIMPLE_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
|
||||
// LLVM-DAG: attributes #[[FLAT_WG_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="64,128"
|
||||
// LLVM-DAG: attributes #[[WAVES_ATTR]] = {{.*}}"amdgpu-waves-per-eu"="2"
|
||||
// LLVM-DAG: attributes #[[WAVES_MM_ATTR]] = {{.*}}"amdgpu-waves-per-eu"="2,4"
|
||||
// LLVM-DAG: attributes #[[SGPR_ATTR]] = {{.*}}"amdgpu-num-sgpr"="32"
|
||||
// LLVM-DAG: attributes #[[VGPR_ATTR]] = {{.*}}"amdgpu-num-vgpr"="64"
|
||||
// LLVM-DAG: attributes #[[MAX_WG_ATTR]] = {{.*}}"amdgpu-max-num-workgroups"="8,4,2"
|
||||
// LLVM-DAG: attributes #[[COMBINED_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="256,256"{{.*}}"amdgpu-num-sgpr"="48"{{.*}}"amdgpu-num-vgpr"="32"{{.*}}"amdgpu-waves-per-eu"="1,2"
|
||||
|
||||
// Test: amdgpu-expand-waitcnt-profiling is set on all functions when enabled
|
||||
// PROF: cir.func{{.*}} @_Z13kernel_simplev(){{.*}}"cir.amdgpu-expand-waitcnt-profiling"
|
||||
// PROF: cir.func{{.*}} @_Z21kernel_flat_wg_size_1v(){{.*}}"cir.amdgpu-expand-waitcnt-profiling"
|
||||
// PROF: cir.func{{.*}} @_Z19kernel_waves_per_euv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling"
|
||||
// PROF: cir.func{{.*}} @_Z22kernel_waves_per_eu_mmv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling"
|
||||
// PROF: cir.func{{.*}} @_Z15kernel_num_sgprv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling"
|
||||
// PROF: cir.func{{.*}} @_Z15kernel_num_vgprv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling"
|
||||
// PROF: cir.func{{.*}} @_Z22kernel_max_num_wgroupsv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling"
|
||||
// PROF: cir.func{{.*}} @_Z15kernel_combinedv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling"
|
||||
// PROF: cir.func{{.*}} @_Z9device_fnv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling"
|
||||
Loading…
x
Reference in New Issue
Block a user