llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
Jon Chesterfield 76bfbb74d3 [libomptarget][amdgpu] Call into deviceRTL instead of ockl
[libomptarget][amdgpu] Call into deviceRTL instead of ockl

Amdgpu codegen presently emits a call into ockl. The same functionality
is already present in the deviceRTL. Adds an amdgpu specific entry point
to avoid the dependency. This lets simple openmp code (specifically, that
which doesn't use libm) run without rocm device libraries installed.

Reviewed By: ronlieb

Differential Revision: https://reviews.llvm.org/D93356
2021-01-04 16:48:47 +00:00

61 lines
2.2 KiB
C++

//===-- CGOpenMPRuntimeAMDGCN.cpp - Interface to OpenMP AMDGCN Runtimes --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This provides a class for OpenMP runtime code generation specialized to
// AMDGCN targets from generalized CGOpenMPRuntimeGPU class.
//
//===----------------------------------------------------------------------===//
#include "CGOpenMPRuntimeAMDGCN.h"
#include "CGOpenMPRuntimeGPU.h"
#include "CodeGenFunction.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Cuda.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM)
: CGOpenMPRuntimeGPU(CGM) {
if (!CGM.getLangOpts().OpenMPIsDevice)
llvm_unreachable("OpenMP AMDGCN can only handle device code.");
}
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
// return constant compile-time target-specific warp size
unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
return Bld.getInt32(WarpSize);
}
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
llvm::Function *F =
CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x);
return Bld.CreateCall(F, llvm::None, "nvptx_tid");
}
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
llvm::Module *M = &CGF.CGM.getModule();
const char *LocSize = "__kmpc_amdgcn_gpu_num_threads";
llvm::Function *F = M->getFunction(LocSize);
if (!F) {
F = llvm::Function::Create(
llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false),
llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
}
return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
}