[clang][CGCUDANV] Unify PointerType members of CGNVCUDARuntime (NFC) (#75668)
Unify 3 `Pointertype *` members which all refer to the same llvm type. Opaque pointer clean-up effort.
This commit is contained in:
parent
82ab0f7f36
commit
f49e2b05bf
@ -39,7 +39,7 @@ class CGNVCUDARuntime : public CGCUDARuntime {
|
||||
private:
|
||||
llvm::IntegerType *IntTy, *SizeTy;
|
||||
llvm::Type *VoidTy;
|
||||
llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy;
|
||||
llvm::PointerType *PtrTy;
|
||||
|
||||
/// Convenience reference to LLVM Context
|
||||
llvm::LLVMContext &Context;
|
||||
@ -232,15 +232,12 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
|
||||
VoidTy = CGM.VoidTy;
|
||||
Zeros[0] = llvm::ConstantInt::get(SizeTy, 0);
|
||||
Zeros[1] = Zeros[0];
|
||||
|
||||
CharPtrTy = CGM.UnqualPtrTy;
|
||||
VoidPtrTy = CGM.UnqualPtrTy;
|
||||
VoidPtrPtrTy = CGM.UnqualPtrTy;
|
||||
PtrTy = CGM.UnqualPtrTy;
|
||||
}
|
||||
|
||||
llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const {
|
||||
// cudaError_t cudaSetupArgument(void *, size_t, size_t)
|
||||
llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
|
||||
llvm::Type *Params[] = {PtrTy, SizeTy, SizeTy};
|
||||
return CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(IntTy, Params, false),
|
||||
addPrefixToName("SetupArgument"));
|
||||
@ -250,24 +247,24 @@ llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const {
|
||||
if (CGM.getLangOpts().HIP) {
|
||||
// hipError_t hipLaunchByPtr(char *);
|
||||
return CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr");
|
||||
llvm::FunctionType::get(IntTy, PtrTy, false), "hipLaunchByPtr");
|
||||
}
|
||||
// cudaError_t cudaLaunch(char *);
|
||||
return CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
|
||||
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, PtrTy, false),
|
||||
"cudaLaunch");
|
||||
}
|
||||
|
||||
llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const {
|
||||
return llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false);
|
||||
return llvm::FunctionType::get(VoidTy, PtrTy, false);
|
||||
}
|
||||
|
||||
llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const {
|
||||
return llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
|
||||
return llvm::FunctionType::get(VoidTy, PtrTy, false);
|
||||
}
|
||||
|
||||
llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
|
||||
llvm::Type *Params[] = {llvm::PointerType::getUnqual(Context), VoidPtrTy,
|
||||
VoidPtrTy, llvm::PointerType::getUnqual(Context)};
|
||||
llvm::Type *Params[] = {llvm::PointerType::getUnqual(Context), PtrTy, PtrTy,
|
||||
llvm::PointerType::getUnqual(Context)};
|
||||
return llvm::FunctionType::get(VoidTy, Params, false);
|
||||
}
|
||||
|
||||
@ -330,15 +327,15 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
|
||||
// args, allocate a single pointer so we still have a valid pointer to the
|
||||
// argument array that we can pass to runtime, even if it will be unused.
|
||||
Address KernelArgs = CGF.CreateTempAlloca(
|
||||
VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args",
|
||||
PtrTy, CharUnits::fromQuantity(16), "kernel_args",
|
||||
llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));
|
||||
// Store pointers to the arguments in a locally allocated launch_args.
|
||||
for (unsigned i = 0; i < Args.size(); ++i) {
|
||||
llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer();
|
||||
llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy);
|
||||
llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, PtrTy);
|
||||
CGF.Builder.CreateDefaultAlignedStore(
|
||||
VoidVarPtr,
|
||||
CGF.Builder.CreateConstGEP1_32(VoidPtrTy, KernelArgs.getPointer(), i));
|
||||
CGF.Builder.CreateConstGEP1_32(PtrTy, KernelArgs.getPointer(), i));
|
||||
}
|
||||
|
||||
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
|
||||
@ -386,8 +383,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
|
||||
CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim");
|
||||
Address ShmemSize =
|
||||
CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size");
|
||||
Address Stream =
|
||||
CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream");
|
||||
Address Stream = CGF.CreateTempAlloca(PtrTy, CGM.getPointerAlign(), "stream");
|
||||
llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(IntTy,
|
||||
{/*gridDim=*/GridDim.getType(),
|
||||
@ -402,8 +398,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
|
||||
ShmemSize.getPointer(), Stream.getPointer()});
|
||||
|
||||
// Emit the call to cudaLaunch
|
||||
llvm::Value *Kernel = CGF.Builder.CreatePointerCast(
|
||||
KernelHandles[CGF.CurFn->getName()], VoidPtrTy);
|
||||
llvm::Value *Kernel =
|
||||
CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn->getName()], PtrTy);
|
||||
CallArgList LaunchKernelArgs;
|
||||
LaunchKernelArgs.add(RValue::get(Kernel),
|
||||
cudaLaunchKernelFD->getParamDecl(0)->getType());
|
||||
@ -443,7 +439,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
|
||||
Offset = Offset.alignTo(TInfo.Align);
|
||||
llvm::Value *Args[] = {
|
||||
CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(),
|
||||
VoidPtrTy),
|
||||
PtrTy),
|
||||
llvm::ConstantInt::get(SizeTy, TInfo.Width.getQuantity()),
|
||||
llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),
|
||||
};
|
||||
@ -458,8 +454,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
|
||||
|
||||
// Emit the call to cudaLaunch
|
||||
llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
|
||||
llvm::Value *Arg = CGF.Builder.CreatePointerCast(
|
||||
KernelHandles[CGF.CurFn->getName()], CharPtrTy);
|
||||
llvm::Value *Arg =
|
||||
CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn->getName()], PtrTy);
|
||||
CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
|
||||
CGF.EmitBranch(EndBlock);
|
||||
|
||||
@ -537,11 +533,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
|
||||
// void __cudaRegisterFunction(void **, const char *, char *, const char *,
|
||||
// int, uint3*, uint3*, dim3*, dim3*, int*)
|
||||
llvm::Type *RegisterFuncParams[] = {
|
||||
VoidPtrPtrTy, CharPtrTy,
|
||||
CharPtrTy, CharPtrTy,
|
||||
IntTy, VoidPtrTy,
|
||||
VoidPtrTy, VoidPtrTy,
|
||||
VoidPtrTy, llvm::PointerType::getUnqual(Context)};
|
||||
PtrTy, PtrTy, PtrTy, PtrTy, IntTy,
|
||||
PtrTy, PtrTy, PtrTy, PtrTy, llvm::PointerType::getUnqual(Context)};
|
||||
llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
|
||||
addUnderscoredPrefixToName("RegisterFunction"));
|
||||
@ -553,7 +546,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
|
||||
for (auto &&I : EmittedKernels) {
|
||||
llvm::Constant *KernelName =
|
||||
makeConstantString(getDeviceSideName(cast<NamedDecl>(I.D)));
|
||||
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
|
||||
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(PtrTy);
|
||||
llvm::Value *Args[] = {
|
||||
&GpuBinaryHandlePtr,
|
||||
KernelHandles[I.Kernel->getName()],
|
||||
@ -576,16 +569,15 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
|
||||
|
||||
// void __cudaRegisterVar(void **, char *, char *, const char *,
|
||||
// int, int, int, int)
|
||||
llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
|
||||
CharPtrTy, IntTy, VarSizeTy,
|
||||
IntTy, IntTy};
|
||||
llvm::Type *RegisterVarParams[] = {PtrTy, PtrTy, PtrTy, PtrTy,
|
||||
IntTy, VarSizeTy, IntTy, IntTy};
|
||||
llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(VoidTy, RegisterVarParams, false),
|
||||
addUnderscoredPrefixToName("RegisterVar"));
|
||||
// void __hipRegisterManagedVar(void **, char *, char *, const char *,
|
||||
// size_t, unsigned)
|
||||
llvm::Type *RegisterManagedVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
|
||||
CharPtrTy, VarSizeTy, IntTy};
|
||||
llvm::Type *RegisterManagedVarParams[] = {PtrTy, PtrTy, PtrTy,
|
||||
PtrTy, VarSizeTy, IntTy};
|
||||
llvm::FunctionCallee RegisterManagedVar = CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(VoidTy, RegisterManagedVarParams, false),
|
||||
addUnderscoredPrefixToName("RegisterManagedVar"));
|
||||
@ -593,16 +585,13 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
|
||||
// const void **, const char *, int, int);
|
||||
llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(
|
||||
VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
|
||||
false),
|
||||
VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy}, false),
|
||||
addUnderscoredPrefixToName("RegisterSurface"));
|
||||
// void __cudaRegisterTexture(void **, const struct textureReference *,
|
||||
// const void **, const char *, int, int, int)
|
||||
llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(
|
||||
VoidTy,
|
||||
{VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
|
||||
false),
|
||||
VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy, IntTy}, false),
|
||||
addUnderscoredPrefixToName("RegisterTexture"));
|
||||
for (auto &&Info : DeviceVars) {
|
||||
llvm::GlobalVariable *Var = Info.Var;
|
||||
@ -713,11 +702,11 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
|
||||
|
||||
// void ** __{cuda|hip}RegisterFatBinary(void *);
|
||||
llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
|
||||
llvm::FunctionType::get(PtrTy, PtrTy, false),
|
||||
addUnderscoredPrefixToName("RegisterFatBinary"));
|
||||
// struct { int magic, int version, void * gpu_binary, void * dont_care };
|
||||
llvm::StructType *FatbinWrapperTy =
|
||||
llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
|
||||
llvm::StructType::get(IntTy, IntTy, PtrTy, PtrTy);
|
||||
|
||||
// Register GPU binary with the CUDA runtime, store returned handle in a
|
||||
// global variable and save a reference in GpuBinaryHandle to be cleaned up
|
||||
@ -813,7 +802,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
|
||||
// Data.
|
||||
Values.add(FatBinStr);
|
||||
// Unused in fatbin v1.
|
||||
Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
|
||||
Values.add(llvm::ConstantPointerNull::get(PtrTy));
|
||||
llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
|
||||
addUnderscoredPrefixToName("_fatbin_wrapper"), CGM.getPointerAlign(),
|
||||
/*constant*/ true);
|
||||
@ -836,9 +825,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
|
||||
// The name, size, and initialization pattern of this variable is part
|
||||
// of HIP ABI.
|
||||
GpuBinaryHandle = new llvm::GlobalVariable(
|
||||
TheModule, VoidPtrPtrTy, /*isConstant=*/false,
|
||||
Linkage,
|
||||
/*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
|
||||
TheModule, PtrTy, /*isConstant=*/false, Linkage,
|
||||
/*Initializer=*/llvm::ConstantPointerNull::get(PtrTy),
|
||||
"__hip_gpubin_handle");
|
||||
if (Linkage == llvm::GlobalValue::LinkOnceAnyLinkage)
|
||||
GpuBinaryHandle->setComdat(
|
||||
@ -848,7 +836,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
|
||||
if (Linkage != llvm::GlobalValue::InternalLinkage)
|
||||
GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
|
||||
Address GpuBinaryAddr(
|
||||
GpuBinaryHandle, VoidPtrPtrTy,
|
||||
GpuBinaryHandle, PtrTy,
|
||||
CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
|
||||
{
|
||||
auto *HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
|
||||
@ -880,8 +868,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
|
||||
llvm::CallInst *RegisterFatbinCall =
|
||||
CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);
|
||||
GpuBinaryHandle = new llvm::GlobalVariable(
|
||||
TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
|
||||
llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
|
||||
TheModule, PtrTy, false, llvm::GlobalValue::InternalLinkage,
|
||||
llvm::ConstantPointerNull::get(PtrTy), "__cuda_gpubin_handle");
|
||||
GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
|
||||
CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
|
||||
CGM.getPointerAlign());
|
||||
@ -895,7 +883,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
|
||||
CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
|
||||
// void __cudaRegisterFatBinaryEnd(void **);
|
||||
llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
|
||||
llvm::FunctionType::get(VoidTy, PtrTy, false),
|
||||
"__cudaRegisterFatBinaryEnd");
|
||||
CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);
|
||||
}
|
||||
@ -967,7 +955,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
|
||||
|
||||
// void __cudaUnregisterFatBinary(void ** handle);
|
||||
llvm::FunctionCallee UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
|
||||
llvm::FunctionType::get(VoidTy, PtrTy, false),
|
||||
addUnderscoredPrefixToName("UnregisterFatBinary"));
|
||||
|
||||
llvm::Function *ModuleDtorFunc = llvm::Function::Create(
|
||||
|
Loading…
x
Reference in New Issue
Block a user