diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.td b/llvm/include/llvm/Analysis/TargetLibraryInfo.td index 10b43ad2466f..6b17e70b87f5 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.td +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.td @@ -404,13 +404,6 @@ def dunder_isoc99_scanf : TargetLibCall<"__isoc99_scanf", Int, [Ptr, Ellip]>; def dunder_isoc99_sscanf : TargetLibCall<"__isoc99_sscanf", Int, [Ptr, Ptr, Ellip]>; -/// void* __kmpc_alloc_shared(size_t nbyte); -def __kmpc_alloc_shared : TargetLibCall<"__kmpc_alloc_shared", Ptr, [SizeT]>; - -/// void __kmpc_free_shared(void *ptr, size_t nbyte); -def __kmpc_free_shared - : TargetLibCall<"__kmpc_free_shared", Void, [Ptr, SizeT]>; - /// double __log10_finite(double x); def log10_finite : TargetLibCall<"__log10_finite", Dbl, [Dbl]>; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 5fe7ee899724..ad44625f352f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -524,6 +524,8 @@ __OMP_RTL(__last, false, Void, ) #define EnumAttr(Kind) Attribute::get(Ctx, Attribute::AttrKind::Kind) #define EnumAttrInt(Kind, N) Attribute::get(Ctx, Attribute::AttrKind::Kind, N) #define AllocSizeAttr(N, M) Attribute::getWithAllocSizeArgs(Ctx, N, M) +#define AllocKindAttr(Kind) Attribute::getWithAllocKind(Ctx, Kind) +#define AllocFamilyAttr(Name) Attribute::get(Ctx, "alloc-family", Name) #define MemoryAttr(ME) Attribute::getWithMemoryEffects(Ctx, ME) #define NoCaptureAttr Attribute::getWithCaptureInfo(Ctx, CaptureInfo::none()) #define AttributeSet(...) \ @@ -617,11 +619,6 @@ __OMP_ATTRS_SET(ReadOnlyPtrAttrs, NoCaptureAttr) : AttributeSet()) -__OMP_ATTRS_SET(DeviceAllocAttrs, - OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync)) - : AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync))) - #if 0 __OMP_ATTRS_SET(WriteOnlyPtrAttrs, OptimisticAttributes @@ -989,9 +986,16 @@ __OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(), __OMP_RTL_ATTRS(__kmpc_alloc_shared, AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), - AllocSizeAttr(0, std::nullopt)), + AllocSizeAttr(0, std::nullopt), + AllocKindAttr(AllocFnKind::Alloc | + AllocFnKind::Uninitialized), + AllocFamilyAttr("__kmpc_alloc_shared")), ReturnPtrAttrs, ParamAttrs(SizeTyExt)) -__OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(), +__OMP_RTL_ATTRS(__kmpc_free_shared, + AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), + AllocKindAttr(AllocFnKind::Free), + AllocFamilyAttr("__kmpc_alloc_shared")), + AttributeSet(), ParamAttrs(AttributeSet(NoCaptureAttr, EnumAttr(AllocatedPointer)), SizeTyExt)) @@ -1110,6 +1114,8 @@ __OMP_RTL_ATTRS(__kmpc_is_spmd_exec_mode, AttributeSet(), SExt, ParamAttrs()) #undef EnumAttrInt #undef ParamAttrs #undef AllocSizeAttr +#undef AllocKindAttr +#undef AllocFamilyAttr ///} diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index c7f89de9f8b1..0be03eb7af55 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -74,7 +74,6 @@ enum class MallocFamily { MSVCNew, // new(unsigned int) MSVCArrayNew, // new[](unsigned int) VecMalloc, - KmpcAllocShared, }; StringRef mangledNameForMallocFamily(const MallocFamily &Family) { @@ -95,8 +94,6 @@ StringRef mangledNameForMallocFamily(const MallocFamily &Family) { return "??_U@YAPAXI@Z"; case MallocFamily::VecMalloc: return "vec_malloc"; - case MallocFamily::KmpcAllocShared: - return "__kmpc_alloc_shared"; } llvm_unreachable("missing an alloc family"); } @@ -152,7 +149,6 @@ static const std::pair AllocationFnData[] = { {LibFunc_dunder_strdup, {StrDupLike, 1, -1, -1, -1, MallocFamily::Malloc}}, {LibFunc_strndup, {StrDupLike, 2, 1, -1, -1, MallocFamily::Malloc}}, {LibFunc_dunder_strndup, {StrDupLike, 2, 1, -1, -1, MallocFamily::Malloc}}, - {LibFunc___kmpc_alloc_shared, {MallocLike, 1, 0, -1, -1, MallocFamily::KmpcAllocShared}}, }; // clang-format on @@ -478,7 +474,6 @@ static const std::pair FreeFnData[] = { {LibFunc_msvc_delete_array_ptr64_longlong, {2, MallocFamily::MSVCArrayNew}}, // delete[](void*, ulonglong) {LibFunc_msvc_delete_array_ptr32_nothrow, {2, MallocFamily::MSVCArrayNew}}, // delete[](void*, nothrow) {LibFunc_msvc_delete_array_ptr64_nothrow, {2, MallocFamily::MSVCArrayNew}}, // delete[](void*, nothrow) - {LibFunc___kmpc_free_shared, {2, MallocFamily::KmpcAllocShared}}, // OpenMP Offloading RTL free {LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t, {3, MallocFamily::CPPNewAligned}}, // delete(void*, align_val_t, nothrow) {LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t, {3, MallocFamily::CPPNewArrayAligned}}, // delete[](void*, align_val_t, nothrow) {LibFunc_ZdlPvjSt11align_val_t, {3, MallocFamily::CPPNewAligned}}, // delete(void*, unsigned int, align_val_t) diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index d0555d68606b..6b8f86139225 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -142,12 +142,9 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_fputs_unlocked); TLI.setUnavailable(LibFunc_fgets_unlocked); - // There is really no runtime library on AMDGPU, apart from - // __kmpc_alloc/free_shared. + // There is really no runtime library on AMDGPU. if (T.isAMDGPU()) { TLI.disableAllFunctions(); - TLI.setAvailable(llvm::LibFunc___kmpc_alloc_shared); - TLI.setAvailable(llvm::LibFunc___kmpc_free_shared); return; } @@ -785,8 +782,6 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, // Miscellaneous other functions not provided. TLI.setUnavailable(LibFunc_atomic_load); TLI.setUnavailable(LibFunc_atomic_store); - TLI.setUnavailable(LibFunc___kmpc_alloc_shared); - TLI.setUnavailable(LibFunc___kmpc_free_shared); TLI.setUnavailable(LibFunc_dunder_strndup); TLI.setUnavailable(LibFunc_bcmp); TLI.setUnavailable(LibFunc_bcopy); @@ -834,8 +829,6 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setAvailable(LibFunc_putc_unlocked); TLI.setAvailable(LibFunc_putchar_unlocked); - TLI.setUnavailable(LibFunc___kmpc_alloc_shared); - TLI.setUnavailable(LibFunc___kmpc_free_shared); TLI.setUnavailable(LibFunc_dunder_strndup); TLI.setUnavailable(LibFunc_memccpy_chk); TLI.setUnavailable(LibFunc_strlen_chk); @@ -875,8 +868,6 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, // TLI.setAvailable(llvm::LibFunc_memcpy); // TLI.setAvailable(llvm::LibFunc_memset); - TLI.setAvailable(llvm::LibFunc___kmpc_alloc_shared); - TLI.setAvailable(llvm::LibFunc___kmpc_free_shared); } else { TLI.setUnavailable(LibFunc_nvvm_reflect); } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 95c0531c2183..4282fe762363 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -6689,12 +6689,17 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { namespace { struct AAHeapToStackFunction final : public AAHeapToStack { + static bool isGlobalizedLocal(const CallBase &CB) { + Attribute A = CB.getFnAttr("alloc-family"); + return A.isValid() && A.getValueAsString() == "__kmpc_alloc_shared"; + } + struct AllocationInfo { /// The call that allocates the memory. CallBase *const CB; - /// The library function id for the allocation. - LibFunc LibraryFunctionId = NotLibFunc; + /// Whether this allocation is an OpenMP globalized local variable. + bool IsGlobalizedLocal = false; /// The status wrt. a rewrite. enum { @@ -6763,8 +6768,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack { if (nullptr != getInitialValueOfAllocation(CB, TLI, I8Ty)) { AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB}; AllocationInfos[CB] = AI; - if (TLI) - TLI->getLibFunc(*CB, AI->LibraryFunctionId); + AI->IsGlobalizedLocal = isGlobalizedLocal(*CB); } } return true; @@ -6858,13 +6862,11 @@ struct AAHeapToStackFunction final : public AAHeapToStack { << "\n"); auto Remark = [&](OptimizationRemark OR) { - LibFunc IsAllocShared; - if (TLI->getLibFunc(*AI.CB, IsAllocShared)) - if (IsAllocShared == LibFunc___kmpc_alloc_shared) - return OR << "Moving globalized variable to the stack."; + if (AI.IsGlobalizedLocal) + return OR << "Moving globalized variable to the stack."; return OR << "Moving memory allocation from the heap to the stack."; }; - if (AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared) + if (AI.IsGlobalizedLocal) A.emitRemark(AI.CB, "OMP110", Remark); else A.emitRemark(AI.CB, "HeapToStack", Remark); @@ -7111,8 +7113,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { return false; } - // __kmpc_alloc_shared and __kmpc_alloc_free are by construction matched. - if (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared) { + // __kmpc_alloc_shared and __kmpc_free_shared are by construction matched. + if (!AI.IsGlobalizedLocal) { Instruction *CtxI = isa(AI.CB) ? AI.CB : AI.CB->getNextNode(); if (!Explorer || !Explorer->findInContextOf(UniqueFree, CtxI)) { LLVM_DEBUG(dbgs() << "[H2S] unique free call might not be executed " @@ -7162,8 +7164,7 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { A, this, CBIRP, DepClassTy::OPTIONAL, IsKnownNoFree); if (!IsAssumedNoCapture || - (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared && - !IsAssumedNoFree)) { + (!AI.IsGlobalizedLocal && !IsAssumedNoFree)) { AI.HasPotentiallyFreeingUnknownUses |= !IsAssumedNoFree; // Emit a missed remark if this is missed OpenMP globalization. @@ -7174,8 +7175,7 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { "parameter as `__attribute__((noescape))` to override."; }; - if (ValidUsesOnly && - AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared) + if (ValidUsesOnly && AI.IsGlobalizedLocal) A.emitRemark(CB, "OMP113", Remark); LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n"); @@ -7236,8 +7236,7 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { } std::optional Size = getSize(A, *this, AI); - if (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared && - MaxHeapToStackSize != -1) { + if (!AI.IsGlobalizedLocal && MaxHeapToStackSize != -1) { if (!Size || Size->ugt(MaxHeapToStackSize)) { LLVM_DEBUG({ if (!Size) @@ -7271,9 +7270,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { // Check if we still think we can move it into the entry block. If the // alloca comes from a converted __kmpc_alloc_shared then we can usually - // ignore the potential compilations associated with loops. - bool IsGlobalizedLocal = - AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared; + // ignore the potential complications associated with loops. + bool IsGlobalizedLocal = AI.IsGlobalizedLocal; if (AI.MoveAllocaIntoEntry && (!Size.has_value() || (!IsGlobalizedLocal && IsInLoop(*AI.CB->getParent())))) diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll index 9a6e0680bb44..a83eeee397f1 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll @@ -305,7 +305,7 @@ define void @test9() { ; CHECK-NEXT: [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4) ; CHECK-NEXT: tail call void @no_sync_func(ptr nofree captures(none) [[I]]) ; CHECK-NEXT: store i32 10, ptr [[I]], align 4 -; CHECK-NEXT: tail call void @foo_nounw(ptr nofree nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: tail call void @foo_nounw(ptr nofree nonnull align 4 dereferenceable(4) [[I]]) #[[ATTR8:[0-9]+]] ; CHECK-NEXT: tail call void @free(ptr nonnull align 4 captures(none) dereferenceable(4) [[I]]) ; CHECK-NEXT: ret void ; @@ -346,7 +346,7 @@ define void @test11() { ; CHECK-LABEL: define {{[^@]+}}@test11() { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4) -; CHECK-NEXT: tail call void @sync_will_return(ptr [[I]]) #[[ATTR6]] +; CHECK-NEXT: tail call void @sync_will_return(ptr [[I]]) #[[ATTR8]] ; CHECK-NEXT: tail call void @free(ptr captures(none) [[I]]) ; CHECK-NEXT: ret void ; @@ -600,7 +600,7 @@ define void @test16c(i8 %v, ptr %P) { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4) ; CHECK-NEXT: store ptr [[I]], ptr [[P]], align 8 -; CHECK-NEXT: tail call void @no_sync_func(ptr nofree captures(none) [[I]]) #[[ATTR6]] +; CHECK-NEXT: tail call void @no_sync_func(ptr nofree captures(none) [[I]]) #[[ATTR8]] ; CHECK-NEXT: tail call void @free(ptr captures(none) [[I]]) ; CHECK-NEXT: ret void ; @@ -626,15 +626,15 @@ bb: ret void } -declare ptr @__kmpc_alloc_shared(i64) -declare void @__kmpc_free_shared(ptr nocapture, i64) +declare ptr @__kmpc_alloc_shared(i64) allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" +declare void @__kmpc_free_shared(ptr allocptr nocapture, i64) allockind("free") "alloc-family"="__kmpc_alloc_shared" define void @test17() { ; CHECK-LABEL: define {{[^@]+}}@test17() { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I_H2S:%.*]] = alloca i8, i64 4, align 1, addrspace(5) ; CHECK-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[I_H2S]] to ptr -; CHECK-NEXT: tail call void @usei8(ptr noalias nofree captures(none) [[MALLOC_CAST]]) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: tail call void @usei8(ptr noalias nofree captures(none) [[MALLOC_CAST]]) #[[ATTR9:[0-9]+]] ; CHECK-NEXT: ret void ; bb: @@ -648,7 +648,7 @@ define void @test17b() { ; CHECK-LABEL: define {{[^@]+}}@test17b() { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = tail call noalias ptr @__kmpc_alloc_shared(i64 noundef 4) -; CHECK-NEXT: tail call void @usei8(ptr nofree [[I]]) #[[ATTR7]] +; CHECK-NEXT: tail call void @usei8(ptr nofree [[I]]) #[[ATTR9]] ; CHECK-NEXT: tail call void @__kmpc_free_shared(ptr captures(none) [[I]], i64 noundef 4) ; CHECK-NEXT: ret void ; @@ -666,7 +666,7 @@ define void @move_alloca() { ; CHECK-NEXT: br label [[NOT_ENTRY:%.*]] ; CHECK: not_entry: ; CHECK-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[I_H2S]] to ptr -; CHECK-NEXT: tail call void @usei8(ptr noalias nofree captures(none) [[MALLOC_CAST]]) #[[ATTR7]] +; CHECK-NEXT: tail call void @usei8(ptr noalias nofree captures(none) [[MALLOC_CAST]]) #[[ATTR9]] ; CHECK-NEXT: ret void ; entry: @@ -683,11 +683,11 @@ not_entry: define void @test16e(i8 %v) norecurse { ; CHECK: Function Attrs: norecurse ; CHECK-LABEL: define {{[^@]+}}@test16e -; CHECK-SAME: (i8 [[V:%.*]]) #[[ATTR5:[0-9]+]] { +; CHECK-SAME: (i8 [[V:%.*]]) #[[ATTR7:[0-9]+]] { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = tail call noalias ptr @__kmpc_alloc_shared(i64 noundef 4) ; CHECK-NEXT: store ptr [[I]], ptr @G, align 8 -; CHECK-NEXT: call void @usei8(ptr nofree captures(none) [[I]]) #[[ATTR8:[0-9]+]] +; CHECK-NEXT: call void @usei8(ptr nofree captures(none) [[I]]) #[[ATTR10:[0-9]+]] ; CHECK-NEXT: tail call void @__kmpc_free_shared(ptr noalias captures(none) [[I]], i64 noundef 4) ; CHECK-NEXT: ret void ; @@ -704,12 +704,12 @@ bb: define void @test16f(i8 %v) norecurse { ; CHECK: Function Attrs: norecurse ; CHECK-LABEL: define {{[^@]+}}@test16f -; CHECK-SAME: (i8 [[V:%.*]]) #[[ATTR5]] { +; CHECK-SAME: (i8 [[V:%.*]]) #[[ATTR7]] { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I_H2S:%.*]] = alloca i8, i64 4, align 1, addrspace(5) ; CHECK-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[I_H2S]] to ptr ; CHECK-NEXT: store ptr [[MALLOC_CAST]], ptr @Gtl, align 8 -; CHECK-NEXT: call void @usei8(ptr nofree captures(none) [[MALLOC_CAST]]) #[[ATTR8]] +; CHECK-NEXT: call void @usei8(ptr nofree captures(none) [[MALLOC_CAST]]) #[[ATTR10]] ; CHECK-NEXT: ret void ; bb: @@ -726,7 +726,7 @@ define void @convert_large_kmpc_alloc_shared() { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I_H2S:%.*]] = alloca i8, i64 256, align 1, addrspace(5) ; CHECK-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[I_H2S]] to ptr -; CHECK-NEXT: tail call void @usei8(ptr noalias nofree captures(none) [[MALLOC_CAST]]) #[[ATTR7]] +; CHECK-NEXT: tail call void @usei8(ptr noalias nofree captures(none) [[MALLOC_CAST]]) #[[ATTR9]] ; CHECK-NEXT: ret void ; bb: @@ -743,10 +743,12 @@ bb: ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nofree nounwind } ; CHECK: attributes #[[ATTR3]] = { noreturn } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; CHECK: attributes #[[ATTR5]] = { norecurse } -; CHECK: attributes #[[ATTR6]] = { nounwind } -; CHECK: attributes #[[ATTR7]] = { nosync nounwind willreturn } -; CHECK: attributes #[[ATTR8]] = { nocallback nosync nounwind willreturn } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { allockind("free") "alloc-family"="__kmpc_alloc_shared" } +; CHECK: attributes #[[ATTR7]] = { norecurse } +; CHECK: attributes #[[ATTR8]] = { nounwind } +; CHECK: attributes #[[ATTR9]] = { nosync nounwind willreturn } +; CHECK: attributes #[[ATTR10]] = { nocallback nosync nounwind willreturn } ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CGSCC: {{.*}} diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll index 2e8f84ca86b2..e5259b7e9141 100644 --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -1293,7 +1293,7 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr, ; CHECK: ; Function Attrs: nounwind ; CHECK-NEXT: declare noalias ptr @__kmpc_aligned_alloc(i32, i64, i64, ptr) -; CHECK: ; Function Attrs: nosync nounwind allocsize(0) +; CHECK: ; Function Attrs: nosync nounwind allockind("alloc,uninitialized") allocsize(0) ; CHECK-NEXT: declare noalias ptr @__kmpc_alloc_shared(i64) ; CHECK: ; Function Attrs: convergent nounwind @@ -1329,7 +1329,7 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr, ; CHECK-NOT: Function Attrs ; CHECK: declare void @__kmpc_fork_call_if(ptr, i32, ptr, i32, ptr) -; CHECK: ; Function Attrs: nosync nounwind +; CHECK: ; Function Attrs: nosync nounwind allockind("free") ; CHECK-NEXT: declare void @__kmpc_free_shared(ptr allocptr captures(none), i64) ; CHECK: ; Function Attrs: nounwind @@ -1935,7 +1935,7 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr, ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_aligned_alloc(i32, i64, i64, ptr) -; OPTIMISTIC: ; Function Attrs: nosync nounwind allocsize(0) +; OPTIMISTIC: ; Function Attrs: nosync nounwind allockind("alloc,uninitialized") allocsize(0) ; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_alloc_shared(i64) ; OPTIMISTIC: ; Function Attrs: convergent nounwind @@ -1971,7 +1971,7 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr, ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare void @__kmpc_fork_call_if(ptr nofree readonly captures(none), i32, ptr nofree readonly captures(none), i32, ptr) -; OPTIMISTIC: ; Function Attrs: nosync nounwind +; OPTIMISTIC: ; Function Attrs: nosync nounwind allockind("free") ; OPTIMISTIC-NEXT: declare void @__kmpc_free_shared(ptr allocptr captures(none), i64) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) @@ -2593,7 +2593,7 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr, ; EXT: ; Function Attrs: nounwind ; EXT-NEXT: declare noalias ptr @__kmpc_aligned_alloc(i32 signext, i64, i64, ptr) -; EXT: ; Function Attrs: nosync nounwind allocsize(0) +; EXT: ; Function Attrs: nosync nounwind allockind("alloc,uninitialized") allocsize(0) ; EXT-NEXT: declare noalias ptr @__kmpc_alloc_shared(i64) ; EXT: ; Function Attrs: convergent nounwind @@ -2629,7 +2629,7 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr, ; EXT-NOT: Function Attrs ; EXT: declare void @__kmpc_fork_call_if(ptr, i32 signext, ptr, i32 signext, ptr) -; EXT: ; Function Attrs: nosync nounwind +; EXT: ; Function Attrs: nosync nounwind allockind("free") ; EXT-NEXT: declare void @__kmpc_free_shared(ptr allocptr captures(none), i64) ; EXT: ; Function Attrs: nounwind diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll index 9dd99dcccd50..7d0cf8ca5c1d 100644 --- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll +++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll @@ -107,7 +107,7 @@ define hidden void @_Z3fooi(i32 noundef %i1) local_unnamed_addr #1 { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]] -; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]] +; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) ; CHECK-NEXT: store i32 [[I1:%.*]], ptr [[I]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5) ; CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8 @@ -175,7 +175,7 @@ define hidden void @_Z4foo1i(i32 noundef %i1) local_unnamed_addr #1 { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]] -; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]] +; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) ; CHECK-NEXT: store i32 [[I1:%.*]], ptr [[I]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5) ; CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8 @@ -203,7 +203,7 @@ define internal void @__omp_outlined__(ptr noalias nocapture readnone %.global_t ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I:%.*]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[CAPTURED_VARS_ADDRS_I]]) ; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]] -; CHECK-NEXT: [[I_I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]] +; CHECK-NEXT: [[I_I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) ; CHECK-NEXT: store i32 [[TMP0]], ptr [[I_I]], align 16 ; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5) ; CHECK-NEXT: store ptr [[I_I]], ptr addrspace(5) [[TMP2]], align 8 @@ -238,7 +238,7 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #5 { ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]]) ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]] -; CHECK-NEXT: [[I_I_I:%.*]] = call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]] +; CHECK-NEXT: [[I_I_I:%.*]] = call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) ; CHECK-NEXT: store i32 [[TMP4]], ptr [[I_I_I]], align 16 ; CHECK-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I_I]] to ptr addrspace(5) ; CHECK-NEXT: store ptr [[I_I_I]], ptr addrspace(5) [[TMP7]], align 8 @@ -331,10 +331,10 @@ declare void @llvm.lifetime.end.p0(ptr nocapture) #11 !5 = !{i32 7, !"openmp-device", i32 50} ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { "kernel" } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind allocsize(0) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } ; CHECK: attributes #[[ATTR2]] = { nounwind } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nounwind } ; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ;. diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll index 419d3d07c2be..d62d514778d9 100644 --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -47,7 +47,7 @@ define ptx_kernel void @kernel(ptr %dyn) "kernel" { ; CHECK-NEXT: call void @foo() #[[ATTR1:[0-9]+]] ; CHECK-NEXT: call void @bar() #[[ATTR1]] ; CHECK-NEXT: call void @convert_and_move_alloca() #[[ATTR1]] -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_target_deinit() ; CHECK-NEXT: ret void ; @@ -58,7 +58,7 @@ define ptx_kernel void @kernel(ptr %dyn) "kernel" { ; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR1:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR1]] ; CHECK-DISABLED-NEXT: call void @convert_and_move_alloca() #[[ATTR1]] -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR4:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR5:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit() ; CHECK-DISABLED-NEXT: ret void ; @@ -97,14 +97,14 @@ define internal void @bar() { ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4 -; CHECK-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG7:![0-9]+]] +; CHECK-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR6:[0-9]+]], !dbg [[DBG7:![0-9]+]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@bar ; CHECK-DISABLED-SAME: () #[[ATTR1]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4 -; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG7:![0-9]+]] +; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR6:[0-9]+]], !dbg [[DBG7:![0-9]+]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -145,8 +145,8 @@ define void @unused() { ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@unused() { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]], !dbg [[DBG10:![0-9]+]] -; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR6]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4), !dbg [[DBG10:![0-9]+]] +; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR7:[0-9]+]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -259,17 +259,19 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK: attributes #[[ATTR0]] = { "kernel" } ; CHECK: attributes #[[ATTR1]] = { nosync nounwind } ; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(write) } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind allocsize(0) } -; CHECK: attributes #[[ATTR4]] = { "llvm.assume"="omp_no_openmp" } -; CHECK: attributes #[[ATTR5]] = { nosync nounwind memory(write) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } +; CHECK: attributes #[[ATTR5]] = { "llvm.assume"="omp_no_openmp" } +; CHECK: attributes #[[ATTR6]] = { nosync nounwind memory(write) } ;. ; CHECK-DISABLED: attributes #[[ATTR0]] = { "kernel" } ; CHECK-DISABLED: attributes #[[ATTR1]] = { nosync nounwind } ; CHECK-DISABLED: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(write) } -; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind allocsize(0) } -; CHECK-DISABLED: attributes #[[ATTR4]] = { "llvm.assume"="omp_no_openmp" } -; CHECK-DISABLED: attributes #[[ATTR5]] = { nosync nounwind memory(write) } -; CHECK-DISABLED: attributes #[[ATTR6]] = { nounwind } +; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } +; CHECK-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } +; CHECK-DISABLED: attributes #[[ATTR5]] = { "llvm.assume"="omp_no_openmp" } +; CHECK-DISABLED: attributes #[[ATTR6]] = { nosync nounwind memory(write) } +; CHECK-DISABLED: attributes #[[ATTR7]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None) ; CHECK: [[META1]] = !DIFile(filename: "{{.*}}remove_globalization.c", directory: {{.*}}) diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll index 92cfd7504922..324415d02e1c 100644 --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -137,10 +137,10 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @foo_kernel_environment, ptr [[DYN]]) -; CHECK-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) ; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5:[0-9]+]] -; CHECK-NEXT: call void @use.internalized(ptr nofree [[X]]) #[[ATTR7:[0-9]+]] -; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR8:[0-9]+]] +; CHECK-NEXT: call void @use.internalized(ptr nofree [[X]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR7:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_target_deinit() ; CHECK-NEXT: ret void ; @@ -152,14 +152,14 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]] ; CHECK: master1: -; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @x_shared to ptr)) #[[ATTR7]] +; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @x_shared to ptr)) #[[ATTR6]] ; CHECK-NEXT: br label [[NEXT:%.*]] ; CHECK: next: ; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5]] ; CHECK-NEXT: [[B0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[B0]], label [[MASTER2:%.*]], label [[EXIT]] ; CHECK: master2: -; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @y_shared to ptr)) #[[ATTR7]] +; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @y_shared to ptr)) #[[ATTR6]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit() @@ -173,9 +173,9 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]] ; CHECK: master3: -; CHECK-NEXT: [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG7:![0-9]+]] -; CHECK-NEXT: call void @use.internalized(ptr nofree [[Z]]) #[[ATTR7]] -; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR8]] +; CHECK-NEXT: [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24), !dbg [[DBG7:![0-9]+]] +; CHECK-NEXT: call void @use.internalized(ptr nofree [[Z]]) #[[ATTR6]] +; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR7]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit() @@ -197,7 +197,6 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: nosync nounwind allocsize(0) memory(read) ; CHECK-LABEL: define {{[^@]+}}@__kmpc_alloc_shared ; CHECK-SAME: (i64 [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr @offset, align 4 @@ -212,16 +211,15 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ;. ; CHECK: attributes #[[ATTR0]] = { "kernel" } ; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(write) } -; CHECK: attributes #[[ATTR2]] = { nosync nounwind allocsize(0) memory(read) } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind } +; CHECK: attributes #[[ATTR2]] = { nosync nounwind allockind("alloc,uninitialized") allocsize(0) memory(read) "alloc-family"="__kmpc_alloc_shared" } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR5]] = { "llvm.assume"="omp_no_openmp" } -; CHECK: attributes #[[ATTR6]] = { nounwind memory(read) } -; CHECK: attributes #[[ATTR7]] = { nosync nounwind memory(write) } -; CHECK: attributes #[[ATTR8]] = { nounwind } +; CHECK: attributes #[[ATTR6]] = { nosync nounwind memory(write) } +; CHECK: attributes #[[ATTR7]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None) -; CHECK: [[META1]] = !DIFile(filename: "replace_globalization.c", directory: {{.*}}) +; CHECK: [[META1]] = !DIFile(filename: "{{.*}}replace_globalization.c", directory: {{.*}}) ; CHECK: [[META2]] = !{} ; CHECK: [[META3:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} ; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index a76444f8c6f5..d5c66f3933a1 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -3154,8 +3154,8 @@ attributes #9 = { alwaysinline } ; AMDGPU: attributes #[[ATTR2]] = { convergent norecurse nounwind } ; AMDGPU: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } ; AMDGPU: attributes #[[ATTR4]] = { nounwind } -; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } -; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } +; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } ; AMDGPU: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; AMDGPU: attributes #[[ATTR8]] = { convergent } ; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } @@ -3167,8 +3167,8 @@ attributes #9 = { alwaysinline } ; NVPTX: attributes #[[ATTR2]] = { convergent norecurse nounwind } ; NVPTX: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } ; NVPTX: attributes #[[ATTR4]] = { nounwind } -; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } -; NVPTX: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } +; NVPTX: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } ; NVPTX: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; NVPTX: attributes #[[ATTR8]] = { convergent } ; NVPTX: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } @@ -3180,8 +3180,8 @@ attributes #9 = { alwaysinline } ; AMDGPU-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind } ; AMDGPU-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } ; AMDGPU-DISABLED1: attributes #[[ATTR4]] = { nounwind } -; AMDGPU-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } -; AMDGPU-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; AMDGPU-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } +; AMDGPU-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } ; AMDGPU-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; AMDGPU-DISABLED1: attributes #[[ATTR8]] = { convergent } ; AMDGPU-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } @@ -3193,8 +3193,8 @@ attributes #9 = { alwaysinline } ; AMDGPU-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind } ; AMDGPU-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } ; AMDGPU-DISABLED2: attributes #[[ATTR4]] = { nounwind } -; AMDGPU-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } -; AMDGPU-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; AMDGPU-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } +; AMDGPU-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } ; AMDGPU-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; AMDGPU-DISABLED2: attributes #[[ATTR8]] = { convergent } ; AMDGPU-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } @@ -3205,8 +3205,8 @@ attributes #9 = { alwaysinline } ; NVPTX-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind } ; NVPTX-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } ; NVPTX-DISABLED1: attributes #[[ATTR4]] = { nounwind } -; NVPTX-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } -; NVPTX-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; NVPTX-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } +; NVPTX-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } ; NVPTX-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; NVPTX-DISABLED1: attributes #[[ATTR8]] = { convergent } ; NVPTX-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } @@ -3218,8 +3218,8 @@ attributes #9 = { alwaysinline } ; NVPTX-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind } ; NVPTX-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } ; NVPTX-DISABLED2: attributes #[[ATTR4]] = { nounwind } -; NVPTX-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } -; NVPTX-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; NVPTX-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } +; NVPTX-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } ; NVPTX-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; NVPTX-DISABLED2: attributes #[[ATTR8]] = { convergent } ; NVPTX-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll index a5fddbea8aed..d1ef41bec684 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll @@ -439,8 +439,8 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame ; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="all" "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="all" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } ; CHECK: attributes #[[ATTR6]] = { nounwind } ; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CHECK: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind } @@ -451,8 +451,8 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame ; CHECK-DISABLED: attributes #[[ATTR1:[0-9]+]] = { alwaysinline } ; CHECK-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="all" "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="all" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } -; CHECK-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } +; CHECK-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } +; CHECK-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } ; CHECK-DISABLED: attributes #[[ATTR6]] = { nounwind } ; CHECK-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CHECK-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind } diff --git a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll index cbfa648fa99f..7d47e46b5577 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll @@ -536,7 +536,7 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p ; AMDGPU-NEXT: [[ENTRY:.*]]: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; AMDGPU-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR10]] +; AMDGPU-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) ; AMDGPU-NEXT: br label %[[FOR_COND:.*]] ; AMDGPU: [[FOR_COND]]: ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] @@ -558,7 +558,7 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p ; NVPTX-NEXT: [[ENTRY:.*]]: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5) ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -; NVPTX-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR10]] +; NVPTX-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) ; NVPTX-NEXT: br label %[[FOR_COND:.*]] ; NVPTX: [[FOR_COND]]: ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ] @@ -1116,8 +1116,8 @@ attributes #8 = { nounwind } ; AMDGPU: attributes #[[ATTR1]] = { norecurse } ; AMDGPU: attributes #[[ATTR2]] = { convergent norecurse nounwind } ; AMDGPU: attributes #[[ATTR3]] = { norecurse nounwind } -; AMDGPU: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } -; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; AMDGPU: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } +; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } ; AMDGPU: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; AMDGPU: attributes #[[ATTR7]] = { convergent } ; AMDGPU: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } @@ -1129,8 +1129,8 @@ attributes #8 = { nounwind } ; NVPTX: attributes #[[ATTR1]] = { norecurse } ; NVPTX: attributes #[[ATTR2]] = { convergent norecurse nounwind } ; NVPTX: attributes #[[ATTR3]] = { norecurse nounwind } -; NVPTX: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } -; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; NVPTX: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allockind("free") "alloc-family"="__kmpc_alloc_shared" } +; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="__kmpc_alloc_shared" } ; NVPTX: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; NVPTX: attributes #[[ATTR7]] = { convergent } ; NVPTX: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml index ff2c9ae00bdb..63520a0b9808 100644 --- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml +++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml @@ -54,10 +54,10 @@ ## the exact count first; the two directives should add up to that. ## Yes, this means additions to TLI will fail this test, but the argument ## to -COUNT can't be an expression. -# AVAIL: TLI knows 530 symbols, 295 available +# AVAIL: TLI knows 528 symbols, 295 available # AVAIL-COUNT-295: {{^}} available # AVAIL-NOT: {{^}} available -# UNAVAIL-COUNT-235: not available +# UNAVAIL-COUNT-233: not available # UNAVAIL-NOT: not available ## This is a large file so it's worth telling lit to stop here. diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp index 39ed297207e9..afeefd36a11e 100644 --- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -688,11 +688,7 @@ TEST_F(TargetLibraryInfoTest, ValidProto) { "declare ptr @vec_calloc(i64, i64)\n" "declare ptr @vec_malloc(i64)\n" "declare ptr @vec_realloc(ptr, i64)\n" - "declare void @vec_free(ptr)\n" - - // These functions are OpenMP Offloading allocation / free routines - "declare ptr @__kmpc_alloc_shared(i64)\n" - "declare void @__kmpc_free_shared(ptr, i64)\n"); + "declare void @vec_free(ptr)\n"); for (unsigned FI = LibFunc::Begin_LibFunc; FI != LibFunc::End_LibFunc; ++FI) { LibFunc LF = (LibFunc)FI;